blob: a9a46f14372a460426781927e0cb1c4776474b48 [file] [log] [blame]
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001/*
2 * kmp_barrier.cpp
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16#include "kmp.h"
17#include "kmp_wait_release.h"
18#include "kmp_stats.h"
19#include "kmp_itt.h"
Jonathan Peytona0e159f2015-10-08 18:23:38 +000020#include "kmp_os.h"
21
Jim Cownie4cc4bb42014-10-07 16:25:50 +000022
23#if KMP_MIC
24#include <immintrin.h>
25#define USE_NGO_STORES 1
26#endif // KMP_MIC
27
Jonas Hahnfeld50fed042016-11-07 15:58:36 +000028#include "tsan_annotations.h"
29
Jim Cownie4cc4bb42014-10-07 16:25:50 +000030#if KMP_MIC && USE_NGO_STORES
31// ICV copying
32#define ngo_load(src) __m512d Vt = _mm512_load_pd((void *)(src))
33#define ngo_store_icvs(dst, src) _mm512_storenrngo_pd((void *)(dst), Vt)
34#define ngo_store_go(dst, src) _mm512_storenrngo_pd((void *)(dst), Vt)
35#define ngo_sync() __asm__ volatile ("lock; addl $0,0(%%rsp)" ::: "memory")
36#else
37#define ngo_load(src) ((void)0)
38#define ngo_store_icvs(dst, src) copy_icvs((dst), (src))
Jonathan Peyton01b58b72015-07-09 18:20:51 +000039#define ngo_store_go(dst, src) KMP_MEMCPY((dst), (src), CACHE_LINE)
Jim Cownie4cc4bb42014-10-07 16:25:50 +000040#define ngo_sync() ((void)0)
41#endif /* KMP_MIC && USE_NGO_STORES */
42
43void __kmp_print_structure(void); // Forward declaration
44
45// ---------------------------- Barrier Algorithms ----------------------------
46
47// Linear Barrier
48static void
49__kmp_linear_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
50 void (*reduce)(void *, void *)
51 USE_ITT_BUILD_ARG(void * itt_sync_obj) )
52{
Jonathan Peyton5375fe82016-11-14 21:13:44 +000053 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_linear_gather);
Jim Cownie4cc4bb42014-10-07 16:25:50 +000054 register kmp_team_t *team = this_thr->th.th_team;
55 register kmp_bstate_t *thr_bar = & this_thr->th.th_bar[bt].bb;
56 register kmp_info_t **other_threads = team->t.t_threads;
57
58 KA_TRACE(20, ("__kmp_linear_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
59 gtid, team->t.t_id, tid, bt));
60 KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]);
61
62#if USE_ITT_BUILD && USE_ITT_NOTIFY
63 // Barrier imbalance - save arrive time to the thread
Andrey Churbanov51aecb82015-05-06 19:22:36 +000064 if(__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +000065 this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time = __itt_get_timestamp();
66 }
67#endif
68 // We now perform a linear reduction to signal that all of the threads have arrived.
69 if (!KMP_MASTER_TID(tid)) {
70 KA_TRACE(20, ("__kmp_linear_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d)"
Jonathan Peytond26e2132015-09-10 18:44:30 +000071 "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid,
Jim Cownie4cc4bb42014-10-07 16:25:50 +000072 __kmp_gtid_from_tid(0, team), team->t.t_id, 0, &thr_bar->b_arrived,
73 thr_bar->b_arrived, thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP));
74 // Mark arrival to master thread
75 /* After performing this write, a worker thread may not assume that the team is valid
76 any more - it could be deallocated by the master thread at any time. */
Jonas Hahnfeld35801a22017-02-15 08:14:22 +000077 ANNOTATE_BARRIER_BEGIN(this_thr);
Jim Cownie4cc4bb42014-10-07 16:25:50 +000078 kmp_flag_64 flag(&thr_bar->b_arrived, other_threads[0]);
79 flag.release();
80 } else {
81 register kmp_balign_team_t *team_bar = &team->t.t_bar[bt];
82 register int nproc = this_thr->th.th_team_nproc;
83 register int i;
84 // Don't have to worry about sleep bit here or atomic since team setting
Jonathan Peytond26e2132015-09-10 18:44:30 +000085 register kmp_uint64 new_state = team_bar->b_arrived + KMP_BARRIER_STATE_BUMP;
Jim Cownie4cc4bb42014-10-07 16:25:50 +000086
87 // Collect all the worker team member threads.
88 for (i=1; i<nproc; ++i) {
89#if KMP_CACHE_MANAGE
90 // Prefetch next thread's arrived count
91 if (i+1 < nproc)
92 KMP_CACHE_PREFETCH(&other_threads[i+1]->th.th_bar[bt].bb.b_arrived);
93#endif /* KMP_CACHE_MANAGE */
94 KA_TRACE(20, ("__kmp_linear_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) "
Jonathan Peytond26e2132015-09-10 18:44:30 +000095 "arrived(%p) == %llu\n", gtid, team->t.t_id, tid,
Jim Cownie4cc4bb42014-10-07 16:25:50 +000096 __kmp_gtid_from_tid(i, team), team->t.t_id, i,
97 &other_threads[i]->th.th_bar[bt].bb.b_arrived, new_state));
98
99 // Wait for worker thread to arrive
100 kmp_flag_64 flag(&other_threads[i]->th.th_bar[bt].bb.b_arrived, new_state);
101 flag.wait(this_thr, FALSE
102 USE_ITT_BUILD_ARG(itt_sync_obj) );
Jonas Hahnfeld35801a22017-02-15 08:14:22 +0000103 ANNOTATE_BARRIER_END(other_threads[i]);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000104#if USE_ITT_BUILD && USE_ITT_NOTIFY
105 // Barrier imbalance - write min of the thread time and the other thread time to the thread.
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000106 if (__kmp_forkjoin_frames_mode == 2) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000107 this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time,
108 other_threads[i]->th.th_bar_min_time);
109 }
110#endif
111 if (reduce) {
112 KA_TRACE(100, ("__kmp_linear_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n", gtid,
113 team->t.t_id, tid, __kmp_gtid_from_tid(i, team), team->t.t_id, i));
Jonas Hahnfeld50fed042016-11-07 15:58:36 +0000114 ANNOTATE_REDUCE_AFTER(reduce);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000115 (*reduce)(this_thr->th.th_local.reduce_data,
116 other_threads[i]->th.th_local.reduce_data);
Jonas Hahnfeld50fed042016-11-07 15:58:36 +0000117 ANNOTATE_REDUCE_BEFORE(reduce);
118 ANNOTATE_REDUCE_BEFORE(&team->t.t_bar);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000119 }
120 }
121 // Don't have to worry about sleep bit here or atomic since team setting
122 team_bar->b_arrived = new_state;
Jonathan Peytond26e2132015-09-10 18:44:30 +0000123 KA_TRACE(20, ("__kmp_linear_barrier_gather: T#%d(%d:%d) set team %d arrived(%p) = %llu\n",
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000124 gtid, team->t.t_id, tid, team->t.t_id, &team_bar->b_arrived, new_state));
125 }
126 KA_TRACE(20, ("__kmp_linear_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
127 gtid, team->t.t_id, tid, bt));
128}
129
130static void
131__kmp_linear_barrier_release(enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
132 int propagate_icvs
133 USE_ITT_BUILD_ARG(void *itt_sync_obj) )
134{
Jonathan Peyton5375fe82016-11-14 21:13:44 +0000135 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_linear_release);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000136 register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
137 register kmp_team_t *team;
138
139 if (KMP_MASTER_TID(tid)) {
140 register unsigned int i;
141 register kmp_uint32 nproc = this_thr->th.th_team_nproc;
142 register kmp_info_t **other_threads;
143
144 team = __kmp_threads[gtid]->th.th_team;
145 KMP_DEBUG_ASSERT(team != NULL);
146 other_threads = team->t.t_threads;
147
148 KA_TRACE(20, ("__kmp_linear_barrier_release: T#%d(%d:%d) master enter for barrier type %d\n",
149 gtid, team->t.t_id, tid, bt));
150
151 if (nproc > 1) {
152#if KMP_BARRIER_ICV_PUSH
Jonathan Peyton45be4502015-08-11 21:36:41 +0000153 {
Jonathan Peyton5375fe82016-11-14 21:13:44 +0000154 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy);
Jonathan Peyton45be4502015-08-11 21:36:41 +0000155 if (propagate_icvs) {
156 ngo_load(&team->t.t_implicit_task_taskdata[0].td_icvs);
157 for (i=1; i<nproc; ++i) {
158 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[i], team, i, FALSE);
159 ngo_store_icvs(&team->t.t_implicit_task_taskdata[i].td_icvs,
160 &team->t.t_implicit_task_taskdata[0].td_icvs);
161 }
162 ngo_sync();
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000163 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000164 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000165#endif // KMP_BARRIER_ICV_PUSH
166
167 // Now, release all of the worker threads
168 for (i=1; i<nproc; ++i) {
169#if KMP_CACHE_MANAGE
170 // Prefetch next thread's go flag
171 if (i+1 < nproc)
172 KMP_CACHE_PREFETCH(&other_threads[i+1]->th.th_bar[bt].bb.b_go);
173#endif /* KMP_CACHE_MANAGE */
174 KA_TRACE(20, ("__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) "
175 "go(%p): %u => %u\n", gtid, team->t.t_id, tid,
176 other_threads[i]->th.th_info.ds.ds_gtid, team->t.t_id, i,
177 &other_threads[i]->th.th_bar[bt].bb.b_go,
178 other_threads[i]->th.th_bar[bt].bb.b_go,
179 other_threads[i]->th.th_bar[bt].bb.b_go + KMP_BARRIER_STATE_BUMP));
Jonas Hahnfeld35801a22017-02-15 08:14:22 +0000180 ANNOTATE_BARRIER_BEGIN(other_threads[i]);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000181 kmp_flag_64 flag(&other_threads[i]->th.th_bar[bt].bb.b_go, other_threads[i]);
182 flag.release();
183 }
184 }
185 } else { // Wait for the MASTER thread to release us
186 KA_TRACE(20, ("__kmp_linear_barrier_release: T#%d wait go(%p) == %u\n",
187 gtid, &thr_bar->b_go, KMP_BARRIER_STATE_BUMP));
188 kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
189 flag.wait(this_thr, TRUE
190 USE_ITT_BUILD_ARG(itt_sync_obj) );
Jonas Hahnfeld35801a22017-02-15 08:14:22 +0000191 ANNOTATE_BARRIER_END(this_thr);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000192#if USE_ITT_BUILD && USE_ITT_NOTIFY
193 if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) {
194 // In a fork barrier; cannot get the object reliably (or ITTNOTIFY is disabled)
195 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1);
196 // Cancel wait on previous parallel region...
197 __kmp_itt_task_starting(itt_sync_obj);
198
199 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
200 return;
201
202 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
203 if (itt_sync_obj != NULL)
204 // Call prepare as early as possible for "new" barrier
205 __kmp_itt_task_finished(itt_sync_obj);
206 } else
207#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
208 // Early exit for reaping threads releasing forkjoin barrier
209 if ( bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done) )
210 return;
211 // The worker thread may now assume that the team is valid.
212#ifdef KMP_DEBUG
213 tid = __kmp_tid_from_gtid(gtid);
214 team = __kmp_threads[gtid]->th.th_team;
215#endif
216 KMP_DEBUG_ASSERT(team != NULL);
217 TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
218 KA_TRACE(20, ("__kmp_linear_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
219 gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE));
220 KMP_MB(); // Flush all pending memory write invalidates.
221 }
222 KA_TRACE(20, ("__kmp_linear_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
223 gtid, team->t.t_id, tid, bt));
224}
225
226// Tree barrier
227static void
228__kmp_tree_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
229 void (*reduce)(void *, void *)
230 USE_ITT_BUILD_ARG(void *itt_sync_obj) )
231{
Jonathan Peyton5375fe82016-11-14 21:13:44 +0000232 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_tree_gather);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000233 register kmp_team_t *team = this_thr->th.th_team;
234 register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
235 register kmp_info_t **other_threads = team->t.t_threads;
236 register kmp_uint32 nproc = this_thr->th.th_team_nproc;
237 register kmp_uint32 branch_bits = __kmp_barrier_gather_branch_bits[bt];
238 register kmp_uint32 branch_factor = 1 << branch_bits;
239 register kmp_uint32 child;
240 register kmp_uint32 child_tid;
Jonathan Peytond26e2132015-09-10 18:44:30 +0000241 register kmp_uint64 new_state;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000242
243 KA_TRACE(20, ("__kmp_tree_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
244 gtid, team->t.t_id, tid, bt));
245 KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]);
246
247#if USE_ITT_BUILD && USE_ITT_NOTIFY
248 // Barrier imbalance - save arrive time to the thread
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000249 if(__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000250 this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time = __itt_get_timestamp();
251 }
252#endif
253 // Perform tree gather to wait until all threads have arrived; reduce any required data as we go
254 child_tid = (tid << branch_bits) + 1;
255 if (child_tid < nproc) {
256 // Parent threads wait for all their children to arrive
257 new_state = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP;
258 child = 1;
259 do {
260 register kmp_info_t *child_thr = other_threads[child_tid];
261 register kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
262#if KMP_CACHE_MANAGE
263 // Prefetch next thread's arrived count
264 if (child+1 <= branch_factor && child_tid+1 < nproc)
265 KMP_CACHE_PREFETCH(&other_threads[child_tid+1]->th.th_bar[bt].bb.b_arrived);
266#endif /* KMP_CACHE_MANAGE */
267 KA_TRACE(20, ("__kmp_tree_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) "
Jonathan Peytond26e2132015-09-10 18:44:30 +0000268 "arrived(%p) == %llu\n", gtid, team->t.t_id, tid,
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000269 __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid,
270 &child_bar->b_arrived, new_state));
271 // Wait for child to arrive
272 kmp_flag_64 flag(&child_bar->b_arrived, new_state);
273 flag.wait(this_thr, FALSE
274 USE_ITT_BUILD_ARG(itt_sync_obj) );
Jonas Hahnfeld35801a22017-02-15 08:14:22 +0000275 ANNOTATE_BARRIER_END(child_thr);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000276#if USE_ITT_BUILD && USE_ITT_NOTIFY
277 // Barrier imbalance - write min of the thread time and a child time to the thread.
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000278 if (__kmp_forkjoin_frames_mode == 2) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000279 this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time,
280 child_thr->th.th_bar_min_time);
281 }
282#endif
283 if (reduce) {
284 KA_TRACE(100, ("__kmp_tree_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n",
285 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
286 team->t.t_id, child_tid));
Jonas Hahnfeld50fed042016-11-07 15:58:36 +0000287 ANNOTATE_REDUCE_AFTER(reduce);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000288 (*reduce)(this_thr->th.th_local.reduce_data, child_thr->th.th_local.reduce_data);
Jonas Hahnfeld50fed042016-11-07 15:58:36 +0000289 ANNOTATE_REDUCE_BEFORE(reduce);
290 ANNOTATE_REDUCE_BEFORE(&team->t.t_bar);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000291 }
292 child++;
293 child_tid++;
294 }
295 while (child <= branch_factor && child_tid < nproc);
296 }
297
298 if (!KMP_MASTER_TID(tid)) { // Worker threads
299 register kmp_int32 parent_tid = (tid - 1) >> branch_bits;
300
301 KA_TRACE(20, ("__kmp_tree_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
Jonathan Peytond26e2132015-09-10 18:44:30 +0000302 "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid,
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000303 __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid,
304 &thr_bar->b_arrived, thr_bar->b_arrived,
305 thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP));
306
307 // Mark arrival to parent thread
308 /* After performing this write, a worker thread may not assume that the team is valid
309 any more - it could be deallocated by the master thread at any time. */
Jonas Hahnfeld35801a22017-02-15 08:14:22 +0000310 ANNOTATE_BARRIER_BEGIN(this_thr);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000311 kmp_flag_64 flag(&thr_bar->b_arrived, other_threads[parent_tid]);
312 flag.release();
313 } else {
314 // Need to update the team arrived pointer if we are the master thread
315 if (nproc > 1) // New value was already computed above
316 team->t.t_bar[bt].b_arrived = new_state;
317 else
318 team->t.t_bar[bt].b_arrived += KMP_BARRIER_STATE_BUMP;
Jonathan Peytond26e2132015-09-10 18:44:30 +0000319 KA_TRACE(20, ("__kmp_tree_barrier_gather: T#%d(%d:%d) set team %d arrived(%p) = %llu\n",
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000320 gtid, team->t.t_id, tid, team->t.t_id,
321 &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived));
322 }
323 KA_TRACE(20, ("__kmp_tree_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
324 gtid, team->t.t_id, tid, bt));
325}
326
327static void
328__kmp_tree_barrier_release(enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
329 int propagate_icvs
330 USE_ITT_BUILD_ARG(void *itt_sync_obj) )
331{
Jonathan Peyton5375fe82016-11-14 21:13:44 +0000332 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_tree_release);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000333 register kmp_team_t *team;
334 register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
335 register kmp_uint32 nproc;
336 register kmp_uint32 branch_bits = __kmp_barrier_release_branch_bits[bt];
337 register kmp_uint32 branch_factor = 1 << branch_bits;
338 register kmp_uint32 child;
339 register kmp_uint32 child_tid;
340
341 // Perform a tree release for all of the threads that have been gathered
342 if (!KMP_MASTER_TID(tid)) { // Handle fork barrier workers who aren't part of a team yet
343 KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d wait go(%p) == %u\n",
344 gtid, &thr_bar->b_go, KMP_BARRIER_STATE_BUMP));
345 // Wait for parent thread to release us
346 kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
347 flag.wait(this_thr, TRUE
348 USE_ITT_BUILD_ARG(itt_sync_obj) );
Jonas Hahnfeld35801a22017-02-15 08:14:22 +0000349 ANNOTATE_BARRIER_END(this_thr);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000350#if USE_ITT_BUILD && USE_ITT_NOTIFY
351 if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) {
352 // In fork barrier where we could not get the object reliably (or ITTNOTIFY is disabled)
353 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1);
354 // Cancel wait on previous parallel region...
355 __kmp_itt_task_starting(itt_sync_obj);
356
357 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
358 return;
359
360 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
361 if (itt_sync_obj != NULL)
362 // Call prepare as early as possible for "new" barrier
363 __kmp_itt_task_finished(itt_sync_obj);
364 } else
365#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
366 // Early exit for reaping threads releasing forkjoin barrier
367 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
368 return;
369
370 // The worker thread may now assume that the team is valid.
371 team = __kmp_threads[gtid]->th.th_team;
372 KMP_DEBUG_ASSERT(team != NULL);
373 tid = __kmp_tid_from_gtid(gtid);
374
375 TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
376 KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
377 gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE));
378 KMP_MB(); // Flush all pending memory write invalidates.
379 } else {
380 team = __kmp_threads[gtid]->th.th_team;
381 KMP_DEBUG_ASSERT(team != NULL);
382 KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d(%d:%d) master enter for barrier type %d\n",
383 gtid, team->t.t_id, tid, bt));
384 }
385 nproc = this_thr->th.th_team_nproc;
386 child_tid = (tid << branch_bits) + 1;
387
388 if (child_tid < nproc) {
389 register kmp_info_t **other_threads = team->t.t_threads;
390 child = 1;
391 // Parent threads release all their children
392 do {
393 register kmp_info_t *child_thr = other_threads[child_tid];
394 register kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
395#if KMP_CACHE_MANAGE
396 // Prefetch next thread's go count
397 if (child+1 <= branch_factor && child_tid+1 < nproc)
398 KMP_CACHE_PREFETCH(&other_threads[child_tid+1]->th.th_bar[bt].bb.b_go);
399#endif /* KMP_CACHE_MANAGE */
400
401#if KMP_BARRIER_ICV_PUSH
Jonathan Peyton45be4502015-08-11 21:36:41 +0000402 {
Jonathan Peyton5375fe82016-11-14 21:13:44 +0000403 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy);
Jonathan Peyton45be4502015-08-11 21:36:41 +0000404 if (propagate_icvs) {
405 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[child_tid],
406 team, child_tid, FALSE);
407 copy_icvs(&team->t.t_implicit_task_taskdata[child_tid].td_icvs,
408 &team->t.t_implicit_task_taskdata[0].td_icvs);
409 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000410 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000411#endif // KMP_BARRIER_ICV_PUSH
412 KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
413 "go(%p): %u => %u\n", gtid, team->t.t_id, tid,
414 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
415 child_tid, &child_bar->b_go, child_bar->b_go,
416 child_bar->b_go + KMP_BARRIER_STATE_BUMP));
417 // Release child from barrier
Jonas Hahnfeld35801a22017-02-15 08:14:22 +0000418 ANNOTATE_BARRIER_BEGIN(child_thr);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000419 kmp_flag_64 flag(&child_bar->b_go, child_thr);
420 flag.release();
421 child++;
422 child_tid++;
423 }
424 while (child <= branch_factor && child_tid < nproc);
425 }
426 KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
427 gtid, team->t.t_id, tid, bt));
428}
429
430
431// Hyper Barrier
432static void
433__kmp_hyper_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
434 void (*reduce)(void *, void *)
435 USE_ITT_BUILD_ARG(void *itt_sync_obj) )
436{
Jonathan Peyton5375fe82016-11-14 21:13:44 +0000437 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hyper_gather);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000438 register kmp_team_t *team = this_thr->th.th_team;
439 register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
440 register kmp_info_t **other_threads = team->t.t_threads;
Jonathan Peytond26e2132015-09-10 18:44:30 +0000441 register kmp_uint64 new_state = KMP_BARRIER_UNUSED_STATE;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000442 register kmp_uint32 num_threads = this_thr->th.th_team_nproc;
443 register kmp_uint32 branch_bits = __kmp_barrier_gather_branch_bits[bt];
444 register kmp_uint32 branch_factor = 1 << branch_bits;
445 register kmp_uint32 offset;
446 register kmp_uint32 level;
447
448 KA_TRACE(20, ("__kmp_hyper_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
449 gtid, team->t.t_id, tid, bt));
450
451 KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]);
452
453#if USE_ITT_BUILD && USE_ITT_NOTIFY
454 // Barrier imbalance - save arrive time to the thread
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000455 if(__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000456 this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time = __itt_get_timestamp();
457 }
458#endif
459 /* Perform a hypercube-embedded tree gather to wait until all of the threads have
460 arrived, and reduce any required data as we go. */
461 kmp_flag_64 p_flag(&thr_bar->b_arrived);
462 for (level=0, offset=1; offset<num_threads; level+=branch_bits, offset<<=branch_bits)
463 {
464 register kmp_uint32 child;
465 register kmp_uint32 child_tid;
466
467 if (((tid >> level) & (branch_factor - 1)) != 0) {
468 register kmp_int32 parent_tid = tid & ~((1 << (level + branch_bits)) -1);
469
470 KA_TRACE(20, ("__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
Jonathan Peytond26e2132015-09-10 18:44:30 +0000471 "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid,
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000472 __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid,
473 &thr_bar->b_arrived, thr_bar->b_arrived,
474 thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP));
475 // Mark arrival to parent thread
476 /* After performing this write (in the last iteration of the enclosing for loop),
477 a worker thread may not assume that the team is valid any more - it could be
478 deallocated by the master thread at any time. */
Jonas Hahnfeld35801a22017-02-15 08:14:22 +0000479 ANNOTATE_BARRIER_BEGIN(this_thr);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000480 p_flag.set_waiter(other_threads[parent_tid]);
Jonathan Peyton1bd61b42015-10-08 19:44:16 +0000481 p_flag.release();
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000482 break;
483 }
484
485 // Parent threads wait for children to arrive
486 if (new_state == KMP_BARRIER_UNUSED_STATE)
487 new_state = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP;
488 for (child=1, child_tid=tid+(1 << level); child<branch_factor && child_tid<num_threads;
489 child++, child_tid+=(1 << level))
490 {
491 register kmp_info_t *child_thr = other_threads[child_tid];
492 register kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
493#if KMP_CACHE_MANAGE
494 register kmp_uint32 next_child_tid = child_tid + (1 << level);
495 // Prefetch next thread's arrived count
496 if (child+1 < branch_factor && next_child_tid < num_threads)
497 KMP_CACHE_PREFETCH(&other_threads[next_child_tid]->th.th_bar[bt].bb.b_arrived);
498#endif /* KMP_CACHE_MANAGE */
499 KA_TRACE(20, ("__kmp_hyper_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) "
Jonathan Peytond26e2132015-09-10 18:44:30 +0000500 "arrived(%p) == %llu\n", gtid, team->t.t_id, tid,
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000501 __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid,
502 &child_bar->b_arrived, new_state));
503 // Wait for child to arrive
504 kmp_flag_64 c_flag(&child_bar->b_arrived, new_state);
505 c_flag.wait(this_thr, FALSE
506 USE_ITT_BUILD_ARG(itt_sync_obj) );
Jonas Hahnfeld35801a22017-02-15 08:14:22 +0000507 ANNOTATE_BARRIER_END(child_thr);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000508#if USE_ITT_BUILD && USE_ITT_NOTIFY
509 // Barrier imbalance - write min of the thread time and a child time to the thread.
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000510 if (__kmp_forkjoin_frames_mode == 2) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000511 this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time,
512 child_thr->th.th_bar_min_time);
513 }
514#endif
515 if (reduce) {
516 KA_TRACE(100, ("__kmp_hyper_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n",
517 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
518 team->t.t_id, child_tid));
Jonas Hahnfeld50fed042016-11-07 15:58:36 +0000519 ANNOTATE_REDUCE_AFTER(reduce);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000520 (*reduce)(this_thr->th.th_local.reduce_data, child_thr->th.th_local.reduce_data);
Jonas Hahnfeld50fed042016-11-07 15:58:36 +0000521 ANNOTATE_REDUCE_BEFORE(reduce);
522 ANNOTATE_REDUCE_BEFORE(&team->t.t_bar);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000523 }
524 }
525 }
526
527 if (KMP_MASTER_TID(tid)) {
528 // Need to update the team arrived pointer if we are the master thread
529 if (new_state == KMP_BARRIER_UNUSED_STATE)
530 team->t.t_bar[bt].b_arrived += KMP_BARRIER_STATE_BUMP;
531 else
532 team->t.t_bar[bt].b_arrived = new_state;
Jonathan Peytond26e2132015-09-10 18:44:30 +0000533 KA_TRACE(20, ("__kmp_hyper_barrier_gather: T#%d(%d:%d) set team %d arrived(%p) = %llu\n",
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000534 gtid, team->t.t_id, tid, team->t.t_id,
535 &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived));
536 }
537 KA_TRACE(20, ("__kmp_hyper_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
538 gtid, team->t.t_id, tid, bt));
539}
540
541// The reverse versions seem to beat the forward versions overall
542#define KMP_REVERSE_HYPER_BAR
543static void
544__kmp_hyper_barrier_release(enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
545 int propagate_icvs
546 USE_ITT_BUILD_ARG(void *itt_sync_obj) )
547{
Jonathan Peyton5375fe82016-11-14 21:13:44 +0000548 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hyper_release);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000549 register kmp_team_t *team;
550 register kmp_bstate_t *thr_bar = & this_thr -> th.th_bar[ bt ].bb;
551 register kmp_info_t **other_threads;
552 register kmp_uint32 num_threads;
553 register kmp_uint32 branch_bits = __kmp_barrier_release_branch_bits[ bt ];
554 register kmp_uint32 branch_factor = 1 << branch_bits;
555 register kmp_uint32 child;
556 register kmp_uint32 child_tid;
557 register kmp_uint32 offset;
558 register kmp_uint32 level;
559
560 /* Perform a hypercube-embedded tree release for all of the threads that have been gathered.
561 If KMP_REVERSE_HYPER_BAR is defined (default) the threads are released in the reverse
562 order of the corresponding gather, otherwise threads are released in the same order. */
563 if (KMP_MASTER_TID(tid)) { // master
564 team = __kmp_threads[gtid]->th.th_team;
565 KMP_DEBUG_ASSERT(team != NULL);
566 KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d(%d:%d) master enter for barrier type %d\n",
567 gtid, team->t.t_id, tid, bt));
568#if KMP_BARRIER_ICV_PUSH
569 if (propagate_icvs) { // master already has ICVs in final destination; copy
570 copy_icvs(&thr_bar->th_fixed_icvs, &team->t.t_implicit_task_taskdata[tid].td_icvs);
571 }
572#endif
573 }
574 else { // Handle fork barrier workers who aren't part of a team yet
575 KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d wait go(%p) == %u\n",
576 gtid, &thr_bar->b_go, KMP_BARRIER_STATE_BUMP));
577 // Wait for parent thread to release us
578 kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
579 flag.wait(this_thr, TRUE
580 USE_ITT_BUILD_ARG(itt_sync_obj) );
Jonas Hahnfeld35801a22017-02-15 08:14:22 +0000581 ANNOTATE_BARRIER_END(this_thr);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000582#if USE_ITT_BUILD && USE_ITT_NOTIFY
583 if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) {
584 // In fork barrier where we could not get the object reliably
585 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1);
586 // Cancel wait on previous parallel region...
587 __kmp_itt_task_starting(itt_sync_obj);
588
589 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
590 return;
591
592 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
593 if (itt_sync_obj != NULL)
594 // Call prepare as early as possible for "new" barrier
595 __kmp_itt_task_finished(itt_sync_obj);
596 } else
597#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
598 // Early exit for reaping threads releasing forkjoin barrier
599 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
600 return;
601
602 // The worker thread may now assume that the team is valid.
603 team = __kmp_threads[gtid]->th.th_team;
604 KMP_DEBUG_ASSERT(team != NULL);
605 tid = __kmp_tid_from_gtid(gtid);
606
607 TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
608 KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
609 gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE));
610 KMP_MB(); // Flush all pending memory write invalidates.
611 }
612 num_threads = this_thr->th.th_team_nproc;
613 other_threads = team->t.t_threads;
614
615#ifdef KMP_REVERSE_HYPER_BAR
616 // Count up to correct level for parent
617 for (level=0, offset=1; offset<num_threads && (((tid>>level) & (branch_factor-1)) == 0);
618 level+=branch_bits, offset<<=branch_bits);
619
620 // Now go down from there
621 for (level-=branch_bits, offset>>=branch_bits; offset != 0;
622 level-=branch_bits, offset>>=branch_bits)
623#else
624 // Go down the tree, level by level
625 for (level=0, offset=1; offset<num_threads; level+=branch_bits, offset<<=branch_bits)
626#endif // KMP_REVERSE_HYPER_BAR
627 {
628#ifdef KMP_REVERSE_HYPER_BAR
629 /* Now go in reverse order through the children, highest to lowest.
630 Initial setting of child is conservative here. */
631 child = num_threads >> ((level==0)?level:level-1);
632 for (child=(child<branch_factor-1) ? child : branch_factor-1, child_tid=tid+(child<<level);
633 child>=1; child--, child_tid-=(1<<level))
634#else
635 if (((tid >> level) & (branch_factor - 1)) != 0)
636 // No need to go lower than this, since this is the level parent would be notified
637 break;
638 // Iterate through children on this level of the tree
639 for (child=1, child_tid=tid+(1<<level); child<branch_factor && child_tid<num_threads;
640 child++, child_tid+=(1<<level))
641#endif // KMP_REVERSE_HYPER_BAR
642 {
643 if (child_tid >= num_threads) continue; // Child doesn't exist so keep going
644 else {
645 register kmp_info_t *child_thr = other_threads[child_tid];
646 register kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
647#if KMP_CACHE_MANAGE
648 register kmp_uint32 next_child_tid = child_tid - (1 << level);
649 // Prefetch next thread's go count
650# ifdef KMP_REVERSE_HYPER_BAR
651 if (child-1 >= 1 && next_child_tid < num_threads)
652# else
653 if (child+1 < branch_factor && next_child_tid < num_threads)
654# endif // KMP_REVERSE_HYPER_BAR
655 KMP_CACHE_PREFETCH(&other_threads[next_child_tid]->th.th_bar[bt].bb.b_go);
656#endif /* KMP_CACHE_MANAGE */
657
658#if KMP_BARRIER_ICV_PUSH
659 if (propagate_icvs) // push my fixed ICVs to my child
660 copy_icvs(&child_bar->th_fixed_icvs, &thr_bar->th_fixed_icvs);
661#endif // KMP_BARRIER_ICV_PUSH
662
663 KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
664 "go(%p): %u => %u\n", gtid, team->t.t_id, tid,
665 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
666 child_tid, &child_bar->b_go, child_bar->b_go,
667 child_bar->b_go + KMP_BARRIER_STATE_BUMP));
668 // Release child from barrier
Jonas Hahnfeld35801a22017-02-15 08:14:22 +0000669 ANNOTATE_BARRIER_BEGIN(child_thr);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000670 kmp_flag_64 flag(&child_bar->b_go, child_thr);
671 flag.release();
672 }
673 }
674 }
675#if KMP_BARRIER_ICV_PUSH
676 if (propagate_icvs && !KMP_MASTER_TID(tid)) { // copy ICVs locally to final dest
677 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid, FALSE);
678 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, &thr_bar->th_fixed_icvs);
679 }
680#endif
681 KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
682 gtid, team->t.t_id, tid, bt));
683}
684
685// Hierarchical Barrier
686
687// Initialize thread barrier data
688/* Initializes/re-initializes the hierarchical barrier data stored on a thread. Performs the
689 minimum amount of initialization required based on how the team has changed. Returns true if
690 leaf children will require both on-core and traditional wake-up mechanisms. For example, if the
691 team size increases, threads already in the team will respond to on-core wakeup on their parent
692 thread, but threads newly added to the team will only be listening on the their local b_go. */
693static bool
694__kmp_init_hierarchical_barrier_thread(enum barrier_type bt, kmp_bstate_t *thr_bar, kmp_uint32 nproc,
695 int gtid, int tid, kmp_team_t *team)
696{
697 // Checks to determine if (re-)initialization is needed
698 bool uninitialized = thr_bar->team == NULL;
699 bool team_changed = team != thr_bar->team;
700 bool team_sz_changed = nproc != thr_bar->nproc;
701 bool tid_changed = tid != thr_bar->old_tid;
702 bool retval = false;
703
704 if (uninitialized || team_sz_changed) {
705 __kmp_get_hierarchy(nproc, thr_bar);
706 }
707
708 if (uninitialized || team_sz_changed || tid_changed) {
709 thr_bar->my_level = thr_bar->depth-1; // default for master
710 thr_bar->parent_tid = -1; // default for master
711 if (!KMP_MASTER_TID(tid)) { // if not master, find parent thread in hierarchy
712 kmp_uint32 d=0;
713 while (d<thr_bar->depth) { // find parent based on level of thread in hierarchy, and note level
714 kmp_uint32 rem;
715 if (d == thr_bar->depth-2) { // reached level right below the master
716 thr_bar->parent_tid = 0;
717 thr_bar->my_level = d;
718 break;
719 }
720 else if ((rem = tid%thr_bar->skip_per_level[d+1]) != 0) { // TODO: can we make this op faster?
721 // thread is not a subtree root at next level, so this is max
722 thr_bar->parent_tid = tid - rem;
723 thr_bar->my_level = d;
724 break;
725 }
726 ++d;
727 }
728 }
729 thr_bar->offset = 7-(tid-thr_bar->parent_tid-1);
730 thr_bar->old_tid = tid;
731 thr_bar->wait_flag = KMP_BARRIER_NOT_WAITING;
Jonathan Peytonb0b83c82015-11-09 16:28:32 +0000732 thr_bar->team = team;
733 thr_bar->parent_bar = &team->t.t_threads[thr_bar->parent_tid]->th.th_bar[bt].bb;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000734 }
735 if (uninitialized || team_changed || tid_changed) {
736 thr_bar->team = team;
737 thr_bar->parent_bar = &team->t.t_threads[thr_bar->parent_tid]->th.th_bar[bt].bb;
738 retval = true;
739 }
740 if (uninitialized || team_sz_changed || tid_changed) {
741 thr_bar->nproc = nproc;
742 thr_bar->leaf_kids = thr_bar->base_leaf_kids;
743 if (thr_bar->my_level == 0) thr_bar->leaf_kids=0;
744 if (thr_bar->leaf_kids && (kmp_uint32)tid+thr_bar->leaf_kids+1 > nproc)
745 thr_bar->leaf_kids = nproc - tid - 1;
746 thr_bar->leaf_state = 0;
747 for (int i=0; i<thr_bar->leaf_kids; ++i) ((char *)&(thr_bar->leaf_state))[7-i] = 1;
748 }
749 return retval;
750}
751
752static void
753__kmp_hierarchical_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr,
754 int gtid, int tid, void (*reduce) (void *, void *)
755 USE_ITT_BUILD_ARG(void * itt_sync_obj) )
756{
Jonathan Peyton5375fe82016-11-14 21:13:44 +0000757 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hier_gather);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000758 register kmp_team_t *team = this_thr->th.th_team;
759 register kmp_bstate_t *thr_bar = & this_thr->th.th_bar[bt].bb;
760 register kmp_uint32 nproc = this_thr->th.th_team_nproc;
761 register kmp_info_t **other_threads = team->t.t_threads;
762 register kmp_uint64 new_state;
763
Andrey Churbanov42a79212015-01-27 16:50:31 +0000764 int level = team->t.t_level;
Jonathan Peyton441f3372015-09-21 17:24:46 +0000765#if OMP_40_ENABLED
Andrey Churbanov42a79212015-01-27 16:50:31 +0000766 if (other_threads[0]->th.th_teams_microtask) // are we inside the teams construct?
767 if (this_thr->th.th_teams_size.nteams > 1)
768 ++level; // level was not increased in teams construct for team_of_masters
Jonathan Peyton441f3372015-09-21 17:24:46 +0000769#endif
Andrey Churbanov42a79212015-01-27 16:50:31 +0000770 if (level == 1) thr_bar->use_oncore_barrier = 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000771 else thr_bar->use_oncore_barrier = 0; // Do not use oncore barrier when nested
772
773 KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
774 gtid, team->t.t_id, tid, bt));
775 KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]);
776
Andrey Churbanove6bfb732015-05-06 18:34:15 +0000777#if USE_ITT_BUILD && USE_ITT_NOTIFY
778 // Barrier imbalance - save arrive time to the thread
779 if(__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
780 this_thr->th.th_bar_arrive_time = __itt_get_timestamp();
781 }
782#endif
783
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000784 (void)__kmp_init_hierarchical_barrier_thread(bt, thr_bar, nproc, gtid, tid, team);
785
786 if (thr_bar->my_level) { // not a leaf (my_level==0 means leaf)
787 register kmp_int32 child_tid;
788 new_state = (kmp_uint64)team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP;
789 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME && thr_bar->use_oncore_barrier) {
790 if (thr_bar->leaf_kids) { // First, wait for leaf children to check-in on my b_arrived flag
Jonathan Peytond26e2132015-09-10 18:44:30 +0000791 kmp_uint64 leaf_state = KMP_MASTER_TID(tid) ? thr_bar->b_arrived | thr_bar->leaf_state : team->t.t_bar[bt].b_arrived | thr_bar->leaf_state;
Jonathan Peyton90862c42015-11-12 21:40:39 +0000792 KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) waiting for leaf kids\n",
793 gtid, team->t.t_id, tid));
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000794 kmp_flag_64 flag(&thr_bar->b_arrived, leaf_state);
795 flag.wait(this_thr, FALSE
796 USE_ITT_BUILD_ARG(itt_sync_obj) );
797 if (reduce) {
Jonas Hahnfeld50fed042016-11-07 15:58:36 +0000798 ANNOTATE_REDUCE_AFTER(reduce);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000799 for (child_tid=tid+1; child_tid<=tid+thr_bar->leaf_kids; ++child_tid) {
800 KA_TRACE(100, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n",
801 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
802 team->t.t_id, child_tid));
Jonas Hahnfeld35801a22017-02-15 08:14:22 +0000803 ANNOTATE_BARRIER_END(other_threads[child_tid]);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000804 (*reduce)(this_thr->th.th_local.reduce_data, other_threads[child_tid]->th.th_local.reduce_data);
805 }
Jonas Hahnfeld50fed042016-11-07 15:58:36 +0000806 ANNOTATE_REDUCE_BEFORE(reduce);
807 ANNOTATE_REDUCE_BEFORE(&team->t.t_bar);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000808 }
809 (void) KMP_TEST_THEN_AND64((volatile kmp_int64 *)&thr_bar->b_arrived, ~(thr_bar->leaf_state)); // clear leaf_state bits
810 }
811 // Next, wait for higher level children on each child's b_arrived flag
812 for (kmp_uint32 d=1; d<thr_bar->my_level; ++d) { // gather lowest level threads first, but skip 0
813 kmp_uint32 last = tid+thr_bar->skip_per_level[d+1], skip = thr_bar->skip_per_level[d];
814 if (last > nproc) last = nproc;
815 for (child_tid=tid+skip; child_tid<(int)last; child_tid+=skip) {
816 register kmp_info_t *child_thr = other_threads[child_tid];
817 register kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
818 KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) "
Jonathan Peytond26e2132015-09-10 18:44:30 +0000819 "arrived(%p) == %llu\n",
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000820 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
821 team->t.t_id, child_tid, &child_bar->b_arrived, new_state));
822 kmp_flag_64 flag(&child_bar->b_arrived, new_state);
823 flag.wait(this_thr, FALSE
824 USE_ITT_BUILD_ARG(itt_sync_obj) );
Jonas Hahnfeld35801a22017-02-15 08:14:22 +0000825 ANNOTATE_BARRIER_END(child_thr);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000826 if (reduce) {
827 KA_TRACE(100, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n",
828 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
829 team->t.t_id, child_tid));
Jonas Hahnfeld50fed042016-11-07 15:58:36 +0000830 ANNOTATE_REDUCE_AFTER(reduce);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000831 (*reduce)(this_thr->th.th_local.reduce_data, child_thr->th.th_local.reduce_data);
Jonas Hahnfeld50fed042016-11-07 15:58:36 +0000832 ANNOTATE_REDUCE_BEFORE(reduce);
833 ANNOTATE_REDUCE_BEFORE(&team->t.t_bar);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000834 }
835 }
836 }
837 }
838 else { // Blocktime is not infinite
839 for (kmp_uint32 d=0; d<thr_bar->my_level; ++d) { // Gather lowest level threads first
840 kmp_uint32 last = tid+thr_bar->skip_per_level[d+1], skip = thr_bar->skip_per_level[d];
841 if (last > nproc) last = nproc;
842 for (child_tid=tid+skip; child_tid<(int)last; child_tid+=skip) {
843 register kmp_info_t *child_thr = other_threads[child_tid];
844 register kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
845 KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) "
Jonathan Peytond26e2132015-09-10 18:44:30 +0000846 "arrived(%p) == %llu\n",
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000847 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
848 team->t.t_id, child_tid, &child_bar->b_arrived, new_state));
849 kmp_flag_64 flag(&child_bar->b_arrived, new_state);
850 flag.wait(this_thr, FALSE
851 USE_ITT_BUILD_ARG(itt_sync_obj) );
Jonas Hahnfeld35801a22017-02-15 08:14:22 +0000852 ANNOTATE_BARRIER_END(child_thr);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000853 if (reduce) {
854 KA_TRACE(100, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n",
855 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
856 team->t.t_id, child_tid));
Jonas Hahnfeld50fed042016-11-07 15:58:36 +0000857 ANNOTATE_REDUCE_AFTER(reduce);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000858 (*reduce)(this_thr->th.th_local.reduce_data, child_thr->th.th_local.reduce_data);
Jonas Hahnfeld50fed042016-11-07 15:58:36 +0000859 ANNOTATE_REDUCE_BEFORE(reduce);
860 ANNOTATE_REDUCE_BEFORE(&team->t.t_bar);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000861 }
862 }
863 }
864 }
865 }
866 // All subordinates are gathered; now release parent if not master thread
867
868 if (!KMP_MASTER_TID(tid)) { // worker threads release parent in hierarchy
869 KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
Jonathan Peytond26e2132015-09-10 18:44:30 +0000870 "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid,
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000871 __kmp_gtid_from_tid(thr_bar->parent_tid, team), team->t.t_id, thr_bar->parent_tid,
872 &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar->b_arrived+KMP_BARRIER_STATE_BUMP));
873 /* Mark arrival to parent: After performing this write, a worker thread may not assume that
874 the team is valid any more - it could be deallocated by the master thread at any time. */
875 if (thr_bar->my_level || __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME
876 || !thr_bar->use_oncore_barrier) { // Parent is waiting on my b_arrived flag; release it
Jonas Hahnfeld35801a22017-02-15 08:14:22 +0000877 ANNOTATE_BARRIER_BEGIN(this_thr);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000878 kmp_flag_64 flag(&thr_bar->b_arrived, other_threads[thr_bar->parent_tid]);
879 flag.release();
880 }
881 else { // Leaf does special release on the "offset" bits of parent's b_arrived flag
Jonathan Peytond26e2132015-09-10 18:44:30 +0000882 thr_bar->b_arrived = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000883 kmp_flag_oncore flag(&thr_bar->parent_bar->b_arrived, thr_bar->offset);
884 flag.set_waiter(other_threads[thr_bar->parent_tid]);
885 flag.release();
886 }
887 } else { // Master thread needs to update the team's b_arrived value
Jonathan Peytond26e2132015-09-10 18:44:30 +0000888 team->t.t_bar[bt].b_arrived = new_state;
889 KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) set team %d arrived(%p) = %llu\n",
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000890 gtid, team->t.t_id, tid, team->t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived));
891 }
892 // Is the team access below unsafe or just technically invalid?
893 KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
894 gtid, team->t.t_id, tid, bt));
895}
896
897static void
898__kmp_hierarchical_barrier_release(enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
899 int propagate_icvs
900 USE_ITT_BUILD_ARG(void * itt_sync_obj) )
901{
Jonathan Peyton5375fe82016-11-14 21:13:44 +0000902 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hier_release);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000903 register kmp_team_t *team;
904 register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
905 register kmp_uint32 nproc;
906 bool team_change = false; // indicates on-core barrier shouldn't be used
907
908 if (KMP_MASTER_TID(tid)) {
909 team = __kmp_threads[gtid]->th.th_team;
910 KMP_DEBUG_ASSERT(team != NULL);
911 KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) master entered barrier type %d\n",
912 gtid, team->t.t_id, tid, bt));
913 }
914 else { // Worker threads
915 // Wait for parent thread to release me
916 if (!thr_bar->use_oncore_barrier || __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME
917 || thr_bar->my_level != 0 || thr_bar->team == NULL) {
918 // Use traditional method of waiting on my own b_go flag
919 thr_bar->wait_flag = KMP_BARRIER_OWN_FLAG;
920 kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
921 flag.wait(this_thr, TRUE
922 USE_ITT_BUILD_ARG(itt_sync_obj) );
Jonas Hahnfeld35801a22017-02-15 08:14:22 +0000923 ANNOTATE_BARRIER_END(this_thr);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000924 TCW_8(thr_bar->b_go, KMP_INIT_BARRIER_STATE); // Reset my b_go flag for next time
925 }
926 else { // Thread barrier data is initialized, this is a leaf, blocktime is infinite, not nested
927 // Wait on my "offset" bits on parent's b_go flag
928 thr_bar->wait_flag = KMP_BARRIER_PARENT_FLAG;
929 kmp_flag_oncore flag(&thr_bar->parent_bar->b_go, KMP_BARRIER_STATE_BUMP, thr_bar->offset,
930 bt, this_thr
931 USE_ITT_BUILD_ARG(itt_sync_obj) );
Jonathan Peytona0e159f2015-10-08 18:23:38 +0000932 flag.wait(this_thr, TRUE);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000933 if (thr_bar->wait_flag == KMP_BARRIER_SWITCHING) { // Thread was switched to own b_go
934 TCW_8(thr_bar->b_go, KMP_INIT_BARRIER_STATE); // Reset my b_go flag for next time
935 }
936 else { // Reset my bits on parent's b_go flag
937 ((char*)&(thr_bar->parent_bar->b_go))[thr_bar->offset] = 0;
938 }
939 }
940 thr_bar->wait_flag = KMP_BARRIER_NOT_WAITING;
941 // Early exit for reaping threads releasing forkjoin barrier
942 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
943 return;
944 // The worker thread may now assume that the team is valid.
945 team = __kmp_threads[gtid]->th.th_team;
946 KMP_DEBUG_ASSERT(team != NULL);
947 tid = __kmp_tid_from_gtid(gtid);
948
949 KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
950 gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE));
951 KMP_MB(); // Flush all pending memory write invalidates.
952 }
953
Jonathan Peytona0e159f2015-10-08 18:23:38 +0000954 nproc = this_thr->th.th_team_nproc;
Andrey Churbanov42a79212015-01-27 16:50:31 +0000955 int level = team->t.t_level;
Jonathan Peyton441f3372015-09-21 17:24:46 +0000956#if OMP_40_ENABLED
Andrey Churbanov42a79212015-01-27 16:50:31 +0000957 if (team->t.t_threads[0]->th.th_teams_microtask ) { // are we inside the teams construct?
958 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master && this_thr->th.th_teams_level == level)
959 ++level; // level was not increased in teams construct for team_of_workers
960 if( this_thr->th.th_teams_size.nteams > 1 )
961 ++level; // level was not increased in teams construct for team_of_masters
962 }
Jonathan Peyton441f3372015-09-21 17:24:46 +0000963#endif
Andrey Churbanov42a79212015-01-27 16:50:31 +0000964 if (level == 1) thr_bar->use_oncore_barrier = 1;
965 else thr_bar->use_oncore_barrier = 0; // Do not use oncore barrier when nested
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000966
967 // If the team size has increased, we still communicate with old leaves via oncore barrier.
968 unsigned short int old_leaf_kids = thr_bar->leaf_kids;
969 kmp_uint64 old_leaf_state = thr_bar->leaf_state;
970 team_change = __kmp_init_hierarchical_barrier_thread(bt, thr_bar, nproc, gtid, tid, team);
971 // But if the entire team changes, we won't use oncore barrier at all
972 if (team_change) old_leaf_kids = 0;
973
974#if KMP_BARRIER_ICV_PUSH
975 if (propagate_icvs) {
Jonathan Peyton2211cfe2015-08-12 20:59:48 +0000976 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid, FALSE);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000977 if (KMP_MASTER_TID(tid)) { // master already has copy in final destination; copy
978 copy_icvs(&thr_bar->th_fixed_icvs, &team->t.t_implicit_task_taskdata[tid].td_icvs);
979 }
980 else if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME && thr_bar->use_oncore_barrier) { // optimization for inf blocktime
981 if (!thr_bar->my_level) // I'm a leaf in the hierarchy (my_level==0)
982 // leaves (on-core children) pull parent's fixed ICVs directly to local ICV store
983 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
984 &thr_bar->parent_bar->th_fixed_icvs);
985 // non-leaves will get ICVs piggybacked with b_go via NGO store
986 }
987 else { // blocktime is not infinite; pull ICVs from parent's fixed ICVs
988 if (thr_bar->my_level) // not a leaf; copy ICVs to my fixed ICVs child can access
989 copy_icvs(&thr_bar->th_fixed_icvs, &thr_bar->parent_bar->th_fixed_icvs);
990 else // leaves copy parent's fixed ICVs directly to local ICV store
991 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
992 &thr_bar->parent_bar->th_fixed_icvs);
993 }
994 }
995#endif // KMP_BARRIER_ICV_PUSH
996
997 // Now, release my children
998 if (thr_bar->my_level) { // not a leaf
999 register kmp_int32 child_tid;
1000 kmp_uint32 last;
1001 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME && thr_bar->use_oncore_barrier) {
1002 if (KMP_MASTER_TID(tid)) { // do a flat release
1003 // Set local b_go to bump children via NGO store of the cache line containing IVCs and b_go.
1004 thr_bar->b_go = KMP_BARRIER_STATE_BUMP;
1005 // Use ngo stores if available; b_go piggybacks in the last 8 bytes of the cache line
1006 ngo_load(&thr_bar->th_fixed_icvs);
1007 // This loops over all the threads skipping only the leaf nodes in the hierarchy
1008 for (child_tid=thr_bar->skip_per_level[1]; child_tid<(int)nproc; child_tid+=thr_bar->skip_per_level[1]) {
1009 register kmp_bstate_t *child_bar = &team->t.t_threads[child_tid]->th.th_bar[bt].bb;
1010 KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d)"
1011 " go(%p): %u => %u\n",
1012 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
1013 team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go,
1014 child_bar->b_go + KMP_BARRIER_STATE_BUMP));
1015 // Use ngo store (if available) to both store ICVs and release child via child's b_go
1016 ngo_store_go(&child_bar->th_fixed_icvs, &thr_bar->th_fixed_icvs);
1017 }
1018 ngo_sync();
1019 }
1020 TCW_8(thr_bar->b_go, KMP_INIT_BARRIER_STATE); // Reset my b_go flag for next time
1021 // Now, release leaf children
1022 if (thr_bar->leaf_kids) { // if there are any
1023 // We test team_change on the off-chance that the level 1 team changed.
1024 if (team_change || old_leaf_kids < thr_bar->leaf_kids) { // some old leaf_kids, some new
1025 if (old_leaf_kids) { // release old leaf kids
1026 thr_bar->b_go |= old_leaf_state;
1027 }
1028 // Release new leaf kids
1029 last = tid+thr_bar->skip_per_level[1];
1030 if (last > nproc) last = nproc;
1031 for (child_tid=tid+1+old_leaf_kids; child_tid<(int)last; ++child_tid) { // skip_per_level[0]=1
1032 register kmp_info_t *child_thr = team->t.t_threads[child_tid];
1033 register kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
1034 KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing"
1035 " T#%d(%d:%d) go(%p): %u => %u\n",
1036 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
1037 team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go,
1038 child_bar->b_go + KMP_BARRIER_STATE_BUMP));
1039 // Release child using child's b_go flag
Jonas Hahnfeld35801a22017-02-15 08:14:22 +00001040 ANNOTATE_BARRIER_BEGIN(child_thr);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001041 kmp_flag_64 flag(&child_bar->b_go, child_thr);
1042 flag.release();
1043 }
1044 }
1045 else { // Release all children at once with leaf_state bits on my own b_go flag
1046 thr_bar->b_go |= thr_bar->leaf_state;
1047 }
1048 }
1049 }
1050 else { // Blocktime is not infinite; do a simple hierarchical release
1051 for (int d=thr_bar->my_level-1; d>=0; --d) { // Release highest level threads first
1052 last = tid+thr_bar->skip_per_level[d+1];
1053 kmp_uint32 skip = thr_bar->skip_per_level[d];
1054 if (last > nproc) last = nproc;
1055 for (child_tid=tid+skip; child_tid<(int)last; child_tid+=skip) {
1056 register kmp_info_t *child_thr = team->t.t_threads[child_tid];
1057 register kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
1058 KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d)"
1059 " go(%p): %u => %u\n",
1060 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
1061 team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go,
1062 child_bar->b_go + KMP_BARRIER_STATE_BUMP));
1063 // Release child using child's b_go flag
Jonas Hahnfeld35801a22017-02-15 08:14:22 +00001064 ANNOTATE_BARRIER_BEGIN(child_thr);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001065 kmp_flag_64 flag(&child_bar->b_go, child_thr);
1066 flag.release();
1067 }
1068 }
1069 }
1070#if KMP_BARRIER_ICV_PUSH
1071 if (propagate_icvs && !KMP_MASTER_TID(tid)) // non-leaves copy ICVs from fixed ICVs to local dest
1072 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, &thr_bar->th_fixed_icvs);
1073#endif // KMP_BARRIER_ICV_PUSH
1074 }
1075 KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
1076 gtid, team->t.t_id, tid, bt));
1077}
1078
1079// ---------------------------- End of Barrier Algorithms ----------------------------
1080
1081// Internal function to do a barrier.
1082/* If is_split is true, do a split barrier, otherwise, do a plain barrier
1083 If reduce is non-NULL, do a split reduction barrier, otherwise, do a split barrier
1084 Returns 0 if master thread, 1 if worker thread. */
1085int
1086__kmp_barrier(enum barrier_type bt, int gtid, int is_split, size_t reduce_size,
1087 void *reduce_data, void (*reduce)(void *, void *))
1088{
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001089 KMP_TIME_PARTITIONED_BLOCK(OMP_plain_barrier);
Jonathan Peyton5375fe82016-11-14 21:13:44 +00001090 KMP_SET_THREAD_STATE_BLOCK(PLAIN_BARRIER);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001091 register int tid = __kmp_tid_from_gtid(gtid);
1092 register kmp_info_t *this_thr = __kmp_threads[gtid];
1093 register kmp_team_t *team = this_thr->th.th_team;
1094 register int status = 0;
1095 ident_t *loc = __kmp_threads[gtid]->th.th_ident;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001096#if OMPT_SUPPORT
1097 ompt_task_id_t my_task_id;
1098 ompt_parallel_id_t my_parallel_id;
1099#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001100
1101 KA_TRACE(15, ("__kmp_barrier: T#%d(%d:%d) has arrived\n",
1102 gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid)));
1103
Jonas Hahnfeld35801a22017-02-15 08:14:22 +00001104 ANNOTATE_BARRIER_BEGIN(&team->t.t_bar);
Jonathan Peyton117a94f2015-06-29 17:28:57 +00001105#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001106 if (ompt_enabled) {
Jonathan Peyton117a94f2015-06-29 17:28:57 +00001107#if OMPT_BLAME
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001108 my_task_id = team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id;
1109 my_parallel_id = team->t.ompt_team_info.parallel_id;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001110
Jonathan Peyton117a94f2015-06-29 17:28:57 +00001111#if OMPT_TRACE
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001112 if (this_thr->th.ompt_thread_info.state == ompt_state_wait_single) {
1113 if (ompt_callbacks.ompt_callback(ompt_event_single_others_end)) {
1114 ompt_callbacks.ompt_callback(ompt_event_single_others_end)(
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001115 my_parallel_id, my_task_id);
1116 }
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001117 }
1118#endif
1119 if (ompt_callbacks.ompt_callback(ompt_event_barrier_begin)) {
1120 ompt_callbacks.ompt_callback(ompt_event_barrier_begin)(
1121 my_parallel_id, my_task_id);
1122 }
Jonathan Peyton117a94f2015-06-29 17:28:57 +00001123#endif
1124 // It is OK to report the barrier state after the barrier begin callback.
1125 // According to the OMPT specification, a compliant implementation may
1126 // even delay reporting this state until the barrier begins to wait.
1127 this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001128 }
1129#endif
1130
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001131 if (! team->t.t_serialized) {
1132#if USE_ITT_BUILD
1133 // This value will be used in itt notify events below.
1134 void *itt_sync_obj = NULL;
1135# if USE_ITT_NOTIFY
1136 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1137 itt_sync_obj = __kmp_itt_barrier_object(gtid, bt, 1);
1138# endif
1139#endif /* USE_ITT_BUILD */
1140 if (__kmp_tasking_mode == tskm_extra_barrier) {
1141 __kmp_tasking_barrier(team, this_thr, gtid);
1142 KA_TRACE(15, ("__kmp_barrier: T#%d(%d:%d) past tasking barrier\n",
1143 gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid)));
1144 }
1145
1146 /* Copy the blocktime info to the thread, where __kmp_wait_template() can access it when
1147 the team struct is not guaranteed to exist. */
1148 // See note about the corresponding code in __kmp_join_barrier() being performance-critical.
1149 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
Jonathan Peytone1c7c132016-10-07 18:12:19 +00001150#if KMP_USE_MONITOR
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001151 this_thr->th.th_team_bt_intervals = team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
1152 this_thr->th.th_team_bt_set = team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
Jonathan Peyton2208a8512017-01-27 17:54:31 +00001153#else
1154 this_thr->th.th_team_bt_intervals = KMP_BLOCKTIME_INTERVAL();
1155#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001156 }
1157
1158#if USE_ITT_BUILD
1159 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1160 __kmp_itt_barrier_starting(gtid, itt_sync_obj);
1161#endif /* USE_ITT_BUILD */
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00001162#if USE_DEBUGGER
1163 // Let the debugger know: the thread arrived to the barrier and waiting.
1164 if (KMP_MASTER_TID(tid)) { // Master counter is stored in team structure.
1165 team->t.t_bar[bt].b_master_arrived += 1;
1166 } else {
1167 this_thr->th.th_bar[bt].bb.b_worker_arrived += 1;
1168 } // if
1169#endif /* USE_DEBUGGER */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001170 if (reduce != NULL) {
1171 //KMP_DEBUG_ASSERT( is_split == TRUE ); // #C69956
1172 this_thr->th.th_local.reduce_data = reduce_data;
1173 }
Jonathan Peytonb0b83c82015-11-09 16:28:32 +00001174
1175 if (KMP_MASTER_TID(tid) && __kmp_tasking_mode != tskm_immediate_exec)
1176 __kmp_task_team_setup(this_thr, team, 0); // use 0 to only setup the current team if nthreads > 1
1177
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001178 switch (__kmp_barrier_gather_pattern[bt]) {
1179 case bp_hyper_bar: {
1180 KMP_ASSERT(__kmp_barrier_gather_branch_bits[bt]); // don't set branch bits to 0; use linear
1181 __kmp_hyper_barrier_gather(bt, this_thr, gtid, tid, reduce
1182 USE_ITT_BUILD_ARG(itt_sync_obj) );
1183 break;
1184 }
1185 case bp_hierarchical_bar: {
1186 __kmp_hierarchical_barrier_gather(bt, this_thr, gtid, tid, reduce
1187 USE_ITT_BUILD_ARG(itt_sync_obj));
1188 break;
1189 }
1190 case bp_tree_bar: {
1191 KMP_ASSERT(__kmp_barrier_gather_branch_bits[bt]); // don't set branch bits to 0; use linear
1192 __kmp_tree_barrier_gather(bt, this_thr, gtid, tid, reduce
1193 USE_ITT_BUILD_ARG(itt_sync_obj) );
1194 break;
1195 }
1196 default: {
1197 __kmp_linear_barrier_gather(bt, this_thr, gtid, tid, reduce
1198 USE_ITT_BUILD_ARG(itt_sync_obj) );
1199 }
1200 }
1201
1202 KMP_MB();
1203
1204 if (KMP_MASTER_TID(tid)) {
1205 status = 0;
1206 if (__kmp_tasking_mode != tskm_immediate_exec) {
1207 __kmp_task_team_wait(this_thr, team
1208 USE_ITT_BUILD_ARG(itt_sync_obj) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001209 }
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00001210#if USE_DEBUGGER
1211 // Let the debugger know: All threads are arrived and starting leaving the barrier.
1212 team->t.t_bar[bt].b_team_arrived += 1;
1213#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001214
Olga Malyshevadbdcfa12017-04-04 13:56:50 +00001215#if OMP_40_ENABLED
1216 // Reset cancellation flag for worksharing constructs
1217 if(team->t.t_cancel_request == cancel_loop ||
1218 team->t.t_cancel_request == cancel_sections ) {
1219 team->t.t_cancel_request = cancel_noreq;
1220 }
1221#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001222#if USE_ITT_BUILD
1223 /* TODO: In case of split reduction barrier, master thread may send acquired event early,
1224 before the final summation into the shared variable is done (final summation can be a
1225 long operation for array reductions). */
1226 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1227 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
1228#endif /* USE_ITT_BUILD */
1229#if USE_ITT_BUILD && USE_ITT_NOTIFY
Andrey Churbanov51aecb82015-05-06 19:22:36 +00001230 // Barrier - report frame end (only if active_level == 1)
1231 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) && __kmp_forkjoin_frames_mode &&
1232#if OMP_40_ENABLED
1233 this_thr->th.th_teams_microtask == NULL &&
1234#endif
1235 team->t.t_active_level == 1)
1236 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001237 kmp_uint64 cur_time = __itt_get_timestamp();
Andrey Churbanov51aecb82015-05-06 19:22:36 +00001238 kmp_info_t **other_threads = team->t.t_threads;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001239 int nproc = this_thr->th.th_team_nproc;
1240 int i;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001241 switch(__kmp_forkjoin_frames_mode) {
1242 case 1:
1243 __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0, loc, nproc);
1244 this_thr->th.th_frame_time = cur_time;
1245 break;
Andrey Churbanov51aecb82015-05-06 19:22:36 +00001246 case 2: // AC 2015-01-19: currently does not work for hierarchical (to be fixed)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001247 __kmp_itt_frame_submit(gtid, this_thr->th.th_bar_min_time, cur_time, 1, loc, nproc);
1248 break;
1249 case 3:
1250 if( __itt_metadata_add_ptr ) {
Andrey Churbanov51aecb82015-05-06 19:22:36 +00001251 // Initialize with master's wait time
1252 kmp_uint64 delta = cur_time - this_thr->th.th_bar_arrive_time;
Jonathan Peyton99ef4d02016-04-14 16:06:49 +00001253 // Set arrive time to zero to be able to check it in __kmp_invoke_task(); the same is done inside the loop below
1254 this_thr->th.th_bar_arrive_time = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001255 for (i=1; i<nproc; ++i) {
1256 delta += ( cur_time - other_threads[i]->th.th_bar_arrive_time );
Jonathan Peyton99ef4d02016-04-14 16:06:49 +00001257 other_threads[i]->th.th_bar_arrive_time = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001258 }
1259 __kmp_itt_metadata_imbalance(gtid, this_thr->th.th_frame_time, cur_time, delta, (kmp_uint64)( reduce != NULL));
1260 }
1261 __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0, loc, nproc);
1262 this_thr->th.th_frame_time = cur_time;
1263 break;
1264 }
1265 }
1266#endif /* USE_ITT_BUILD */
1267 } else {
1268 status = 1;
1269#if USE_ITT_BUILD
1270 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1271 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
1272#endif /* USE_ITT_BUILD */
1273 }
1274 if (status == 1 || ! is_split) {
1275 switch (__kmp_barrier_release_pattern[bt]) {
1276 case bp_hyper_bar: {
1277 KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
1278 __kmp_hyper_barrier_release(bt, this_thr, gtid, tid, FALSE
1279 USE_ITT_BUILD_ARG(itt_sync_obj) );
1280 break;
1281 }
1282 case bp_hierarchical_bar: {
1283 __kmp_hierarchical_barrier_release(bt, this_thr, gtid, tid, FALSE
1284 USE_ITT_BUILD_ARG(itt_sync_obj) );
1285 break;
1286 }
1287 case bp_tree_bar: {
1288 KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
1289 __kmp_tree_barrier_release(bt, this_thr, gtid, tid, FALSE
1290 USE_ITT_BUILD_ARG(itt_sync_obj) );
1291 break;
1292 }
1293 default: {
1294 __kmp_linear_barrier_release(bt, this_thr, gtid, tid, FALSE
1295 USE_ITT_BUILD_ARG(itt_sync_obj) );
1296 }
1297 }
1298 if (__kmp_tasking_mode != tskm_immediate_exec) {
1299 __kmp_task_team_sync(this_thr, team);
1300 }
1301 }
1302
1303#if USE_ITT_BUILD
1304 /* GEH: TODO: Move this under if-condition above and also include in
1305 __kmp_end_split_barrier(). This will more accurately represent the actual release time
1306 of the threads for split barriers. */
1307 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1308 __kmp_itt_barrier_finished(gtid, itt_sync_obj);
1309#endif /* USE_ITT_BUILD */
1310 } else { // Team is serialized.
1311 status = 0;
1312 if (__kmp_tasking_mode != tskm_immediate_exec) {
Jonathan Peytondf6818b2016-06-14 17:57:47 +00001313#if OMP_45_ENABLED
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001314 if ( this_thr->th.th_task_team != NULL ) {
1315 void *itt_sync_obj = NULL;
1316#if USE_ITT_NOTIFY
1317 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
1318 itt_sync_obj = __kmp_itt_barrier_object(gtid, bt, 1);
1319 __kmp_itt_barrier_starting(gtid, itt_sync_obj);
1320 }
1321#endif
1322
Jonathan Peytonfe9a1d72015-08-26 19:58:48 +00001323 KMP_DEBUG_ASSERT(this_thr->th.th_task_team->tt.tt_found_proxy_tasks == TRUE);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001324 __kmp_task_team_wait(this_thr, team
1325 USE_ITT_BUILD_ARG(itt_sync_obj));
Jonathan Peyton54127982015-11-04 21:37:48 +00001326 __kmp_task_team_setup(this_thr, team, 0);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001327
1328#if USE_ITT_BUILD
1329 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1330 __kmp_itt_barrier_finished(gtid, itt_sync_obj);
1331#endif /* USE_ITT_BUILD */
1332 }
1333#else
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001334 // The task team should be NULL for serialized code (tasks will be executed immediately)
Andrey Churbanov6d224db2015-02-10 18:37:43 +00001335 KMP_DEBUG_ASSERT(team->t.t_task_team[this_thr->th.th_task_state] == NULL);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001336 KMP_DEBUG_ASSERT(this_thr->th.th_task_team == NULL);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001337#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001338 }
1339 }
1340 KA_TRACE(15, ("__kmp_barrier: T#%d(%d:%d) is leaving with return value %d\n",
1341 gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid), status));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001342
1343#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001344 if (ompt_enabled) {
Jonathan Peyton117a94f2015-06-29 17:28:57 +00001345#if OMPT_BLAME
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001346 if (ompt_callbacks.ompt_callback(ompt_event_barrier_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001347 ompt_callbacks.ompt_callback(ompt_event_barrier_end)(
1348 my_parallel_id, my_task_id);
1349 }
1350#endif
1351 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
1352 }
1353#endif
Jonas Hahnfeld35801a22017-02-15 08:14:22 +00001354 ANNOTATE_BARRIER_END(&team->t.t_bar);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001355
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001356 return status;
1357}
1358
1359
1360void
1361__kmp_end_split_barrier(enum barrier_type bt, int gtid)
1362{
Jonathan Peyton5375fe82016-11-14 21:13:44 +00001363 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_end_split_barrier);
1364 KMP_SET_THREAD_STATE_BLOCK(PLAIN_BARRIER);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001365 int tid = __kmp_tid_from_gtid(gtid);
1366 kmp_info_t *this_thr = __kmp_threads[gtid];
1367 kmp_team_t *team = this_thr->th.th_team;
1368
Jonas Hahnfeld35801a22017-02-15 08:14:22 +00001369 ANNOTATE_BARRIER_BEGIN(&team->t.t_bar);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001370 if (!team->t.t_serialized) {
1371 if (KMP_MASTER_GTID(gtid)) {
1372 switch (__kmp_barrier_release_pattern[bt]) {
1373 case bp_hyper_bar: {
1374 KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
1375 __kmp_hyper_barrier_release(bt, this_thr, gtid, tid, FALSE
1376 USE_ITT_BUILD_ARG(NULL) );
1377 break;
1378 }
1379 case bp_hierarchical_bar: {
1380 __kmp_hierarchical_barrier_release(bt, this_thr, gtid, tid, FALSE
1381 USE_ITT_BUILD_ARG(NULL));
1382 break;
1383 }
1384 case bp_tree_bar: {
1385 KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
1386 __kmp_tree_barrier_release(bt, this_thr, gtid, tid, FALSE
1387 USE_ITT_BUILD_ARG(NULL) );
1388 break;
1389 }
1390 default: {
1391 __kmp_linear_barrier_release(bt, this_thr, gtid, tid, FALSE
1392 USE_ITT_BUILD_ARG(NULL) );
1393 }
1394 }
1395 if (__kmp_tasking_mode != tskm_immediate_exec) {
1396 __kmp_task_team_sync(this_thr, team);
1397 } // if
1398 }
1399 }
Jonas Hahnfeld35801a22017-02-15 08:14:22 +00001400 ANNOTATE_BARRIER_END(&team->t.t_bar);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001401}
1402
1403
1404void
1405__kmp_join_barrier(int gtid)
1406{
Jonathan Peyton5375fe82016-11-14 21:13:44 +00001407 KMP_TIME_PARTITIONED_BLOCK(OMP_join_barrier);
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001408 KMP_SET_THREAD_STATE_BLOCK(FORK_JOIN_BARRIER);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001409 register kmp_info_t *this_thr = __kmp_threads[gtid];
1410 register kmp_team_t *team;
1411 register kmp_uint nproc;
1412 kmp_info_t *master_thread;
1413 int tid;
1414#ifdef KMP_DEBUG
1415 int team_id;
1416#endif /* KMP_DEBUG */
1417#if USE_ITT_BUILD
1418 void *itt_sync_obj = NULL;
1419# if USE_ITT_NOTIFY
1420 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) // Don't call routine without need
1421 // Get object created at fork_barrier
1422 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
1423# endif
1424#endif /* USE_ITT_BUILD */
1425 KMP_MB();
1426
1427 // Get current info
1428 team = this_thr->th.th_team;
1429 nproc = this_thr->th.th_team_nproc;
1430 KMP_DEBUG_ASSERT((int)nproc == team->t.t_nproc);
1431 tid = __kmp_tid_from_gtid(gtid);
1432#ifdef KMP_DEBUG
1433 team_id = team->t.t_id;
1434#endif /* KMP_DEBUG */
1435 master_thread = this_thr->th.th_team_master;
1436#ifdef KMP_DEBUG
1437 if (master_thread != team->t.t_threads[0]) {
1438 __kmp_print_structure();
1439 }
1440#endif /* KMP_DEBUG */
1441 KMP_DEBUG_ASSERT(master_thread == team->t.t_threads[0]);
1442 KMP_MB();
1443
1444 // Verify state
1445 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
1446 KMP_DEBUG_ASSERT(TCR_PTR(this_thr->th.th_team));
1447 KMP_DEBUG_ASSERT(TCR_PTR(this_thr->th.th_root));
1448 KMP_DEBUG_ASSERT(this_thr == team->t.t_threads[tid]);
1449 KA_TRACE(10, ("__kmp_join_barrier: T#%d(%d:%d) arrived at join barrier\n", gtid, team_id, tid));
1450
Jonas Hahnfeld35801a22017-02-15 08:14:22 +00001451 ANNOTATE_BARRIER_BEGIN(&team->t.t_bar);
Jonathan Peyton61118492016-05-20 19:03:38 +00001452#if OMPT_SUPPORT
Jonathan Peyton117a94f2015-06-29 17:28:57 +00001453#if OMPT_TRACE
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001454 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001455 ompt_callbacks.ompt_callback(ompt_event_barrier_begin)) {
1456 ompt_callbacks.ompt_callback(ompt_event_barrier_begin)(
1457 team->t.ompt_team_info.parallel_id,
1458 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
1459 }
Jonathan Peyton117a94f2015-06-29 17:28:57 +00001460#endif
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001461 this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier;
1462#endif
1463
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001464 if (__kmp_tasking_mode == tskm_extra_barrier) {
1465 __kmp_tasking_barrier(team, this_thr, gtid);
1466 KA_TRACE(10, ("__kmp_join_barrier: T#%d(%d:%d) past taking barrier\n", gtid, team_id, tid));
1467 }
1468# ifdef KMP_DEBUG
1469 if (__kmp_tasking_mode != tskm_immediate_exec) {
1470 KA_TRACE(20, ( "__kmp_join_barrier: T#%d, old team = %d, old task_team = %p, th_task_team = %p\n",
Andrey Churbanov6d224db2015-02-10 18:37:43 +00001471 __kmp_gtid_from_thread(this_thr), team_id, team->t.t_task_team[this_thr->th.th_task_state],
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001472 this_thr->th.th_task_team));
Andrey Churbanov6d224db2015-02-10 18:37:43 +00001473 KMP_DEBUG_ASSERT(this_thr->th.th_task_team == team->t.t_task_team[this_thr->th.th_task_state]);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001474 }
1475# endif /* KMP_DEBUG */
1476
1477 /* Copy the blocktime info to the thread, where __kmp_wait_template() can access it when the
1478 team struct is not guaranteed to exist. Doing these loads causes a cache miss slows
1479 down EPCC parallel by 2x. As a workaround, we do not perform the copy if blocktime=infinite,
1480 since the values are not used by __kmp_wait_template() in that case. */
1481 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
Jonathan Peytone1c7c132016-10-07 18:12:19 +00001482#if KMP_USE_MONITOR
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001483 this_thr->th.th_team_bt_intervals = team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
1484 this_thr->th.th_team_bt_set = team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
Jonathan Peyton2208a8512017-01-27 17:54:31 +00001485#else
1486 this_thr->th.th_team_bt_intervals = KMP_BLOCKTIME_INTERVAL();
1487#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001488 }
1489
1490#if USE_ITT_BUILD
1491 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1492 __kmp_itt_barrier_starting(gtid, itt_sync_obj);
1493#endif /* USE_ITT_BUILD */
1494
1495 switch (__kmp_barrier_gather_pattern[bs_forkjoin_barrier]) {
1496 case bp_hyper_bar: {
1497 KMP_ASSERT(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]);
1498 __kmp_hyper_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid, NULL
1499 USE_ITT_BUILD_ARG(itt_sync_obj) );
1500 break;
1501 }
1502 case bp_hierarchical_bar: {
1503 __kmp_hierarchical_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid, NULL
1504 USE_ITT_BUILD_ARG(itt_sync_obj) );
1505 break;
1506 }
1507 case bp_tree_bar: {
1508 KMP_ASSERT(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]);
1509 __kmp_tree_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid, NULL
1510 USE_ITT_BUILD_ARG(itt_sync_obj) );
1511 break;
1512 }
1513 default: {
1514 __kmp_linear_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid, NULL
1515 USE_ITT_BUILD_ARG(itt_sync_obj) );
1516 }
1517 }
1518
1519 /* From this point on, the team data structure may be deallocated at any time by the
1520 master thread - it is unsafe to reference it in any of the worker threads. Any per-team
1521 data items that need to be referenced before the end of the barrier should be moved to
1522 the kmp_task_team_t structs. */
1523 if (KMP_MASTER_TID(tid)) {
1524 if (__kmp_tasking_mode != tskm_immediate_exec) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001525 __kmp_task_team_wait(this_thr, team
1526 USE_ITT_BUILD_ARG(itt_sync_obj) );
1527 }
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001528#if KMP_STATS_ENABLED
1529 // Have master thread flag the workers to indicate they are now waiting for
1530 // next parallel region, Also wake them up so they switch their timers to idle.
1531 for (int i=0; i<team->t.t_nproc; ++i) {
1532 kmp_info_t* team_thread = team->t.t_threads[i];
1533 if (team_thread == this_thr)
1534 continue;
1535 team_thread->th.th_stats->setIdleFlag();
1536 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME && team_thread->th.th_sleep_loc != NULL)
1537 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(team_thread), team_thread->th.th_sleep_loc);
1538 }
1539#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001540#if USE_ITT_BUILD
1541 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1542 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
1543#endif /* USE_ITT_BUILD */
1544
1545# if USE_ITT_BUILD && USE_ITT_NOTIFY
1546 // Join barrier - report frame end
Andrey Churbanov51aecb82015-05-06 19:22:36 +00001547 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) && __kmp_forkjoin_frames_mode &&
1548#if OMP_40_ENABLED
1549 this_thr->th.th_teams_microtask == NULL &&
1550#endif
1551 team->t.t_active_level == 1)
1552 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001553 kmp_uint64 cur_time = __itt_get_timestamp();
1554 ident_t * loc = team->t.t_ident;
Andrey Churbanov51aecb82015-05-06 19:22:36 +00001555 kmp_info_t **other_threads = team->t.t_threads;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001556 int nproc = this_thr->th.th_team_nproc;
1557 int i;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001558 switch(__kmp_forkjoin_frames_mode) {
1559 case 1:
1560 __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0, loc, nproc);
1561 break;
1562 case 2:
1563 __kmp_itt_frame_submit(gtid, this_thr->th.th_bar_min_time, cur_time, 1, loc, nproc);
1564 break;
1565 case 3:
1566 if( __itt_metadata_add_ptr ) {
Andrey Churbanov51aecb82015-05-06 19:22:36 +00001567 // Initialize with master's wait time
1568 kmp_uint64 delta = cur_time - this_thr->th.th_bar_arrive_time;
Jonathan Peyton99ef4d02016-04-14 16:06:49 +00001569 // Set arrive time to zero to be able to check it in __kmp_invoke_task(); the same is done inside the loop below
1570 this_thr->th.th_bar_arrive_time = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001571 for (i=1; i<nproc; ++i) {
1572 delta += ( cur_time - other_threads[i]->th.th_bar_arrive_time );
Jonathan Peyton99ef4d02016-04-14 16:06:49 +00001573 other_threads[i]->th.th_bar_arrive_time = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001574 }
1575 __kmp_itt_metadata_imbalance(gtid, this_thr->th.th_frame_time, cur_time, delta, 0);
1576 }
1577 __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0, loc, nproc);
1578 this_thr->th.th_frame_time = cur_time;
1579 break;
1580 }
1581 }
1582# endif /* USE_ITT_BUILD */
1583 }
1584#if USE_ITT_BUILD
1585 else {
1586 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1587 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
1588 }
1589#endif /* USE_ITT_BUILD */
1590
1591#if KMP_DEBUG
1592 if (KMP_MASTER_TID(tid)) {
1593 KA_TRACE(15, ("__kmp_join_barrier: T#%d(%d:%d) says all %d team threads arrived\n",
1594 gtid, team_id, tid, nproc));
1595 }
1596#endif /* KMP_DEBUG */
1597
1598 // TODO now, mark worker threads as done so they may be disbanded
1599 KMP_MB(); // Flush all pending memory write invalidates.
1600 KA_TRACE(10, ("__kmp_join_barrier: T#%d(%d:%d) leaving\n", gtid, team_id, tid));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001601
1602#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001603 if (ompt_enabled) {
Jonathan Peytoncab67cc2015-09-18 16:24:46 +00001604#if OMPT_BLAME
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001605 if (ompt_callbacks.ompt_callback(ompt_event_barrier_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001606 ompt_callbacks.ompt_callback(ompt_event_barrier_end)(
1607 team->t.ompt_team_info.parallel_id,
1608 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001609 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001610#endif
1611
1612 // return to default state
1613 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
1614 }
1615#endif
Jonas Hahnfeld35801a22017-02-15 08:14:22 +00001616 ANNOTATE_BARRIER_END(&team->t.t_bar);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001617}
1618
1619
1620// TODO release worker threads' fork barriers as we are ready instead of all at once
1621void
1622__kmp_fork_barrier(int gtid, int tid)
1623{
Jonathan Peyton5375fe82016-11-14 21:13:44 +00001624 KMP_TIME_PARTITIONED_BLOCK(OMP_fork_barrier);
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001625 KMP_SET_THREAD_STATE_BLOCK(FORK_JOIN_BARRIER);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001626 kmp_info_t *this_thr = __kmp_threads[gtid];
1627 kmp_team_t *team = (tid == 0) ? this_thr->th.th_team : NULL;
1628#if USE_ITT_BUILD
1629 void * itt_sync_obj = NULL;
1630#endif /* USE_ITT_BUILD */
Jonas Hahnfeld50fed042016-11-07 15:58:36 +00001631 if (team)
Jonas Hahnfeld35801a22017-02-15 08:14:22 +00001632 ANNOTATE_BARRIER_END(&team->t.t_bar);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001633
1634 KA_TRACE(10, ("__kmp_fork_barrier: T#%d(%d:%d) has arrived\n",
1635 gtid, (team != NULL) ? team->t.t_id : -1, tid));
1636
1637 // th_team pointer only valid for master thread here
1638 if (KMP_MASTER_TID(tid)) {
1639#if USE_ITT_BUILD && USE_ITT_NOTIFY
1640 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
1641 // Create itt barrier object
1642 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 1);
1643 __kmp_itt_barrier_middle(gtid, itt_sync_obj); // Call acquired/releasing
1644 }
1645#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1646
1647#ifdef KMP_DEBUG
1648 register kmp_info_t **other_threads = team->t.t_threads;
1649 register int i;
1650
1651 // Verify state
1652 KMP_MB();
1653
1654 for(i=1; i<team->t.t_nproc; ++i) {
1655 KA_TRACE(500, ("__kmp_fork_barrier: T#%d(%d:0) checking T#%d(%d:%d) fork go == %u.\n",
1656 gtid, team->t.t_id, other_threads[i]->th.th_info.ds.ds_gtid,
1657 team->t.t_id, other_threads[i]->th.th_info.ds.ds_tid,
1658 other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go));
1659 KMP_DEBUG_ASSERT((TCR_4(other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go)
1660 & ~(KMP_BARRIER_SLEEP_STATE))
1661 == KMP_INIT_BARRIER_STATE);
1662 KMP_DEBUG_ASSERT(other_threads[i]->th.th_team == team);
1663 }
1664#endif
1665
1666 if (__kmp_tasking_mode != tskm_immediate_exec) {
Jonathan Peyton54127982015-11-04 21:37:48 +00001667 __kmp_task_team_setup(this_thr, team, 0); // 0 indicates setup current task team if nthreads > 1
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001668 }
1669
1670 /* The master thread may have changed its blocktime between the join barrier and the
1671 fork barrier. Copy the blocktime info to the thread, where __kmp_wait_template() can
1672 access it when the team struct is not guaranteed to exist. */
1673 // See note about the corresponding code in __kmp_join_barrier() being performance-critical
1674 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
Jonathan Peytone1c7c132016-10-07 18:12:19 +00001675#if KMP_USE_MONITOR
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001676 this_thr->th.th_team_bt_intervals = team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
1677 this_thr->th.th_team_bt_set = team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
Jonathan Peyton2208a8512017-01-27 17:54:31 +00001678#else
1679 this_thr->th.th_team_bt_intervals = KMP_BLOCKTIME_INTERVAL();
1680#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001681 }
1682 } // master
1683
1684 switch (__kmp_barrier_release_pattern[bs_forkjoin_barrier]) {
1685 case bp_hyper_bar: {
1686 KMP_ASSERT(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier]);
1687 __kmp_hyper_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid, TRUE
1688 USE_ITT_BUILD_ARG(itt_sync_obj) );
1689 break;
1690 }
1691 case bp_hierarchical_bar: {
1692 __kmp_hierarchical_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid, TRUE
1693 USE_ITT_BUILD_ARG(itt_sync_obj) );
1694 break;
1695 }
1696 case bp_tree_bar: {
1697 KMP_ASSERT(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier]);
1698 __kmp_tree_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid, TRUE
1699 USE_ITT_BUILD_ARG(itt_sync_obj) );
1700 break;
1701 }
1702 default: {
1703 __kmp_linear_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid, TRUE
1704 USE_ITT_BUILD_ARG(itt_sync_obj) );
1705 }
1706 }
1707
1708 // Early exit for reaping threads releasing forkjoin barrier
1709 if (TCR_4(__kmp_global.g.g_done)) {
Jonathan Peyton54127982015-11-04 21:37:48 +00001710 this_thr->th.th_task_team = NULL;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001711
1712#if USE_ITT_BUILD && USE_ITT_NOTIFY
1713 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
1714 if (!KMP_MASTER_TID(tid)) {
1715 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
1716 if (itt_sync_obj)
1717 __kmp_itt_barrier_finished(gtid, itt_sync_obj);
1718 }
1719 }
1720#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1721 KA_TRACE(10, ("__kmp_fork_barrier: T#%d is leaving early\n", gtid));
1722 return;
1723 }
1724
1725 /* We can now assume that a valid team structure has been allocated by the master and
1726 propagated to all worker threads. The current thread, however, may not be part of the
1727 team, so we can't blindly assume that the team pointer is non-null. */
1728 team = (kmp_team_t *)TCR_PTR(this_thr->th.th_team);
1729 KMP_DEBUG_ASSERT(team != NULL);
1730 tid = __kmp_tid_from_gtid(gtid);
1731
1732
1733#if KMP_BARRIER_ICV_PULL
1734 /* Master thread's copy of the ICVs was set up on the implicit taskdata in
1735 __kmp_reinitialize_team. __kmp_fork_call() assumes the master thread's implicit task has
1736 this data before this function is called. We cannot modify __kmp_fork_call() to look at
1737 the fixed ICVs in the master's thread struct, because it is not always the case that the
1738 threads arrays have been allocated when __kmp_fork_call() is executed. */
Jonathan Peyton45be4502015-08-11 21:36:41 +00001739 {
Jonathan Peyton5375fe82016-11-14 21:13:44 +00001740 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy);
Jonathan Peyton45be4502015-08-11 21:36:41 +00001741 if (!KMP_MASTER_TID(tid)) { // master thread already has ICVs
1742 // Copy the initial ICVs from the master's thread struct to the implicit task for this tid.
1743 KA_TRACE(10, ("__kmp_fork_barrier: T#%d(%d) is PULLing ICVs\n", gtid, tid));
1744 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid, FALSE);
1745 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
1746 &team->t.t_threads[0]->th.th_bar[bs_forkjoin_barrier].bb.th_fixed_icvs);
1747 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001748 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001749#endif // KMP_BARRIER_ICV_PULL
1750
1751 if (__kmp_tasking_mode != tskm_immediate_exec) {
1752 __kmp_task_team_sync(this_thr, team);
1753 }
1754
1755#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
1756 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
1757 if (proc_bind == proc_bind_intel) {
1758#endif
Andrey Churbanovf28f6132015-01-13 14:54:00 +00001759#if KMP_AFFINITY_SUPPORTED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001760 // Call dynamic affinity settings
1761 if(__kmp_affinity_type == affinity_balanced && team->t.t_size_changed) {
1762 __kmp_balanced_affinity(tid, team->t.t_nproc);
1763 }
Andrey Churbanovf28f6132015-01-13 14:54:00 +00001764#endif // KMP_AFFINITY_SUPPORTED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001765#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
1766 }
Andrey Churbanov94e569e2015-03-10 09:19:47 +00001767 else if (proc_bind != proc_bind_false) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001768 if (this_thr->th.th_new_place == this_thr->th.th_current_place) {
1769 KA_TRACE(100, ("__kmp_fork_barrier: T#%d already in correct place %d\n",
1770 __kmp_gtid_from_thread(this_thr), this_thr->th.th_current_place));
1771 }
1772 else {
1773 __kmp_affinity_set_place(gtid);
1774 }
1775 }
1776#endif
1777
1778#if USE_ITT_BUILD && USE_ITT_NOTIFY
1779 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
1780 if (!KMP_MASTER_TID(tid)) {
1781 // Get correct barrier object
1782 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
1783 __kmp_itt_barrier_finished(gtid, itt_sync_obj); // Workers call acquired
1784 } // (prepare called inside barrier_release)
1785 }
1786#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
Jonas Hahnfeld35801a22017-02-15 08:14:22 +00001787 ANNOTATE_BARRIER_END(&team->t.t_bar);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001788 KA_TRACE(10, ("__kmp_fork_barrier: T#%d(%d:%d) is leaving\n", gtid, team->t.t_id, tid));
1789}
1790
1791
1792void
1793__kmp_setup_icv_copy(kmp_team_t *team, int new_nproc, kmp_internal_control_t *new_icvs, ident_t *loc )
1794{
Jonathan Peyton5375fe82016-11-14 21:13:44 +00001795 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_setup_icv_copy);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001796
1797 KMP_DEBUG_ASSERT(team && new_nproc && new_icvs);
1798 KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);
1799
1800 /* Master thread's copy of the ICVs was set up on the implicit taskdata in
1801 __kmp_reinitialize_team. __kmp_fork_call() assumes the master thread's implicit task has
1802 this data before this function is called. */
1803#if KMP_BARRIER_ICV_PULL
1804 /* Copy ICVs to master's thread structure into th_fixed_icvs (which remains untouched), where
1805 all of the worker threads can access them and make their own copies after the barrier. */
1806 KMP_DEBUG_ASSERT(team->t.t_threads[0]); // The threads arrays should be allocated at this point
1807 copy_icvs(&team->t.t_threads[0]->th.th_bar[bs_forkjoin_barrier].bb.th_fixed_icvs, new_icvs);
1808 KF_TRACE(10, ("__kmp_setup_icv_copy: PULL: T#%d this_thread=%p team=%p\n",
1809 0, team->t.t_threads[0], team));
1810#elif KMP_BARRIER_ICV_PUSH
1811 // The ICVs will be propagated in the fork barrier, so nothing needs to be done here.
1812 KF_TRACE(10, ("__kmp_setup_icv_copy: PUSH: T#%d this_thread=%p team=%p\n",
1813 0, team->t.t_threads[0], team));
1814#else
1815 // Copy the ICVs to each of the non-master threads. This takes O(nthreads) time.
1816 ngo_load(new_icvs);
1817 KMP_DEBUG_ASSERT(team->t.t_threads[0]); // The threads arrays should be allocated at this point
Jonathan Peyton91b78702015-06-08 19:39:07 +00001818 for (int f=1; f<new_nproc; ++f) { // Skip the master thread
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001819 // TODO: GEH - pass in better source location info since usually NULL here
1820 KF_TRACE(10, ("__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n",
1821 f, team->t.t_threads[f], team));
1822 __kmp_init_implicit_task(loc, team->t.t_threads[f], team, f, FALSE);
1823 ngo_store_icvs(&team->t.t_implicit_task_taskdata[f].td_icvs, new_icvs);
1824 KF_TRACE(10, ("__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n",
1825 f, team->t.t_threads[f], team));
1826 }
1827 ngo_sync();
1828#endif // KMP_BARRIER_ICV_PULL
1829}