blob: 6f782edcf82778123a841d2640fa8996f8f40d6b [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
2 * kmp_csupport.c -- kfront linkage support for OpenMP.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16#include "omp.h" /* extern "C" declarations of user-visible routines */
17#include "kmp.h"
18#include "kmp_i18n.h"
19#include "kmp_itt.h"
20#include "kmp_error.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000021#include "kmp_stats.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000022
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000023#if OMPT_SUPPORT
24#include "ompt-internal.h"
25#include "ompt-specific.h"
26#endif
27
Jim Cownie5e8470a2013-09-27 10:38:44 +000028#define MAX_MESSAGE 512
29
30/* ------------------------------------------------------------------------ */
31/* ------------------------------------------------------------------------ */
32
33/* flags will be used in future, e.g., to implement */
34/* openmp_strict library restrictions */
35
36/*!
37 * @ingroup STARTUP_SHUTDOWN
38 * @param loc in source location information
39 * @param flags in for future use (currently ignored)
40 *
41 * Initialize the runtime library. This call is optional; if it is not made then
Jim Cownie4cc4bb42014-10-07 16:25:50 +000042 * it will be implicitly called by attempts to use other library functions.
Jim Cownie5e8470a2013-09-27 10:38:44 +000043 *
44 */
45void
46__kmpc_begin(ident_t *loc, kmp_int32 flags)
47{
48 // By default __kmp_ignore_mppbeg() returns TRUE.
49 if (__kmp_ignore_mppbeg() == FALSE) {
50 __kmp_internal_begin();
51
52 KC_TRACE( 10, ("__kmpc_begin: called\n" ) );
53 }
54}
55
56/*!
57 * @ingroup STARTUP_SHUTDOWN
58 * @param loc source location information
59 *
60 * Shutdown the runtime library. This is also optional, and even if called will not
61 * do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to zero.
62 */
63void
64__kmpc_end(ident_t *loc)
65{
66 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end() call no-op.
67 // However, this can be overridden with KMP_IGNORE_MPPEND environment variable.
68 // If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend() returns FALSE and __kmpc_end()
69 // will unregister this root (it can cause library shut down).
70 if (__kmp_ignore_mppend() == FALSE) {
71 KC_TRACE( 10, ("__kmpc_end: called\n" ) );
72 KA_TRACE( 30, ("__kmpc_end\n" ));
73
74 __kmp_internal_end_thread( -1 );
75 }
76}
77
78/*!
79@ingroup THREAD_STATES
80@param loc Source location information.
81@return The global thread index of the active thread.
82
83This function can be called in any context.
84
85If the runtime has ony been entered at the outermost level from a
86single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is that
87which would be returned by @ref omp_get_thread_num() in the outermost
88active parallel construct. (Or zero if there is no active parallel
89construct, since the master thread is necessarily thread zero).
90
91If multiple non-OpenMP threads all enter an OpenMP construct then this
92will be a unique thread identifier among all the threads created by
93the OpenMP runtime (but the value cannote be defined in terms of
94OpenMP thread ids returned by omp_get_thread_num()).
95
96*/
97kmp_int32
98__kmpc_global_thread_num(ident_t *loc)
99{
100 kmp_int32 gtid = __kmp_entry_gtid();
101
102 KC_TRACE( 10, ("__kmpc_global_thread_num: T#%d\n", gtid ) );
103
104 return gtid;
105}
106
107/*!
108@ingroup THREAD_STATES
109@param loc Source location information.
110@return The number of threads under control of the OpenMP<sup>*</sup> runtime
111
112This function can be called in any context.
113It returns the total number of threads under the control of the OpenMP runtime. That is
114not a number that can be determined by any OpenMP standard calls, since the library may be
115called from more than one non-OpenMP thread, and this reflects the total over all such calls.
116Similarly the runtime maintains underlying threads even when they are not active (since the cost
117of creating and destroying OS threads is high), this call counts all such threads even if they are not
118waiting for work.
119*/
120kmp_int32
121__kmpc_global_num_threads(ident_t *loc)
122{
123 KC_TRACE( 10, ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_nth ) );
124
125 return TCR_4(__kmp_nth);
126}
127
128/*!
129@ingroup THREAD_STATES
130@param loc Source location information.
131@return The thread number of the calling thread in the innermost active parallel construct.
132
133*/
134kmp_int32
135__kmpc_bound_thread_num(ident_t *loc)
136{
137 KC_TRACE( 10, ("__kmpc_bound_thread_num: called\n" ) );
138 return __kmp_tid_from_gtid( __kmp_entry_gtid() );
139}
140
141/*!
142@ingroup THREAD_STATES
143@param loc Source location information.
144@return The number of threads in the innermost active parallel construct.
145*/
146kmp_int32
147__kmpc_bound_num_threads(ident_t *loc)
148{
149 KC_TRACE( 10, ("__kmpc_bound_num_threads: called\n" ) );
150
151 return __kmp_entry_thread() -> th.th_team -> t.t_nproc;
152}
153
154/*!
155 * @ingroup DEPRECATED
156 * @param loc location description
157 *
158 * This function need not be called. It always returns TRUE.
159 */
160kmp_int32
161__kmpc_ok_to_fork(ident_t *loc)
162{
163#ifndef KMP_DEBUG
164
165 return TRUE;
166
167#else
168
169 const char *semi2;
170 const char *semi3;
171 int line_no;
172
173 if (__kmp_par_range == 0) {
174 return TRUE;
175 }
176 semi2 = loc->psource;
177 if (semi2 == NULL) {
178 return TRUE;
179 }
180 semi2 = strchr(semi2, ';');
181 if (semi2 == NULL) {
182 return TRUE;
183 }
184 semi2 = strchr(semi2 + 1, ';');
185 if (semi2 == NULL) {
186 return TRUE;
187 }
188 if (__kmp_par_range_filename[0]) {
189 const char *name = semi2 - 1;
190 while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
191 name--;
192 }
193 if ((*name == '/') || (*name == ';')) {
194 name++;
195 }
196 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
197 return __kmp_par_range < 0;
198 }
199 }
200 semi3 = strchr(semi2 + 1, ';');
201 if (__kmp_par_range_routine[0]) {
202 if ((semi3 != NULL) && (semi3 > semi2)
203 && (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
204 return __kmp_par_range < 0;
205 }
206 }
Andrey Churbanov74bf17b2015-04-02 13:27:08 +0000207 if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000208 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
209 return __kmp_par_range > 0;
210 }
211 return __kmp_par_range < 0;
212 }
213 return TRUE;
214
215#endif /* KMP_DEBUG */
216
217}
218
219/*!
220@ingroup THREAD_STATES
221@param loc Source location information.
222@return 1 if this thread is executing inside an active parallel region, zero if not.
223*/
224kmp_int32
225__kmpc_in_parallel( ident_t *loc )
226{
227 return __kmp_entry_thread() -> th.th_root -> r.r_active;
228}
229
230/*!
231@ingroup PARALLEL
232@param loc source location information
233@param global_tid global thread number
234@param num_threads number of threads requested for this parallel construct
235
236Set the number of threads to be used by the next fork spawned by this thread.
237This call is only required if the parallel construct has a `num_threads` clause.
238*/
239void
240__kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads )
241{
242 KA_TRACE( 20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
243 global_tid, num_threads ) );
244
245 __kmp_push_num_threads( loc, global_tid, num_threads );
246}
247
248void
249__kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid )
250{
251 KA_TRACE( 20, ("__kmpc_pop_num_threads: enter\n" ) );
252
253 /* the num_threads are automatically popped */
254}
255
256
257#if OMP_40_ENABLED
258
259void
260__kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, kmp_int32 proc_bind )
261{
262 KA_TRACE( 20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n",
263 global_tid, proc_bind ) );
264
265 __kmp_push_proc_bind( loc, global_tid, (kmp_proc_bind_t)proc_bind );
266}
267
268#endif /* OMP_40_ENABLED */
269
270
271/*!
272@ingroup PARALLEL
273@param loc source location information
274@param argc total number of arguments in the ellipsis
275@param microtask pointer to callback routine consisting of outlined parallel construct
276@param ... pointers to shared variables that aren't global
277
278Do the actual fork and call the microtask in the relevant number of threads.
279*/
280void
281__kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...)
282{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000283 KMP_STOP_EXPLICIT_TIMER(OMP_serial);
284 KMP_COUNT_BLOCK(OMP_PARALLEL);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000285 int gtid = __kmp_entry_gtid();
286 // maybe to save thr_state is enough here
287 {
288 va_list ap;
289 va_start( ap, microtask );
290
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000291#if OMPT_SUPPORT
292 kmp_info_t *master_th = __kmp_threads[ gtid ];
293 kmp_team_t *parent_team = master_th->th.th_team;
294 int tid = __kmp_tid_from_gtid( gtid );
295 parent_team->t.t_implicit_task_taskdata[tid].
296 ompt_task_info.frame.reenter_runtime_frame = __builtin_frame_address(0);
297#endif
298
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000299#if INCLUDE_SSC_MARKS
300 SSC_MARK_FORKING();
301#endif
302 __kmp_fork_call( loc, gtid, fork_context_intel,
Jim Cownie5e8470a2013-09-27 10:38:44 +0000303 argc,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000304#if OMPT_SUPPORT
305 VOLATILE_CAST(void *) microtask, // "unwrapped" task
306#endif
307 VOLATILE_CAST(microtask_t) microtask, // "wrapped" task
Jim Cownie5e8470a2013-09-27 10:38:44 +0000308 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
309/* TODO: revert workaround for Intel(R) 64 tracker #96 */
Andrey Churbanovcbda8682015-01-13 14:43:35 +0000310#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jim Cownie5e8470a2013-09-27 10:38:44 +0000311 &ap
312#else
313 ap
314#endif
315 );
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000316#if INCLUDE_SSC_MARKS
317 SSC_MARK_JOINING();
318#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000319 __kmp_join_call( loc, gtid );
320
321 va_end( ap );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000322
323#if OMPT_SUPPORT
324 if (ompt_status & ompt_status_track) {
325 parent_team->t.t_implicit_task_taskdata[tid].
326 ompt_task_info.frame.reenter_runtime_frame = 0;
327 }
328#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000329 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000330 KMP_START_EXPLICIT_TIMER(OMP_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000331}
332
333#if OMP_40_ENABLED
334/*!
335@ingroup PARALLEL
336@param loc source location information
337@param global_tid global thread number
338@param num_teams number of teams requested for the teams construct
339
340Set the number of teams to be used by the teams construct.
341This call is only required if the teams construct has a `num_teams` clause
342or a `thread_limit` clause (or both).
343*/
344void
345__kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads )
346{
347 KA_TRACE( 20, ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
348 global_tid, num_teams, num_threads ) );
349
350 __kmp_push_num_teams( loc, global_tid, num_teams, num_threads );
351}
352
353/*!
354@ingroup PARALLEL
355@param loc source location information
356@param argc total number of arguments in the ellipsis
357@param microtask pointer to callback routine consisting of outlined teams construct
358@param ... pointers to shared variables that aren't global
359
360Do the actual fork and call the microtask in the relevant number of threads.
361*/
362void
363__kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...)
364{
365 int gtid = __kmp_entry_gtid();
366 kmp_info_t *this_thr = __kmp_threads[ gtid ];
367 va_list ap;
368 va_start( ap, microtask );
369
370 // remember teams entry point and nesting level
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000371 this_thr->th.th_teams_microtask = microtask;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000372 this_thr->th.th_teams_level = this_thr->th.th_team->t.t_level; // AC: can be >0 on host
373
374 // check if __kmpc_push_num_teams called, set default number of teams otherwise
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000375 if ( this_thr->th.th_teams_size.nteams == 0 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000376 __kmp_push_num_teams( loc, gtid, 0, 0 );
377 }
378 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000379 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
380 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000381
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000382 __kmp_fork_call( loc, gtid, fork_context_intel,
Jim Cownie5e8470a2013-09-27 10:38:44 +0000383 argc,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000384#if OMPT_SUPPORT
385 VOLATILE_CAST(void *) microtask, // "unwrapped" task
386#endif
387 VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task
Jim Cownie5e8470a2013-09-27 10:38:44 +0000388 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master,
Andrey Churbanovcbda8682015-01-13 14:43:35 +0000389#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jim Cownie5e8470a2013-09-27 10:38:44 +0000390 &ap
391#else
392 ap
393#endif
394 );
395 __kmp_join_call( loc, gtid );
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000396 this_thr->th.th_teams_microtask = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000397 this_thr->th.th_teams_level = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000398 *(kmp_int64*)(&this_thr->th.th_teams_size) = 0L;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000399 va_end( ap );
400}
401#endif /* OMP_40_ENABLED */
402
403
404//
405// I don't think this function should ever have been exported.
406// The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
407// openmp code ever called it, but it's been exported from the RTL for so
408// long that I'm afraid to remove the definition.
409//
410int
411__kmpc_invoke_task_func( int gtid )
412{
413 return __kmp_invoke_task_func( gtid );
414}
415
416/*!
417@ingroup PARALLEL
418@param loc source location information
419@param global_tid global thread number
420
421Enter a serialized parallel construct. This interface is used to handle a
422conditional parallel region, like this,
423@code
424#pragma omp parallel if (condition)
425@endcode
426when the condition is false.
427*/
428void
429__kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
430{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000431 __kmp_serialized_parallel(loc, global_tid); /* The implementation is now in kmp_runtime.c so that it can share static functions with
432 * kmp_fork_call since the tasks to be done are similar in each case.
433 */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000434}
435
436/*!
437@ingroup PARALLEL
438@param loc source location information
439@param global_tid global thread number
440
441Leave a serialized parallel construct.
442*/
443void
444__kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
445{
446 kmp_internal_control_t *top;
447 kmp_info_t *this_thr;
448 kmp_team_t *serial_team;
449
450 KC_TRACE( 10, ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid ) );
451
452 /* skip all this code for autopar serialized loops since it results in
453 unacceptable overhead */
454 if( loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR ) )
455 return;
456
457 // Not autopar code
458 if( ! TCR_4( __kmp_init_parallel ) )
459 __kmp_parallel_initialize();
460
461 this_thr = __kmp_threads[ global_tid ];
462 serial_team = this_thr->th.th_serial_team;
463
464 KMP_MB();
465 KMP_DEBUG_ASSERT( serial_team );
466 KMP_ASSERT( serial_team -> t.t_serialized );
467 KMP_DEBUG_ASSERT( this_thr -> th.th_team == serial_team );
468 KMP_DEBUG_ASSERT( serial_team != this_thr->th.th_root->r.r_root_team );
469 KMP_DEBUG_ASSERT( serial_team -> t.t_threads );
470 KMP_DEBUG_ASSERT( serial_team -> t.t_threads[0] == this_thr );
471
472 /* If necessary, pop the internal control stack values and replace the team values */
473 top = serial_team -> t.t_control_stack_top;
474 if ( top && top -> serial_nesting_level == serial_team -> t.t_serialized ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000475 copy_icvs( &serial_team -> t.t_threads[0] -> th.th_current_task -> td_icvs, top );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000476 serial_team -> t.t_control_stack_top = top -> next;
477 __kmp_free(top);
478 }
479
Jim Cownie5e8470a2013-09-27 10:38:44 +0000480 //if( serial_team -> t.t_serialized > 1 )
481 serial_team -> t.t_level--;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000482
483 /* pop dispatch buffers stack */
484 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
485 {
486 dispatch_private_info_t * disp_buffer = serial_team->t.t_dispatch->th_disp_buffer;
487 serial_team->t.t_dispatch->th_disp_buffer =
488 serial_team->t.t_dispatch->th_disp_buffer->next;
489 __kmp_free( disp_buffer );
490 }
491
492 -- serial_team -> t.t_serialized;
493 if ( serial_team -> t.t_serialized == 0 ) {
494
495 /* return to the parallel section */
496
497#if KMP_ARCH_X86 || KMP_ARCH_X86_64
498 if ( __kmp_inherit_fp_control && serial_team->t.t_fp_control_saved ) {
499 __kmp_clear_x87_fpu_status_word();
500 __kmp_load_x87_fpu_control_word( &serial_team->t.t_x87_fpu_control_word );
501 __kmp_load_mxcsr( &serial_team->t.t_mxcsr );
502 }
503#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
504
505 this_thr -> th.th_team = serial_team -> t.t_parent;
506 this_thr -> th.th_info.ds.ds_tid = serial_team -> t.t_master_tid;
507
508 /* restore values cached in the thread */
509 this_thr -> th.th_team_nproc = serial_team -> t.t_parent -> t.t_nproc; /* JPH */
510 this_thr -> th.th_team_master = serial_team -> t.t_parent -> t.t_threads[0]; /* JPH */
511 this_thr -> th.th_team_serialized = this_thr -> th.th_team -> t.t_serialized;
512
513 /* TODO the below shouldn't need to be adjusted for serialized teams */
514 this_thr -> th.th_dispatch = & this_thr -> th.th_team ->
515 t.t_dispatch[ serial_team -> t.t_master_tid ];
516
Jim Cownie5e8470a2013-09-27 10:38:44 +0000517 __kmp_pop_current_task_from_thread( this_thr );
518
519 KMP_ASSERT( this_thr -> th.th_current_task -> td_flags.executing == 0 );
520 this_thr -> th.th_current_task -> td_flags.executing = 1;
521
522 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +0000523 // Copy the task team from the new child / old parent team to the thread.
524 this_thr->th.th_task_team = this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000525 KA_TRACE( 20, ( "__kmpc_end_serialized_parallel: T#%d restoring task_team %p / team %p\n",
526 global_tid, this_thr -> th.th_task_team, this_thr -> th.th_team ) );
527 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000528 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000529 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
530 KA_TRACE( 20, ( "__kmpc_end_serialized_parallel: T#%d decreasing nesting depth of serial team %p to %d\n",
531 global_tid, serial_team, serial_team -> t.t_serialized ) );
532 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000533 }
534
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000535#if USE_ITT_BUILD
536 kmp_uint64 cur_time = 0;
537#if USE_ITT_NOTIFY
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000538 if ( __itt_get_timestamp_ptr ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000539 cur_time = __itt_get_timestamp();
540 }
541#endif /* USE_ITT_NOTIFY */
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000542 if ( this_thr->th.th_team->t.t_level == 0
543#if OMP_40_ENABLED
544 && this_thr->th.th_teams_microtask == NULL
545#endif
546 ) {
547 // Report the barrier
Jim Cownie181b4bb2013-12-23 17:28:57 +0000548 this_thr->th.th_ident = loc;
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000549 if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) &&
550 ( __kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 1 ) )
551 {
552 __kmp_itt_frame_submit( global_tid, this_thr->th.th_frame_time_serialized,
553 cur_time, 0, loc, this_thr->th.th_team_nproc, 0 );
554 if ( __kmp_forkjoin_frames_mode == 3 )
555 // Since barrier frame for serialized region is equal to the region we use the same begin timestamp as for the barrier.
556 __kmp_itt_frame_submit( global_tid, serial_team->t.t_region_time,
557 cur_time, 0, loc, this_thr->th.th_team_nproc, 2 );
558 } else if ( ( __itt_frame_end_v3_ptr || KMP_ITT_DEBUG ) &&
559 ! __kmp_forkjoin_frames_mode && __kmp_forkjoin_frames )
560 // Mark the end of the "parallel" region for VTune. Only use one of frame notification scheme at the moment.
561 __kmp_itt_region_joined( global_tid, 1 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000562 }
563#endif /* USE_ITT_BUILD */
564
565 if ( __kmp_env_consistency_check )
566 __kmp_pop_parallel( global_tid, NULL );
567}
568
569/*!
570@ingroup SYNCHRONIZATION
571@param loc source location information.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000572
Andrey Churbanov723a6b62015-02-20 18:09:27 +0000573Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though
Jim Cownie5e8470a2013-09-27 10:38:44 +0000574depending on the memory ordering convention obeyed by the compiler
575even that may not be necessary).
576*/
577void
Andrey Churbanov723a6b62015-02-20 18:09:27 +0000578__kmpc_flush(ident_t *loc)
Jim Cownie5e8470a2013-09-27 10:38:44 +0000579{
580 KC_TRACE( 10, ("__kmpc_flush: called\n" ) );
581
582 /* need explicit __mf() here since use volatile instead in library */
583 KMP_MB(); /* Flush all pending memory write invalidates. */
584
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000585 #if ( KMP_ARCH_X86 || KMP_ARCH_X86_64 )
586 #if KMP_MIC
587 // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used.
588 // We shouldn't need it, though, since the ABI rules require that
589 // * If the compiler generates NGO stores it also generates the fence
590 // * If users hand-code NGO stores they should insert the fence
591 // therefore no incomplete unordered stores should be visible.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000592 #else
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000593 // C74404
594 // This is to address non-temporal store instructions (sfence needed).
595 // The clflush instruction is addressed either (mfence needed).
596 // Probably the non-temporal load monvtdqa instruction should also be addressed.
597 // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2.
598 if ( ! __kmp_cpuinfo.initialized ) {
599 __kmp_query_cpuid( & __kmp_cpuinfo );
600 }; // if
601 if ( ! __kmp_cpuinfo.sse2 ) {
602 // CPU cannot execute SSE2 instructions.
603 } else {
604 #if KMP_COMPILER_ICC || KMP_COMPILER_MSVC
605 _mm_mfence();
606 #else
607 __sync_synchronize();
608 #endif // KMP_COMPILER_ICC
609 }; // if
610 #endif // KMP_MIC
Andrey Churbanovcbda8682015-01-13 14:43:35 +0000611 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64)
612 // Nothing to see here move along
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000613 #elif KMP_ARCH_PPC64
614 // Nothing needed here (we have a real MB above).
615 #if KMP_OS_CNK
616 // The flushing thread needs to yield here; this prevents a
617 // busy-waiting thread from saturating the pipeline. flush is
618 // often used in loops like this:
619 // while (!flag) {
620 // #pragma omp flush(flag)
621 // }
622 // and adding the yield here is good for at least a 10x speedup
623 // when running >2 threads per core (on the NAS LU benchmark).
624 __kmp_yield(TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000625 #endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000626 #else
627 #error Unknown or unsupported architecture
628 #endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000629
630}
631
632/* -------------------------------------------------------------------------- */
633
634/* -------------------------------------------------------------------------- */
635
636/*!
637@ingroup SYNCHRONIZATION
638@param loc source location information
639@param global_tid thread id.
640
641Execute a barrier.
642*/
643void
644__kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
645{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000646 KMP_COUNT_BLOCK(OMP_BARRIER);
647 KMP_TIME_BLOCK(OMP_barrier);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000648 int explicit_barrier_flag;
649 KC_TRACE( 10, ("__kmpc_barrier: called T#%d\n", global_tid ) );
650
651 if (! TCR_4(__kmp_init_parallel))
652 __kmp_parallel_initialize();
653
654 if ( __kmp_env_consistency_check ) {
655 if ( loc == 0 ) {
656 KMP_WARNING( ConstructIdentInvalid ); // ??? What does it mean for the user?
657 }; // if
658
659 __kmp_check_barrier( global_tid, ct_barrier, loc );
660 }
661
662 __kmp_threads[ global_tid ]->th.th_ident = loc;
663 // TODO: explicit barrier_wait_id:
664 // this function is called when 'barrier' directive is present or
665 // implicit barrier at the end of a worksharing construct.
666 // 1) better to add a per-thread barrier counter to a thread data structure
667 // 2) set to 0 when a new team is created
668 // 4) no sync is required
669
670 __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
671}
672
673/* The BARRIER for a MASTER section is always explicit */
674/*!
675@ingroup WORK_SHARING
676@param loc source location information.
677@param global_tid global thread number .
678@return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise.
679*/
680kmp_int32
681__kmpc_master(ident_t *loc, kmp_int32 global_tid)
682{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000683 KMP_COUNT_BLOCK(OMP_MASTER);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000684 int status = 0;
685
686 KC_TRACE( 10, ("__kmpc_master: called T#%d\n", global_tid ) );
687
688 if( ! TCR_4( __kmp_init_parallel ) )
689 __kmp_parallel_initialize();
690
691 if( KMP_MASTER_GTID( global_tid ))
692 status = 1;
693
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000694#if OMPT_SUPPORT && OMPT_TRACE
695 if (status) {
696 kmp_info_t *this_thr = __kmp_threads[ global_tid ];
697 kmp_team_t *team = this_thr -> th.th_team;
698 if ((ompt_status == ompt_status_track_callback) &&
699 ompt_callbacks.ompt_callback(ompt_event_master_begin)) {
700 int tid = __kmp_tid_from_gtid( global_tid );
701 ompt_callbacks.ompt_callback(ompt_event_master_begin)(
702 team->t.ompt_team_info.parallel_id,
703 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
704 }
705 }
706#endif
707
Jim Cownie5e8470a2013-09-27 10:38:44 +0000708 if ( __kmp_env_consistency_check ) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000709#if KMP_USE_DYNAMIC_LOCK
710 if (status)
711 __kmp_push_sync( global_tid, ct_master, loc, NULL, 0 );
712 else
713 __kmp_check_sync( global_tid, ct_master, loc, NULL, 0 );
714#else
Jim Cownie5e8470a2013-09-27 10:38:44 +0000715 if (status)
716 __kmp_push_sync( global_tid, ct_master, loc, NULL );
717 else
718 __kmp_check_sync( global_tid, ct_master, loc, NULL );
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000719#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000720 }
721
722 return status;
723}
724
725/*!
726@ingroup WORK_SHARING
727@param loc source location information.
728@param global_tid global thread number .
729
730Mark the end of a <tt>master</tt> region. This should only be called by the thread
731that executes the <tt>master</tt> region.
732*/
733void
734__kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
735{
736 KC_TRACE( 10, ("__kmpc_end_master: called T#%d\n", global_tid ) );
737
738 KMP_DEBUG_ASSERT( KMP_MASTER_GTID( global_tid ));
739
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000740#if OMPT_SUPPORT && OMPT_TRACE
741 kmp_info_t *this_thr = __kmp_threads[ global_tid ];
742 kmp_team_t *team = this_thr -> th.th_team;
743 if ((ompt_status == ompt_status_track_callback) &&
744 ompt_callbacks.ompt_callback(ompt_event_master_end)) {
745 int tid = __kmp_tid_from_gtid( global_tid );
746 ompt_callbacks.ompt_callback(ompt_event_master_end)(
747 team->t.ompt_team_info.parallel_id,
748 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
749 }
750#endif
751
Jim Cownie5e8470a2013-09-27 10:38:44 +0000752 if ( __kmp_env_consistency_check ) {
753 if( global_tid < 0 )
754 KMP_WARNING( ThreadIdentInvalid );
755
756 if( KMP_MASTER_GTID( global_tid ))
757 __kmp_pop_sync( global_tid, ct_master, loc );
758 }
759}
760
761/*!
762@ingroup WORK_SHARING
763@param loc source location information.
764@param gtid global thread number.
765
766Start execution of an <tt>ordered</tt> construct.
767*/
768void
769__kmpc_ordered( ident_t * loc, kmp_int32 gtid )
770{
771 int cid = 0;
772 kmp_info_t *th;
773 KMP_DEBUG_ASSERT( __kmp_init_serial );
774
775 KC_TRACE( 10, ("__kmpc_ordered: called T#%d\n", gtid ));
776
777 if (! TCR_4(__kmp_init_parallel))
778 __kmp_parallel_initialize();
779
780#if USE_ITT_BUILD
781 __kmp_itt_ordered_prep( gtid );
782 // TODO: ordered_wait_id
783#endif /* USE_ITT_BUILD */
784
785 th = __kmp_threads[ gtid ];
786
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000787#if OMPT_SUPPORT && OMPT_TRACE
788 if (ompt_status & ompt_status_track) {
789 /* OMPT state update */
790 th->th.ompt_thread_info.wait_id = (uint64_t) loc;
791 th->th.ompt_thread_info.state = ompt_state_wait_ordered;
792
793 /* OMPT event callback */
794 if ((ompt_status == ompt_status_track_callback) &&
795 ompt_callbacks.ompt_callback(ompt_event_wait_ordered)) {
796 ompt_callbacks.ompt_callback(ompt_event_wait_ordered)(
797 th->th.ompt_thread_info.wait_id);
798 }
799 }
800#endif
801
Jim Cownie5e8470a2013-09-27 10:38:44 +0000802 if ( th -> th.th_dispatch -> th_deo_fcn != 0 )
803 (*th->th.th_dispatch->th_deo_fcn)( & gtid, & cid, loc );
804 else
805 __kmp_parallel_deo( & gtid, & cid, loc );
806
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000807#if OMPT_SUPPORT && OMPT_TRACE
808 if (ompt_status & ompt_status_track) {
809 /* OMPT state update */
810 th->th.ompt_thread_info.state = ompt_state_work_parallel;
811 th->th.ompt_thread_info.wait_id = 0;
812
813 /* OMPT event callback */
814 if ((ompt_status == ompt_status_track_callback) &&
815 ompt_callbacks.ompt_callback(ompt_event_acquired_ordered)) {
816 ompt_callbacks.ompt_callback(ompt_event_acquired_ordered)(
817 th->th.ompt_thread_info.wait_id);
818 }
819 }
820#endif
821
Jim Cownie5e8470a2013-09-27 10:38:44 +0000822#if USE_ITT_BUILD
823 __kmp_itt_ordered_start( gtid );
824#endif /* USE_ITT_BUILD */
825}
826
827/*!
828@ingroup WORK_SHARING
829@param loc source location information.
830@param gtid global thread number.
831
832End execution of an <tt>ordered</tt> construct.
833*/
834void
835__kmpc_end_ordered( ident_t * loc, kmp_int32 gtid )
836{
837 int cid = 0;
838 kmp_info_t *th;
839
840 KC_TRACE( 10, ("__kmpc_end_ordered: called T#%d\n", gtid ) );
841
842#if USE_ITT_BUILD
843 __kmp_itt_ordered_end( gtid );
844 // TODO: ordered_wait_id
845#endif /* USE_ITT_BUILD */
846
847 th = __kmp_threads[ gtid ];
848
849 if ( th -> th.th_dispatch -> th_dxo_fcn != 0 )
850 (*th->th.th_dispatch->th_dxo_fcn)( & gtid, & cid, loc );
851 else
852 __kmp_parallel_dxo( & gtid, & cid, loc );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000853
854#if OMPT_SUPPORT && OMPT_BLAME
855 if ((ompt_status == ompt_status_track_callback) &&
856 ompt_callbacks.ompt_callback(ompt_event_release_ordered)) {
857 ompt_callbacks.ompt_callback(ompt_event_release_ordered)(
858 th->th.ompt_thread_info.wait_id);
859 }
860#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000861}
862
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000863#if KMP_USE_DYNAMIC_LOCK
864
865static __forceinline kmp_indirect_lock_t *
866__kmp_get_indirect_csptr(kmp_critical_name * crit, ident_t const * loc, kmp_int32 gtid, kmp_dyna_lockseq_t seq)
867{
868 // Code from __kmp_get_critical_section_ptr
869 // This function returns an indirect lock object instead of a user lock.
870 kmp_indirect_lock_t **lck, *ret;
871 lck = (kmp_indirect_lock_t **)crit;
872 ret = (kmp_indirect_lock_t *)TCR_PTR(*lck);
873 if (ret == NULL) {
874 void *idx;
875 kmp_indirect_locktag_t tag = DYNA_GET_I_TAG(seq);
876 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
877 ret = ilk;
878 DYNA_I_LOCK_FUNC(ilk, init)(ilk->lock);
879 DYNA_SET_I_LOCK_LOCATION(ilk, loc);
880 DYNA_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
881 KA_TRACE(20, ("__kmp_get_indirect_csptr: initialized indirect lock #%d\n", tag));
882#if USE_ITT_BUILD
883 __kmp_itt_critical_creating(ilk->lock, loc);
884#endif
885 int status = KMP_COMPARE_AND_STORE_PTR(lck, 0, ilk);
886 if (status == 0) {
887#if USE_ITT_BUILD
888 __kmp_itt_critical_destroyed(ilk->lock);
889#endif
890 // Postponing destroy, to avoid costly dispatch here.
891 //DYNA_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
892 ret = (kmp_indirect_lock_t *)TCR_PTR(*lck);
893 KMP_DEBUG_ASSERT(ret != NULL);
894 }
895 }
896 return ret;
897}
898
899// Fast-path acquire tas lock
900#define DYNA_ACQUIRE_TAS_LOCK(lock, gtid) { \
901 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
902 if (l->lk.poll != DYNA_LOCK_FREE(tas) || \
903 ! KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), DYNA_LOCK_FREE(tas), DYNA_LOCK_BUSY(gtid+1, tas))) { \
904 kmp_uint32 spins; \
905 KMP_FSYNC_PREPARE(l); \
906 KMP_INIT_YIELD(spins); \
907 if (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
908 KMP_YIELD(TRUE); \
909 } else { \
910 KMP_YIELD_SPIN(spins); \
911 } \
912 while (l->lk.poll != DYNA_LOCK_FREE(tas) || \
913 ! KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), DYNA_LOCK_FREE(tas), DYNA_LOCK_BUSY(gtid+1, tas))) { \
914 if (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
915 KMP_YIELD(TRUE); \
916 } else { \
917 KMP_YIELD_SPIN(spins); \
918 } \
919 } \
920 } \
921 KMP_FSYNC_ACQUIRED(l); \
922}
923
924// Fast-path test tas lock
925#define DYNA_TEST_TAS_LOCK(lock, gtid, rc) { \
926 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
927 rc = l->lk.poll == DYNA_LOCK_FREE(tas) && \
928 KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), DYNA_LOCK_FREE(tas), DYNA_LOCK_BUSY(gtid+1, tas)); \
929}
930
931// Fast-path release tas lock
932#define DYNA_RELEASE_TAS_LOCK(lock, gtid) { \
933 TCW_4(((kmp_tas_lock_t *)lock)->lk.poll, DYNA_LOCK_FREE(tas)); \
934 KMP_MB(); \
935}
936
937#if DYNA_HAS_FUTEX
938
939# include <unistd.h>
940# include <sys/syscall.h>
941# ifndef FUTEX_WAIT
942# define FUTEX_WAIT 0
943# endif
944# ifndef FUTEX_WAKE
945# define FUTEX_WAKE 1
946# endif
947
948// Fast-path acquire futex lock
949#define DYNA_ACQUIRE_FUTEX_LOCK(lock, gtid) { \
950 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
951 kmp_int32 gtid_code = (gtid+1) << 1; \
952 KMP_MB(); \
953 KMP_FSYNC_PREPARE(ftx); \
954 kmp_int32 poll_val; \
955 while ((poll_val = KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), DYNA_LOCK_FREE(futex), \
956 DYNA_LOCK_BUSY(gtid_code, futex))) != DYNA_LOCK_FREE(futex)) { \
957 kmp_int32 cond = DYNA_LOCK_STRIP(poll_val) & 1; \
958 if (!cond) { \
959 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, poll_val | DYNA_LOCK_BUSY(1, futex))) { \
960 continue; \
961 } \
962 poll_val |= DYNA_LOCK_BUSY(1, futex); \
963 } \
964 kmp_int32 rc; \
965 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, NULL, NULL, 0)) != 0) { \
966 continue; \
967 } \
968 gtid_code |= 1; \
969 } \
970 KMP_FSYNC_ACQUIRED(ftx); \
971}
972
973// Fast-path test futex lock
974#define DYNA_TEST_FUTEX_LOCK(lock, gtid, rc) { \
975 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
976 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), DYNA_LOCK_FREE(futex), DYNA_LOCK_BUSY(gtid+1, futex) << 1)) { \
977 KMP_FSYNC_ACQUIRED(ftx); \
978 rc = TRUE; \
979 } else { \
980 rc = FALSE; \
981 } \
982}
983
984// Fast-path release futex lock
985#define DYNA_RELEASE_FUTEX_LOCK(lock, gtid) { \
986 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
987 KMP_MB(); \
988 KMP_FSYNC_RELEASING(ftx); \
989 kmp_int32 poll_val = KMP_XCHG_FIXED32(&(ftx->lk.poll), DYNA_LOCK_FREE(futex)); \
990 if (DYNA_LOCK_STRIP(poll_val) & 1) { \
991 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, DYNA_LOCK_BUSY(1, futex), NULL, NULL, 0); \
992 } \
993 KMP_MB(); \
994 KMP_YIELD(TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); \
995}
996
997#endif // DYNA_HAS_FUTEX
998
999#else // KMP_USE_DYNAMIC_LOCK
1000
Jim Cownie5e8470a2013-09-27 10:38:44 +00001001static kmp_user_lock_p
1002__kmp_get_critical_section_ptr( kmp_critical_name * crit, ident_t const * loc, kmp_int32 gtid )
1003{
1004 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1005
1006 //
1007 // Because of the double-check, the following load
1008 // doesn't need to be volatile.
1009 //
1010 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR( *lck_pp );
1011
1012 if ( lck == NULL ) {
1013 void * idx;
1014
1015 // Allocate & initialize the lock.
1016 // Remember allocated locks in table in order to free them in __kmp_cleanup()
1017 lck = __kmp_user_lock_allocate( &idx, gtid, kmp_lf_critical_section );
1018 __kmp_init_user_lock_with_checks( lck );
1019 __kmp_set_user_lock_location( lck, loc );
1020#if USE_ITT_BUILD
1021 __kmp_itt_critical_creating( lck );
1022 // __kmp_itt_critical_creating() should be called *before* the first usage of underlying
1023 // lock. It is the only place where we can guarantee it. There are chances the lock will
1024 // destroyed with no usage, but it is not a problem, because this is not real event seen
1025 // by user but rather setting name for object (lock). See more details in kmp_itt.h.
1026#endif /* USE_ITT_BUILD */
1027
1028 //
1029 // Use a cmpxchg instruction to slam the start of the critical
1030 // section with the lock pointer. If another thread beat us
1031 // to it, deallocate the lock, and use the lock that the other
1032 // thread allocated.
1033 //
1034 int status = KMP_COMPARE_AND_STORE_PTR( lck_pp, 0, lck );
1035
1036 if ( status == 0 ) {
1037 // Deallocate the lock and reload the value.
1038#if USE_ITT_BUILD
1039 __kmp_itt_critical_destroyed( lck );
1040 // Let ITT know the lock is destroyed and the same memory location may be reused for
1041 // another purpose.
1042#endif /* USE_ITT_BUILD */
1043 __kmp_destroy_user_lock_with_checks( lck );
1044 __kmp_user_lock_free( &idx, gtid, lck );
1045 lck = (kmp_user_lock_p)TCR_PTR( *lck_pp );
1046 KMP_DEBUG_ASSERT( lck != NULL );
1047 }
1048 }
1049 return lck;
1050}
1051
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001052#endif // KMP_USE_DYNAMIC_LOCK
1053
Jim Cownie5e8470a2013-09-27 10:38:44 +00001054/*!
1055@ingroup WORK_SHARING
1056@param loc source location information.
1057@param global_tid global thread number .
1058@param crit identity of the critical section. This could be a pointer to a lock associated with the critical section, or
1059some other suitably unique value.
1060
1061Enter code protected by a `critical` construct.
1062This function blocks until the executing thread can enter the critical section.
1063*/
1064void
1065__kmpc_critical( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001066 KMP_COUNT_BLOCK(OMP_CRITICAL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001067
1068 kmp_user_lock_p lck;
1069
1070 KC_TRACE( 10, ("__kmpc_critical: called T#%d\n", global_tid ) );
1071
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001072#if KMP_USE_DYNAMIC_LOCK
1073 // Assumption: all direct locks fit in OMP_CRITICAL_SIZE.
1074 // The global sequence __kmp_user_lock_seq is used unless compiler pushes a value.
1075 if (DYNA_IS_D_LOCK(__kmp_user_lock_seq)) {
1076 lck = (kmp_user_lock_p)crit;
1077 // The thread that reaches here first needs to tag the lock word.
1078 if (*((kmp_dyna_lock_t *)lck) == 0) {
1079 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)lck, 0, DYNA_GET_D_TAG(__kmp_user_lock_seq));
1080 }
1081 if (__kmp_env_consistency_check) {
1082 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
1083 }
1084# if USE_ITT_BUILD
1085 __kmp_itt_critical_acquiring(lck);
1086# endif
1087# if DYNA_USE_FAST_TAS
1088 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1089 DYNA_ACQUIRE_TAS_LOCK(lck, global_tid);
1090 } else
1091# elif DYNA_USE_FAST_FUTEX
1092 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1093 DYNA_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1094 } else
1095# endif
1096 {
1097 DYNA_D_LOCK_FUNC(lck, set)((kmp_dyna_lock_t *)lck, global_tid);
1098 }
1099 } else {
1100 kmp_indirect_lock_t *ilk = __kmp_get_indirect_csptr(crit, loc, global_tid, __kmp_user_lock_seq);
1101 lck = ilk->lock;
1102 if (__kmp_env_consistency_check) {
1103 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
1104 }
1105# if USE_ITT_BUILD
1106 __kmp_itt_critical_acquiring(lck);
1107# endif
1108 DYNA_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1109 }
1110
1111#else // KMP_USE_DYNAMIC_LOCK
1112
Jim Cownie5e8470a2013-09-27 10:38:44 +00001113 //TODO: add THR_OVHD_STATE
1114
1115 KMP_CHECK_USER_LOCK_INIT();
1116
1117 if ( ( __kmp_user_lock_kind == lk_tas )
1118 && ( sizeof( lck->tas.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
1119 lck = (kmp_user_lock_p)crit;
1120 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001121#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001122 else if ( ( __kmp_user_lock_kind == lk_futex )
1123 && ( sizeof( lck->futex.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
1124 lck = (kmp_user_lock_p)crit;
1125 }
1126#endif
1127 else { // ticket, queuing or drdpa
1128 lck = __kmp_get_critical_section_ptr( crit, loc, global_tid );
1129 }
1130
1131 if ( __kmp_env_consistency_check )
1132 __kmp_push_sync( global_tid, ct_critical, loc, lck );
1133
1134 /* since the critical directive binds to all threads, not just
1135 * the current team we have to check this even if we are in a
1136 * serialized team */
1137 /* also, even if we are the uber thread, we still have to conduct the lock,
1138 * as we have to contend with sibling threads */
1139
1140#if USE_ITT_BUILD
1141 __kmp_itt_critical_acquiring( lck );
1142#endif /* USE_ITT_BUILD */
1143 // Value of 'crit' should be good for using as a critical_id of the critical section directive.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001144 __kmp_acquire_user_lock_with_checks( lck, global_tid );
1145
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001146#endif // KMP_USE_DYNAMIC_LOCK
1147
Jim Cownie5e8470a2013-09-27 10:38:44 +00001148#if USE_ITT_BUILD
1149 __kmp_itt_critical_acquired( lck );
1150#endif /* USE_ITT_BUILD */
1151
1152 KA_TRACE( 15, ("__kmpc_critical: done T#%d\n", global_tid ));
1153} // __kmpc_critical
1154
1155/*!
1156@ingroup WORK_SHARING
1157@param loc source location information.
1158@param global_tid global thread number .
1159@param crit identity of the critical section. This could be a pointer to a lock associated with the critical section, or
1160some other suitably unique value.
1161
1162Leave a critical section, releasing any lock that was held during its execution.
1163*/
1164void
1165__kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
1166{
1167 kmp_user_lock_p lck;
1168
1169 KC_TRACE( 10, ("__kmpc_end_critical: called T#%d\n", global_tid ));
1170
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001171#if KMP_USE_DYNAMIC_LOCK
1172 if (DYNA_IS_D_LOCK(__kmp_user_lock_seq)) {
1173 lck = (kmp_user_lock_p)crit;
1174 KMP_ASSERT(lck != NULL);
1175 if (__kmp_env_consistency_check) {
1176 __kmp_pop_sync(global_tid, ct_critical, loc);
1177 }
1178# if USE_ITT_BUILD
1179 __kmp_itt_critical_releasing( lck );
1180# endif
1181# if DYNA_USE_FAST_TAS
1182 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1183 DYNA_RELEASE_TAS_LOCK(lck, global_tid);
1184 } else
1185# elif DYNA_USE_FAST_FUTEX
1186 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1187 DYNA_RELEASE_FUTEX_LOCK(lck, global_tid);
1188 } else
1189# endif
1190 {
1191 DYNA_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1192 }
1193 } else {
1194 kmp_indirect_lock_t *ilk = (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1195 KMP_ASSERT(ilk != NULL);
1196 lck = ilk->lock;
1197 if (__kmp_env_consistency_check) {
1198 __kmp_pop_sync(global_tid, ct_critical, loc);
1199 }
1200# if USE_ITT_BUILD
1201 __kmp_itt_critical_releasing( lck );
1202# endif
1203 DYNA_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1204 }
1205
1206#else // KMP_USE_DYNAMIC_LOCK
1207
Jim Cownie5e8470a2013-09-27 10:38:44 +00001208 if ( ( __kmp_user_lock_kind == lk_tas )
1209 && ( sizeof( lck->tas.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
1210 lck = (kmp_user_lock_p)crit;
1211 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001212#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001213 else if ( ( __kmp_user_lock_kind == lk_futex )
1214 && ( sizeof( lck->futex.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
1215 lck = (kmp_user_lock_p)crit;
1216 }
1217#endif
1218 else { // ticket, queuing or drdpa
1219 lck = (kmp_user_lock_p) TCR_PTR(*((kmp_user_lock_p *)crit));
1220 }
1221
1222 KMP_ASSERT(lck != NULL);
1223
1224 if ( __kmp_env_consistency_check )
1225 __kmp_pop_sync( global_tid, ct_critical, loc );
1226
1227#if USE_ITT_BUILD
1228 __kmp_itt_critical_releasing( lck );
1229#endif /* USE_ITT_BUILD */
1230 // Value of 'crit' should be good for using as a critical_id of the critical section directive.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001231 __kmp_release_user_lock_with_checks( lck, global_tid );
1232
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001233#if OMPT_SUPPORT && OMPT_BLAME
1234 if ((ompt_status == ompt_status_track_callback) &&
1235 ompt_callbacks.ompt_callback(ompt_event_release_critical)) {
1236 ompt_callbacks.ompt_callback(ompt_event_release_critical)(
1237 (uint64_t) lck);
1238 }
1239#endif
1240
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001241#endif // KMP_USE_DYNAMIC_LOCK
1242
Jim Cownie5e8470a2013-09-27 10:38:44 +00001243 KA_TRACE( 15, ("__kmpc_end_critical: done T#%d\n", global_tid ));
1244}
1245
1246/*!
1247@ingroup SYNCHRONIZATION
1248@param loc source location information
1249@param global_tid thread id.
1250@return one if the thread should execute the master block, zero otherwise
1251
1252Start execution of a combined barrier and master. The barrier is executed inside this function.
1253*/
1254kmp_int32
1255__kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
1256{
1257 int status;
1258
1259 KC_TRACE( 10, ("__kmpc_barrier_master: called T#%d\n", global_tid ) );
1260
1261 if (! TCR_4(__kmp_init_parallel))
1262 __kmp_parallel_initialize();
1263
1264 if ( __kmp_env_consistency_check )
1265 __kmp_check_barrier( global_tid, ct_barrier, loc );
1266
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001267#if USE_ITT_NOTIFY
1268 __kmp_threads[global_tid]->th.th_ident = loc;
1269#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001270 status = __kmp_barrier( bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL );
1271
1272 return (status != 0) ? 0 : 1;
1273}
1274
1275/*!
1276@ingroup SYNCHRONIZATION
1277@param loc source location information
1278@param global_tid thread id.
1279
1280Complete the execution of a combined barrier and master. This function should
1281only be called at the completion of the <tt>master</tt> code. Other threads will
1282still be waiting at the barrier and this call releases them.
1283*/
1284void
1285__kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
1286{
1287 KC_TRACE( 10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid ));
1288
1289 __kmp_end_split_barrier ( bs_plain_barrier, global_tid );
1290}
1291
1292/*!
1293@ingroup SYNCHRONIZATION
1294@param loc source location information
1295@param global_tid thread id.
1296@return one if the thread should execute the master block, zero otherwise
1297
1298Start execution of a combined barrier and master(nowait) construct.
1299The barrier is executed inside this function.
1300There is no equivalent "end" function, since the
1301*/
1302kmp_int32
1303__kmpc_barrier_master_nowait( ident_t * loc, kmp_int32 global_tid )
1304{
1305 kmp_int32 ret;
1306
1307 KC_TRACE( 10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid ));
1308
1309 if (! TCR_4(__kmp_init_parallel))
1310 __kmp_parallel_initialize();
1311
1312 if ( __kmp_env_consistency_check ) {
1313 if ( loc == 0 ) {
1314 KMP_WARNING( ConstructIdentInvalid ); // ??? What does it mean for the user?
1315 }
1316 __kmp_check_barrier( global_tid, ct_barrier, loc );
1317 }
1318
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001319#if USE_ITT_NOTIFY
1320 __kmp_threads[global_tid]->th.th_ident = loc;
1321#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001322 __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
1323
1324 ret = __kmpc_master (loc, global_tid);
1325
1326 if ( __kmp_env_consistency_check ) {
1327 /* there's no __kmpc_end_master called; so the (stats) */
1328 /* actions of __kmpc_end_master are done here */
1329
1330 if ( global_tid < 0 ) {
1331 KMP_WARNING( ThreadIdentInvalid );
1332 }
1333 if (ret) {
1334 /* only one thread should do the pop since only */
1335 /* one did the push (see __kmpc_master()) */
1336
1337 __kmp_pop_sync( global_tid, ct_master, loc );
1338 }
1339 }
1340
1341 return (ret);
1342}
1343
1344/* The BARRIER for a SINGLE process section is always explicit */
1345/*!
1346@ingroup WORK_SHARING
1347@param loc source location information
1348@param global_tid global thread number
1349@return One if this thread should execute the single construct, zero otherwise.
1350
1351Test whether to execute a <tt>single</tt> construct.
1352There are no implicit barriers in the two "single" calls, rather the compiler should
1353introduce an explicit barrier if it is required.
1354*/
1355
1356kmp_int32
1357__kmpc_single(ident_t *loc, kmp_int32 global_tid)
1358{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001359 KMP_COUNT_BLOCK(OMP_SINGLE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001360 kmp_int32 rc = __kmp_enter_single( global_tid, loc, TRUE );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001361
1362#if OMPT_SUPPORT && OMPT_TRACE
1363 kmp_info_t *this_thr = __kmp_threads[ global_tid ];
1364 kmp_team_t *team = this_thr -> th.th_team;
1365 int tid = __kmp_tid_from_gtid( global_tid );
1366
1367 if ((ompt_status == ompt_status_track_callback)) {
1368 if (rc) {
1369 if (ompt_callbacks.ompt_callback(ompt_event_single_in_block_begin)) {
1370 ompt_callbacks.ompt_callback(ompt_event_single_in_block_begin)(
1371 team->t.ompt_team_info.parallel_id,
1372 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id,
1373 team->t.ompt_team_info.microtask);
1374 }
1375 } else {
1376 if (ompt_callbacks.ompt_callback(ompt_event_single_others_begin)) {
1377 ompt_callbacks.ompt_callback(ompt_event_single_others_begin)(
1378 team->t.ompt_team_info.parallel_id,
1379 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
1380 }
1381 this_thr->th.ompt_thread_info.state = ompt_state_wait_single;
1382 }
1383 }
1384#endif
1385
Jim Cownie5e8470a2013-09-27 10:38:44 +00001386 return rc;
1387}
1388
1389/*!
1390@ingroup WORK_SHARING
1391@param loc source location information
1392@param global_tid global thread number
1393
1394Mark the end of a <tt>single</tt> construct. This function should
1395only be called by the thread that executed the block of code protected
1396by the `single` construct.
1397*/
1398void
1399__kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
1400{
1401 __kmp_exit_single( global_tid );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001402
1403#if OMPT_SUPPORT && OMPT_TRACE
1404 kmp_info_t *this_thr = __kmp_threads[ global_tid ];
1405 kmp_team_t *team = this_thr -> th.th_team;
1406 int tid = __kmp_tid_from_gtid( global_tid );
1407
1408 if ((ompt_status == ompt_status_track_callback) &&
1409 ompt_callbacks.ompt_callback(ompt_event_single_in_block_end)) {
1410 ompt_callbacks.ompt_callback(ompt_event_single_in_block_end)(
1411 team->t.ompt_team_info.parallel_id,
1412 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
1413 }
1414#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001415}
1416
1417/*!
1418@ingroup WORK_SHARING
1419@param loc Source location
1420@param global_tid Global thread id
1421
1422Mark the end of a statically scheduled loop.
1423*/
1424void
1425__kmpc_for_static_fini( ident_t *loc, kmp_int32 global_tid )
1426{
1427 KE_TRACE( 10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
1428
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001429#if OMPT_SUPPORT && OMPT_TRACE
1430 kmp_info_t *this_thr = __kmp_threads[ global_tid ];
1431 kmp_team_t *team = this_thr -> th.th_team;
1432 int tid = __kmp_tid_from_gtid( global_tid );
1433
1434 if ((ompt_status == ompt_status_track_callback) &&
1435 ompt_callbacks.ompt_callback(ompt_event_loop_end)) {
1436 ompt_callbacks.ompt_callback(ompt_event_loop_end)(
1437 team->t.ompt_team_info.parallel_id,
1438 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
1439 }
1440#endif
1441
Jim Cownie5e8470a2013-09-27 10:38:44 +00001442 if ( __kmp_env_consistency_check )
1443 __kmp_pop_workshare( global_tid, ct_pdo, loc );
1444}
1445
1446/*
1447 * User routines which take C-style arguments (call by value)
1448 * different from the Fortran equivalent routines
1449 */
1450
1451void
1452ompc_set_num_threads( int arg )
1453{
1454// !!!!! TODO: check the per-task binding
1455 __kmp_set_num_threads( arg, __kmp_entry_gtid() );
1456}
1457
1458void
1459ompc_set_dynamic( int flag )
1460{
1461 kmp_info_t *thread;
1462
1463 /* For the thread-private implementation of the internal controls */
1464 thread = __kmp_entry_thread();
1465
1466 __kmp_save_internal_controls( thread );
1467
1468 set__dynamic( thread, flag ? TRUE : FALSE );
1469}
1470
1471void
1472ompc_set_nested( int flag )
1473{
1474 kmp_info_t *thread;
1475
1476 /* For the thread-private internal controls implementation */
1477 thread = __kmp_entry_thread();
1478
1479 __kmp_save_internal_controls( thread );
1480
1481 set__nested( thread, flag ? TRUE : FALSE );
1482}
1483
Jim Cownie5e8470a2013-09-27 10:38:44 +00001484void
1485ompc_set_max_active_levels( int max_active_levels )
1486{
1487 /* TO DO */
1488 /* we want per-task implementation of this internal control */
1489
1490 /* For the per-thread internal controls implementation */
1491 __kmp_set_max_active_levels( __kmp_entry_gtid(), max_active_levels );
1492}
1493
1494void
1495ompc_set_schedule( omp_sched_t kind, int modifier )
1496{
1497// !!!!! TODO: check the per-task binding
1498 __kmp_set_schedule( __kmp_entry_gtid(), ( kmp_sched_t ) kind, modifier );
1499}
1500
1501int
1502ompc_get_ancestor_thread_num( int level )
1503{
1504 return __kmp_get_ancestor_thread_num( __kmp_entry_gtid(), level );
1505}
1506
1507int
1508ompc_get_team_size( int level )
1509{
1510 return __kmp_get_team_size( __kmp_entry_gtid(), level );
1511}
1512
Jim Cownie5e8470a2013-09-27 10:38:44 +00001513void
1514kmpc_set_stacksize( int arg )
1515{
1516 // __kmp_aux_set_stacksize initializes the library if needed
1517 __kmp_aux_set_stacksize( arg );
1518}
1519
1520void
1521kmpc_set_stacksize_s( size_t arg )
1522{
1523 // __kmp_aux_set_stacksize initializes the library if needed
1524 __kmp_aux_set_stacksize( arg );
1525}
1526
1527void
1528kmpc_set_blocktime( int arg )
1529{
1530 int gtid, tid;
1531 kmp_info_t *thread;
1532
1533 gtid = __kmp_entry_gtid();
1534 tid = __kmp_tid_from_gtid(gtid);
1535 thread = __kmp_thread_from_gtid(gtid);
1536
1537 __kmp_aux_set_blocktime( arg, thread, tid );
1538}
1539
1540void
1541kmpc_set_library( int arg )
1542{
1543 // __kmp_user_set_library initializes the library if needed
1544 __kmp_user_set_library( (enum library_type)arg );
1545}
1546
1547void
1548kmpc_set_defaults( char const * str )
1549{
1550 // __kmp_aux_set_defaults initializes the library if needed
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00001551 __kmp_aux_set_defaults( str, KMP_STRLEN( str ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001552}
1553
Jim Cownie5e8470a2013-09-27 10:38:44 +00001554int
1555kmpc_set_affinity_mask_proc( int proc, void **mask )
1556{
Alp Toker98758b02014-03-02 04:12:06 +00001557#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001558 return -1;
1559#else
1560 if ( ! TCR_4(__kmp_init_middle) ) {
1561 __kmp_middle_initialize();
1562 }
1563 return __kmp_aux_set_affinity_mask_proc( proc, mask );
1564#endif
1565}
1566
1567int
1568kmpc_unset_affinity_mask_proc( int proc, void **mask )
1569{
Alp Toker98758b02014-03-02 04:12:06 +00001570#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001571 return -1;
1572#else
1573 if ( ! TCR_4(__kmp_init_middle) ) {
1574 __kmp_middle_initialize();
1575 }
1576 return __kmp_aux_unset_affinity_mask_proc( proc, mask );
1577#endif
1578}
1579
1580int
1581kmpc_get_affinity_mask_proc( int proc, void **mask )
1582{
Alp Toker98758b02014-03-02 04:12:06 +00001583#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001584 return -1;
1585#else
1586 if ( ! TCR_4(__kmp_init_middle) ) {
1587 __kmp_middle_initialize();
1588 }
1589 return __kmp_aux_get_affinity_mask_proc( proc, mask );
1590#endif
1591}
1592
Jim Cownie5e8470a2013-09-27 10:38:44 +00001593
1594/* -------------------------------------------------------------------------- */
1595/*!
1596@ingroup THREADPRIVATE
1597@param loc source location information
1598@param gtid global thread number
1599@param cpy_size size of the cpy_data buffer
1600@param cpy_data pointer to data to be copied
1601@param cpy_func helper function to call for copying data
1602@param didit flag variable: 1=single thread; 0=not single thread
1603
1604__kmpc_copyprivate implements the interface for the private data broadcast needed for
1605the copyprivate clause associated with a single region in an OpenMP<sup>*</sup> program (both C and Fortran).
1606All threads participating in the parallel region call this routine.
1607One of the threads (called the single thread) should have the <tt>didit</tt> variable set to 1
1608and all other threads should have that variable set to 0.
1609All threads pass a pointer to a data buffer (cpy_data) that they have built.
1610
1611The OpenMP specification forbids the use of nowait on the single region when a copyprivate
1612clause is present. However, @ref __kmpc_copyprivate implements a barrier internally to avoid
1613race conditions, so the code generation for the single region should avoid generating a barrier
1614after the call to @ref __kmpc_copyprivate.
1615
1616The <tt>gtid</tt> parameter is the global thread id for the current thread.
1617The <tt>loc</tt> parameter is a pointer to source location information.
1618
1619Internal implementation: The single thread will first copy its descriptor address (cpy_data)
1620to a team-private location, then the other threads will each call the function pointed to by
1621the parameter cpy_func, which carries out the copy by copying the data using the cpy_data buffer.
1622
1623The cpy_func routine used for the copy and the contents of the data area defined by cpy_data
1624and cpy_size may be built in any fashion that will allow the copy to be done. For instance,
1625the cpy_data buffer can hold the actual data to be copied or it may hold a list of pointers
1626to the data. The cpy_func routine must interpret the cpy_data buffer appropriately.
1627
1628The interface to cpy_func is as follows:
1629@code
1630void cpy_func( void *destination, void *source )
1631@endcode
1632where void *destination is the cpy_data pointer for the thread being copied to
1633and void *source is the cpy_data pointer for the thread being copied from.
1634*/
1635void
1636__kmpc_copyprivate( ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void*,void*), kmp_int32 didit )
1637{
1638 void **data_ptr;
1639
1640 KC_TRACE( 10, ("__kmpc_copyprivate: called T#%d\n", gtid ));
1641
1642 KMP_MB();
1643
1644 data_ptr = & __kmp_team_from_gtid( gtid )->t.t_copypriv_data;
1645
1646 if ( __kmp_env_consistency_check ) {
1647 if ( loc == 0 ) {
1648 KMP_WARNING( ConstructIdentInvalid );
1649 }
1650 }
1651
1652 /* ToDo: Optimize the following two barriers into some kind of split barrier */
1653
1654 if (didit) *data_ptr = cpy_data;
1655
1656 /* This barrier is not a barrier region boundary */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001657#if USE_ITT_NOTIFY
1658 __kmp_threads[gtid]->th.th_ident = loc;
1659#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001660 __kmp_barrier( bs_plain_barrier, gtid, FALSE , 0, NULL, NULL );
1661
1662 if (! didit) (*cpy_func)( cpy_data, *data_ptr );
1663
1664 /* Consider next barrier the user-visible barrier for barrier region boundaries */
1665 /* Nesting checks are already handled by the single construct checks */
1666
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001667#if USE_ITT_NOTIFY
1668 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g. tasks can overwrite the location)
1669#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001670 __kmp_barrier( bs_plain_barrier, gtid, FALSE , 0, NULL, NULL );
1671}
1672
1673/* -------------------------------------------------------------------------- */
1674
1675#define INIT_LOCK __kmp_init_user_lock_with_checks
1676#define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
1677#define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
1678#define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
1679#define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
1680#define ACQUIRE_NESTED_LOCK_TIMED __kmp_acquire_nested_user_lock_with_checks_timed
1681#define RELEASE_LOCK __kmp_release_user_lock_with_checks
1682#define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
1683#define TEST_LOCK __kmp_test_user_lock_with_checks
1684#define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
1685#define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
1686#define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
1687
1688
1689/*
1690 * TODO: Make check abort messages use location info & pass it
1691 * into with_checks routines
1692 */
1693
1694/* initialize the lock */
1695void
1696__kmpc_init_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001697#if KMP_USE_DYNAMIC_LOCK
1698 KMP_DEBUG_ASSERT(__kmp_init_serial);
1699 if (__kmp_env_consistency_check && user_lock == NULL) {
1700 KMP_FATAL(LockIsUninitialized, "omp_init_lock");
1701 }
1702 if (DYNA_IS_D_LOCK(__kmp_user_lock_seq)) {
1703 DYNA_INIT_D_LOCK(user_lock, __kmp_user_lock_seq);
1704# if USE_ITT_BUILD
1705 __kmp_itt_lock_creating((kmp_user_lock_p)user_lock, NULL);
1706# endif
1707 } else {
1708 DYNA_INIT_I_LOCK(user_lock, __kmp_user_lock_seq);
1709 kmp_indirect_lock_t *ilk = DYNA_LOOKUP_I_LOCK(user_lock);
1710 DYNA_SET_I_LOCK_LOCATION(ilk, loc);
1711# if USE_ITT_BUILD
1712 __kmp_itt_lock_creating(ilk->lock, loc);
1713# endif
1714 }
1715
1716#else // KMP_USE_DYNAMIC_LOCK
1717
Jim Cownie5e8470a2013-09-27 10:38:44 +00001718 static char const * const func = "omp_init_lock";
1719 kmp_user_lock_p lck;
1720 KMP_DEBUG_ASSERT( __kmp_init_serial );
1721
1722 if ( __kmp_env_consistency_check ) {
1723 if ( user_lock == NULL ) {
1724 KMP_FATAL( LockIsUninitialized, func );
1725 }
1726 }
1727
1728 KMP_CHECK_USER_LOCK_INIT();
1729
1730 if ( ( __kmp_user_lock_kind == lk_tas )
1731 && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1732 lck = (kmp_user_lock_p)user_lock;
1733 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001734#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001735 else if ( ( __kmp_user_lock_kind == lk_futex )
1736 && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1737 lck = (kmp_user_lock_p)user_lock;
1738 }
1739#endif
1740 else {
Jim Cownie181b4bb2013-12-23 17:28:57 +00001741 lck = __kmp_user_lock_allocate( user_lock, gtid, 0 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001742 }
1743 INIT_LOCK( lck );
1744 __kmp_set_user_lock_location( lck, loc );
1745
1746#if USE_ITT_BUILD
1747 __kmp_itt_lock_creating( lck );
1748#endif /* USE_ITT_BUILD */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001749
1750#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00001751} // __kmpc_init_lock
1752
1753/* initialize the lock */
1754void
1755__kmpc_init_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001756#if KMP_USE_DYNAMIC_LOCK
1757
1758 KMP_DEBUG_ASSERT(__kmp_init_serial);
1759 if (__kmp_env_consistency_check && user_lock == NULL) {
1760 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
1761 }
1762 // Invoke init function after converting to nested version.
1763 kmp_dyna_lockseq_t nested_seq;
1764 switch (__kmp_user_lock_seq) {
1765 case lockseq_tas: nested_seq = lockseq_nested_tas; break;
1766#if DYNA_HAS_FUTEX
1767 case lockseq_futex: nested_seq = lockseq_nested_futex; break;
1768#endif
1769 case lockseq_ticket: nested_seq = lockseq_nested_ticket; break;
1770 case lockseq_queuing: nested_seq = lockseq_nested_queuing; break;
1771 case lockseq_drdpa: nested_seq = lockseq_nested_drdpa; break;
1772 default: nested_seq = lockseq_nested_queuing; break;
1773 // Use nested queuing lock for lock kinds without "nested" implementation.
1774 }
1775 DYNA_INIT_I_LOCK(user_lock, nested_seq);
1776 // All nested locks are indirect locks.
1777 kmp_indirect_lock_t *ilk = DYNA_LOOKUP_I_LOCK(user_lock);
1778 DYNA_SET_I_LOCK_LOCATION(ilk, loc);
1779# if USE_ITT_BUILD
1780 __kmp_itt_lock_creating(ilk->lock, loc);
1781# endif
1782
1783#else // KMP_USE_DYNAMIC_LOCK
1784
Jim Cownie5e8470a2013-09-27 10:38:44 +00001785 static char const * const func = "omp_init_nest_lock";
1786 kmp_user_lock_p lck;
1787 KMP_DEBUG_ASSERT( __kmp_init_serial );
1788
1789 if ( __kmp_env_consistency_check ) {
1790 if ( user_lock == NULL ) {
1791 KMP_FATAL( LockIsUninitialized, func );
1792 }
1793 }
1794
1795 KMP_CHECK_USER_LOCK_INIT();
1796
1797 if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
1798 + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
1799 lck = (kmp_user_lock_p)user_lock;
1800 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001801#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001802 else if ( ( __kmp_user_lock_kind == lk_futex )
1803 && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
1804 <= OMP_NEST_LOCK_T_SIZE ) ) {
1805 lck = (kmp_user_lock_p)user_lock;
1806 }
1807#endif
1808 else {
Jim Cownie181b4bb2013-12-23 17:28:57 +00001809 lck = __kmp_user_lock_allocate( user_lock, gtid, 0 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001810 }
1811
1812 INIT_NESTED_LOCK( lck );
1813 __kmp_set_user_lock_location( lck, loc );
1814
1815#if USE_ITT_BUILD
1816 __kmp_itt_lock_creating( lck );
1817#endif /* USE_ITT_BUILD */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001818
1819#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00001820} // __kmpc_init_nest_lock
1821
1822void
1823__kmpc_destroy_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001824#if KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00001825
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001826# if USE_ITT_BUILD
1827 kmp_user_lock_p lck;
1828 if (DYNA_EXTRACT_D_TAG(user_lock) == 0) {
1829 lck = ((kmp_indirect_lock_t *)DYNA_LOOKUP_I_LOCK(user_lock))->lock;
1830 } else {
1831 lck = (kmp_user_lock_p)user_lock;
1832 }
1833 __kmp_itt_lock_destroyed(lck);
1834# endif
1835 DYNA_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
1836#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00001837 kmp_user_lock_p lck;
1838
1839 if ( ( __kmp_user_lock_kind == lk_tas )
1840 && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1841 lck = (kmp_user_lock_p)user_lock;
1842 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001843#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001844 else if ( ( __kmp_user_lock_kind == lk_futex )
1845 && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1846 lck = (kmp_user_lock_p)user_lock;
1847 }
1848#endif
1849 else {
1850 lck = __kmp_lookup_user_lock( user_lock, "omp_destroy_lock" );
1851 }
1852
1853#if USE_ITT_BUILD
1854 __kmp_itt_lock_destroyed( lck );
1855#endif /* USE_ITT_BUILD */
1856 DESTROY_LOCK( lck );
1857
1858 if ( ( __kmp_user_lock_kind == lk_tas )
1859 && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1860 ;
1861 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001862#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001863 else if ( ( __kmp_user_lock_kind == lk_futex )
1864 && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1865 ;
1866 }
1867#endif
1868 else {
1869 __kmp_user_lock_free( user_lock, gtid, lck );
1870 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001871#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00001872} // __kmpc_destroy_lock
1873
1874/* destroy the lock */
1875void
1876__kmpc_destroy_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001877#if KMP_USE_DYNAMIC_LOCK
1878
1879# if USE_ITT_BUILD
1880 kmp_indirect_lock_t *ilk = DYNA_LOOKUP_I_LOCK(user_lock);
1881 __kmp_itt_lock_destroyed(ilk->lock);
1882# endif
1883 DYNA_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
1884
1885#else // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00001886
1887 kmp_user_lock_p lck;
1888
1889 if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
1890 + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
1891 lck = (kmp_user_lock_p)user_lock;
1892 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001893#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001894 else if ( ( __kmp_user_lock_kind == lk_futex )
1895 && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
1896 <= OMP_NEST_LOCK_T_SIZE ) ) {
1897 lck = (kmp_user_lock_p)user_lock;
1898 }
1899#endif
1900 else {
1901 lck = __kmp_lookup_user_lock( user_lock, "omp_destroy_nest_lock" );
1902 }
1903
1904#if USE_ITT_BUILD
1905 __kmp_itt_lock_destroyed( lck );
1906#endif /* USE_ITT_BUILD */
1907
1908 DESTROY_NESTED_LOCK( lck );
1909
1910 if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
1911 + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
1912 ;
1913 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001914#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001915 else if ( ( __kmp_user_lock_kind == lk_futex )
1916 && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
1917 <= OMP_NEST_LOCK_T_SIZE ) ) {
1918 ;
1919 }
1920#endif
1921 else {
1922 __kmp_user_lock_free( user_lock, gtid, lck );
1923 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001924#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00001925} // __kmpc_destroy_nest_lock
1926
1927void
1928__kmpc_set_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001929 KMP_COUNT_BLOCK(OMP_set_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001930#if KMP_USE_DYNAMIC_LOCK
1931 int tag = DYNA_EXTRACT_D_TAG(user_lock);
1932# if USE_ITT_BUILD
1933 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); // itt function will get to the right lock object.
1934# endif
1935# if DYNA_USE_FAST_TAS
1936 if (tag == locktag_tas && !__kmp_env_consistency_check) {
1937 DYNA_ACQUIRE_TAS_LOCK(user_lock, gtid);
1938 } else
1939# elif DYNA_USE_FAST_FUTEX
1940 if (tag == locktag_futex && !__kmp_env_consistency_check) {
1941 DYNA_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
1942 } else
1943# endif
1944 {
1945 __kmp_direct_set_ops[tag]((kmp_dyna_lock_t *)user_lock, gtid);
1946 }
1947# if USE_ITT_BUILD
1948 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
1949# endif
1950
1951#else // KMP_USE_DYNAMIC_LOCK
1952
Jim Cownie5e8470a2013-09-27 10:38:44 +00001953 kmp_user_lock_p lck;
1954
1955 if ( ( __kmp_user_lock_kind == lk_tas )
1956 && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1957 lck = (kmp_user_lock_p)user_lock;
1958 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001959#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001960 else if ( ( __kmp_user_lock_kind == lk_futex )
1961 && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1962 lck = (kmp_user_lock_p)user_lock;
1963 }
1964#endif
1965 else {
1966 lck = __kmp_lookup_user_lock( user_lock, "omp_set_lock" );
1967 }
1968
1969#if USE_ITT_BUILD
1970 __kmp_itt_lock_acquiring( lck );
1971#endif /* USE_ITT_BUILD */
1972
1973 ACQUIRE_LOCK( lck, gtid );
1974
1975#if USE_ITT_BUILD
1976 __kmp_itt_lock_acquired( lck );
1977#endif /* USE_ITT_BUILD */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001978
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001979#endif // KMP_USE_DYNAMIC_LOCK
1980}
Jim Cownie5e8470a2013-09-27 10:38:44 +00001981
1982void
1983__kmpc_set_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001984#if KMP_USE_DYNAMIC_LOCK
1985
1986# if USE_ITT_BUILD
1987 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
1988# endif
1989 DYNA_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
1990# if USE_ITT_BUILD
1991 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
1992#endif
1993
1994#else // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00001995 kmp_user_lock_p lck;
1996
1997 if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
1998 + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
1999 lck = (kmp_user_lock_p)user_lock;
2000 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002001#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002002 else if ( ( __kmp_user_lock_kind == lk_futex )
2003 && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
2004 <= OMP_NEST_LOCK_T_SIZE ) ) {
2005 lck = (kmp_user_lock_p)user_lock;
2006 }
2007#endif
2008 else {
2009 lck = __kmp_lookup_user_lock( user_lock, "omp_set_nest_lock" );
2010 }
2011
2012#if USE_ITT_BUILD
2013 __kmp_itt_lock_acquiring( lck );
2014#endif /* USE_ITT_BUILD */
2015
2016 ACQUIRE_NESTED_LOCK( lck, gtid );
2017
2018#if USE_ITT_BUILD
2019 __kmp_itt_lock_acquired( lck );
2020#endif /* USE_ITT_BUILD */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002021#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002022}
2023
2024void
2025__kmpc_unset_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
2026{
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002027#if KMP_USE_DYNAMIC_LOCK
2028
2029 int tag = DYNA_EXTRACT_D_TAG(user_lock);
2030# if USE_ITT_BUILD
2031 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2032# endif
2033# if DYNA_USE_FAST_TAS
2034 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2035 DYNA_RELEASE_TAS_LOCK(user_lock, gtid);
2036 } else
2037# elif DYNA_USE_FAST_FUTEX
2038 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2039 DYNA_RELEASE_FUTEX_LOCK(user_lock, gtid);
2040 } else
2041# endif
2042 {
2043 __kmp_direct_unset_ops[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2044 }
2045
2046#else // KMP_USE_DYNAMIC_LOCK
2047
Jim Cownie5e8470a2013-09-27 10:38:44 +00002048 kmp_user_lock_p lck;
2049
2050 /* Can't use serial interval since not block structured */
2051 /* release the lock */
2052
2053 if ( ( __kmp_user_lock_kind == lk_tas )
2054 && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002055#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002056 // "fast" path implemented to fix customer performance issue
2057#if USE_ITT_BUILD
2058 __kmp_itt_lock_releasing( (kmp_user_lock_p)user_lock );
2059#endif /* USE_ITT_BUILD */
2060 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2061 KMP_MB();
2062 return;
2063#else
2064 lck = (kmp_user_lock_p)user_lock;
2065#endif
2066 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002067#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002068 else if ( ( __kmp_user_lock_kind == lk_futex )
2069 && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2070 lck = (kmp_user_lock_p)user_lock;
2071 }
2072#endif
2073 else {
2074 lck = __kmp_lookup_user_lock( user_lock, "omp_unset_lock" );
2075 }
2076
2077#if USE_ITT_BUILD
2078 __kmp_itt_lock_releasing( lck );
2079#endif /* USE_ITT_BUILD */
2080
2081 RELEASE_LOCK( lck, gtid );
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002082
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002083#if OMPT_SUPPORT && OMPT_BLAME
2084 if ((ompt_status == ompt_status_track_callback) &&
2085 ompt_callbacks.ompt_callback(ompt_event_release_lock)) {
2086 ompt_callbacks.ompt_callback(ompt_event_release_lock)((uint64_t) lck);
2087 }
2088#endif
2089
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002090#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002091}
2092
2093/* release the lock */
2094void
2095__kmpc_unset_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
2096{
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002097#if KMP_USE_DYNAMIC_LOCK
2098
2099# if USE_ITT_BUILD
2100 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2101# endif
2102 DYNA_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
2103
2104#else // KMP_USE_DYNAMIC_LOCK
2105
Jim Cownie5e8470a2013-09-27 10:38:44 +00002106 kmp_user_lock_p lck;
2107
2108 /* Can't use serial interval since not block structured */
2109
2110 if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
2111 + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002112#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002113 // "fast" path implemented to fix customer performance issue
2114 kmp_tas_lock_t *tl = (kmp_tas_lock_t*)user_lock;
2115#if USE_ITT_BUILD
2116 __kmp_itt_lock_releasing( (kmp_user_lock_p)user_lock );
2117#endif /* USE_ITT_BUILD */
2118 if ( --(tl->lk.depth_locked) == 0 ) {
2119 TCW_4(tl->lk.poll, 0);
2120 }
2121 KMP_MB();
2122 return;
2123#else
2124 lck = (kmp_user_lock_p)user_lock;
2125#endif
2126 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002127#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002128 else if ( ( __kmp_user_lock_kind == lk_futex )
2129 && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
2130 <= OMP_NEST_LOCK_T_SIZE ) ) {
2131 lck = (kmp_user_lock_p)user_lock;
2132 }
2133#endif
2134 else {
2135 lck = __kmp_lookup_user_lock( user_lock, "omp_unset_nest_lock" );
2136 }
2137
2138#if USE_ITT_BUILD
2139 __kmp_itt_lock_releasing( lck );
2140#endif /* USE_ITT_BUILD */
2141
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002142 int release_status = RELEASE_NESTED_LOCK( lck, gtid );
2143#if OMPT_SUPPORT && OMPT_BLAME
2144 if (ompt_status == ompt_status_track_callback) {
2145 if (release_status == KMP_LOCK_RELEASED) {
2146 if (ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_last)) {
2147 ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_last)(
2148 (uint64_t) lck);
2149 }
2150 } else if (ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_prev)) {
2151 ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_prev)(
2152 (uint64_t) lck);
2153 }
2154 }
2155#endif
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002156
2157#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002158}
2159
2160/* try to acquire the lock */
2161int
2162__kmpc_test_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
2163{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002164 KMP_COUNT_BLOCK(OMP_test_lock);
2165 KMP_TIME_BLOCK(OMP_test_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002166
2167#if KMP_USE_DYNAMIC_LOCK
2168 int rc;
2169 int tag = DYNA_EXTRACT_D_TAG(user_lock);
2170# if USE_ITT_BUILD
2171 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2172# endif
2173# if DYNA_USE_FAST_TAS
2174 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2175 DYNA_TEST_TAS_LOCK(user_lock, gtid, rc);
2176 } else
2177# elif DYNA_USE_FAST_FUTEX
2178 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2179 DYNA_TEST_FUTEX_LOCK(user_lock, gtid, rc);
2180 } else
2181# endif
2182 {
2183 rc = __kmp_direct_test_ops[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2184 }
2185 if (rc) {
2186# if USE_ITT_BUILD
2187 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2188# endif
2189 return FTN_TRUE;
2190 } else {
2191# if USE_ITT_BUILD
2192 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
2193# endif
2194 return FTN_FALSE;
2195 }
2196
2197#else // KMP_USE_DYNAMIC_LOCK
2198
Jim Cownie5e8470a2013-09-27 10:38:44 +00002199 kmp_user_lock_p lck;
2200 int rc;
2201
2202 if ( ( __kmp_user_lock_kind == lk_tas )
2203 && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2204 lck = (kmp_user_lock_p)user_lock;
2205 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002206#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002207 else if ( ( __kmp_user_lock_kind == lk_futex )
2208 && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2209 lck = (kmp_user_lock_p)user_lock;
2210 }
2211#endif
2212 else {
2213 lck = __kmp_lookup_user_lock( user_lock, "omp_test_lock" );
2214 }
2215
2216#if USE_ITT_BUILD
2217 __kmp_itt_lock_acquiring( lck );
2218#endif /* USE_ITT_BUILD */
2219
2220 rc = TEST_LOCK( lck, gtid );
2221#if USE_ITT_BUILD
2222 if ( rc ) {
2223 __kmp_itt_lock_acquired( lck );
2224 } else {
2225 __kmp_itt_lock_cancelled( lck );
2226 }
2227#endif /* USE_ITT_BUILD */
2228 return ( rc ? FTN_TRUE : FTN_FALSE );
2229
2230 /* Can't use serial interval since not block structured */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002231
2232#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002233}
2234
2235/* try to acquire the lock */
2236int
2237__kmpc_test_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
2238{
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002239#if KMP_USE_DYNAMIC_LOCK
2240 int rc;
2241# if USE_ITT_BUILD
2242 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2243# endif
2244 rc = DYNA_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
2245# if USE_ITT_BUILD
2246 if (rc) {
2247 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2248 } else {
2249 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
2250 }
2251# endif
2252 return rc;
2253
2254#else // KMP_USE_DYNAMIC_LOCK
2255
Jim Cownie5e8470a2013-09-27 10:38:44 +00002256 kmp_user_lock_p lck;
2257 int rc;
2258
2259 if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
2260 + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
2261 lck = (kmp_user_lock_p)user_lock;
2262 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002263#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002264 else if ( ( __kmp_user_lock_kind == lk_futex )
2265 && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
2266 <= OMP_NEST_LOCK_T_SIZE ) ) {
2267 lck = (kmp_user_lock_p)user_lock;
2268 }
2269#endif
2270 else {
2271 lck = __kmp_lookup_user_lock( user_lock, "omp_test_nest_lock" );
2272 }
2273
2274#if USE_ITT_BUILD
2275 __kmp_itt_lock_acquiring( lck );
2276#endif /* USE_ITT_BUILD */
2277
2278 rc = TEST_NESTED_LOCK( lck, gtid );
2279#if USE_ITT_BUILD
2280 if ( rc ) {
2281 __kmp_itt_lock_acquired( lck );
2282 } else {
2283 __kmp_itt_lock_cancelled( lck );
2284 }
2285#endif /* USE_ITT_BUILD */
2286 return rc;
2287
2288 /* Can't use serial interval since not block structured */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002289
2290#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002291}
2292
2293
2294/*--------------------------------------------------------------------------------------------------------------------*/
2295
2296/*
2297 * Interface to fast scalable reduce methods routines
2298 */
2299
2300// keep the selected method in a thread local structure for cross-function usage: will be used in __kmpc_end_reduce* functions;
2301// another solution: to re-determine the method one more time in __kmpc_end_reduce* functions (new prototype required then)
2302// AT: which solution is better?
2303#define __KMP_SET_REDUCTION_METHOD(gtid,rmethod) \
2304 ( ( __kmp_threads[ ( gtid ) ] -> th.th_local.packed_reduction_method ) = ( rmethod ) )
2305
2306#define __KMP_GET_REDUCTION_METHOD(gtid) \
2307 ( __kmp_threads[ ( gtid ) ] -> th.th_local.packed_reduction_method )
2308
2309// description of the packed_reduction_method variable: look at the macros in kmp.h
2310
2311
2312// used in a critical section reduce block
2313static __forceinline void
2314__kmp_enter_critical_section_reduce_block( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit ) {
2315
2316 // this lock was visible to a customer and to the thread profiler as a serial overhead span
2317 // (although it's used for an internal purpose only)
2318 // why was it visible in previous implementation?
2319 // should we keep it visible in new reduce block?
2320 kmp_user_lock_p lck;
2321
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002322#if KMP_USE_DYNAMIC_LOCK
2323
2324 if (DYNA_IS_D_LOCK(__kmp_user_lock_seq)) {
2325 lck = (kmp_user_lock_p)crit;
2326 if (*((kmp_dyna_lock_t *)lck) == 0) {
2327 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)lck, 0, DYNA_GET_D_TAG(__kmp_user_lock_seq));
2328 }
2329 KMP_DEBUG_ASSERT(lck != NULL);
2330 if (__kmp_env_consistency_check) {
2331 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
2332 }
2333 DYNA_D_LOCK_FUNC(lck, set)((kmp_dyna_lock_t *)lck, global_tid);
2334 } else {
2335 kmp_indirect_lock_t *ilk = __kmp_get_indirect_csptr(crit, loc, global_tid, __kmp_user_lock_seq);
2336 KMP_DEBUG_ASSERT(ilk != NULL);
2337 if (__kmp_env_consistency_check) {
2338 __kmp_push_sync(global_tid, ct_critical, loc, ilk->lock, __kmp_user_lock_seq);
2339 }
2340 DYNA_I_LOCK_FUNC(ilk, set)(ilk->lock, global_tid);
2341 }
2342
2343#else // KMP_USE_DYNAMIC_LOCK
2344
Jim Cownie5e8470a2013-09-27 10:38:44 +00002345 // We know that the fast reduction code is only emitted by Intel compilers
2346 // with 32 byte critical sections. If there isn't enough space, then we
2347 // have to use a pointer.
2348 if ( __kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE ) {
2349 lck = (kmp_user_lock_p)crit;
2350 }
2351 else {
2352 lck = __kmp_get_critical_section_ptr( crit, loc, global_tid );
2353 }
2354 KMP_DEBUG_ASSERT( lck != NULL );
2355
2356 if ( __kmp_env_consistency_check )
2357 __kmp_push_sync( global_tid, ct_critical, loc, lck );
2358
2359 __kmp_acquire_user_lock_with_checks( lck, global_tid );
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002360
2361#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002362}
2363
2364// used in a critical section reduce block
2365static __forceinline void
2366__kmp_end_critical_section_reduce_block( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit ) {
2367
2368 kmp_user_lock_p lck;
2369
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002370#if KMP_USE_DYNAMIC_LOCK
2371
2372 if (DYNA_IS_D_LOCK(__kmp_user_lock_seq)) {
2373 lck = (kmp_user_lock_p)crit;
2374 if (__kmp_env_consistency_check)
2375 __kmp_pop_sync(global_tid, ct_critical, loc);
2376 DYNA_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
2377 } else {
2378 kmp_indirect_lock_t *ilk = (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
2379 if (__kmp_env_consistency_check)
2380 __kmp_pop_sync(global_tid, ct_critical, loc);
2381 DYNA_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
2382 }
2383
2384#else // KMP_USE_DYNAMIC_LOCK
2385
Jim Cownie5e8470a2013-09-27 10:38:44 +00002386 // We know that the fast reduction code is only emitted by Intel compilers with 32 byte critical
2387 // sections. If there isn't enough space, then we have to use a pointer.
2388 if ( __kmp_base_user_lock_size > 32 ) {
2389 lck = *( (kmp_user_lock_p *) crit );
2390 KMP_ASSERT( lck != NULL );
2391 } else {
2392 lck = (kmp_user_lock_p) crit;
2393 }
2394
2395 if ( __kmp_env_consistency_check )
2396 __kmp_pop_sync( global_tid, ct_critical, loc );
2397
2398 __kmp_release_user_lock_with_checks( lck, global_tid );
2399
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002400#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002401} // __kmp_end_critical_section_reduce_block
2402
2403
2404/* 2.a.i. Reduce Block without a terminating barrier */
2405/*!
2406@ingroup SYNCHRONIZATION
2407@param loc source location information
2408@param global_tid global thread number
2409@param num_vars number of items (variables) to be reduced
2410@param reduce_size size of data in bytes to be reduced
2411@param reduce_data pointer to data to be reduced
2412@param reduce_func callback function providing reduction operation on two operands and returning result of reduction in lhs_data
2413@param lck pointer to the unique lock data structure
2414@result 1 for the master thread, 0 for all other team threads, 2 for all team threads if atomic reduction needed
2415
2416The nowait version is used for a reduce clause with the nowait argument.
2417*/
2418kmp_int32
2419__kmpc_reduce_nowait(
2420 ident_t *loc, kmp_int32 global_tid,
2421 kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
2422 kmp_critical_name *lck ) {
2423
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002424 KMP_COUNT_BLOCK(REDUCE_nowait);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002425 int retval;
2426 PACKED_REDUCTION_METHOD_T packed_reduction_method;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002427#if OMP_40_ENABLED
2428 kmp_team_t *team;
2429 kmp_info_t *th;
2430 int teams_swapped = 0, task_state;
2431#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002432 KA_TRACE( 10, ( "__kmpc_reduce_nowait() enter: called T#%d\n", global_tid ) );
2433
2434 // why do we need this initialization here at all?
2435 // Reduction clause can not be used as a stand-alone directive.
2436
2437 // do not call __kmp_serial_initialize(), it will be called by __kmp_parallel_initialize() if needed
2438 // possible detection of false-positive race by the threadchecker ???
2439 if( ! TCR_4( __kmp_init_parallel ) )
2440 __kmp_parallel_initialize();
2441
2442 // check correctness of reduce block nesting
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002443#if KMP_USE_DYNAMIC_LOCK
2444 if ( __kmp_env_consistency_check )
2445 __kmp_push_sync( global_tid, ct_reduce, loc, NULL, 0 );
2446#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00002447 if ( __kmp_env_consistency_check )
2448 __kmp_push_sync( global_tid, ct_reduce, loc, NULL );
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002449#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002450
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002451#if OMP_40_ENABLED
2452 th = __kmp_thread_from_gtid(global_tid);
2453 if( th->th.th_teams_microtask ) { // AC: check if we are inside the teams construct?
2454 team = th->th.th_team;
2455 if( team->t.t_level == th->th.th_teams_level ) {
2456 // this is reduction at teams construct
2457 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
2458 // Let's swap teams temporarily for the reduction barrier
2459 teams_swapped = 1;
2460 th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2461 th->th.th_team = team->t.t_parent;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002462 th->th.th_team_nproc = th->th.th_team->t.t_nproc;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002463 th->th.th_task_team = th->th.th_team->t.t_task_team[0];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002464 task_state = th->th.th_task_state;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002465 th->th.th_task_state = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002466 }
2467 }
2468#endif // OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00002469
2470 // packed_reduction_method value will be reused by __kmp_end_reduce* function, the value should be kept in a variable
2471 // the variable should be either a construct-specific or thread-specific property, not a team specific property
2472 // (a thread can reach the next reduce block on the next construct, reduce method may differ on the next construct)
2473 // an ident_t "loc" parameter could be used as a construct-specific property (what if loc == 0?)
2474 // (if both construct-specific and team-specific variables were shared, then unness extra syncs should be needed)
2475 // a thread-specific variable is better regarding two issues above (next construct and extra syncs)
2476 // a thread-specific "th_local.reduction_method" variable is used currently
2477 // each thread executes 'determine' and 'set' lines (no need to execute by one thread, to avoid unness extra syncs)
2478
2479 packed_reduction_method = __kmp_determine_reduction_method( loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck );
2480 __KMP_SET_REDUCTION_METHOD( global_tid, packed_reduction_method );
2481
2482 if( packed_reduction_method == critical_reduce_block ) {
2483
2484 __kmp_enter_critical_section_reduce_block( loc, global_tid, lck );
2485 retval = 1;
2486
2487 } else if( packed_reduction_method == empty_reduce_block ) {
2488
2489 // usage: if team size == 1, no synchronization is required ( Intel platforms only )
2490 retval = 1;
2491
2492 } else if( packed_reduction_method == atomic_reduce_block ) {
2493
2494 retval = 2;
2495
2496 // all threads should do this pop here (because __kmpc_end_reduce_nowait() won't be called by the code gen)
2497 // (it's not quite good, because the checking block has been closed by this 'pop',
2498 // but atomic operation has not been executed yet, will be executed slightly later, literally on next instruction)
2499 if ( __kmp_env_consistency_check )
2500 __kmp_pop_sync( global_tid, ct_reduce, loc );
2501
2502 } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
2503
2504 //AT: performance issue: a real barrier here
2505 //AT: (if master goes slow, other threads are blocked here waiting for the master to come and release them)
2506 //AT: (it's not what a customer might expect specifying NOWAIT clause)
2507 //AT: (specifying NOWAIT won't result in improvement of performance, it'll be confusing to a customer)
2508 //AT: another implementation of *barrier_gather*nowait() (or some other design) might go faster
2509 // and be more in line with sense of NOWAIT
2510 //AT: TO DO: do epcc test and compare times
2511
2512 // this barrier should be invisible to a customer and to the thread profiler
2513 // (it's neither a terminating barrier nor customer's code, it's used for an internal purpose)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002514#if USE_ITT_NOTIFY
2515 __kmp_threads[global_tid]->th.th_ident = loc;
2516#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002517 retval = __kmp_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid, FALSE, reduce_size, reduce_data, reduce_func );
2518 retval = ( retval != 0 ) ? ( 0 ) : ( 1 );
2519
2520 // all other workers except master should do this pop here
2521 // ( none of other workers will get to __kmpc_end_reduce_nowait() )
2522 if ( __kmp_env_consistency_check ) {
2523 if( retval == 0 ) {
2524 __kmp_pop_sync( global_tid, ct_reduce, loc );
2525 }
2526 }
2527
2528 } else {
2529
2530 // should never reach this block
2531 KMP_ASSERT( 0 ); // "unexpected method"
2532
2533 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002534#if OMP_40_ENABLED
2535 if( teams_swapped ) {
2536 // Restore thread structure
2537 th->th.th_info.ds.ds_tid = 0;
2538 th->th.th_team = team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002539 th->th.th_team_nproc = team->t.t_nproc;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002540 th->th.th_task_team = team->t.t_task_team[task_state];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002541 th->th.th_task_state = task_state;
2542 }
2543#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002544 KA_TRACE( 10, ( "__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n", global_tid, packed_reduction_method, retval ) );
2545
2546 return retval;
2547}
2548
2549/*!
2550@ingroup SYNCHRONIZATION
2551@param loc source location information
2552@param global_tid global thread id.
2553@param lck pointer to the unique lock data structure
2554
2555Finish the execution of a reduce nowait.
2556*/
2557void
2558__kmpc_end_reduce_nowait( ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck ) {
2559
2560 PACKED_REDUCTION_METHOD_T packed_reduction_method;
2561
2562 KA_TRACE( 10, ( "__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid ) );
2563
2564 packed_reduction_method = __KMP_GET_REDUCTION_METHOD( global_tid );
2565
2566 if( packed_reduction_method == critical_reduce_block ) {
2567
2568 __kmp_end_critical_section_reduce_block( loc, global_tid, lck );
2569
2570 } else if( packed_reduction_method == empty_reduce_block ) {
2571
2572 // usage: if team size == 1, no synchronization is required ( on Intel platforms only )
2573
2574 } else if( packed_reduction_method == atomic_reduce_block ) {
2575
2576 // neither master nor other workers should get here
2577 // (code gen does not generate this call in case 2: atomic reduce block)
2578 // actually it's better to remove this elseif at all;
2579 // after removal this value will checked by the 'else' and will assert
2580
2581 } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
2582
2583 // only master gets here
2584
2585 } else {
2586
2587 // should never reach this block
2588 KMP_ASSERT( 0 ); // "unexpected method"
2589
2590 }
2591
2592 if ( __kmp_env_consistency_check )
2593 __kmp_pop_sync( global_tid, ct_reduce, loc );
2594
2595 KA_TRACE( 10, ( "__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n", global_tid, packed_reduction_method ) );
2596
2597 return;
2598}
2599
2600/* 2.a.ii. Reduce Block with a terminating barrier */
2601
2602/*!
2603@ingroup SYNCHRONIZATION
2604@param loc source location information
2605@param global_tid global thread number
2606@param num_vars number of items (variables) to be reduced
2607@param reduce_size size of data in bytes to be reduced
2608@param reduce_data pointer to data to be reduced
2609@param reduce_func callback function providing reduction operation on two operands and returning result of reduction in lhs_data
2610@param lck pointer to the unique lock data structure
2611@result 1 for the master thread, 0 for all other team threads, 2 for all team threads if atomic reduction needed
2612
2613A blocking reduce that includes an implicit barrier.
2614*/
2615kmp_int32
2616__kmpc_reduce(
2617 ident_t *loc, kmp_int32 global_tid,
2618 kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
2619 void (*reduce_func)(void *lhs_data, void *rhs_data),
2620 kmp_critical_name *lck )
2621{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002622 KMP_COUNT_BLOCK(REDUCE_wait);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002623 int retval;
2624 PACKED_REDUCTION_METHOD_T packed_reduction_method;
2625
2626 KA_TRACE( 10, ( "__kmpc_reduce() enter: called T#%d\n", global_tid ) );
2627
2628 // why do we need this initialization here at all?
2629 // Reduction clause can not be a stand-alone directive.
2630
2631 // do not call __kmp_serial_initialize(), it will be called by __kmp_parallel_initialize() if needed
2632 // possible detection of false-positive race by the threadchecker ???
2633 if( ! TCR_4( __kmp_init_parallel ) )
2634 __kmp_parallel_initialize();
2635
2636 // check correctness of reduce block nesting
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002637#if KMP_USE_DYNAMIC_LOCK
2638 if ( __kmp_env_consistency_check )
2639 __kmp_push_sync( global_tid, ct_reduce, loc, NULL, 0 );
2640#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00002641 if ( __kmp_env_consistency_check )
2642 __kmp_push_sync( global_tid, ct_reduce, loc, NULL );
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002643#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002644
Jim Cownie5e8470a2013-09-27 10:38:44 +00002645 packed_reduction_method = __kmp_determine_reduction_method( loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck );
2646 __KMP_SET_REDUCTION_METHOD( global_tid, packed_reduction_method );
2647
2648 if( packed_reduction_method == critical_reduce_block ) {
2649
2650 __kmp_enter_critical_section_reduce_block( loc, global_tid, lck );
2651 retval = 1;
2652
2653 } else if( packed_reduction_method == empty_reduce_block ) {
2654
2655 // usage: if team size == 1, no synchronization is required ( Intel platforms only )
2656 retval = 1;
2657
2658 } else if( packed_reduction_method == atomic_reduce_block ) {
2659
2660 retval = 2;
2661
2662 } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
2663
2664 //case tree_reduce_block:
2665 // this barrier should be visible to a customer and to the thread profiler
2666 // (it's a terminating barrier on constructs if NOWAIT not specified)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002667#if USE_ITT_NOTIFY
2668 __kmp_threads[global_tid]->th.th_ident = loc; // needed for correct notification of frames
2669#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002670 retval = __kmp_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid, TRUE, reduce_size, reduce_data, reduce_func );
2671 retval = ( retval != 0 ) ? ( 0 ) : ( 1 );
2672
2673 // all other workers except master should do this pop here
2674 // ( none of other workers except master will enter __kmpc_end_reduce() )
2675 if ( __kmp_env_consistency_check ) {
2676 if( retval == 0 ) { // 0: all other workers; 1: master
2677 __kmp_pop_sync( global_tid, ct_reduce, loc );
2678 }
2679 }
2680
2681 } else {
2682
2683 // should never reach this block
2684 KMP_ASSERT( 0 ); // "unexpected method"
2685
2686 }
2687
2688 KA_TRACE( 10, ( "__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n", global_tid, packed_reduction_method, retval ) );
2689
2690 return retval;
2691}
2692
2693/*!
2694@ingroup SYNCHRONIZATION
2695@param loc source location information
2696@param global_tid global thread id.
2697@param lck pointer to the unique lock data structure
2698
2699Finish the execution of a blocking reduce.
2700The <tt>lck</tt> pointer must be the same as that used in the corresponding start function.
2701*/
2702void
2703__kmpc_end_reduce( ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck ) {
2704
2705 PACKED_REDUCTION_METHOD_T packed_reduction_method;
2706
2707 KA_TRACE( 10, ( "__kmpc_end_reduce() enter: called T#%d\n", global_tid ) );
2708
2709 packed_reduction_method = __KMP_GET_REDUCTION_METHOD( global_tid );
2710
2711 // this barrier should be visible to a customer and to the thread profiler
2712 // (it's a terminating barrier on constructs if NOWAIT not specified)
2713
2714 if( packed_reduction_method == critical_reduce_block ) {
2715
2716 __kmp_end_critical_section_reduce_block( loc, global_tid, lck );
2717
2718 // TODO: implicit barrier: should be exposed
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002719#if USE_ITT_NOTIFY
2720 __kmp_threads[global_tid]->th.th_ident = loc;
2721#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002722 __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
2723
2724 } else if( packed_reduction_method == empty_reduce_block ) {
2725
2726 // usage: if team size == 1, no synchronization is required ( Intel platforms only )
2727
2728 // TODO: implicit barrier: should be exposed
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002729#if USE_ITT_NOTIFY
2730 __kmp_threads[global_tid]->th.th_ident = loc;
2731#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002732 __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
2733
2734 } else if( packed_reduction_method == atomic_reduce_block ) {
2735
2736 // TODO: implicit barrier: should be exposed
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002737#if USE_ITT_NOTIFY
2738 __kmp_threads[global_tid]->th.th_ident = loc;
2739#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002740 __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
2741
2742 } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
2743
2744 // only master executes here (master releases all other workers)
2745 __kmp_end_split_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid );
2746
2747 } else {
2748
2749 // should never reach this block
2750 KMP_ASSERT( 0 ); // "unexpected method"
2751
2752 }
2753
2754 if ( __kmp_env_consistency_check )
2755 __kmp_pop_sync( global_tid, ct_reduce, loc );
2756
2757 KA_TRACE( 10, ( "__kmpc_end_reduce() exit: called T#%d: method %08x\n", global_tid, packed_reduction_method ) );
2758
2759 return;
2760}
2761
2762#undef __KMP_GET_REDUCTION_METHOD
2763#undef __KMP_SET_REDUCTION_METHOD
2764
2765/*-- end of interface to fast scalable reduce routines ---------------------------------------------------------------*/
2766
2767kmp_uint64
2768__kmpc_get_taskid() {
2769
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002770 kmp_int32 gtid;
2771 kmp_info_t * thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002772
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002773 gtid = __kmp_get_gtid();
2774 if ( gtid < 0 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002775 return 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002776 }; // if
2777 thread = __kmp_thread_from_gtid( gtid );
2778 return thread->th.th_current_task->td_task_id;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002779
2780} // __kmpc_get_taskid
2781
2782
2783kmp_uint64
2784__kmpc_get_parent_taskid() {
2785
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002786 kmp_int32 gtid;
2787 kmp_info_t * thread;
2788 kmp_taskdata_t * parent_task;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002789
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002790 gtid = __kmp_get_gtid();
2791 if ( gtid < 0 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002792 return 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002793 }; // if
2794 thread = __kmp_thread_from_gtid( gtid );
2795 parent_task = thread->th.th_current_task->td_parent;
2796 return ( parent_task == NULL ? 0 : parent_task->td_task_id );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002797
2798} // __kmpc_get_parent_taskid
2799
2800void __kmpc_place_threads(int nC, int nT, int nO)
2801{
Jim Cownie5e8470a2013-09-27 10:38:44 +00002802 if ( ! __kmp_init_serial ) {
2803 __kmp_serial_initialize();
2804 }
2805 __kmp_place_num_cores = nC;
2806 __kmp_place_num_threads_per_core = nT;
2807 __kmp_place_core_offset = nO;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002808}
2809
2810// end of file //
2811