blob: 5dc706a4ae39caa89c1cc019c203fbbd4199c8c8 [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
2 * kmp_csupport.c -- kfront linkage support for OpenMP.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16#include "omp.h" /* extern "C" declarations of user-visible routines */
17#include "kmp.h"
18#include "kmp_i18n.h"
19#include "kmp_itt.h"
20#include "kmp_error.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000021#include "kmp_stats.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000022
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000023#if OMPT_SUPPORT
24#include "ompt-internal.h"
25#include "ompt-specific.h"
26#endif
27
Jim Cownie5e8470a2013-09-27 10:38:44 +000028#define MAX_MESSAGE 512
29
30/* ------------------------------------------------------------------------ */
31/* ------------------------------------------------------------------------ */
32
33/* flags will be used in future, e.g., to implement */
34/* openmp_strict library restrictions */
35
36/*!
37 * @ingroup STARTUP_SHUTDOWN
38 * @param loc in source location information
39 * @param flags in for future use (currently ignored)
40 *
41 * Initialize the runtime library. This call is optional; if it is not made then
Jim Cownie4cc4bb42014-10-07 16:25:50 +000042 * it will be implicitly called by attempts to use other library functions.
Jim Cownie5e8470a2013-09-27 10:38:44 +000043 *
44 */
45void
46__kmpc_begin(ident_t *loc, kmp_int32 flags)
47{
48 // By default __kmp_ignore_mppbeg() returns TRUE.
49 if (__kmp_ignore_mppbeg() == FALSE) {
50 __kmp_internal_begin();
51
52 KC_TRACE( 10, ("__kmpc_begin: called\n" ) );
53 }
54}
55
56/*!
57 * @ingroup STARTUP_SHUTDOWN
58 * @param loc source location information
59 *
60 * Shutdown the runtime library. This is also optional, and even if called will not
61 * do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to zero.
62 */
63void
64__kmpc_end(ident_t *loc)
65{
66 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end() call no-op.
67 // However, this can be overridden with KMP_IGNORE_MPPEND environment variable.
68 // If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend() returns FALSE and __kmpc_end()
69 // will unregister this root (it can cause library shut down).
70 if (__kmp_ignore_mppend() == FALSE) {
71 KC_TRACE( 10, ("__kmpc_end: called\n" ) );
72 KA_TRACE( 30, ("__kmpc_end\n" ));
73
74 __kmp_internal_end_thread( -1 );
75 }
76}
77
78/*!
79@ingroup THREAD_STATES
80@param loc Source location information.
81@return The global thread index of the active thread.
82
83This function can be called in any context.
84
85If the runtime has ony been entered at the outermost level from a
86single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is that
Jonathan Peyton81f9cd12015-05-22 22:37:22 +000087which would be returned by omp_get_thread_num() in the outermost
Jim Cownie5e8470a2013-09-27 10:38:44 +000088active parallel construct. (Or zero if there is no active parallel
89construct, since the master thread is necessarily thread zero).
90
91If multiple non-OpenMP threads all enter an OpenMP construct then this
92will be a unique thread identifier among all the threads created by
93the OpenMP runtime (but the value cannote be defined in terms of
94OpenMP thread ids returned by omp_get_thread_num()).
95
96*/
97kmp_int32
98__kmpc_global_thread_num(ident_t *loc)
99{
100 kmp_int32 gtid = __kmp_entry_gtid();
101
102 KC_TRACE( 10, ("__kmpc_global_thread_num: T#%d\n", gtid ) );
103
104 return gtid;
105}
106
107/*!
108@ingroup THREAD_STATES
109@param loc Source location information.
110@return The number of threads under control of the OpenMP<sup>*</sup> runtime
111
112This function can be called in any context.
113It returns the total number of threads under the control of the OpenMP runtime. That is
114not a number that can be determined by any OpenMP standard calls, since the library may be
115called from more than one non-OpenMP thread, and this reflects the total over all such calls.
116Similarly the runtime maintains underlying threads even when they are not active (since the cost
117of creating and destroying OS threads is high), this call counts all such threads even if they are not
118waiting for work.
119*/
120kmp_int32
121__kmpc_global_num_threads(ident_t *loc)
122{
123 KC_TRACE( 10, ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_nth ) );
124
125 return TCR_4(__kmp_nth);
126}
127
128/*!
129@ingroup THREAD_STATES
130@param loc Source location information.
131@return The thread number of the calling thread in the innermost active parallel construct.
132
133*/
134kmp_int32
135__kmpc_bound_thread_num(ident_t *loc)
136{
137 KC_TRACE( 10, ("__kmpc_bound_thread_num: called\n" ) );
138 return __kmp_tid_from_gtid( __kmp_entry_gtid() );
139}
140
141/*!
142@ingroup THREAD_STATES
143@param loc Source location information.
144@return The number of threads in the innermost active parallel construct.
145*/
146kmp_int32
147__kmpc_bound_num_threads(ident_t *loc)
148{
149 KC_TRACE( 10, ("__kmpc_bound_num_threads: called\n" ) );
150
151 return __kmp_entry_thread() -> th.th_team -> t.t_nproc;
152}
153
154/*!
155 * @ingroup DEPRECATED
156 * @param loc location description
157 *
158 * This function need not be called. It always returns TRUE.
159 */
160kmp_int32
161__kmpc_ok_to_fork(ident_t *loc)
162{
163#ifndef KMP_DEBUG
164
165 return TRUE;
166
167#else
168
169 const char *semi2;
170 const char *semi3;
171 int line_no;
172
173 if (__kmp_par_range == 0) {
174 return TRUE;
175 }
176 semi2 = loc->psource;
177 if (semi2 == NULL) {
178 return TRUE;
179 }
180 semi2 = strchr(semi2, ';');
181 if (semi2 == NULL) {
182 return TRUE;
183 }
184 semi2 = strchr(semi2 + 1, ';');
185 if (semi2 == NULL) {
186 return TRUE;
187 }
188 if (__kmp_par_range_filename[0]) {
189 const char *name = semi2 - 1;
190 while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
191 name--;
192 }
193 if ((*name == '/') || (*name == ';')) {
194 name++;
195 }
196 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
197 return __kmp_par_range < 0;
198 }
199 }
200 semi3 = strchr(semi2 + 1, ';');
201 if (__kmp_par_range_routine[0]) {
202 if ((semi3 != NULL) && (semi3 > semi2)
203 && (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
204 return __kmp_par_range < 0;
205 }
206 }
Andrey Churbanov74bf17b2015-04-02 13:27:08 +0000207 if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000208 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
209 return __kmp_par_range > 0;
210 }
211 return __kmp_par_range < 0;
212 }
213 return TRUE;
214
215#endif /* KMP_DEBUG */
216
217}
218
219/*!
220@ingroup THREAD_STATES
221@param loc Source location information.
222@return 1 if this thread is executing inside an active parallel region, zero if not.
223*/
224kmp_int32
225__kmpc_in_parallel( ident_t *loc )
226{
227 return __kmp_entry_thread() -> th.th_root -> r.r_active;
228}
229
230/*!
231@ingroup PARALLEL
232@param loc source location information
233@param global_tid global thread number
234@param num_threads number of threads requested for this parallel construct
235
236Set the number of threads to be used by the next fork spawned by this thread.
237This call is only required if the parallel construct has a `num_threads` clause.
238*/
239void
240__kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads )
241{
242 KA_TRACE( 20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
243 global_tid, num_threads ) );
244
245 __kmp_push_num_threads( loc, global_tid, num_threads );
246}
247
248void
249__kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid )
250{
251 KA_TRACE( 20, ("__kmpc_pop_num_threads: enter\n" ) );
252
253 /* the num_threads are automatically popped */
254}
255
256
257#if OMP_40_ENABLED
258
259void
260__kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, kmp_int32 proc_bind )
261{
262 KA_TRACE( 20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n",
263 global_tid, proc_bind ) );
264
265 __kmp_push_proc_bind( loc, global_tid, (kmp_proc_bind_t)proc_bind );
266}
267
268#endif /* OMP_40_ENABLED */
269
270
271/*!
272@ingroup PARALLEL
273@param loc source location information
274@param argc total number of arguments in the ellipsis
275@param microtask pointer to callback routine consisting of outlined parallel construct
276@param ... pointers to shared variables that aren't global
277
278Do the actual fork and call the microtask in the relevant number of threads.
279*/
280void
281__kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...)
282{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000283 KMP_STOP_EXPLICIT_TIMER(OMP_serial);
284 KMP_COUNT_BLOCK(OMP_PARALLEL);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000285 int gtid = __kmp_entry_gtid();
286 // maybe to save thr_state is enough here
287 {
288 va_list ap;
289 va_start( ap, microtask );
290
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000291#if OMPT_SUPPORT
292 kmp_info_t *master_th = __kmp_threads[ gtid ];
293 kmp_team_t *parent_team = master_th->th.th_team;
294 int tid = __kmp_tid_from_gtid( gtid );
295 parent_team->t.t_implicit_task_taskdata[tid].
296 ompt_task_info.frame.reenter_runtime_frame = __builtin_frame_address(0);
297#endif
298
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000299#if INCLUDE_SSC_MARKS
300 SSC_MARK_FORKING();
301#endif
302 __kmp_fork_call( loc, gtid, fork_context_intel,
Jim Cownie5e8470a2013-09-27 10:38:44 +0000303 argc,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000304#if OMPT_SUPPORT
305 VOLATILE_CAST(void *) microtask, // "unwrapped" task
306#endif
307 VOLATILE_CAST(microtask_t) microtask, // "wrapped" task
Jim Cownie5e8470a2013-09-27 10:38:44 +0000308 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
309/* TODO: revert workaround for Intel(R) 64 tracker #96 */
Andrey Churbanovcbda8682015-01-13 14:43:35 +0000310#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jim Cownie5e8470a2013-09-27 10:38:44 +0000311 &ap
312#else
313 ap
314#endif
315 );
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000316#if INCLUDE_SSC_MARKS
317 SSC_MARK_JOINING();
318#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000319 __kmp_join_call( loc, gtid );
320
321 va_end( ap );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000322
323#if OMPT_SUPPORT
324 if (ompt_status & ompt_status_track) {
325 parent_team->t.t_implicit_task_taskdata[tid].
326 ompt_task_info.frame.reenter_runtime_frame = 0;
327 }
328#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000329 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000330 KMP_START_EXPLICIT_TIMER(OMP_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000331}
332
333#if OMP_40_ENABLED
334/*!
335@ingroup PARALLEL
336@param loc source location information
337@param global_tid global thread number
338@param num_teams number of teams requested for the teams construct
Jonathan Peyton81f9cd12015-05-22 22:37:22 +0000339@param num_threads number of threads per team requested for the teams construct
Jim Cownie5e8470a2013-09-27 10:38:44 +0000340
341Set the number of teams to be used by the teams construct.
342This call is only required if the teams construct has a `num_teams` clause
343or a `thread_limit` clause (or both).
344*/
345void
346__kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads )
347{
348 KA_TRACE( 20, ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
349 global_tid, num_teams, num_threads ) );
350
351 __kmp_push_num_teams( loc, global_tid, num_teams, num_threads );
352}
353
354/*!
355@ingroup PARALLEL
356@param loc source location information
357@param argc total number of arguments in the ellipsis
358@param microtask pointer to callback routine consisting of outlined teams construct
359@param ... pointers to shared variables that aren't global
360
361Do the actual fork and call the microtask in the relevant number of threads.
362*/
363void
364__kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...)
365{
366 int gtid = __kmp_entry_gtid();
367 kmp_info_t *this_thr = __kmp_threads[ gtid ];
368 va_list ap;
369 va_start( ap, microtask );
370
371 // remember teams entry point and nesting level
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000372 this_thr->th.th_teams_microtask = microtask;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000373 this_thr->th.th_teams_level = this_thr->th.th_team->t.t_level; // AC: can be >0 on host
374
375 // check if __kmpc_push_num_teams called, set default number of teams otherwise
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000376 if ( this_thr->th.th_teams_size.nteams == 0 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000377 __kmp_push_num_teams( loc, gtid, 0, 0 );
378 }
379 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000380 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
381 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000382
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000383 __kmp_fork_call( loc, gtid, fork_context_intel,
Jim Cownie5e8470a2013-09-27 10:38:44 +0000384 argc,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000385#if OMPT_SUPPORT
386 VOLATILE_CAST(void *) microtask, // "unwrapped" task
387#endif
388 VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task
Jim Cownie5e8470a2013-09-27 10:38:44 +0000389 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master,
Andrey Churbanovcbda8682015-01-13 14:43:35 +0000390#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jim Cownie5e8470a2013-09-27 10:38:44 +0000391 &ap
392#else
393 ap
394#endif
395 );
396 __kmp_join_call( loc, gtid );
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000397 this_thr->th.th_teams_microtask = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000398 this_thr->th.th_teams_level = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000399 *(kmp_int64*)(&this_thr->th.th_teams_size) = 0L;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000400 va_end( ap );
401}
402#endif /* OMP_40_ENABLED */
403
404
405//
406// I don't think this function should ever have been exported.
407// The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
408// openmp code ever called it, but it's been exported from the RTL for so
409// long that I'm afraid to remove the definition.
410//
411int
412__kmpc_invoke_task_func( int gtid )
413{
414 return __kmp_invoke_task_func( gtid );
415}
416
417/*!
418@ingroup PARALLEL
419@param loc source location information
420@param global_tid global thread number
421
422Enter a serialized parallel construct. This interface is used to handle a
423conditional parallel region, like this,
424@code
425#pragma omp parallel if (condition)
426@endcode
427when the condition is false.
428*/
429void
430__kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
431{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000432 __kmp_serialized_parallel(loc, global_tid); /* The implementation is now in kmp_runtime.c so that it can share static functions with
433 * kmp_fork_call since the tasks to be done are similar in each case.
434 */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000435}
436
437/*!
438@ingroup PARALLEL
439@param loc source location information
440@param global_tid global thread number
441
442Leave a serialized parallel construct.
443*/
444void
445__kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
446{
447 kmp_internal_control_t *top;
448 kmp_info_t *this_thr;
449 kmp_team_t *serial_team;
450
451 KC_TRACE( 10, ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid ) );
452
453 /* skip all this code for autopar serialized loops since it results in
454 unacceptable overhead */
455 if( loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR ) )
456 return;
457
458 // Not autopar code
459 if( ! TCR_4( __kmp_init_parallel ) )
460 __kmp_parallel_initialize();
461
462 this_thr = __kmp_threads[ global_tid ];
463 serial_team = this_thr->th.th_serial_team;
464
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000465 #if OMP_41_ENABLED
466 kmp_task_team_t * task_team = this_thr->th.th_task_team;
467
468 // we need to wait for the proxy tasks before finishing the thread
469 if ( task_team != NULL && task_team->tt.tt_found_proxy_tasks )
470 __kmp_task_team_wait(this_thr, serial_team, NULL ); // is an ITT object needed here?
471 #endif
472
Jim Cownie5e8470a2013-09-27 10:38:44 +0000473 KMP_MB();
474 KMP_DEBUG_ASSERT( serial_team );
475 KMP_ASSERT( serial_team -> t.t_serialized );
476 KMP_DEBUG_ASSERT( this_thr -> th.th_team == serial_team );
477 KMP_DEBUG_ASSERT( serial_team != this_thr->th.th_root->r.r_root_team );
478 KMP_DEBUG_ASSERT( serial_team -> t.t_threads );
479 KMP_DEBUG_ASSERT( serial_team -> t.t_threads[0] == this_thr );
480
481 /* If necessary, pop the internal control stack values and replace the team values */
482 top = serial_team -> t.t_control_stack_top;
483 if ( top && top -> serial_nesting_level == serial_team -> t.t_serialized ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000484 copy_icvs( &serial_team -> t.t_threads[0] -> th.th_current_task -> td_icvs, top );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000485 serial_team -> t.t_control_stack_top = top -> next;
486 __kmp_free(top);
487 }
488
Jim Cownie5e8470a2013-09-27 10:38:44 +0000489 //if( serial_team -> t.t_serialized > 1 )
490 serial_team -> t.t_level--;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000491
492 /* pop dispatch buffers stack */
493 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
494 {
495 dispatch_private_info_t * disp_buffer = serial_team->t.t_dispatch->th_disp_buffer;
496 serial_team->t.t_dispatch->th_disp_buffer =
497 serial_team->t.t_dispatch->th_disp_buffer->next;
498 __kmp_free( disp_buffer );
499 }
500
501 -- serial_team -> t.t_serialized;
502 if ( serial_team -> t.t_serialized == 0 ) {
503
504 /* return to the parallel section */
505
506#if KMP_ARCH_X86 || KMP_ARCH_X86_64
507 if ( __kmp_inherit_fp_control && serial_team->t.t_fp_control_saved ) {
508 __kmp_clear_x87_fpu_status_word();
509 __kmp_load_x87_fpu_control_word( &serial_team->t.t_x87_fpu_control_word );
510 __kmp_load_mxcsr( &serial_team->t.t_mxcsr );
511 }
512#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
513
514 this_thr -> th.th_team = serial_team -> t.t_parent;
515 this_thr -> th.th_info.ds.ds_tid = serial_team -> t.t_master_tid;
516
517 /* restore values cached in the thread */
518 this_thr -> th.th_team_nproc = serial_team -> t.t_parent -> t.t_nproc; /* JPH */
519 this_thr -> th.th_team_master = serial_team -> t.t_parent -> t.t_threads[0]; /* JPH */
520 this_thr -> th.th_team_serialized = this_thr -> th.th_team -> t.t_serialized;
521
522 /* TODO the below shouldn't need to be adjusted for serialized teams */
523 this_thr -> th.th_dispatch = & this_thr -> th.th_team ->
524 t.t_dispatch[ serial_team -> t.t_master_tid ];
525
Jim Cownie5e8470a2013-09-27 10:38:44 +0000526 __kmp_pop_current_task_from_thread( this_thr );
527
528 KMP_ASSERT( this_thr -> th.th_current_task -> td_flags.executing == 0 );
529 this_thr -> th.th_current_task -> td_flags.executing = 1;
530
531 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +0000532 // Copy the task team from the new child / old parent team to the thread.
533 this_thr->th.th_task_team = this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000534 KA_TRACE( 20, ( "__kmpc_end_serialized_parallel: T#%d restoring task_team %p / team %p\n",
535 global_tid, this_thr -> th.th_task_team, this_thr -> th.th_team ) );
536 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000537 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000538 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
539 KA_TRACE( 20, ( "__kmpc_end_serialized_parallel: T#%d decreasing nesting depth of serial team %p to %d\n",
540 global_tid, serial_team, serial_team -> t.t_serialized ) );
541 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000542 }
543
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000544#if USE_ITT_BUILD
545 kmp_uint64 cur_time = 0;
546#if USE_ITT_NOTIFY
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000547 if ( __itt_get_timestamp_ptr ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000548 cur_time = __itt_get_timestamp();
549 }
550#endif /* USE_ITT_NOTIFY */
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000551 if ( this_thr->th.th_team->t.t_level == 0
552#if OMP_40_ENABLED
553 && this_thr->th.th_teams_microtask == NULL
554#endif
555 ) {
556 // Report the barrier
Jim Cownie181b4bb2013-12-23 17:28:57 +0000557 this_thr->th.th_ident = loc;
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000558 if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) &&
559 ( __kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 1 ) )
560 {
561 __kmp_itt_frame_submit( global_tid, this_thr->th.th_frame_time_serialized,
562 cur_time, 0, loc, this_thr->th.th_team_nproc, 0 );
563 if ( __kmp_forkjoin_frames_mode == 3 )
564 // Since barrier frame for serialized region is equal to the region we use the same begin timestamp as for the barrier.
565 __kmp_itt_frame_submit( global_tid, serial_team->t.t_region_time,
566 cur_time, 0, loc, this_thr->th.th_team_nproc, 2 );
567 } else if ( ( __itt_frame_end_v3_ptr || KMP_ITT_DEBUG ) &&
568 ! __kmp_forkjoin_frames_mode && __kmp_forkjoin_frames )
569 // Mark the end of the "parallel" region for VTune. Only use one of frame notification scheme at the moment.
570 __kmp_itt_region_joined( global_tid, 1 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000571 }
572#endif /* USE_ITT_BUILD */
573
574 if ( __kmp_env_consistency_check )
575 __kmp_pop_parallel( global_tid, NULL );
576}
577
578/*!
579@ingroup SYNCHRONIZATION
580@param loc source location information.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000581
Andrey Churbanov723a6b62015-02-20 18:09:27 +0000582Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though
Jim Cownie5e8470a2013-09-27 10:38:44 +0000583depending on the memory ordering convention obeyed by the compiler
584even that may not be necessary).
585*/
586void
Andrey Churbanov723a6b62015-02-20 18:09:27 +0000587__kmpc_flush(ident_t *loc)
Jim Cownie5e8470a2013-09-27 10:38:44 +0000588{
589 KC_TRACE( 10, ("__kmpc_flush: called\n" ) );
590
591 /* need explicit __mf() here since use volatile instead in library */
592 KMP_MB(); /* Flush all pending memory write invalidates. */
593
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000594 #if ( KMP_ARCH_X86 || KMP_ARCH_X86_64 )
595 #if KMP_MIC
596 // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used.
597 // We shouldn't need it, though, since the ABI rules require that
598 // * If the compiler generates NGO stores it also generates the fence
599 // * If users hand-code NGO stores they should insert the fence
600 // therefore no incomplete unordered stores should be visible.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000601 #else
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000602 // C74404
603 // This is to address non-temporal store instructions (sfence needed).
604 // The clflush instruction is addressed either (mfence needed).
605 // Probably the non-temporal load monvtdqa instruction should also be addressed.
606 // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2.
607 if ( ! __kmp_cpuinfo.initialized ) {
608 __kmp_query_cpuid( & __kmp_cpuinfo );
609 }; // if
610 if ( ! __kmp_cpuinfo.sse2 ) {
611 // CPU cannot execute SSE2 instructions.
612 } else {
613 #if KMP_COMPILER_ICC || KMP_COMPILER_MSVC
614 _mm_mfence();
615 #else
616 __sync_synchronize();
617 #endif // KMP_COMPILER_ICC
618 }; // if
619 #endif // KMP_MIC
Andrey Churbanovcbda8682015-01-13 14:43:35 +0000620 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64)
621 // Nothing to see here move along
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000622 #elif KMP_ARCH_PPC64
623 // Nothing needed here (we have a real MB above).
624 #if KMP_OS_CNK
625 // The flushing thread needs to yield here; this prevents a
626 // busy-waiting thread from saturating the pipeline. flush is
627 // often used in loops like this:
628 // while (!flag) {
629 // #pragma omp flush(flag)
630 // }
631 // and adding the yield here is good for at least a 10x speedup
632 // when running >2 threads per core (on the NAS LU benchmark).
633 __kmp_yield(TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000634 #endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000635 #else
636 #error Unknown or unsupported architecture
637 #endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000638
639}
640
641/* -------------------------------------------------------------------------- */
642
643/* -------------------------------------------------------------------------- */
644
645/*!
646@ingroup SYNCHRONIZATION
647@param loc source location information
648@param global_tid thread id.
649
650Execute a barrier.
651*/
652void
653__kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
654{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000655 KMP_COUNT_BLOCK(OMP_BARRIER);
656 KMP_TIME_BLOCK(OMP_barrier);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000657 int explicit_barrier_flag;
658 KC_TRACE( 10, ("__kmpc_barrier: called T#%d\n", global_tid ) );
659
660 if (! TCR_4(__kmp_init_parallel))
661 __kmp_parallel_initialize();
662
663 if ( __kmp_env_consistency_check ) {
664 if ( loc == 0 ) {
665 KMP_WARNING( ConstructIdentInvalid ); // ??? What does it mean for the user?
666 }; // if
667
668 __kmp_check_barrier( global_tid, ct_barrier, loc );
669 }
670
671 __kmp_threads[ global_tid ]->th.th_ident = loc;
672 // TODO: explicit barrier_wait_id:
673 // this function is called when 'barrier' directive is present or
674 // implicit barrier at the end of a worksharing construct.
675 // 1) better to add a per-thread barrier counter to a thread data structure
676 // 2) set to 0 when a new team is created
677 // 4) no sync is required
678
679 __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
680}
681
682/* The BARRIER for a MASTER section is always explicit */
683/*!
684@ingroup WORK_SHARING
685@param loc source location information.
686@param global_tid global thread number .
687@return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise.
688*/
689kmp_int32
690__kmpc_master(ident_t *loc, kmp_int32 global_tid)
691{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000692 KMP_COUNT_BLOCK(OMP_MASTER);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000693 int status = 0;
694
695 KC_TRACE( 10, ("__kmpc_master: called T#%d\n", global_tid ) );
696
697 if( ! TCR_4( __kmp_init_parallel ) )
698 __kmp_parallel_initialize();
699
700 if( KMP_MASTER_GTID( global_tid ))
701 status = 1;
702
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000703#if OMPT_SUPPORT && OMPT_TRACE
704 if (status) {
705 kmp_info_t *this_thr = __kmp_threads[ global_tid ];
706 kmp_team_t *team = this_thr -> th.th_team;
707 if ((ompt_status == ompt_status_track_callback) &&
708 ompt_callbacks.ompt_callback(ompt_event_master_begin)) {
709 int tid = __kmp_tid_from_gtid( global_tid );
710 ompt_callbacks.ompt_callback(ompt_event_master_begin)(
711 team->t.ompt_team_info.parallel_id,
712 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
713 }
714 }
715#endif
716
Jim Cownie5e8470a2013-09-27 10:38:44 +0000717 if ( __kmp_env_consistency_check ) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000718#if KMP_USE_DYNAMIC_LOCK
719 if (status)
720 __kmp_push_sync( global_tid, ct_master, loc, NULL, 0 );
721 else
722 __kmp_check_sync( global_tid, ct_master, loc, NULL, 0 );
723#else
Jim Cownie5e8470a2013-09-27 10:38:44 +0000724 if (status)
725 __kmp_push_sync( global_tid, ct_master, loc, NULL );
726 else
727 __kmp_check_sync( global_tid, ct_master, loc, NULL );
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000728#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000729 }
730
731 return status;
732}
733
734/*!
735@ingroup WORK_SHARING
736@param loc source location information.
737@param global_tid global thread number .
738
739Mark the end of a <tt>master</tt> region. This should only be called by the thread
740that executes the <tt>master</tt> region.
741*/
742void
743__kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
744{
745 KC_TRACE( 10, ("__kmpc_end_master: called T#%d\n", global_tid ) );
746
747 KMP_DEBUG_ASSERT( KMP_MASTER_GTID( global_tid ));
748
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000749#if OMPT_SUPPORT && OMPT_TRACE
750 kmp_info_t *this_thr = __kmp_threads[ global_tid ];
751 kmp_team_t *team = this_thr -> th.th_team;
752 if ((ompt_status == ompt_status_track_callback) &&
753 ompt_callbacks.ompt_callback(ompt_event_master_end)) {
754 int tid = __kmp_tid_from_gtid( global_tid );
755 ompt_callbacks.ompt_callback(ompt_event_master_end)(
756 team->t.ompt_team_info.parallel_id,
757 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
758 }
759#endif
760
Jim Cownie5e8470a2013-09-27 10:38:44 +0000761 if ( __kmp_env_consistency_check ) {
762 if( global_tid < 0 )
763 KMP_WARNING( ThreadIdentInvalid );
764
765 if( KMP_MASTER_GTID( global_tid ))
766 __kmp_pop_sync( global_tid, ct_master, loc );
767 }
768}
769
770/*!
771@ingroup WORK_SHARING
772@param loc source location information.
773@param gtid global thread number.
774
775Start execution of an <tt>ordered</tt> construct.
776*/
777void
778__kmpc_ordered( ident_t * loc, kmp_int32 gtid )
779{
780 int cid = 0;
781 kmp_info_t *th;
782 KMP_DEBUG_ASSERT( __kmp_init_serial );
783
784 KC_TRACE( 10, ("__kmpc_ordered: called T#%d\n", gtid ));
785
786 if (! TCR_4(__kmp_init_parallel))
787 __kmp_parallel_initialize();
788
789#if USE_ITT_BUILD
790 __kmp_itt_ordered_prep( gtid );
791 // TODO: ordered_wait_id
792#endif /* USE_ITT_BUILD */
793
794 th = __kmp_threads[ gtid ];
795
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000796#if OMPT_SUPPORT && OMPT_TRACE
797 if (ompt_status & ompt_status_track) {
798 /* OMPT state update */
799 th->th.ompt_thread_info.wait_id = (uint64_t) loc;
800 th->th.ompt_thread_info.state = ompt_state_wait_ordered;
801
802 /* OMPT event callback */
803 if ((ompt_status == ompt_status_track_callback) &&
804 ompt_callbacks.ompt_callback(ompt_event_wait_ordered)) {
805 ompt_callbacks.ompt_callback(ompt_event_wait_ordered)(
806 th->th.ompt_thread_info.wait_id);
807 }
808 }
809#endif
810
Jim Cownie5e8470a2013-09-27 10:38:44 +0000811 if ( th -> th.th_dispatch -> th_deo_fcn != 0 )
812 (*th->th.th_dispatch->th_deo_fcn)( & gtid, & cid, loc );
813 else
814 __kmp_parallel_deo( & gtid, & cid, loc );
815
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000816#if OMPT_SUPPORT && OMPT_TRACE
817 if (ompt_status & ompt_status_track) {
818 /* OMPT state update */
819 th->th.ompt_thread_info.state = ompt_state_work_parallel;
820 th->th.ompt_thread_info.wait_id = 0;
821
822 /* OMPT event callback */
823 if ((ompt_status == ompt_status_track_callback) &&
824 ompt_callbacks.ompt_callback(ompt_event_acquired_ordered)) {
825 ompt_callbacks.ompt_callback(ompt_event_acquired_ordered)(
826 th->th.ompt_thread_info.wait_id);
827 }
828 }
829#endif
830
Jim Cownie5e8470a2013-09-27 10:38:44 +0000831#if USE_ITT_BUILD
832 __kmp_itt_ordered_start( gtid );
833#endif /* USE_ITT_BUILD */
834}
835
836/*!
837@ingroup WORK_SHARING
838@param loc source location information.
839@param gtid global thread number.
840
841End execution of an <tt>ordered</tt> construct.
842*/
843void
844__kmpc_end_ordered( ident_t * loc, kmp_int32 gtid )
845{
846 int cid = 0;
847 kmp_info_t *th;
848
849 KC_TRACE( 10, ("__kmpc_end_ordered: called T#%d\n", gtid ) );
850
851#if USE_ITT_BUILD
852 __kmp_itt_ordered_end( gtid );
853 // TODO: ordered_wait_id
854#endif /* USE_ITT_BUILD */
855
856 th = __kmp_threads[ gtid ];
857
858 if ( th -> th.th_dispatch -> th_dxo_fcn != 0 )
859 (*th->th.th_dispatch->th_dxo_fcn)( & gtid, & cid, loc );
860 else
861 __kmp_parallel_dxo( & gtid, & cid, loc );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000862
863#if OMPT_SUPPORT && OMPT_BLAME
864 if ((ompt_status == ompt_status_track_callback) &&
865 ompt_callbacks.ompt_callback(ompt_event_release_ordered)) {
866 ompt_callbacks.ompt_callback(ompt_event_release_ordered)(
867 th->th.ompt_thread_info.wait_id);
868 }
869#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000870}
871
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000872#if KMP_USE_DYNAMIC_LOCK
873
874static __forceinline kmp_indirect_lock_t *
875__kmp_get_indirect_csptr(kmp_critical_name * crit, ident_t const * loc, kmp_int32 gtid, kmp_dyna_lockseq_t seq)
876{
877 // Code from __kmp_get_critical_section_ptr
878 // This function returns an indirect lock object instead of a user lock.
879 kmp_indirect_lock_t **lck, *ret;
880 lck = (kmp_indirect_lock_t **)crit;
881 ret = (kmp_indirect_lock_t *)TCR_PTR(*lck);
882 if (ret == NULL) {
883 void *idx;
884 kmp_indirect_locktag_t tag = DYNA_GET_I_TAG(seq);
885 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
886 ret = ilk;
887 DYNA_I_LOCK_FUNC(ilk, init)(ilk->lock);
888 DYNA_SET_I_LOCK_LOCATION(ilk, loc);
889 DYNA_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
890 KA_TRACE(20, ("__kmp_get_indirect_csptr: initialized indirect lock #%d\n", tag));
891#if USE_ITT_BUILD
892 __kmp_itt_critical_creating(ilk->lock, loc);
893#endif
894 int status = KMP_COMPARE_AND_STORE_PTR(lck, 0, ilk);
895 if (status == 0) {
896#if USE_ITT_BUILD
897 __kmp_itt_critical_destroyed(ilk->lock);
898#endif
899 // Postponing destroy, to avoid costly dispatch here.
900 //DYNA_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
901 ret = (kmp_indirect_lock_t *)TCR_PTR(*lck);
902 KMP_DEBUG_ASSERT(ret != NULL);
903 }
904 }
905 return ret;
906}
907
908// Fast-path acquire tas lock
909#define DYNA_ACQUIRE_TAS_LOCK(lock, gtid) { \
910 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
911 if (l->lk.poll != DYNA_LOCK_FREE(tas) || \
912 ! KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), DYNA_LOCK_FREE(tas), DYNA_LOCK_BUSY(gtid+1, tas))) { \
913 kmp_uint32 spins; \
914 KMP_FSYNC_PREPARE(l); \
915 KMP_INIT_YIELD(spins); \
916 if (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
917 KMP_YIELD(TRUE); \
918 } else { \
919 KMP_YIELD_SPIN(spins); \
920 } \
921 while (l->lk.poll != DYNA_LOCK_FREE(tas) || \
922 ! KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), DYNA_LOCK_FREE(tas), DYNA_LOCK_BUSY(gtid+1, tas))) { \
923 if (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
924 KMP_YIELD(TRUE); \
925 } else { \
926 KMP_YIELD_SPIN(spins); \
927 } \
928 } \
929 } \
930 KMP_FSYNC_ACQUIRED(l); \
931}
932
933// Fast-path test tas lock
934#define DYNA_TEST_TAS_LOCK(lock, gtid, rc) { \
935 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
936 rc = l->lk.poll == DYNA_LOCK_FREE(tas) && \
937 KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), DYNA_LOCK_FREE(tas), DYNA_LOCK_BUSY(gtid+1, tas)); \
938}
939
940// Fast-path release tas lock
941#define DYNA_RELEASE_TAS_LOCK(lock, gtid) { \
942 TCW_4(((kmp_tas_lock_t *)lock)->lk.poll, DYNA_LOCK_FREE(tas)); \
943 KMP_MB(); \
944}
945
946#if DYNA_HAS_FUTEX
947
948# include <unistd.h>
949# include <sys/syscall.h>
950# ifndef FUTEX_WAIT
951# define FUTEX_WAIT 0
952# endif
953# ifndef FUTEX_WAKE
954# define FUTEX_WAKE 1
955# endif
956
957// Fast-path acquire futex lock
958#define DYNA_ACQUIRE_FUTEX_LOCK(lock, gtid) { \
959 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
960 kmp_int32 gtid_code = (gtid+1) << 1; \
961 KMP_MB(); \
962 KMP_FSYNC_PREPARE(ftx); \
963 kmp_int32 poll_val; \
964 while ((poll_val = KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), DYNA_LOCK_FREE(futex), \
965 DYNA_LOCK_BUSY(gtid_code, futex))) != DYNA_LOCK_FREE(futex)) { \
966 kmp_int32 cond = DYNA_LOCK_STRIP(poll_val) & 1; \
967 if (!cond) { \
968 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, poll_val | DYNA_LOCK_BUSY(1, futex))) { \
969 continue; \
970 } \
971 poll_val |= DYNA_LOCK_BUSY(1, futex); \
972 } \
973 kmp_int32 rc; \
974 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, NULL, NULL, 0)) != 0) { \
975 continue; \
976 } \
977 gtid_code |= 1; \
978 } \
979 KMP_FSYNC_ACQUIRED(ftx); \
980}
981
982// Fast-path test futex lock
983#define DYNA_TEST_FUTEX_LOCK(lock, gtid, rc) { \
984 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
985 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), DYNA_LOCK_FREE(futex), DYNA_LOCK_BUSY(gtid+1, futex) << 1)) { \
986 KMP_FSYNC_ACQUIRED(ftx); \
987 rc = TRUE; \
988 } else { \
989 rc = FALSE; \
990 } \
991}
992
993// Fast-path release futex lock
994#define DYNA_RELEASE_FUTEX_LOCK(lock, gtid) { \
995 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
996 KMP_MB(); \
997 KMP_FSYNC_RELEASING(ftx); \
998 kmp_int32 poll_val = KMP_XCHG_FIXED32(&(ftx->lk.poll), DYNA_LOCK_FREE(futex)); \
999 if (DYNA_LOCK_STRIP(poll_val) & 1) { \
1000 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, DYNA_LOCK_BUSY(1, futex), NULL, NULL, 0); \
1001 } \
1002 KMP_MB(); \
1003 KMP_YIELD(TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); \
1004}
1005
1006#endif // DYNA_HAS_FUTEX
1007
1008#else // KMP_USE_DYNAMIC_LOCK
1009
Jim Cownie5e8470a2013-09-27 10:38:44 +00001010static kmp_user_lock_p
1011__kmp_get_critical_section_ptr( kmp_critical_name * crit, ident_t const * loc, kmp_int32 gtid )
1012{
1013 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1014
1015 //
1016 // Because of the double-check, the following load
1017 // doesn't need to be volatile.
1018 //
1019 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR( *lck_pp );
1020
1021 if ( lck == NULL ) {
1022 void * idx;
1023
1024 // Allocate & initialize the lock.
1025 // Remember allocated locks in table in order to free them in __kmp_cleanup()
1026 lck = __kmp_user_lock_allocate( &idx, gtid, kmp_lf_critical_section );
1027 __kmp_init_user_lock_with_checks( lck );
1028 __kmp_set_user_lock_location( lck, loc );
1029#if USE_ITT_BUILD
1030 __kmp_itt_critical_creating( lck );
1031 // __kmp_itt_critical_creating() should be called *before* the first usage of underlying
1032 // lock. It is the only place where we can guarantee it. There are chances the lock will
1033 // destroyed with no usage, but it is not a problem, because this is not real event seen
1034 // by user but rather setting name for object (lock). See more details in kmp_itt.h.
1035#endif /* USE_ITT_BUILD */
1036
1037 //
1038 // Use a cmpxchg instruction to slam the start of the critical
1039 // section with the lock pointer. If another thread beat us
1040 // to it, deallocate the lock, and use the lock that the other
1041 // thread allocated.
1042 //
1043 int status = KMP_COMPARE_AND_STORE_PTR( lck_pp, 0, lck );
1044
1045 if ( status == 0 ) {
1046 // Deallocate the lock and reload the value.
1047#if USE_ITT_BUILD
1048 __kmp_itt_critical_destroyed( lck );
1049 // Let ITT know the lock is destroyed and the same memory location may be reused for
1050 // another purpose.
1051#endif /* USE_ITT_BUILD */
1052 __kmp_destroy_user_lock_with_checks( lck );
1053 __kmp_user_lock_free( &idx, gtid, lck );
1054 lck = (kmp_user_lock_p)TCR_PTR( *lck_pp );
1055 KMP_DEBUG_ASSERT( lck != NULL );
1056 }
1057 }
1058 return lck;
1059}
1060
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001061#endif // KMP_USE_DYNAMIC_LOCK
1062
Jim Cownie5e8470a2013-09-27 10:38:44 +00001063/*!
1064@ingroup WORK_SHARING
1065@param loc source location information.
1066@param global_tid global thread number .
1067@param crit identity of the critical section. This could be a pointer to a lock associated with the critical section, or
1068some other suitably unique value.
1069
1070Enter code protected by a `critical` construct.
1071This function blocks until the executing thread can enter the critical section.
1072*/
1073void
1074__kmpc_critical( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001075 KMP_COUNT_BLOCK(OMP_CRITICAL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001076
1077 kmp_user_lock_p lck;
1078
1079 KC_TRACE( 10, ("__kmpc_critical: called T#%d\n", global_tid ) );
1080
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001081#if KMP_USE_DYNAMIC_LOCK
1082 // Assumption: all direct locks fit in OMP_CRITICAL_SIZE.
1083 // The global sequence __kmp_user_lock_seq is used unless compiler pushes a value.
1084 if (DYNA_IS_D_LOCK(__kmp_user_lock_seq)) {
1085 lck = (kmp_user_lock_p)crit;
1086 // The thread that reaches here first needs to tag the lock word.
1087 if (*((kmp_dyna_lock_t *)lck) == 0) {
1088 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)lck, 0, DYNA_GET_D_TAG(__kmp_user_lock_seq));
1089 }
1090 if (__kmp_env_consistency_check) {
1091 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
1092 }
1093# if USE_ITT_BUILD
1094 __kmp_itt_critical_acquiring(lck);
1095# endif
1096# if DYNA_USE_FAST_TAS
1097 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1098 DYNA_ACQUIRE_TAS_LOCK(lck, global_tid);
1099 } else
1100# elif DYNA_USE_FAST_FUTEX
1101 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1102 DYNA_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1103 } else
1104# endif
1105 {
1106 DYNA_D_LOCK_FUNC(lck, set)((kmp_dyna_lock_t *)lck, global_tid);
1107 }
1108 } else {
1109 kmp_indirect_lock_t *ilk = __kmp_get_indirect_csptr(crit, loc, global_tid, __kmp_user_lock_seq);
1110 lck = ilk->lock;
1111 if (__kmp_env_consistency_check) {
1112 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
1113 }
1114# if USE_ITT_BUILD
1115 __kmp_itt_critical_acquiring(lck);
1116# endif
1117 DYNA_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1118 }
1119
1120#else // KMP_USE_DYNAMIC_LOCK
1121
Jim Cownie5e8470a2013-09-27 10:38:44 +00001122 //TODO: add THR_OVHD_STATE
1123
1124 KMP_CHECK_USER_LOCK_INIT();
1125
1126 if ( ( __kmp_user_lock_kind == lk_tas )
1127 && ( sizeof( lck->tas.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
1128 lck = (kmp_user_lock_p)crit;
1129 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001130#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001131 else if ( ( __kmp_user_lock_kind == lk_futex )
1132 && ( sizeof( lck->futex.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
1133 lck = (kmp_user_lock_p)crit;
1134 }
1135#endif
1136 else { // ticket, queuing or drdpa
1137 lck = __kmp_get_critical_section_ptr( crit, loc, global_tid );
1138 }
1139
1140 if ( __kmp_env_consistency_check )
1141 __kmp_push_sync( global_tid, ct_critical, loc, lck );
1142
1143 /* since the critical directive binds to all threads, not just
1144 * the current team we have to check this even if we are in a
1145 * serialized team */
1146 /* also, even if we are the uber thread, we still have to conduct the lock,
1147 * as we have to contend with sibling threads */
1148
1149#if USE_ITT_BUILD
1150 __kmp_itt_critical_acquiring( lck );
1151#endif /* USE_ITT_BUILD */
1152 // Value of 'crit' should be good for using as a critical_id of the critical section directive.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001153 __kmp_acquire_user_lock_with_checks( lck, global_tid );
1154
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001155#endif // KMP_USE_DYNAMIC_LOCK
1156
Jim Cownie5e8470a2013-09-27 10:38:44 +00001157#if USE_ITT_BUILD
1158 __kmp_itt_critical_acquired( lck );
1159#endif /* USE_ITT_BUILD */
1160
1161 KA_TRACE( 15, ("__kmpc_critical: done T#%d\n", global_tid ));
1162} // __kmpc_critical
1163
1164/*!
1165@ingroup WORK_SHARING
1166@param loc source location information.
1167@param global_tid global thread number .
1168@param crit identity of the critical section. This could be a pointer to a lock associated with the critical section, or
1169some other suitably unique value.
1170
1171Leave a critical section, releasing any lock that was held during its execution.
1172*/
1173void
1174__kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
1175{
1176 kmp_user_lock_p lck;
1177
1178 KC_TRACE( 10, ("__kmpc_end_critical: called T#%d\n", global_tid ));
1179
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001180#if KMP_USE_DYNAMIC_LOCK
1181 if (DYNA_IS_D_LOCK(__kmp_user_lock_seq)) {
1182 lck = (kmp_user_lock_p)crit;
1183 KMP_ASSERT(lck != NULL);
1184 if (__kmp_env_consistency_check) {
1185 __kmp_pop_sync(global_tid, ct_critical, loc);
1186 }
1187# if USE_ITT_BUILD
1188 __kmp_itt_critical_releasing( lck );
1189# endif
1190# if DYNA_USE_FAST_TAS
1191 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1192 DYNA_RELEASE_TAS_LOCK(lck, global_tid);
1193 } else
1194# elif DYNA_USE_FAST_FUTEX
1195 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1196 DYNA_RELEASE_FUTEX_LOCK(lck, global_tid);
1197 } else
1198# endif
1199 {
1200 DYNA_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1201 }
1202 } else {
1203 kmp_indirect_lock_t *ilk = (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1204 KMP_ASSERT(ilk != NULL);
1205 lck = ilk->lock;
1206 if (__kmp_env_consistency_check) {
1207 __kmp_pop_sync(global_tid, ct_critical, loc);
1208 }
1209# if USE_ITT_BUILD
1210 __kmp_itt_critical_releasing( lck );
1211# endif
1212 DYNA_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1213 }
1214
1215#else // KMP_USE_DYNAMIC_LOCK
1216
Jim Cownie5e8470a2013-09-27 10:38:44 +00001217 if ( ( __kmp_user_lock_kind == lk_tas )
1218 && ( sizeof( lck->tas.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
1219 lck = (kmp_user_lock_p)crit;
1220 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001221#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001222 else if ( ( __kmp_user_lock_kind == lk_futex )
1223 && ( sizeof( lck->futex.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
1224 lck = (kmp_user_lock_p)crit;
1225 }
1226#endif
1227 else { // ticket, queuing or drdpa
1228 lck = (kmp_user_lock_p) TCR_PTR(*((kmp_user_lock_p *)crit));
1229 }
1230
1231 KMP_ASSERT(lck != NULL);
1232
1233 if ( __kmp_env_consistency_check )
1234 __kmp_pop_sync( global_tid, ct_critical, loc );
1235
1236#if USE_ITT_BUILD
1237 __kmp_itt_critical_releasing( lck );
1238#endif /* USE_ITT_BUILD */
1239 // Value of 'crit' should be good for using as a critical_id of the critical section directive.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001240 __kmp_release_user_lock_with_checks( lck, global_tid );
1241
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001242#if OMPT_SUPPORT && OMPT_BLAME
1243 if ((ompt_status == ompt_status_track_callback) &&
1244 ompt_callbacks.ompt_callback(ompt_event_release_critical)) {
1245 ompt_callbacks.ompt_callback(ompt_event_release_critical)(
1246 (uint64_t) lck);
1247 }
1248#endif
1249
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001250#endif // KMP_USE_DYNAMIC_LOCK
1251
Jim Cownie5e8470a2013-09-27 10:38:44 +00001252 KA_TRACE( 15, ("__kmpc_end_critical: done T#%d\n", global_tid ));
1253}
1254
1255/*!
1256@ingroup SYNCHRONIZATION
1257@param loc source location information
1258@param global_tid thread id.
1259@return one if the thread should execute the master block, zero otherwise
1260
1261Start execution of a combined barrier and master. The barrier is executed inside this function.
1262*/
1263kmp_int32
1264__kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
1265{
1266 int status;
1267
1268 KC_TRACE( 10, ("__kmpc_barrier_master: called T#%d\n", global_tid ) );
1269
1270 if (! TCR_4(__kmp_init_parallel))
1271 __kmp_parallel_initialize();
1272
1273 if ( __kmp_env_consistency_check )
1274 __kmp_check_barrier( global_tid, ct_barrier, loc );
1275
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001276#if USE_ITT_NOTIFY
1277 __kmp_threads[global_tid]->th.th_ident = loc;
1278#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001279 status = __kmp_barrier( bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL );
1280
1281 return (status != 0) ? 0 : 1;
1282}
1283
1284/*!
1285@ingroup SYNCHRONIZATION
1286@param loc source location information
1287@param global_tid thread id.
1288
1289Complete the execution of a combined barrier and master. This function should
1290only be called at the completion of the <tt>master</tt> code. Other threads will
1291still be waiting at the barrier and this call releases them.
1292*/
1293void
1294__kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
1295{
1296 KC_TRACE( 10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid ));
1297
1298 __kmp_end_split_barrier ( bs_plain_barrier, global_tid );
1299}
1300
1301/*!
1302@ingroup SYNCHRONIZATION
1303@param loc source location information
1304@param global_tid thread id.
1305@return one if the thread should execute the master block, zero otherwise
1306
1307Start execution of a combined barrier and master(nowait) construct.
1308The barrier is executed inside this function.
1309There is no equivalent "end" function, since the
1310*/
1311kmp_int32
1312__kmpc_barrier_master_nowait( ident_t * loc, kmp_int32 global_tid )
1313{
1314 kmp_int32 ret;
1315
1316 KC_TRACE( 10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid ));
1317
1318 if (! TCR_4(__kmp_init_parallel))
1319 __kmp_parallel_initialize();
1320
1321 if ( __kmp_env_consistency_check ) {
1322 if ( loc == 0 ) {
1323 KMP_WARNING( ConstructIdentInvalid ); // ??? What does it mean for the user?
1324 }
1325 __kmp_check_barrier( global_tid, ct_barrier, loc );
1326 }
1327
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001328#if USE_ITT_NOTIFY
1329 __kmp_threads[global_tid]->th.th_ident = loc;
1330#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001331 __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
1332
1333 ret = __kmpc_master (loc, global_tid);
1334
1335 if ( __kmp_env_consistency_check ) {
1336 /* there's no __kmpc_end_master called; so the (stats) */
1337 /* actions of __kmpc_end_master are done here */
1338
1339 if ( global_tid < 0 ) {
1340 KMP_WARNING( ThreadIdentInvalid );
1341 }
1342 if (ret) {
1343 /* only one thread should do the pop since only */
1344 /* one did the push (see __kmpc_master()) */
1345
1346 __kmp_pop_sync( global_tid, ct_master, loc );
1347 }
1348 }
1349
1350 return (ret);
1351}
1352
1353/* The BARRIER for a SINGLE process section is always explicit */
1354/*!
1355@ingroup WORK_SHARING
1356@param loc source location information
1357@param global_tid global thread number
1358@return One if this thread should execute the single construct, zero otherwise.
1359
1360Test whether to execute a <tt>single</tt> construct.
1361There are no implicit barriers in the two "single" calls, rather the compiler should
1362introduce an explicit barrier if it is required.
1363*/
1364
1365kmp_int32
1366__kmpc_single(ident_t *loc, kmp_int32 global_tid)
1367{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001368 KMP_COUNT_BLOCK(OMP_SINGLE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001369 kmp_int32 rc = __kmp_enter_single( global_tid, loc, TRUE );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001370
1371#if OMPT_SUPPORT && OMPT_TRACE
1372 kmp_info_t *this_thr = __kmp_threads[ global_tid ];
1373 kmp_team_t *team = this_thr -> th.th_team;
1374 int tid = __kmp_tid_from_gtid( global_tid );
1375
1376 if ((ompt_status == ompt_status_track_callback)) {
1377 if (rc) {
1378 if (ompt_callbacks.ompt_callback(ompt_event_single_in_block_begin)) {
1379 ompt_callbacks.ompt_callback(ompt_event_single_in_block_begin)(
1380 team->t.ompt_team_info.parallel_id,
1381 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id,
1382 team->t.ompt_team_info.microtask);
1383 }
1384 } else {
1385 if (ompt_callbacks.ompt_callback(ompt_event_single_others_begin)) {
1386 ompt_callbacks.ompt_callback(ompt_event_single_others_begin)(
1387 team->t.ompt_team_info.parallel_id,
1388 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
1389 }
1390 this_thr->th.ompt_thread_info.state = ompt_state_wait_single;
1391 }
1392 }
1393#endif
1394
Jim Cownie5e8470a2013-09-27 10:38:44 +00001395 return rc;
1396}
1397
1398/*!
1399@ingroup WORK_SHARING
1400@param loc source location information
1401@param global_tid global thread number
1402
1403Mark the end of a <tt>single</tt> construct. This function should
1404only be called by the thread that executed the block of code protected
1405by the `single` construct.
1406*/
1407void
1408__kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
1409{
1410 __kmp_exit_single( global_tid );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001411
1412#if OMPT_SUPPORT && OMPT_TRACE
1413 kmp_info_t *this_thr = __kmp_threads[ global_tid ];
1414 kmp_team_t *team = this_thr -> th.th_team;
1415 int tid = __kmp_tid_from_gtid( global_tid );
1416
1417 if ((ompt_status == ompt_status_track_callback) &&
1418 ompt_callbacks.ompt_callback(ompt_event_single_in_block_end)) {
1419 ompt_callbacks.ompt_callback(ompt_event_single_in_block_end)(
1420 team->t.ompt_team_info.parallel_id,
1421 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
1422 }
1423#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001424}
1425
1426/*!
1427@ingroup WORK_SHARING
1428@param loc Source location
1429@param global_tid Global thread id
1430
1431Mark the end of a statically scheduled loop.
1432*/
1433void
1434__kmpc_for_static_fini( ident_t *loc, kmp_int32 global_tid )
1435{
1436 KE_TRACE( 10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
1437
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001438#if OMPT_SUPPORT && OMPT_TRACE
1439 kmp_info_t *this_thr = __kmp_threads[ global_tid ];
1440 kmp_team_t *team = this_thr -> th.th_team;
1441 int tid = __kmp_tid_from_gtid( global_tid );
1442
1443 if ((ompt_status == ompt_status_track_callback) &&
1444 ompt_callbacks.ompt_callback(ompt_event_loop_end)) {
1445 ompt_callbacks.ompt_callback(ompt_event_loop_end)(
1446 team->t.ompt_team_info.parallel_id,
1447 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
1448 }
1449#endif
1450
Jim Cownie5e8470a2013-09-27 10:38:44 +00001451 if ( __kmp_env_consistency_check )
1452 __kmp_pop_workshare( global_tid, ct_pdo, loc );
1453}
1454
1455/*
1456 * User routines which take C-style arguments (call by value)
1457 * different from the Fortran equivalent routines
1458 */
1459
1460void
1461ompc_set_num_threads( int arg )
1462{
1463// !!!!! TODO: check the per-task binding
1464 __kmp_set_num_threads( arg, __kmp_entry_gtid() );
1465}
1466
1467void
1468ompc_set_dynamic( int flag )
1469{
1470 kmp_info_t *thread;
1471
1472 /* For the thread-private implementation of the internal controls */
1473 thread = __kmp_entry_thread();
1474
1475 __kmp_save_internal_controls( thread );
1476
1477 set__dynamic( thread, flag ? TRUE : FALSE );
1478}
1479
1480void
1481ompc_set_nested( int flag )
1482{
1483 kmp_info_t *thread;
1484
1485 /* For the thread-private internal controls implementation */
1486 thread = __kmp_entry_thread();
1487
1488 __kmp_save_internal_controls( thread );
1489
1490 set__nested( thread, flag ? TRUE : FALSE );
1491}
1492
Jim Cownie5e8470a2013-09-27 10:38:44 +00001493void
1494ompc_set_max_active_levels( int max_active_levels )
1495{
1496 /* TO DO */
1497 /* we want per-task implementation of this internal control */
1498
1499 /* For the per-thread internal controls implementation */
1500 __kmp_set_max_active_levels( __kmp_entry_gtid(), max_active_levels );
1501}
1502
1503void
1504ompc_set_schedule( omp_sched_t kind, int modifier )
1505{
1506// !!!!! TODO: check the per-task binding
1507 __kmp_set_schedule( __kmp_entry_gtid(), ( kmp_sched_t ) kind, modifier );
1508}
1509
1510int
1511ompc_get_ancestor_thread_num( int level )
1512{
1513 return __kmp_get_ancestor_thread_num( __kmp_entry_gtid(), level );
1514}
1515
1516int
1517ompc_get_team_size( int level )
1518{
1519 return __kmp_get_team_size( __kmp_entry_gtid(), level );
1520}
1521
Jim Cownie5e8470a2013-09-27 10:38:44 +00001522void
1523kmpc_set_stacksize( int arg )
1524{
1525 // __kmp_aux_set_stacksize initializes the library if needed
1526 __kmp_aux_set_stacksize( arg );
1527}
1528
1529void
1530kmpc_set_stacksize_s( size_t arg )
1531{
1532 // __kmp_aux_set_stacksize initializes the library if needed
1533 __kmp_aux_set_stacksize( arg );
1534}
1535
1536void
1537kmpc_set_blocktime( int arg )
1538{
1539 int gtid, tid;
1540 kmp_info_t *thread;
1541
1542 gtid = __kmp_entry_gtid();
1543 tid = __kmp_tid_from_gtid(gtid);
1544 thread = __kmp_thread_from_gtid(gtid);
1545
1546 __kmp_aux_set_blocktime( arg, thread, tid );
1547}
1548
1549void
1550kmpc_set_library( int arg )
1551{
1552 // __kmp_user_set_library initializes the library if needed
1553 __kmp_user_set_library( (enum library_type)arg );
1554}
1555
1556void
1557kmpc_set_defaults( char const * str )
1558{
1559 // __kmp_aux_set_defaults initializes the library if needed
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00001560 __kmp_aux_set_defaults( str, KMP_STRLEN( str ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001561}
1562
Jim Cownie5e8470a2013-09-27 10:38:44 +00001563int
1564kmpc_set_affinity_mask_proc( int proc, void **mask )
1565{
Alp Toker98758b02014-03-02 04:12:06 +00001566#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001567 return -1;
1568#else
1569 if ( ! TCR_4(__kmp_init_middle) ) {
1570 __kmp_middle_initialize();
1571 }
1572 return __kmp_aux_set_affinity_mask_proc( proc, mask );
1573#endif
1574}
1575
1576int
1577kmpc_unset_affinity_mask_proc( int proc, void **mask )
1578{
Alp Toker98758b02014-03-02 04:12:06 +00001579#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001580 return -1;
1581#else
1582 if ( ! TCR_4(__kmp_init_middle) ) {
1583 __kmp_middle_initialize();
1584 }
1585 return __kmp_aux_unset_affinity_mask_proc( proc, mask );
1586#endif
1587}
1588
1589int
1590kmpc_get_affinity_mask_proc( int proc, void **mask )
1591{
Alp Toker98758b02014-03-02 04:12:06 +00001592#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001593 return -1;
1594#else
1595 if ( ! TCR_4(__kmp_init_middle) ) {
1596 __kmp_middle_initialize();
1597 }
1598 return __kmp_aux_get_affinity_mask_proc( proc, mask );
1599#endif
1600}
1601
Jim Cownie5e8470a2013-09-27 10:38:44 +00001602
1603/* -------------------------------------------------------------------------- */
1604/*!
1605@ingroup THREADPRIVATE
1606@param loc source location information
1607@param gtid global thread number
1608@param cpy_size size of the cpy_data buffer
1609@param cpy_data pointer to data to be copied
1610@param cpy_func helper function to call for copying data
1611@param didit flag variable: 1=single thread; 0=not single thread
1612
1613__kmpc_copyprivate implements the interface for the private data broadcast needed for
1614the copyprivate clause associated with a single region in an OpenMP<sup>*</sup> program (both C and Fortran).
1615All threads participating in the parallel region call this routine.
1616One of the threads (called the single thread) should have the <tt>didit</tt> variable set to 1
1617and all other threads should have that variable set to 0.
1618All threads pass a pointer to a data buffer (cpy_data) that they have built.
1619
1620The OpenMP specification forbids the use of nowait on the single region when a copyprivate
1621clause is present. However, @ref __kmpc_copyprivate implements a barrier internally to avoid
1622race conditions, so the code generation for the single region should avoid generating a barrier
1623after the call to @ref __kmpc_copyprivate.
1624
1625The <tt>gtid</tt> parameter is the global thread id for the current thread.
1626The <tt>loc</tt> parameter is a pointer to source location information.
1627
1628Internal implementation: The single thread will first copy its descriptor address (cpy_data)
1629to a team-private location, then the other threads will each call the function pointed to by
1630the parameter cpy_func, which carries out the copy by copying the data using the cpy_data buffer.
1631
1632The cpy_func routine used for the copy and the contents of the data area defined by cpy_data
1633and cpy_size may be built in any fashion that will allow the copy to be done. For instance,
1634the cpy_data buffer can hold the actual data to be copied or it may hold a list of pointers
1635to the data. The cpy_func routine must interpret the cpy_data buffer appropriately.
1636
1637The interface to cpy_func is as follows:
1638@code
1639void cpy_func( void *destination, void *source )
1640@endcode
1641where void *destination is the cpy_data pointer for the thread being copied to
1642and void *source is the cpy_data pointer for the thread being copied from.
1643*/
1644void
1645__kmpc_copyprivate( ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void*,void*), kmp_int32 didit )
1646{
1647 void **data_ptr;
1648
1649 KC_TRACE( 10, ("__kmpc_copyprivate: called T#%d\n", gtid ));
1650
1651 KMP_MB();
1652
1653 data_ptr = & __kmp_team_from_gtid( gtid )->t.t_copypriv_data;
1654
1655 if ( __kmp_env_consistency_check ) {
1656 if ( loc == 0 ) {
1657 KMP_WARNING( ConstructIdentInvalid );
1658 }
1659 }
1660
1661 /* ToDo: Optimize the following two barriers into some kind of split barrier */
1662
1663 if (didit) *data_ptr = cpy_data;
1664
1665 /* This barrier is not a barrier region boundary */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001666#if USE_ITT_NOTIFY
1667 __kmp_threads[gtid]->th.th_ident = loc;
1668#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001669 __kmp_barrier( bs_plain_barrier, gtid, FALSE , 0, NULL, NULL );
1670
1671 if (! didit) (*cpy_func)( cpy_data, *data_ptr );
1672
1673 /* Consider next barrier the user-visible barrier for barrier region boundaries */
1674 /* Nesting checks are already handled by the single construct checks */
1675
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001676#if USE_ITT_NOTIFY
1677 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g. tasks can overwrite the location)
1678#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001679 __kmp_barrier( bs_plain_barrier, gtid, FALSE , 0, NULL, NULL );
1680}
1681
1682/* -------------------------------------------------------------------------- */
1683
1684#define INIT_LOCK __kmp_init_user_lock_with_checks
1685#define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
1686#define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
1687#define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
1688#define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
1689#define ACQUIRE_NESTED_LOCK_TIMED __kmp_acquire_nested_user_lock_with_checks_timed
1690#define RELEASE_LOCK __kmp_release_user_lock_with_checks
1691#define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
1692#define TEST_LOCK __kmp_test_user_lock_with_checks
1693#define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
1694#define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
1695#define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
1696
1697
1698/*
1699 * TODO: Make check abort messages use location info & pass it
1700 * into with_checks routines
1701 */
1702
1703/* initialize the lock */
1704void
1705__kmpc_init_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001706#if KMP_USE_DYNAMIC_LOCK
1707 KMP_DEBUG_ASSERT(__kmp_init_serial);
1708 if (__kmp_env_consistency_check && user_lock == NULL) {
1709 KMP_FATAL(LockIsUninitialized, "omp_init_lock");
1710 }
1711 if (DYNA_IS_D_LOCK(__kmp_user_lock_seq)) {
1712 DYNA_INIT_D_LOCK(user_lock, __kmp_user_lock_seq);
1713# if USE_ITT_BUILD
1714 __kmp_itt_lock_creating((kmp_user_lock_p)user_lock, NULL);
1715# endif
1716 } else {
1717 DYNA_INIT_I_LOCK(user_lock, __kmp_user_lock_seq);
1718 kmp_indirect_lock_t *ilk = DYNA_LOOKUP_I_LOCK(user_lock);
1719 DYNA_SET_I_LOCK_LOCATION(ilk, loc);
1720# if USE_ITT_BUILD
1721 __kmp_itt_lock_creating(ilk->lock, loc);
1722# endif
1723 }
1724
1725#else // KMP_USE_DYNAMIC_LOCK
1726
Jim Cownie5e8470a2013-09-27 10:38:44 +00001727 static char const * const func = "omp_init_lock";
1728 kmp_user_lock_p lck;
1729 KMP_DEBUG_ASSERT( __kmp_init_serial );
1730
1731 if ( __kmp_env_consistency_check ) {
1732 if ( user_lock == NULL ) {
1733 KMP_FATAL( LockIsUninitialized, func );
1734 }
1735 }
1736
1737 KMP_CHECK_USER_LOCK_INIT();
1738
1739 if ( ( __kmp_user_lock_kind == lk_tas )
1740 && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1741 lck = (kmp_user_lock_p)user_lock;
1742 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001743#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001744 else if ( ( __kmp_user_lock_kind == lk_futex )
1745 && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1746 lck = (kmp_user_lock_p)user_lock;
1747 }
1748#endif
1749 else {
Jim Cownie181b4bb2013-12-23 17:28:57 +00001750 lck = __kmp_user_lock_allocate( user_lock, gtid, 0 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001751 }
1752 INIT_LOCK( lck );
1753 __kmp_set_user_lock_location( lck, loc );
1754
1755#if USE_ITT_BUILD
1756 __kmp_itt_lock_creating( lck );
1757#endif /* USE_ITT_BUILD */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001758
1759#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00001760} // __kmpc_init_lock
1761
1762/* initialize the lock */
1763void
1764__kmpc_init_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001765#if KMP_USE_DYNAMIC_LOCK
1766
1767 KMP_DEBUG_ASSERT(__kmp_init_serial);
1768 if (__kmp_env_consistency_check && user_lock == NULL) {
1769 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
1770 }
1771 // Invoke init function after converting to nested version.
1772 kmp_dyna_lockseq_t nested_seq;
1773 switch (__kmp_user_lock_seq) {
1774 case lockseq_tas: nested_seq = lockseq_nested_tas; break;
1775#if DYNA_HAS_FUTEX
1776 case lockseq_futex: nested_seq = lockseq_nested_futex; break;
1777#endif
1778 case lockseq_ticket: nested_seq = lockseq_nested_ticket; break;
1779 case lockseq_queuing: nested_seq = lockseq_nested_queuing; break;
1780 case lockseq_drdpa: nested_seq = lockseq_nested_drdpa; break;
1781 default: nested_seq = lockseq_nested_queuing; break;
1782 // Use nested queuing lock for lock kinds without "nested" implementation.
1783 }
1784 DYNA_INIT_I_LOCK(user_lock, nested_seq);
1785 // All nested locks are indirect locks.
1786 kmp_indirect_lock_t *ilk = DYNA_LOOKUP_I_LOCK(user_lock);
1787 DYNA_SET_I_LOCK_LOCATION(ilk, loc);
1788# if USE_ITT_BUILD
1789 __kmp_itt_lock_creating(ilk->lock, loc);
1790# endif
1791
1792#else // KMP_USE_DYNAMIC_LOCK
1793
Jim Cownie5e8470a2013-09-27 10:38:44 +00001794 static char const * const func = "omp_init_nest_lock";
1795 kmp_user_lock_p lck;
1796 KMP_DEBUG_ASSERT( __kmp_init_serial );
1797
1798 if ( __kmp_env_consistency_check ) {
1799 if ( user_lock == NULL ) {
1800 KMP_FATAL( LockIsUninitialized, func );
1801 }
1802 }
1803
1804 KMP_CHECK_USER_LOCK_INIT();
1805
1806 if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
1807 + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
1808 lck = (kmp_user_lock_p)user_lock;
1809 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001810#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001811 else if ( ( __kmp_user_lock_kind == lk_futex )
1812 && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
1813 <= OMP_NEST_LOCK_T_SIZE ) ) {
1814 lck = (kmp_user_lock_p)user_lock;
1815 }
1816#endif
1817 else {
Jim Cownie181b4bb2013-12-23 17:28:57 +00001818 lck = __kmp_user_lock_allocate( user_lock, gtid, 0 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001819 }
1820
1821 INIT_NESTED_LOCK( lck );
1822 __kmp_set_user_lock_location( lck, loc );
1823
1824#if USE_ITT_BUILD
1825 __kmp_itt_lock_creating( lck );
1826#endif /* USE_ITT_BUILD */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001827
1828#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00001829} // __kmpc_init_nest_lock
1830
1831void
1832__kmpc_destroy_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001833#if KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00001834
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001835# if USE_ITT_BUILD
1836 kmp_user_lock_p lck;
1837 if (DYNA_EXTRACT_D_TAG(user_lock) == 0) {
1838 lck = ((kmp_indirect_lock_t *)DYNA_LOOKUP_I_LOCK(user_lock))->lock;
1839 } else {
1840 lck = (kmp_user_lock_p)user_lock;
1841 }
1842 __kmp_itt_lock_destroyed(lck);
1843# endif
1844 DYNA_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
1845#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00001846 kmp_user_lock_p lck;
1847
1848 if ( ( __kmp_user_lock_kind == lk_tas )
1849 && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1850 lck = (kmp_user_lock_p)user_lock;
1851 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001852#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001853 else if ( ( __kmp_user_lock_kind == lk_futex )
1854 && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1855 lck = (kmp_user_lock_p)user_lock;
1856 }
1857#endif
1858 else {
1859 lck = __kmp_lookup_user_lock( user_lock, "omp_destroy_lock" );
1860 }
1861
1862#if USE_ITT_BUILD
1863 __kmp_itt_lock_destroyed( lck );
1864#endif /* USE_ITT_BUILD */
1865 DESTROY_LOCK( lck );
1866
1867 if ( ( __kmp_user_lock_kind == lk_tas )
1868 && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1869 ;
1870 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001871#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001872 else if ( ( __kmp_user_lock_kind == lk_futex )
1873 && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1874 ;
1875 }
1876#endif
1877 else {
1878 __kmp_user_lock_free( user_lock, gtid, lck );
1879 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001880#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00001881} // __kmpc_destroy_lock
1882
1883/* destroy the lock */
1884void
1885__kmpc_destroy_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001886#if KMP_USE_DYNAMIC_LOCK
1887
1888# if USE_ITT_BUILD
1889 kmp_indirect_lock_t *ilk = DYNA_LOOKUP_I_LOCK(user_lock);
1890 __kmp_itt_lock_destroyed(ilk->lock);
1891# endif
1892 DYNA_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
1893
1894#else // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00001895
1896 kmp_user_lock_p lck;
1897
1898 if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
1899 + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
1900 lck = (kmp_user_lock_p)user_lock;
1901 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001902#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001903 else if ( ( __kmp_user_lock_kind == lk_futex )
1904 && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
1905 <= OMP_NEST_LOCK_T_SIZE ) ) {
1906 lck = (kmp_user_lock_p)user_lock;
1907 }
1908#endif
1909 else {
1910 lck = __kmp_lookup_user_lock( user_lock, "omp_destroy_nest_lock" );
1911 }
1912
1913#if USE_ITT_BUILD
1914 __kmp_itt_lock_destroyed( lck );
1915#endif /* USE_ITT_BUILD */
1916
1917 DESTROY_NESTED_LOCK( lck );
1918
1919 if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
1920 + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
1921 ;
1922 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001923#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001924 else if ( ( __kmp_user_lock_kind == lk_futex )
1925 && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
1926 <= OMP_NEST_LOCK_T_SIZE ) ) {
1927 ;
1928 }
1929#endif
1930 else {
1931 __kmp_user_lock_free( user_lock, gtid, lck );
1932 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001933#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00001934} // __kmpc_destroy_nest_lock
1935
1936void
1937__kmpc_set_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001938 KMP_COUNT_BLOCK(OMP_set_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001939#if KMP_USE_DYNAMIC_LOCK
1940 int tag = DYNA_EXTRACT_D_TAG(user_lock);
1941# if USE_ITT_BUILD
1942 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); // itt function will get to the right lock object.
1943# endif
1944# if DYNA_USE_FAST_TAS
1945 if (tag == locktag_tas && !__kmp_env_consistency_check) {
1946 DYNA_ACQUIRE_TAS_LOCK(user_lock, gtid);
1947 } else
1948# elif DYNA_USE_FAST_FUTEX
1949 if (tag == locktag_futex && !__kmp_env_consistency_check) {
1950 DYNA_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
1951 } else
1952# endif
1953 {
1954 __kmp_direct_set_ops[tag]((kmp_dyna_lock_t *)user_lock, gtid);
1955 }
1956# if USE_ITT_BUILD
1957 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
1958# endif
1959
1960#else // KMP_USE_DYNAMIC_LOCK
1961
Jim Cownie5e8470a2013-09-27 10:38:44 +00001962 kmp_user_lock_p lck;
1963
1964 if ( ( __kmp_user_lock_kind == lk_tas )
1965 && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1966 lck = (kmp_user_lock_p)user_lock;
1967 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001968#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001969 else if ( ( __kmp_user_lock_kind == lk_futex )
1970 && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1971 lck = (kmp_user_lock_p)user_lock;
1972 }
1973#endif
1974 else {
1975 lck = __kmp_lookup_user_lock( user_lock, "omp_set_lock" );
1976 }
1977
1978#if USE_ITT_BUILD
1979 __kmp_itt_lock_acquiring( lck );
1980#endif /* USE_ITT_BUILD */
1981
1982 ACQUIRE_LOCK( lck, gtid );
1983
1984#if USE_ITT_BUILD
1985 __kmp_itt_lock_acquired( lck );
1986#endif /* USE_ITT_BUILD */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001987
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001988#endif // KMP_USE_DYNAMIC_LOCK
1989}
Jim Cownie5e8470a2013-09-27 10:38:44 +00001990
1991void
1992__kmpc_set_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001993#if KMP_USE_DYNAMIC_LOCK
1994
1995# if USE_ITT_BUILD
1996 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
1997# endif
1998 DYNA_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
1999# if USE_ITT_BUILD
2000 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2001#endif
2002
2003#else // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002004 kmp_user_lock_p lck;
2005
2006 if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
2007 + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
2008 lck = (kmp_user_lock_p)user_lock;
2009 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002010#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002011 else if ( ( __kmp_user_lock_kind == lk_futex )
2012 && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
2013 <= OMP_NEST_LOCK_T_SIZE ) ) {
2014 lck = (kmp_user_lock_p)user_lock;
2015 }
2016#endif
2017 else {
2018 lck = __kmp_lookup_user_lock( user_lock, "omp_set_nest_lock" );
2019 }
2020
2021#if USE_ITT_BUILD
2022 __kmp_itt_lock_acquiring( lck );
2023#endif /* USE_ITT_BUILD */
2024
2025 ACQUIRE_NESTED_LOCK( lck, gtid );
2026
2027#if USE_ITT_BUILD
2028 __kmp_itt_lock_acquired( lck );
2029#endif /* USE_ITT_BUILD */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002030#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002031}
2032
2033void
2034__kmpc_unset_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
2035{
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002036#if KMP_USE_DYNAMIC_LOCK
2037
2038 int tag = DYNA_EXTRACT_D_TAG(user_lock);
2039# if USE_ITT_BUILD
2040 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2041# endif
2042# if DYNA_USE_FAST_TAS
2043 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2044 DYNA_RELEASE_TAS_LOCK(user_lock, gtid);
2045 } else
2046# elif DYNA_USE_FAST_FUTEX
2047 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2048 DYNA_RELEASE_FUTEX_LOCK(user_lock, gtid);
2049 } else
2050# endif
2051 {
2052 __kmp_direct_unset_ops[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2053 }
2054
2055#else // KMP_USE_DYNAMIC_LOCK
2056
Jim Cownie5e8470a2013-09-27 10:38:44 +00002057 kmp_user_lock_p lck;
2058
2059 /* Can't use serial interval since not block structured */
2060 /* release the lock */
2061
2062 if ( ( __kmp_user_lock_kind == lk_tas )
2063 && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002064#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002065 // "fast" path implemented to fix customer performance issue
2066#if USE_ITT_BUILD
2067 __kmp_itt_lock_releasing( (kmp_user_lock_p)user_lock );
2068#endif /* USE_ITT_BUILD */
2069 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2070 KMP_MB();
2071 return;
2072#else
2073 lck = (kmp_user_lock_p)user_lock;
2074#endif
2075 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002076#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002077 else if ( ( __kmp_user_lock_kind == lk_futex )
2078 && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2079 lck = (kmp_user_lock_p)user_lock;
2080 }
2081#endif
2082 else {
2083 lck = __kmp_lookup_user_lock( user_lock, "omp_unset_lock" );
2084 }
2085
2086#if USE_ITT_BUILD
2087 __kmp_itt_lock_releasing( lck );
2088#endif /* USE_ITT_BUILD */
2089
2090 RELEASE_LOCK( lck, gtid );
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002091
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002092#if OMPT_SUPPORT && OMPT_BLAME
2093 if ((ompt_status == ompt_status_track_callback) &&
2094 ompt_callbacks.ompt_callback(ompt_event_release_lock)) {
2095 ompt_callbacks.ompt_callback(ompt_event_release_lock)((uint64_t) lck);
2096 }
2097#endif
2098
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002099#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002100}
2101
2102/* release the lock */
2103void
2104__kmpc_unset_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
2105{
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002106#if KMP_USE_DYNAMIC_LOCK
2107
2108# if USE_ITT_BUILD
2109 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2110# endif
2111 DYNA_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
2112
2113#else // KMP_USE_DYNAMIC_LOCK
2114
Jim Cownie5e8470a2013-09-27 10:38:44 +00002115 kmp_user_lock_p lck;
2116
2117 /* Can't use serial interval since not block structured */
2118
2119 if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
2120 + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002121#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002122 // "fast" path implemented to fix customer performance issue
2123 kmp_tas_lock_t *tl = (kmp_tas_lock_t*)user_lock;
2124#if USE_ITT_BUILD
2125 __kmp_itt_lock_releasing( (kmp_user_lock_p)user_lock );
2126#endif /* USE_ITT_BUILD */
2127 if ( --(tl->lk.depth_locked) == 0 ) {
2128 TCW_4(tl->lk.poll, 0);
2129 }
2130 KMP_MB();
2131 return;
2132#else
2133 lck = (kmp_user_lock_p)user_lock;
2134#endif
2135 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002136#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002137 else if ( ( __kmp_user_lock_kind == lk_futex )
2138 && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
2139 <= OMP_NEST_LOCK_T_SIZE ) ) {
2140 lck = (kmp_user_lock_p)user_lock;
2141 }
2142#endif
2143 else {
2144 lck = __kmp_lookup_user_lock( user_lock, "omp_unset_nest_lock" );
2145 }
2146
2147#if USE_ITT_BUILD
2148 __kmp_itt_lock_releasing( lck );
2149#endif /* USE_ITT_BUILD */
2150
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002151 int release_status = RELEASE_NESTED_LOCK( lck, gtid );
2152#if OMPT_SUPPORT && OMPT_BLAME
2153 if (ompt_status == ompt_status_track_callback) {
2154 if (release_status == KMP_LOCK_RELEASED) {
2155 if (ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_last)) {
2156 ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_last)(
2157 (uint64_t) lck);
2158 }
2159 } else if (ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_prev)) {
2160 ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_prev)(
2161 (uint64_t) lck);
2162 }
2163 }
2164#endif
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002165
2166#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002167}
2168
2169/* try to acquire the lock */
2170int
2171__kmpc_test_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
2172{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002173 KMP_COUNT_BLOCK(OMP_test_lock);
2174 KMP_TIME_BLOCK(OMP_test_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002175
2176#if KMP_USE_DYNAMIC_LOCK
2177 int rc;
2178 int tag = DYNA_EXTRACT_D_TAG(user_lock);
2179# if USE_ITT_BUILD
Jonathan Peyton81f9cd12015-05-22 22:37:22 +00002180 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002181# endif
2182# if DYNA_USE_FAST_TAS
2183 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2184 DYNA_TEST_TAS_LOCK(user_lock, gtid, rc);
2185 } else
2186# elif DYNA_USE_FAST_FUTEX
2187 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2188 DYNA_TEST_FUTEX_LOCK(user_lock, gtid, rc);
2189 } else
2190# endif
2191 {
2192 rc = __kmp_direct_test_ops[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2193 }
2194 if (rc) {
2195# if USE_ITT_BUILD
2196 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2197# endif
2198 return FTN_TRUE;
2199 } else {
2200# if USE_ITT_BUILD
2201 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
2202# endif
2203 return FTN_FALSE;
2204 }
2205
2206#else // KMP_USE_DYNAMIC_LOCK
2207
Jim Cownie5e8470a2013-09-27 10:38:44 +00002208 kmp_user_lock_p lck;
2209 int rc;
2210
2211 if ( ( __kmp_user_lock_kind == lk_tas )
2212 && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2213 lck = (kmp_user_lock_p)user_lock;
2214 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002215#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002216 else if ( ( __kmp_user_lock_kind == lk_futex )
2217 && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2218 lck = (kmp_user_lock_p)user_lock;
2219 }
2220#endif
2221 else {
2222 lck = __kmp_lookup_user_lock( user_lock, "omp_test_lock" );
2223 }
2224
2225#if USE_ITT_BUILD
2226 __kmp_itt_lock_acquiring( lck );
2227#endif /* USE_ITT_BUILD */
2228
2229 rc = TEST_LOCK( lck, gtid );
2230#if USE_ITT_BUILD
2231 if ( rc ) {
2232 __kmp_itt_lock_acquired( lck );
2233 } else {
2234 __kmp_itt_lock_cancelled( lck );
2235 }
2236#endif /* USE_ITT_BUILD */
2237 return ( rc ? FTN_TRUE : FTN_FALSE );
2238
2239 /* Can't use serial interval since not block structured */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002240
2241#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002242}
2243
2244/* try to acquire the lock */
2245int
2246__kmpc_test_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
2247{
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002248#if KMP_USE_DYNAMIC_LOCK
2249 int rc;
2250# if USE_ITT_BUILD
2251 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2252# endif
2253 rc = DYNA_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
2254# if USE_ITT_BUILD
2255 if (rc) {
2256 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2257 } else {
2258 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
2259 }
2260# endif
2261 return rc;
2262
2263#else // KMP_USE_DYNAMIC_LOCK
2264
Jim Cownie5e8470a2013-09-27 10:38:44 +00002265 kmp_user_lock_p lck;
2266 int rc;
2267
2268 if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
2269 + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
2270 lck = (kmp_user_lock_p)user_lock;
2271 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002272#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002273 else if ( ( __kmp_user_lock_kind == lk_futex )
2274 && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
2275 <= OMP_NEST_LOCK_T_SIZE ) ) {
2276 lck = (kmp_user_lock_p)user_lock;
2277 }
2278#endif
2279 else {
2280 lck = __kmp_lookup_user_lock( user_lock, "omp_test_nest_lock" );
2281 }
2282
2283#if USE_ITT_BUILD
2284 __kmp_itt_lock_acquiring( lck );
2285#endif /* USE_ITT_BUILD */
2286
2287 rc = TEST_NESTED_LOCK( lck, gtid );
2288#if USE_ITT_BUILD
2289 if ( rc ) {
2290 __kmp_itt_lock_acquired( lck );
2291 } else {
2292 __kmp_itt_lock_cancelled( lck );
2293 }
2294#endif /* USE_ITT_BUILD */
2295 return rc;
2296
2297 /* Can't use serial interval since not block structured */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002298
2299#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002300}
2301
2302
2303/*--------------------------------------------------------------------------------------------------------------------*/
2304
2305/*
2306 * Interface to fast scalable reduce methods routines
2307 */
2308
2309// keep the selected method in a thread local structure for cross-function usage: will be used in __kmpc_end_reduce* functions;
2310// another solution: to re-determine the method one more time in __kmpc_end_reduce* functions (new prototype required then)
2311// AT: which solution is better?
2312#define __KMP_SET_REDUCTION_METHOD(gtid,rmethod) \
2313 ( ( __kmp_threads[ ( gtid ) ] -> th.th_local.packed_reduction_method ) = ( rmethod ) )
2314
2315#define __KMP_GET_REDUCTION_METHOD(gtid) \
2316 ( __kmp_threads[ ( gtid ) ] -> th.th_local.packed_reduction_method )
2317
2318// description of the packed_reduction_method variable: look at the macros in kmp.h
2319
2320
2321// used in a critical section reduce block
2322static __forceinline void
2323__kmp_enter_critical_section_reduce_block( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit ) {
2324
2325 // this lock was visible to a customer and to the thread profiler as a serial overhead span
2326 // (although it's used for an internal purpose only)
2327 // why was it visible in previous implementation?
2328 // should we keep it visible in new reduce block?
2329 kmp_user_lock_p lck;
2330
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002331#if KMP_USE_DYNAMIC_LOCK
2332
2333 if (DYNA_IS_D_LOCK(__kmp_user_lock_seq)) {
2334 lck = (kmp_user_lock_p)crit;
2335 if (*((kmp_dyna_lock_t *)lck) == 0) {
2336 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)lck, 0, DYNA_GET_D_TAG(__kmp_user_lock_seq));
2337 }
2338 KMP_DEBUG_ASSERT(lck != NULL);
2339 if (__kmp_env_consistency_check) {
2340 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
2341 }
2342 DYNA_D_LOCK_FUNC(lck, set)((kmp_dyna_lock_t *)lck, global_tid);
2343 } else {
2344 kmp_indirect_lock_t *ilk = __kmp_get_indirect_csptr(crit, loc, global_tid, __kmp_user_lock_seq);
2345 KMP_DEBUG_ASSERT(ilk != NULL);
2346 if (__kmp_env_consistency_check) {
2347 __kmp_push_sync(global_tid, ct_critical, loc, ilk->lock, __kmp_user_lock_seq);
2348 }
2349 DYNA_I_LOCK_FUNC(ilk, set)(ilk->lock, global_tid);
2350 }
2351
2352#else // KMP_USE_DYNAMIC_LOCK
2353
Jim Cownie5e8470a2013-09-27 10:38:44 +00002354 // We know that the fast reduction code is only emitted by Intel compilers
2355 // with 32 byte critical sections. If there isn't enough space, then we
2356 // have to use a pointer.
2357 if ( __kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE ) {
2358 lck = (kmp_user_lock_p)crit;
2359 }
2360 else {
2361 lck = __kmp_get_critical_section_ptr( crit, loc, global_tid );
2362 }
2363 KMP_DEBUG_ASSERT( lck != NULL );
2364
2365 if ( __kmp_env_consistency_check )
2366 __kmp_push_sync( global_tid, ct_critical, loc, lck );
2367
2368 __kmp_acquire_user_lock_with_checks( lck, global_tid );
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002369
2370#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002371}
2372
2373// used in a critical section reduce block
2374static __forceinline void
2375__kmp_end_critical_section_reduce_block( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit ) {
2376
2377 kmp_user_lock_p lck;
2378
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002379#if KMP_USE_DYNAMIC_LOCK
2380
2381 if (DYNA_IS_D_LOCK(__kmp_user_lock_seq)) {
2382 lck = (kmp_user_lock_p)crit;
2383 if (__kmp_env_consistency_check)
2384 __kmp_pop_sync(global_tid, ct_critical, loc);
2385 DYNA_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
2386 } else {
2387 kmp_indirect_lock_t *ilk = (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
2388 if (__kmp_env_consistency_check)
2389 __kmp_pop_sync(global_tid, ct_critical, loc);
2390 DYNA_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
2391 }
2392
2393#else // KMP_USE_DYNAMIC_LOCK
2394
Jim Cownie5e8470a2013-09-27 10:38:44 +00002395 // We know that the fast reduction code is only emitted by Intel compilers with 32 byte critical
2396 // sections. If there isn't enough space, then we have to use a pointer.
2397 if ( __kmp_base_user_lock_size > 32 ) {
2398 lck = *( (kmp_user_lock_p *) crit );
2399 KMP_ASSERT( lck != NULL );
2400 } else {
2401 lck = (kmp_user_lock_p) crit;
2402 }
2403
2404 if ( __kmp_env_consistency_check )
2405 __kmp_pop_sync( global_tid, ct_critical, loc );
2406
2407 __kmp_release_user_lock_with_checks( lck, global_tid );
2408
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002409#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002410} // __kmp_end_critical_section_reduce_block
2411
2412
2413/* 2.a.i. Reduce Block without a terminating barrier */
2414/*!
2415@ingroup SYNCHRONIZATION
2416@param loc source location information
2417@param global_tid global thread number
2418@param num_vars number of items (variables) to be reduced
2419@param reduce_size size of data in bytes to be reduced
2420@param reduce_data pointer to data to be reduced
2421@param reduce_func callback function providing reduction operation on two operands and returning result of reduction in lhs_data
2422@param lck pointer to the unique lock data structure
2423@result 1 for the master thread, 0 for all other team threads, 2 for all team threads if atomic reduction needed
2424
2425The nowait version is used for a reduce clause with the nowait argument.
2426*/
2427kmp_int32
2428__kmpc_reduce_nowait(
2429 ident_t *loc, kmp_int32 global_tid,
2430 kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
2431 kmp_critical_name *lck ) {
2432
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002433 KMP_COUNT_BLOCK(REDUCE_nowait);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002434 int retval;
2435 PACKED_REDUCTION_METHOD_T packed_reduction_method;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002436#if OMP_40_ENABLED
2437 kmp_team_t *team;
2438 kmp_info_t *th;
2439 int teams_swapped = 0, task_state;
2440#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002441 KA_TRACE( 10, ( "__kmpc_reduce_nowait() enter: called T#%d\n", global_tid ) );
2442
2443 // why do we need this initialization here at all?
2444 // Reduction clause can not be used as a stand-alone directive.
2445
2446 // do not call __kmp_serial_initialize(), it will be called by __kmp_parallel_initialize() if needed
2447 // possible detection of false-positive race by the threadchecker ???
2448 if( ! TCR_4( __kmp_init_parallel ) )
2449 __kmp_parallel_initialize();
2450
2451 // check correctness of reduce block nesting
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002452#if KMP_USE_DYNAMIC_LOCK
2453 if ( __kmp_env_consistency_check )
2454 __kmp_push_sync( global_tid, ct_reduce, loc, NULL, 0 );
2455#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00002456 if ( __kmp_env_consistency_check )
2457 __kmp_push_sync( global_tid, ct_reduce, loc, NULL );
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002458#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002459
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002460#if OMP_40_ENABLED
2461 th = __kmp_thread_from_gtid(global_tid);
2462 if( th->th.th_teams_microtask ) { // AC: check if we are inside the teams construct?
2463 team = th->th.th_team;
2464 if( team->t.t_level == th->th.th_teams_level ) {
2465 // this is reduction at teams construct
2466 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
2467 // Let's swap teams temporarily for the reduction barrier
2468 teams_swapped = 1;
2469 th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2470 th->th.th_team = team->t.t_parent;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002471 th->th.th_team_nproc = th->th.th_team->t.t_nproc;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002472 th->th.th_task_team = th->th.th_team->t.t_task_team[0];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002473 task_state = th->th.th_task_state;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002474 th->th.th_task_state = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002475 }
2476 }
2477#endif // OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00002478
2479 // packed_reduction_method value will be reused by __kmp_end_reduce* function, the value should be kept in a variable
2480 // the variable should be either a construct-specific or thread-specific property, not a team specific property
2481 // (a thread can reach the next reduce block on the next construct, reduce method may differ on the next construct)
2482 // an ident_t "loc" parameter could be used as a construct-specific property (what if loc == 0?)
2483 // (if both construct-specific and team-specific variables were shared, then unness extra syncs should be needed)
2484 // a thread-specific variable is better regarding two issues above (next construct and extra syncs)
2485 // a thread-specific "th_local.reduction_method" variable is used currently
2486 // each thread executes 'determine' and 'set' lines (no need to execute by one thread, to avoid unness extra syncs)
2487
2488 packed_reduction_method = __kmp_determine_reduction_method( loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck );
2489 __KMP_SET_REDUCTION_METHOD( global_tid, packed_reduction_method );
2490
2491 if( packed_reduction_method == critical_reduce_block ) {
2492
2493 __kmp_enter_critical_section_reduce_block( loc, global_tid, lck );
2494 retval = 1;
2495
2496 } else if( packed_reduction_method == empty_reduce_block ) {
2497
2498 // usage: if team size == 1, no synchronization is required ( Intel platforms only )
2499 retval = 1;
2500
2501 } else if( packed_reduction_method == atomic_reduce_block ) {
2502
2503 retval = 2;
2504
2505 // all threads should do this pop here (because __kmpc_end_reduce_nowait() won't be called by the code gen)
2506 // (it's not quite good, because the checking block has been closed by this 'pop',
2507 // but atomic operation has not been executed yet, will be executed slightly later, literally on next instruction)
2508 if ( __kmp_env_consistency_check )
2509 __kmp_pop_sync( global_tid, ct_reduce, loc );
2510
2511 } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
2512
2513 //AT: performance issue: a real barrier here
2514 //AT: (if master goes slow, other threads are blocked here waiting for the master to come and release them)
2515 //AT: (it's not what a customer might expect specifying NOWAIT clause)
2516 //AT: (specifying NOWAIT won't result in improvement of performance, it'll be confusing to a customer)
2517 //AT: another implementation of *barrier_gather*nowait() (or some other design) might go faster
2518 // and be more in line with sense of NOWAIT
2519 //AT: TO DO: do epcc test and compare times
2520
2521 // this barrier should be invisible to a customer and to the thread profiler
2522 // (it's neither a terminating barrier nor customer's code, it's used for an internal purpose)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002523#if USE_ITT_NOTIFY
2524 __kmp_threads[global_tid]->th.th_ident = loc;
2525#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002526 retval = __kmp_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid, FALSE, reduce_size, reduce_data, reduce_func );
2527 retval = ( retval != 0 ) ? ( 0 ) : ( 1 );
2528
2529 // all other workers except master should do this pop here
2530 // ( none of other workers will get to __kmpc_end_reduce_nowait() )
2531 if ( __kmp_env_consistency_check ) {
2532 if( retval == 0 ) {
2533 __kmp_pop_sync( global_tid, ct_reduce, loc );
2534 }
2535 }
2536
2537 } else {
2538
2539 // should never reach this block
2540 KMP_ASSERT( 0 ); // "unexpected method"
2541
2542 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002543#if OMP_40_ENABLED
2544 if( teams_swapped ) {
2545 // Restore thread structure
2546 th->th.th_info.ds.ds_tid = 0;
2547 th->th.th_team = team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002548 th->th.th_team_nproc = team->t.t_nproc;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002549 th->th.th_task_team = team->t.t_task_team[task_state];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002550 th->th.th_task_state = task_state;
2551 }
2552#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002553 KA_TRACE( 10, ( "__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n", global_tid, packed_reduction_method, retval ) );
2554
2555 return retval;
2556}
2557
2558/*!
2559@ingroup SYNCHRONIZATION
2560@param loc source location information
2561@param global_tid global thread id.
2562@param lck pointer to the unique lock data structure
2563
2564Finish the execution of a reduce nowait.
2565*/
2566void
2567__kmpc_end_reduce_nowait( ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck ) {
2568
2569 PACKED_REDUCTION_METHOD_T packed_reduction_method;
2570
2571 KA_TRACE( 10, ( "__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid ) );
2572
2573 packed_reduction_method = __KMP_GET_REDUCTION_METHOD( global_tid );
2574
2575 if( packed_reduction_method == critical_reduce_block ) {
2576
2577 __kmp_end_critical_section_reduce_block( loc, global_tid, lck );
2578
2579 } else if( packed_reduction_method == empty_reduce_block ) {
2580
2581 // usage: if team size == 1, no synchronization is required ( on Intel platforms only )
2582
2583 } else if( packed_reduction_method == atomic_reduce_block ) {
2584
2585 // neither master nor other workers should get here
2586 // (code gen does not generate this call in case 2: atomic reduce block)
2587 // actually it's better to remove this elseif at all;
2588 // after removal this value will checked by the 'else' and will assert
2589
2590 } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
2591
2592 // only master gets here
2593
2594 } else {
2595
2596 // should never reach this block
2597 KMP_ASSERT( 0 ); // "unexpected method"
2598
2599 }
2600
2601 if ( __kmp_env_consistency_check )
2602 __kmp_pop_sync( global_tid, ct_reduce, loc );
2603
2604 KA_TRACE( 10, ( "__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n", global_tid, packed_reduction_method ) );
2605
2606 return;
2607}
2608
2609/* 2.a.ii. Reduce Block with a terminating barrier */
2610
2611/*!
2612@ingroup SYNCHRONIZATION
2613@param loc source location information
2614@param global_tid global thread number
2615@param num_vars number of items (variables) to be reduced
2616@param reduce_size size of data in bytes to be reduced
2617@param reduce_data pointer to data to be reduced
2618@param reduce_func callback function providing reduction operation on two operands and returning result of reduction in lhs_data
2619@param lck pointer to the unique lock data structure
2620@result 1 for the master thread, 0 for all other team threads, 2 for all team threads if atomic reduction needed
2621
2622A blocking reduce that includes an implicit barrier.
2623*/
2624kmp_int32
2625__kmpc_reduce(
2626 ident_t *loc, kmp_int32 global_tid,
2627 kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
2628 void (*reduce_func)(void *lhs_data, void *rhs_data),
2629 kmp_critical_name *lck )
2630{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002631 KMP_COUNT_BLOCK(REDUCE_wait);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002632 int retval;
2633 PACKED_REDUCTION_METHOD_T packed_reduction_method;
2634
2635 KA_TRACE( 10, ( "__kmpc_reduce() enter: called T#%d\n", global_tid ) );
2636
2637 // why do we need this initialization here at all?
2638 // Reduction clause can not be a stand-alone directive.
2639
2640 // do not call __kmp_serial_initialize(), it will be called by __kmp_parallel_initialize() if needed
2641 // possible detection of false-positive race by the threadchecker ???
2642 if( ! TCR_4( __kmp_init_parallel ) )
2643 __kmp_parallel_initialize();
2644
2645 // check correctness of reduce block nesting
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002646#if KMP_USE_DYNAMIC_LOCK
2647 if ( __kmp_env_consistency_check )
2648 __kmp_push_sync( global_tid, ct_reduce, loc, NULL, 0 );
2649#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00002650 if ( __kmp_env_consistency_check )
2651 __kmp_push_sync( global_tid, ct_reduce, loc, NULL );
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002652#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002653
Jim Cownie5e8470a2013-09-27 10:38:44 +00002654 packed_reduction_method = __kmp_determine_reduction_method( loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck );
2655 __KMP_SET_REDUCTION_METHOD( global_tid, packed_reduction_method );
2656
2657 if( packed_reduction_method == critical_reduce_block ) {
2658
2659 __kmp_enter_critical_section_reduce_block( loc, global_tid, lck );
2660 retval = 1;
2661
2662 } else if( packed_reduction_method == empty_reduce_block ) {
2663
2664 // usage: if team size == 1, no synchronization is required ( Intel platforms only )
2665 retval = 1;
2666
2667 } else if( packed_reduction_method == atomic_reduce_block ) {
2668
2669 retval = 2;
2670
2671 } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
2672
2673 //case tree_reduce_block:
2674 // this barrier should be visible to a customer and to the thread profiler
2675 // (it's a terminating barrier on constructs if NOWAIT not specified)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002676#if USE_ITT_NOTIFY
2677 __kmp_threads[global_tid]->th.th_ident = loc; // needed for correct notification of frames
2678#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002679 retval = __kmp_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid, TRUE, reduce_size, reduce_data, reduce_func );
2680 retval = ( retval != 0 ) ? ( 0 ) : ( 1 );
2681
2682 // all other workers except master should do this pop here
2683 // ( none of other workers except master will enter __kmpc_end_reduce() )
2684 if ( __kmp_env_consistency_check ) {
2685 if( retval == 0 ) { // 0: all other workers; 1: master
2686 __kmp_pop_sync( global_tid, ct_reduce, loc );
2687 }
2688 }
2689
2690 } else {
2691
2692 // should never reach this block
2693 KMP_ASSERT( 0 ); // "unexpected method"
2694
2695 }
2696
2697 KA_TRACE( 10, ( "__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n", global_tid, packed_reduction_method, retval ) );
2698
2699 return retval;
2700}
2701
2702/*!
2703@ingroup SYNCHRONIZATION
2704@param loc source location information
2705@param global_tid global thread id.
2706@param lck pointer to the unique lock data structure
2707
2708Finish the execution of a blocking reduce.
2709The <tt>lck</tt> pointer must be the same as that used in the corresponding start function.
2710*/
2711void
2712__kmpc_end_reduce( ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck ) {
2713
2714 PACKED_REDUCTION_METHOD_T packed_reduction_method;
2715
2716 KA_TRACE( 10, ( "__kmpc_end_reduce() enter: called T#%d\n", global_tid ) );
2717
2718 packed_reduction_method = __KMP_GET_REDUCTION_METHOD( global_tid );
2719
2720 // this barrier should be visible to a customer and to the thread profiler
2721 // (it's a terminating barrier on constructs if NOWAIT not specified)
2722
2723 if( packed_reduction_method == critical_reduce_block ) {
2724
2725 __kmp_end_critical_section_reduce_block( loc, global_tid, lck );
2726
2727 // TODO: implicit barrier: should be exposed
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002728#if USE_ITT_NOTIFY
2729 __kmp_threads[global_tid]->th.th_ident = loc;
2730#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002731 __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
2732
2733 } else if( packed_reduction_method == empty_reduce_block ) {
2734
2735 // usage: if team size == 1, no synchronization is required ( Intel platforms only )
2736
2737 // TODO: implicit barrier: should be exposed
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002738#if USE_ITT_NOTIFY
2739 __kmp_threads[global_tid]->th.th_ident = loc;
2740#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002741 __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
2742
2743 } else if( packed_reduction_method == atomic_reduce_block ) {
2744
2745 // TODO: implicit barrier: should be exposed
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002746#if USE_ITT_NOTIFY
2747 __kmp_threads[global_tid]->th.th_ident = loc;
2748#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002749 __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
2750
2751 } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
2752
2753 // only master executes here (master releases all other workers)
2754 __kmp_end_split_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid );
2755
2756 } else {
2757
2758 // should never reach this block
2759 KMP_ASSERT( 0 ); // "unexpected method"
2760
2761 }
2762
2763 if ( __kmp_env_consistency_check )
2764 __kmp_pop_sync( global_tid, ct_reduce, loc );
2765
2766 KA_TRACE( 10, ( "__kmpc_end_reduce() exit: called T#%d: method %08x\n", global_tid, packed_reduction_method ) );
2767
2768 return;
2769}
2770
2771#undef __KMP_GET_REDUCTION_METHOD
2772#undef __KMP_SET_REDUCTION_METHOD
2773
2774/*-- end of interface to fast scalable reduce routines ---------------------------------------------------------------*/
2775
2776kmp_uint64
2777__kmpc_get_taskid() {
2778
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002779 kmp_int32 gtid;
2780 kmp_info_t * thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002781
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002782 gtid = __kmp_get_gtid();
2783 if ( gtid < 0 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002784 return 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002785 }; // if
2786 thread = __kmp_thread_from_gtid( gtid );
2787 return thread->th.th_current_task->td_task_id;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002788
2789} // __kmpc_get_taskid
2790
2791
2792kmp_uint64
2793__kmpc_get_parent_taskid() {
2794
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002795 kmp_int32 gtid;
2796 kmp_info_t * thread;
2797 kmp_taskdata_t * parent_task;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002798
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002799 gtid = __kmp_get_gtid();
2800 if ( gtid < 0 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002801 return 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002802 }; // if
2803 thread = __kmp_thread_from_gtid( gtid );
2804 parent_task = thread->th.th_current_task->td_parent;
2805 return ( parent_task == NULL ? 0 : parent_task->td_task_id );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002806
2807} // __kmpc_get_parent_taskid
2808
2809void __kmpc_place_threads(int nC, int nT, int nO)
2810{
Jim Cownie5e8470a2013-09-27 10:38:44 +00002811 if ( ! __kmp_init_serial ) {
2812 __kmp_serial_initialize();
2813 }
2814 __kmp_place_num_cores = nC;
2815 __kmp_place_num_threads_per_core = nT;
2816 __kmp_place_core_offset = nO;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002817}
2818
2819// end of file //
2820