blob: 8e3d7f15fa68e5e63e2b440aa5f2a4c5bce879ba [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
2 * kmp_csupport.c -- kfront linkage support for OpenMP.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16#include "omp.h" /* extern "C" declarations of user-visible routines */
17#include "kmp.h"
18#include "kmp_i18n.h"
19#include "kmp_itt.h"
20#include "kmp_error.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000021#include "kmp_stats.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000022
23#define MAX_MESSAGE 512
24
25/* ------------------------------------------------------------------------ */
26/* ------------------------------------------------------------------------ */
27
28/* flags will be used in future, e.g., to implement */
29/* openmp_strict library restrictions */
30
31/*!
32 * @ingroup STARTUP_SHUTDOWN
33 * @param loc in source location information
34 * @param flags in for future use (currently ignored)
35 *
36 * Initialize the runtime library. This call is optional; if it is not made then
Jim Cownie4cc4bb42014-10-07 16:25:50 +000037 * it will be implicitly called by attempts to use other library functions.
Jim Cownie5e8470a2013-09-27 10:38:44 +000038 *
39 */
40void
41__kmpc_begin(ident_t *loc, kmp_int32 flags)
42{
43 // By default __kmp_ignore_mppbeg() returns TRUE.
44 if (__kmp_ignore_mppbeg() == FALSE) {
45 __kmp_internal_begin();
46
47 KC_TRACE( 10, ("__kmpc_begin: called\n" ) );
48 }
49}
50
51/*!
52 * @ingroup STARTUP_SHUTDOWN
53 * @param loc source location information
54 *
55 * Shutdown the runtime library. This is also optional, and even if called will not
56 * do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to zero.
57 */
58void
59__kmpc_end(ident_t *loc)
60{
61 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end() call no-op.
62 // However, this can be overridden with KMP_IGNORE_MPPEND environment variable.
63 // If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend() returns FALSE and __kmpc_end()
64 // will unregister this root (it can cause library shut down).
65 if (__kmp_ignore_mppend() == FALSE) {
66 KC_TRACE( 10, ("__kmpc_end: called\n" ) );
67 KA_TRACE( 30, ("__kmpc_end\n" ));
68
69 __kmp_internal_end_thread( -1 );
70 }
71}
72
73/*!
74@ingroup THREAD_STATES
75@param loc Source location information.
76@return The global thread index of the active thread.
77
78This function can be called in any context.
79
80If the runtime has ony been entered at the outermost level from a
81single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is that
82which would be returned by @ref omp_get_thread_num() in the outermost
83active parallel construct. (Or zero if there is no active parallel
84construct, since the master thread is necessarily thread zero).
85
86If multiple non-OpenMP threads all enter an OpenMP construct then this
87will be a unique thread identifier among all the threads created by
88the OpenMP runtime (but the value cannote be defined in terms of
89OpenMP thread ids returned by omp_get_thread_num()).
90
91*/
92kmp_int32
93__kmpc_global_thread_num(ident_t *loc)
94{
95 kmp_int32 gtid = __kmp_entry_gtid();
96
97 KC_TRACE( 10, ("__kmpc_global_thread_num: T#%d\n", gtid ) );
98
99 return gtid;
100}
101
102/*!
103@ingroup THREAD_STATES
104@param loc Source location information.
105@return The number of threads under control of the OpenMP<sup>*</sup> runtime
106
107This function can be called in any context.
108It returns the total number of threads under the control of the OpenMP runtime. That is
109not a number that can be determined by any OpenMP standard calls, since the library may be
110called from more than one non-OpenMP thread, and this reflects the total over all such calls.
111Similarly the runtime maintains underlying threads even when they are not active (since the cost
112of creating and destroying OS threads is high), this call counts all such threads even if they are not
113waiting for work.
114*/
115kmp_int32
116__kmpc_global_num_threads(ident_t *loc)
117{
118 KC_TRACE( 10, ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_nth ) );
119
120 return TCR_4(__kmp_nth);
121}
122
123/*!
124@ingroup THREAD_STATES
125@param loc Source location information.
126@return The thread number of the calling thread in the innermost active parallel construct.
127
128*/
129kmp_int32
130__kmpc_bound_thread_num(ident_t *loc)
131{
132 KC_TRACE( 10, ("__kmpc_bound_thread_num: called\n" ) );
133 return __kmp_tid_from_gtid( __kmp_entry_gtid() );
134}
135
136/*!
137@ingroup THREAD_STATES
138@param loc Source location information.
139@return The number of threads in the innermost active parallel construct.
140*/
141kmp_int32
142__kmpc_bound_num_threads(ident_t *loc)
143{
144 KC_TRACE( 10, ("__kmpc_bound_num_threads: called\n" ) );
145
146 return __kmp_entry_thread() -> th.th_team -> t.t_nproc;
147}
148
149/*!
150 * @ingroup DEPRECATED
151 * @param loc location description
152 *
153 * This function need not be called. It always returns TRUE.
154 */
155kmp_int32
156__kmpc_ok_to_fork(ident_t *loc)
157{
158#ifndef KMP_DEBUG
159
160 return TRUE;
161
162#else
163
164 const char *semi2;
165 const char *semi3;
166 int line_no;
167
168 if (__kmp_par_range == 0) {
169 return TRUE;
170 }
171 semi2 = loc->psource;
172 if (semi2 == NULL) {
173 return TRUE;
174 }
175 semi2 = strchr(semi2, ';');
176 if (semi2 == NULL) {
177 return TRUE;
178 }
179 semi2 = strchr(semi2 + 1, ';');
180 if (semi2 == NULL) {
181 return TRUE;
182 }
183 if (__kmp_par_range_filename[0]) {
184 const char *name = semi2 - 1;
185 while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
186 name--;
187 }
188 if ((*name == '/') || (*name == ';')) {
189 name++;
190 }
191 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
192 return __kmp_par_range < 0;
193 }
194 }
195 semi3 = strchr(semi2 + 1, ';');
196 if (__kmp_par_range_routine[0]) {
197 if ((semi3 != NULL) && (semi3 > semi2)
198 && (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
199 return __kmp_par_range < 0;
200 }
201 }
202 if (sscanf(semi3 + 1, "%d", &line_no) == 1) {
203 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
204 return __kmp_par_range > 0;
205 }
206 return __kmp_par_range < 0;
207 }
208 return TRUE;
209
210#endif /* KMP_DEBUG */
211
212}
213
214/*!
215@ingroup THREAD_STATES
216@param loc Source location information.
217@return 1 if this thread is executing inside an active parallel region, zero if not.
218*/
219kmp_int32
220__kmpc_in_parallel( ident_t *loc )
221{
222 return __kmp_entry_thread() -> th.th_root -> r.r_active;
223}
224
225/*!
226@ingroup PARALLEL
227@param loc source location information
228@param global_tid global thread number
229@param num_threads number of threads requested for this parallel construct
230
231Set the number of threads to be used by the next fork spawned by this thread.
232This call is only required if the parallel construct has a `num_threads` clause.
233*/
234void
235__kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads )
236{
237 KA_TRACE( 20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
238 global_tid, num_threads ) );
239
240 __kmp_push_num_threads( loc, global_tid, num_threads );
241}
242
243void
244__kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid )
245{
246 KA_TRACE( 20, ("__kmpc_pop_num_threads: enter\n" ) );
247
248 /* the num_threads are automatically popped */
249}
250
251
252#if OMP_40_ENABLED
253
254void
255__kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, kmp_int32 proc_bind )
256{
257 KA_TRACE( 20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n",
258 global_tid, proc_bind ) );
259
260 __kmp_push_proc_bind( loc, global_tid, (kmp_proc_bind_t)proc_bind );
261}
262
263#endif /* OMP_40_ENABLED */
264
265
266/*!
267@ingroup PARALLEL
268@param loc source location information
269@param argc total number of arguments in the ellipsis
270@param microtask pointer to callback routine consisting of outlined parallel construct
271@param ... pointers to shared variables that aren't global
272
273Do the actual fork and call the microtask in the relevant number of threads.
274*/
275void
276__kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...)
277{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000278 KMP_STOP_EXPLICIT_TIMER(OMP_serial);
279 KMP_COUNT_BLOCK(OMP_PARALLEL);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000280 int gtid = __kmp_entry_gtid();
281 // maybe to save thr_state is enough here
282 {
283 va_list ap;
284 va_start( ap, microtask );
285
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000286#if INCLUDE_SSC_MARKS
287 SSC_MARK_FORKING();
288#endif
289 __kmp_fork_call( loc, gtid, fork_context_intel,
Jim Cownie5e8470a2013-09-27 10:38:44 +0000290 argc,
291 VOLATILE_CAST(microtask_t) microtask,
292 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
293/* TODO: revert workaround for Intel(R) 64 tracker #96 */
Andrey Churbanovcbda8682015-01-13 14:43:35 +0000294#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jim Cownie5e8470a2013-09-27 10:38:44 +0000295 &ap
296#else
297 ap
298#endif
299 );
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000300#if INCLUDE_SSC_MARKS
301 SSC_MARK_JOINING();
302#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000303 __kmp_join_call( loc, gtid );
304
305 va_end( ap );
306 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000307 KMP_START_EXPLICIT_TIMER(OMP_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000308}
309
310#if OMP_40_ENABLED
311/*!
312@ingroup PARALLEL
313@param loc source location information
314@param global_tid global thread number
315@param num_teams number of teams requested for the teams construct
316
317Set the number of teams to be used by the teams construct.
318This call is only required if the teams construct has a `num_teams` clause
319or a `thread_limit` clause (or both).
320*/
321void
322__kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads )
323{
324 KA_TRACE( 20, ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
325 global_tid, num_teams, num_threads ) );
326
327 __kmp_push_num_teams( loc, global_tid, num_teams, num_threads );
328}
329
330/*!
331@ingroup PARALLEL
332@param loc source location information
333@param argc total number of arguments in the ellipsis
334@param microtask pointer to callback routine consisting of outlined teams construct
335@param ... pointers to shared variables that aren't global
336
337Do the actual fork and call the microtask in the relevant number of threads.
338*/
339void
340__kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...)
341{
342 int gtid = __kmp_entry_gtid();
343 kmp_info_t *this_thr = __kmp_threads[ gtid ];
344 va_list ap;
345 va_start( ap, microtask );
346
347 // remember teams entry point and nesting level
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000348 this_thr->th.th_teams_microtask = microtask;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000349 this_thr->th.th_teams_level = this_thr->th.th_team->t.t_level; // AC: can be >0 on host
350
351 // check if __kmpc_push_num_teams called, set default number of teams otherwise
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000352 if ( this_thr->th.th_teams_size.nteams == 0 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000353 __kmp_push_num_teams( loc, gtid, 0, 0 );
354 }
355 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000356 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
357 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000358
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000359 __kmp_fork_call( loc, gtid, fork_context_intel,
Jim Cownie5e8470a2013-09-27 10:38:44 +0000360 argc,
361 VOLATILE_CAST(microtask_t) __kmp_teams_master,
362 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master,
Andrey Churbanovcbda8682015-01-13 14:43:35 +0000363#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jim Cownie5e8470a2013-09-27 10:38:44 +0000364 &ap
365#else
366 ap
367#endif
368 );
369 __kmp_join_call( loc, gtid );
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000370 this_thr->th.th_teams_microtask = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000371 this_thr->th.th_teams_level = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000372 *(kmp_int64*)(&this_thr->th.th_teams_size) = 0L;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000373 va_end( ap );
374}
375#endif /* OMP_40_ENABLED */
376
377
378//
379// I don't think this function should ever have been exported.
380// The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
381// openmp code ever called it, but it's been exported from the RTL for so
382// long that I'm afraid to remove the definition.
383//
384int
385__kmpc_invoke_task_func( int gtid )
386{
387 return __kmp_invoke_task_func( gtid );
388}
389
390/*!
391@ingroup PARALLEL
392@param loc source location information
393@param global_tid global thread number
394
395Enter a serialized parallel construct. This interface is used to handle a
396conditional parallel region, like this,
397@code
398#pragma omp parallel if (condition)
399@endcode
400when the condition is false.
401*/
402void
403__kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
404{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000405 __kmp_serialized_parallel(loc, global_tid); /* The implementation is now in kmp_runtime.c so that it can share static functions with
406 * kmp_fork_call since the tasks to be done are similar in each case.
407 */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000408}
409
410/*!
411@ingroup PARALLEL
412@param loc source location information
413@param global_tid global thread number
414
415Leave a serialized parallel construct.
416*/
417void
418__kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
419{
420 kmp_internal_control_t *top;
421 kmp_info_t *this_thr;
422 kmp_team_t *serial_team;
423
424 KC_TRACE( 10, ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid ) );
425
426 /* skip all this code for autopar serialized loops since it results in
427 unacceptable overhead */
428 if( loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR ) )
429 return;
430
431 // Not autopar code
432 if( ! TCR_4( __kmp_init_parallel ) )
433 __kmp_parallel_initialize();
434
435 this_thr = __kmp_threads[ global_tid ];
436 serial_team = this_thr->th.th_serial_team;
437
438 KMP_MB();
439 KMP_DEBUG_ASSERT( serial_team );
440 KMP_ASSERT( serial_team -> t.t_serialized );
441 KMP_DEBUG_ASSERT( this_thr -> th.th_team == serial_team );
442 KMP_DEBUG_ASSERT( serial_team != this_thr->th.th_root->r.r_root_team );
443 KMP_DEBUG_ASSERT( serial_team -> t.t_threads );
444 KMP_DEBUG_ASSERT( serial_team -> t.t_threads[0] == this_thr );
445
446 /* If necessary, pop the internal control stack values and replace the team values */
447 top = serial_team -> t.t_control_stack_top;
448 if ( top && top -> serial_nesting_level == serial_team -> t.t_serialized ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000449 copy_icvs( &serial_team -> t.t_threads[0] -> th.th_current_task -> td_icvs, top );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000450 serial_team -> t.t_control_stack_top = top -> next;
451 __kmp_free(top);
452 }
453
Jim Cownie5e8470a2013-09-27 10:38:44 +0000454 //if( serial_team -> t.t_serialized > 1 )
455 serial_team -> t.t_level--;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000456
457 /* pop dispatch buffers stack */
458 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
459 {
460 dispatch_private_info_t * disp_buffer = serial_team->t.t_dispatch->th_disp_buffer;
461 serial_team->t.t_dispatch->th_disp_buffer =
462 serial_team->t.t_dispatch->th_disp_buffer->next;
463 __kmp_free( disp_buffer );
464 }
465
466 -- serial_team -> t.t_serialized;
467 if ( serial_team -> t.t_serialized == 0 ) {
468
469 /* return to the parallel section */
470
471#if KMP_ARCH_X86 || KMP_ARCH_X86_64
472 if ( __kmp_inherit_fp_control && serial_team->t.t_fp_control_saved ) {
473 __kmp_clear_x87_fpu_status_word();
474 __kmp_load_x87_fpu_control_word( &serial_team->t.t_x87_fpu_control_word );
475 __kmp_load_mxcsr( &serial_team->t.t_mxcsr );
476 }
477#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
478
479 this_thr -> th.th_team = serial_team -> t.t_parent;
480 this_thr -> th.th_info.ds.ds_tid = serial_team -> t.t_master_tid;
481
482 /* restore values cached in the thread */
483 this_thr -> th.th_team_nproc = serial_team -> t.t_parent -> t.t_nproc; /* JPH */
484 this_thr -> th.th_team_master = serial_team -> t.t_parent -> t.t_threads[0]; /* JPH */
485 this_thr -> th.th_team_serialized = this_thr -> th.th_team -> t.t_serialized;
486
487 /* TODO the below shouldn't need to be adjusted for serialized teams */
488 this_thr -> th.th_dispatch = & this_thr -> th.th_team ->
489 t.t_dispatch[ serial_team -> t.t_master_tid ];
490
Jim Cownie5e8470a2013-09-27 10:38:44 +0000491 __kmp_pop_current_task_from_thread( this_thr );
492
493 KMP_ASSERT( this_thr -> th.th_current_task -> td_flags.executing == 0 );
494 this_thr -> th.th_current_task -> td_flags.executing = 1;
495
496 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +0000497 // Copy the task team from the new child / old parent team to the thread.
498 this_thr->th.th_task_team = this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000499 KA_TRACE( 20, ( "__kmpc_end_serialized_parallel: T#%d restoring task_team %p / team %p\n",
500 global_tid, this_thr -> th.th_task_team, this_thr -> th.th_team ) );
501 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000502 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000503 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
504 KA_TRACE( 20, ( "__kmpc_end_serialized_parallel: T#%d decreasing nesting depth of serial team %p to %d\n",
505 global_tid, serial_team, serial_team -> t.t_serialized ) );
506 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000507 }
508
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000509#if USE_ITT_BUILD
510 kmp_uint64 cur_time = 0;
511#if USE_ITT_NOTIFY
512 if( __itt_get_timestamp_ptr ) {
513 cur_time = __itt_get_timestamp();
514 }
515#endif /* USE_ITT_NOTIFY */
516 // Report the barrier
517 if( ( __kmp_forkjoin_frames_mode == 1 || __kmp_forkjoin_frames_mode == 3 ) && __itt_frame_submit_v3_ptr ) {
518 if( this_thr->th.th_team->t.t_level == 0 ) {
519 __kmp_itt_frame_submit( global_tid, this_thr->th.th_frame_time_serialized, cur_time, 0, loc, this_thr->th.th_team_nproc, 0 );
520 }
521 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000522 // Mark the end of the "parallel" region for VTune. Only use one of frame notification scheme at the moment.
523 if ( ( __itt_frame_end_v3_ptr && __kmp_forkjoin_frames && ! __kmp_forkjoin_frames_mode ) || KMP_ITT_DEBUG )
524 {
Jim Cownie181b4bb2013-12-23 17:28:57 +0000525 this_thr->th.th_ident = loc;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000526 __kmp_itt_region_joined( global_tid, 1 );
527 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000528 if ( ( __itt_frame_submit_v3_ptr && __kmp_forkjoin_frames_mode == 3 ) || KMP_ITT_DEBUG )
529 {
530 this_thr->th.th_ident = loc;
531 // Since barrier frame for serialized region is equal to the region we use the same begin timestamp as for the barrier.
532 __kmp_itt_frame_submit( global_tid, serial_team->t.t_region_time, cur_time, 0, loc, this_thr->th.th_team_nproc, 2 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000533 }
534#endif /* USE_ITT_BUILD */
535
536 if ( __kmp_env_consistency_check )
537 __kmp_pop_parallel( global_tid, NULL );
538}
539
540/*!
541@ingroup SYNCHRONIZATION
542@param loc source location information.
543@param ... pointers to the variables to be synchronized.
544
545Execute <tt>flush</tt>. The pointers to the variables to be flushed
546need not actually be passed, (indeed unless this is a zero terminated
547list they can't be since there's no count here so we don't know how
548many there are!). This is implemented as a full memory fence. (Though
549depending on the memory ordering convention obeyed by the compiler
550even that may not be necessary).
551*/
552void
553__kmpc_flush(ident_t *loc, ...)
554{
555 KC_TRACE( 10, ("__kmpc_flush: called\n" ) );
556
557 /* need explicit __mf() here since use volatile instead in library */
558 KMP_MB(); /* Flush all pending memory write invalidates. */
559
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000560 #if ( KMP_ARCH_X86 || KMP_ARCH_X86_64 )
561 #if KMP_MIC
562 // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used.
563 // We shouldn't need it, though, since the ABI rules require that
564 // * If the compiler generates NGO stores it also generates the fence
565 // * If users hand-code NGO stores they should insert the fence
566 // therefore no incomplete unordered stores should be visible.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000567 #else
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000568 // C74404
569 // This is to address non-temporal store instructions (sfence needed).
570 // The clflush instruction is addressed either (mfence needed).
571 // Probably the non-temporal load monvtdqa instruction should also be addressed.
572 // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2.
573 if ( ! __kmp_cpuinfo.initialized ) {
574 __kmp_query_cpuid( & __kmp_cpuinfo );
575 }; // if
576 if ( ! __kmp_cpuinfo.sse2 ) {
577 // CPU cannot execute SSE2 instructions.
578 } else {
579 #if KMP_COMPILER_ICC || KMP_COMPILER_MSVC
580 _mm_mfence();
581 #else
582 __sync_synchronize();
583 #endif // KMP_COMPILER_ICC
584 }; // if
585 #endif // KMP_MIC
Andrey Churbanovcbda8682015-01-13 14:43:35 +0000586 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64)
587 // Nothing to see here move along
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000588 #elif KMP_ARCH_PPC64
589 // Nothing needed here (we have a real MB above).
590 #if KMP_OS_CNK
591 // The flushing thread needs to yield here; this prevents a
592 // busy-waiting thread from saturating the pipeline. flush is
593 // often used in loops like this:
594 // while (!flag) {
595 // #pragma omp flush(flag)
596 // }
597 // and adding the yield here is good for at least a 10x speedup
598 // when running >2 threads per core (on the NAS LU benchmark).
599 __kmp_yield(TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000600 #endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000601 #else
602 #error Unknown or unsupported architecture
603 #endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000604
605}
606
607/* -------------------------------------------------------------------------- */
608
609/* -------------------------------------------------------------------------- */
610
611/*!
612@ingroup SYNCHRONIZATION
613@param loc source location information
614@param global_tid thread id.
615
616Execute a barrier.
617*/
618void
619__kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
620{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000621 KMP_COUNT_BLOCK(OMP_BARRIER);
622 KMP_TIME_BLOCK(OMP_barrier);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000623 int explicit_barrier_flag;
624 KC_TRACE( 10, ("__kmpc_barrier: called T#%d\n", global_tid ) );
625
626 if (! TCR_4(__kmp_init_parallel))
627 __kmp_parallel_initialize();
628
629 if ( __kmp_env_consistency_check ) {
630 if ( loc == 0 ) {
631 KMP_WARNING( ConstructIdentInvalid ); // ??? What does it mean for the user?
632 }; // if
633
634 __kmp_check_barrier( global_tid, ct_barrier, loc );
635 }
636
637 __kmp_threads[ global_tid ]->th.th_ident = loc;
638 // TODO: explicit barrier_wait_id:
639 // this function is called when 'barrier' directive is present or
640 // implicit barrier at the end of a worksharing construct.
641 // 1) better to add a per-thread barrier counter to a thread data structure
642 // 2) set to 0 when a new team is created
643 // 4) no sync is required
644
645 __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
646}
647
648/* The BARRIER for a MASTER section is always explicit */
649/*!
650@ingroup WORK_SHARING
651@param loc source location information.
652@param global_tid global thread number .
653@return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise.
654*/
655kmp_int32
656__kmpc_master(ident_t *loc, kmp_int32 global_tid)
657{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000658 KMP_COUNT_BLOCK(OMP_MASTER);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000659 int status = 0;
660
661 KC_TRACE( 10, ("__kmpc_master: called T#%d\n", global_tid ) );
662
663 if( ! TCR_4( __kmp_init_parallel ) )
664 __kmp_parallel_initialize();
665
666 if( KMP_MASTER_GTID( global_tid ))
667 status = 1;
668
669 if ( __kmp_env_consistency_check ) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000670#if KMP_USE_DYNAMIC_LOCK
671 if (status)
672 __kmp_push_sync( global_tid, ct_master, loc, NULL, 0 );
673 else
674 __kmp_check_sync( global_tid, ct_master, loc, NULL, 0 );
675#else
Jim Cownie5e8470a2013-09-27 10:38:44 +0000676 if (status)
677 __kmp_push_sync( global_tid, ct_master, loc, NULL );
678 else
679 __kmp_check_sync( global_tid, ct_master, loc, NULL );
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000680#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000681 }
682
683 return status;
684}
685
686/*!
687@ingroup WORK_SHARING
688@param loc source location information.
689@param global_tid global thread number .
690
691Mark the end of a <tt>master</tt> region. This should only be called by the thread
692that executes the <tt>master</tt> region.
693*/
694void
695__kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
696{
697 KC_TRACE( 10, ("__kmpc_end_master: called T#%d\n", global_tid ) );
698
699 KMP_DEBUG_ASSERT( KMP_MASTER_GTID( global_tid ));
700
701 if ( __kmp_env_consistency_check ) {
702 if( global_tid < 0 )
703 KMP_WARNING( ThreadIdentInvalid );
704
705 if( KMP_MASTER_GTID( global_tid ))
706 __kmp_pop_sync( global_tid, ct_master, loc );
707 }
708}
709
710/*!
711@ingroup WORK_SHARING
712@param loc source location information.
713@param gtid global thread number.
714
715Start execution of an <tt>ordered</tt> construct.
716*/
717void
718__kmpc_ordered( ident_t * loc, kmp_int32 gtid )
719{
720 int cid = 0;
721 kmp_info_t *th;
722 KMP_DEBUG_ASSERT( __kmp_init_serial );
723
724 KC_TRACE( 10, ("__kmpc_ordered: called T#%d\n", gtid ));
725
726 if (! TCR_4(__kmp_init_parallel))
727 __kmp_parallel_initialize();
728
729#if USE_ITT_BUILD
730 __kmp_itt_ordered_prep( gtid );
731 // TODO: ordered_wait_id
732#endif /* USE_ITT_BUILD */
733
734 th = __kmp_threads[ gtid ];
735
736 if ( th -> th.th_dispatch -> th_deo_fcn != 0 )
737 (*th->th.th_dispatch->th_deo_fcn)( & gtid, & cid, loc );
738 else
739 __kmp_parallel_deo( & gtid, & cid, loc );
740
741#if USE_ITT_BUILD
742 __kmp_itt_ordered_start( gtid );
743#endif /* USE_ITT_BUILD */
744}
745
746/*!
747@ingroup WORK_SHARING
748@param loc source location information.
749@param gtid global thread number.
750
751End execution of an <tt>ordered</tt> construct.
752*/
753void
754__kmpc_end_ordered( ident_t * loc, kmp_int32 gtid )
755{
756 int cid = 0;
757 kmp_info_t *th;
758
759 KC_TRACE( 10, ("__kmpc_end_ordered: called T#%d\n", gtid ) );
760
761#if USE_ITT_BUILD
762 __kmp_itt_ordered_end( gtid );
763 // TODO: ordered_wait_id
764#endif /* USE_ITT_BUILD */
765
766 th = __kmp_threads[ gtid ];
767
768 if ( th -> th.th_dispatch -> th_dxo_fcn != 0 )
769 (*th->th.th_dispatch->th_dxo_fcn)( & gtid, & cid, loc );
770 else
771 __kmp_parallel_dxo( & gtid, & cid, loc );
772}
773
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000774#if KMP_USE_DYNAMIC_LOCK
775
776static __forceinline kmp_indirect_lock_t *
777__kmp_get_indirect_csptr(kmp_critical_name * crit, ident_t const * loc, kmp_int32 gtid, kmp_dyna_lockseq_t seq)
778{
779 // Code from __kmp_get_critical_section_ptr
780 // This function returns an indirect lock object instead of a user lock.
781 kmp_indirect_lock_t **lck, *ret;
782 lck = (kmp_indirect_lock_t **)crit;
783 ret = (kmp_indirect_lock_t *)TCR_PTR(*lck);
784 if (ret == NULL) {
785 void *idx;
786 kmp_indirect_locktag_t tag = DYNA_GET_I_TAG(seq);
787 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
788 ret = ilk;
789 DYNA_I_LOCK_FUNC(ilk, init)(ilk->lock);
790 DYNA_SET_I_LOCK_LOCATION(ilk, loc);
791 DYNA_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
792 KA_TRACE(20, ("__kmp_get_indirect_csptr: initialized indirect lock #%d\n", tag));
793#if USE_ITT_BUILD
794 __kmp_itt_critical_creating(ilk->lock, loc);
795#endif
796 int status = KMP_COMPARE_AND_STORE_PTR(lck, 0, ilk);
797 if (status == 0) {
798#if USE_ITT_BUILD
799 __kmp_itt_critical_destroyed(ilk->lock);
800#endif
801 // Postponing destroy, to avoid costly dispatch here.
802 //DYNA_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
803 ret = (kmp_indirect_lock_t *)TCR_PTR(*lck);
804 KMP_DEBUG_ASSERT(ret != NULL);
805 }
806 }
807 return ret;
808}
809
810// Fast-path acquire tas lock
811#define DYNA_ACQUIRE_TAS_LOCK(lock, gtid) { \
812 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
813 if (l->lk.poll != DYNA_LOCK_FREE(tas) || \
814 ! KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), DYNA_LOCK_FREE(tas), DYNA_LOCK_BUSY(gtid+1, tas))) { \
815 kmp_uint32 spins; \
816 KMP_FSYNC_PREPARE(l); \
817 KMP_INIT_YIELD(spins); \
818 if (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
819 KMP_YIELD(TRUE); \
820 } else { \
821 KMP_YIELD_SPIN(spins); \
822 } \
823 while (l->lk.poll != DYNA_LOCK_FREE(tas) || \
824 ! KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), DYNA_LOCK_FREE(tas), DYNA_LOCK_BUSY(gtid+1, tas))) { \
825 if (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
826 KMP_YIELD(TRUE); \
827 } else { \
828 KMP_YIELD_SPIN(spins); \
829 } \
830 } \
831 } \
832 KMP_FSYNC_ACQUIRED(l); \
833}
834
835// Fast-path test tas lock
836#define DYNA_TEST_TAS_LOCK(lock, gtid, rc) { \
837 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
838 rc = l->lk.poll == DYNA_LOCK_FREE(tas) && \
839 KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), DYNA_LOCK_FREE(tas), DYNA_LOCK_BUSY(gtid+1, tas)); \
840}
841
842// Fast-path release tas lock
843#define DYNA_RELEASE_TAS_LOCK(lock, gtid) { \
844 TCW_4(((kmp_tas_lock_t *)lock)->lk.poll, DYNA_LOCK_FREE(tas)); \
845 KMP_MB(); \
846}
847
848#if DYNA_HAS_FUTEX
849
850# include <unistd.h>
851# include <sys/syscall.h>
852# ifndef FUTEX_WAIT
853# define FUTEX_WAIT 0
854# endif
855# ifndef FUTEX_WAKE
856# define FUTEX_WAKE 1
857# endif
858
859// Fast-path acquire futex lock
860#define DYNA_ACQUIRE_FUTEX_LOCK(lock, gtid) { \
861 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
862 kmp_int32 gtid_code = (gtid+1) << 1; \
863 KMP_MB(); \
864 KMP_FSYNC_PREPARE(ftx); \
865 kmp_int32 poll_val; \
866 while ((poll_val = KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), DYNA_LOCK_FREE(futex), \
867 DYNA_LOCK_BUSY(gtid_code, futex))) != DYNA_LOCK_FREE(futex)) { \
868 kmp_int32 cond = DYNA_LOCK_STRIP(poll_val) & 1; \
869 if (!cond) { \
870 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, poll_val | DYNA_LOCK_BUSY(1, futex))) { \
871 continue; \
872 } \
873 poll_val |= DYNA_LOCK_BUSY(1, futex); \
874 } \
875 kmp_int32 rc; \
876 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, NULL, NULL, 0)) != 0) { \
877 continue; \
878 } \
879 gtid_code |= 1; \
880 } \
881 KMP_FSYNC_ACQUIRED(ftx); \
882}
883
884// Fast-path test futex lock
885#define DYNA_TEST_FUTEX_LOCK(lock, gtid, rc) { \
886 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
887 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), DYNA_LOCK_FREE(futex), DYNA_LOCK_BUSY(gtid+1, futex) << 1)) { \
888 KMP_FSYNC_ACQUIRED(ftx); \
889 rc = TRUE; \
890 } else { \
891 rc = FALSE; \
892 } \
893}
894
895// Fast-path release futex lock
896#define DYNA_RELEASE_FUTEX_LOCK(lock, gtid) { \
897 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
898 KMP_MB(); \
899 KMP_FSYNC_RELEASING(ftx); \
900 kmp_int32 poll_val = KMP_XCHG_FIXED32(&(ftx->lk.poll), DYNA_LOCK_FREE(futex)); \
901 if (DYNA_LOCK_STRIP(poll_val) & 1) { \
902 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, DYNA_LOCK_BUSY(1, futex), NULL, NULL, 0); \
903 } \
904 KMP_MB(); \
905 KMP_YIELD(TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); \
906}
907
908#endif // DYNA_HAS_FUTEX
909
910#else // KMP_USE_DYNAMIC_LOCK
911
Jim Cownie5e8470a2013-09-27 10:38:44 +0000912static kmp_user_lock_p
913__kmp_get_critical_section_ptr( kmp_critical_name * crit, ident_t const * loc, kmp_int32 gtid )
914{
915 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
916
917 //
918 // Because of the double-check, the following load
919 // doesn't need to be volatile.
920 //
921 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR( *lck_pp );
922
923 if ( lck == NULL ) {
924 void * idx;
925
926 // Allocate & initialize the lock.
927 // Remember allocated locks in table in order to free them in __kmp_cleanup()
928 lck = __kmp_user_lock_allocate( &idx, gtid, kmp_lf_critical_section );
929 __kmp_init_user_lock_with_checks( lck );
930 __kmp_set_user_lock_location( lck, loc );
931#if USE_ITT_BUILD
932 __kmp_itt_critical_creating( lck );
933 // __kmp_itt_critical_creating() should be called *before* the first usage of underlying
934 // lock. It is the only place where we can guarantee it. There are chances the lock will
935 // destroyed with no usage, but it is not a problem, because this is not real event seen
936 // by user but rather setting name for object (lock). See more details in kmp_itt.h.
937#endif /* USE_ITT_BUILD */
938
939 //
940 // Use a cmpxchg instruction to slam the start of the critical
941 // section with the lock pointer. If another thread beat us
942 // to it, deallocate the lock, and use the lock that the other
943 // thread allocated.
944 //
945 int status = KMP_COMPARE_AND_STORE_PTR( lck_pp, 0, lck );
946
947 if ( status == 0 ) {
948 // Deallocate the lock and reload the value.
949#if USE_ITT_BUILD
950 __kmp_itt_critical_destroyed( lck );
951 // Let ITT know the lock is destroyed and the same memory location may be reused for
952 // another purpose.
953#endif /* USE_ITT_BUILD */
954 __kmp_destroy_user_lock_with_checks( lck );
955 __kmp_user_lock_free( &idx, gtid, lck );
956 lck = (kmp_user_lock_p)TCR_PTR( *lck_pp );
957 KMP_DEBUG_ASSERT( lck != NULL );
958 }
959 }
960 return lck;
961}
962
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000963#endif // KMP_USE_DYNAMIC_LOCK
964
Jim Cownie5e8470a2013-09-27 10:38:44 +0000965/*!
966@ingroup WORK_SHARING
967@param loc source location information.
968@param global_tid global thread number .
969@param crit identity of the critical section. This could be a pointer to a lock associated with the critical section, or
970some other suitably unique value.
971
972Enter code protected by a `critical` construct.
973This function blocks until the executing thread can enter the critical section.
974*/
975void
976__kmpc_critical( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000977 KMP_COUNT_BLOCK(OMP_CRITICAL);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000978
979 kmp_user_lock_p lck;
980
981 KC_TRACE( 10, ("__kmpc_critical: called T#%d\n", global_tid ) );
982
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000983#if KMP_USE_DYNAMIC_LOCK
984 // Assumption: all direct locks fit in OMP_CRITICAL_SIZE.
985 // The global sequence __kmp_user_lock_seq is used unless compiler pushes a value.
986 if (DYNA_IS_D_LOCK(__kmp_user_lock_seq)) {
987 lck = (kmp_user_lock_p)crit;
988 // The thread that reaches here first needs to tag the lock word.
989 if (*((kmp_dyna_lock_t *)lck) == 0) {
990 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)lck, 0, DYNA_GET_D_TAG(__kmp_user_lock_seq));
991 }
992 if (__kmp_env_consistency_check) {
993 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
994 }
995# if USE_ITT_BUILD
996 __kmp_itt_critical_acquiring(lck);
997# endif
998# if DYNA_USE_FAST_TAS
999 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1000 DYNA_ACQUIRE_TAS_LOCK(lck, global_tid);
1001 } else
1002# elif DYNA_USE_FAST_FUTEX
1003 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1004 DYNA_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1005 } else
1006# endif
1007 {
1008 DYNA_D_LOCK_FUNC(lck, set)((kmp_dyna_lock_t *)lck, global_tid);
1009 }
1010 } else {
1011 kmp_indirect_lock_t *ilk = __kmp_get_indirect_csptr(crit, loc, global_tid, __kmp_user_lock_seq);
1012 lck = ilk->lock;
1013 if (__kmp_env_consistency_check) {
1014 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
1015 }
1016# if USE_ITT_BUILD
1017 __kmp_itt_critical_acquiring(lck);
1018# endif
1019 DYNA_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1020 }
1021
1022#else // KMP_USE_DYNAMIC_LOCK
1023
Jim Cownie5e8470a2013-09-27 10:38:44 +00001024 //TODO: add THR_OVHD_STATE
1025
1026 KMP_CHECK_USER_LOCK_INIT();
1027
1028 if ( ( __kmp_user_lock_kind == lk_tas )
1029 && ( sizeof( lck->tas.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
1030 lck = (kmp_user_lock_p)crit;
1031 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001032#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001033 else if ( ( __kmp_user_lock_kind == lk_futex )
1034 && ( sizeof( lck->futex.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
1035 lck = (kmp_user_lock_p)crit;
1036 }
1037#endif
1038 else { // ticket, queuing or drdpa
1039 lck = __kmp_get_critical_section_ptr( crit, loc, global_tid );
1040 }
1041
1042 if ( __kmp_env_consistency_check )
1043 __kmp_push_sync( global_tid, ct_critical, loc, lck );
1044
1045 /* since the critical directive binds to all threads, not just
1046 * the current team we have to check this even if we are in a
1047 * serialized team */
1048 /* also, even if we are the uber thread, we still have to conduct the lock,
1049 * as we have to contend with sibling threads */
1050
1051#if USE_ITT_BUILD
1052 __kmp_itt_critical_acquiring( lck );
1053#endif /* USE_ITT_BUILD */
1054 // Value of 'crit' should be good for using as a critical_id of the critical section directive.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001055 __kmp_acquire_user_lock_with_checks( lck, global_tid );
1056
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001057#endif // KMP_USE_DYNAMIC_LOCK
1058
Jim Cownie5e8470a2013-09-27 10:38:44 +00001059#if USE_ITT_BUILD
1060 __kmp_itt_critical_acquired( lck );
1061#endif /* USE_ITT_BUILD */
1062
1063 KA_TRACE( 15, ("__kmpc_critical: done T#%d\n", global_tid ));
1064} // __kmpc_critical
1065
1066/*!
1067@ingroup WORK_SHARING
1068@param loc source location information.
1069@param global_tid global thread number .
1070@param crit identity of the critical section. This could be a pointer to a lock associated with the critical section, or
1071some other suitably unique value.
1072
1073Leave a critical section, releasing any lock that was held during its execution.
1074*/
1075void
1076__kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
1077{
1078 kmp_user_lock_p lck;
1079
1080 KC_TRACE( 10, ("__kmpc_end_critical: called T#%d\n", global_tid ));
1081
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001082#if KMP_USE_DYNAMIC_LOCK
1083 if (DYNA_IS_D_LOCK(__kmp_user_lock_seq)) {
1084 lck = (kmp_user_lock_p)crit;
1085 KMP_ASSERT(lck != NULL);
1086 if (__kmp_env_consistency_check) {
1087 __kmp_pop_sync(global_tid, ct_critical, loc);
1088 }
1089# if USE_ITT_BUILD
1090 __kmp_itt_critical_releasing( lck );
1091# endif
1092# if DYNA_USE_FAST_TAS
1093 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1094 DYNA_RELEASE_TAS_LOCK(lck, global_tid);
1095 } else
1096# elif DYNA_USE_FAST_FUTEX
1097 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1098 DYNA_RELEASE_FUTEX_LOCK(lck, global_tid);
1099 } else
1100# endif
1101 {
1102 DYNA_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1103 }
1104 } else {
1105 kmp_indirect_lock_t *ilk = (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1106 KMP_ASSERT(ilk != NULL);
1107 lck = ilk->lock;
1108 if (__kmp_env_consistency_check) {
1109 __kmp_pop_sync(global_tid, ct_critical, loc);
1110 }
1111# if USE_ITT_BUILD
1112 __kmp_itt_critical_releasing( lck );
1113# endif
1114 DYNA_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1115 }
1116
1117#else // KMP_USE_DYNAMIC_LOCK
1118
Jim Cownie5e8470a2013-09-27 10:38:44 +00001119 if ( ( __kmp_user_lock_kind == lk_tas )
1120 && ( sizeof( lck->tas.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
1121 lck = (kmp_user_lock_p)crit;
1122 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001123#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001124 else if ( ( __kmp_user_lock_kind == lk_futex )
1125 && ( sizeof( lck->futex.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
1126 lck = (kmp_user_lock_p)crit;
1127 }
1128#endif
1129 else { // ticket, queuing or drdpa
1130 lck = (kmp_user_lock_p) TCR_PTR(*((kmp_user_lock_p *)crit));
1131 }
1132
1133 KMP_ASSERT(lck != NULL);
1134
1135 if ( __kmp_env_consistency_check )
1136 __kmp_pop_sync( global_tid, ct_critical, loc );
1137
1138#if USE_ITT_BUILD
1139 __kmp_itt_critical_releasing( lck );
1140#endif /* USE_ITT_BUILD */
1141 // Value of 'crit' should be good for using as a critical_id of the critical section directive.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001142 __kmp_release_user_lock_with_checks( lck, global_tid );
1143
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001144#endif // KMP_USE_DYNAMIC_LOCK
1145
Jim Cownie5e8470a2013-09-27 10:38:44 +00001146 KA_TRACE( 15, ("__kmpc_end_critical: done T#%d\n", global_tid ));
1147}
1148
1149/*!
1150@ingroup SYNCHRONIZATION
1151@param loc source location information
1152@param global_tid thread id.
1153@return one if the thread should execute the master block, zero otherwise
1154
1155Start execution of a combined barrier and master. The barrier is executed inside this function.
1156*/
1157kmp_int32
1158__kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
1159{
1160 int status;
1161
1162 KC_TRACE( 10, ("__kmpc_barrier_master: called T#%d\n", global_tid ) );
1163
1164 if (! TCR_4(__kmp_init_parallel))
1165 __kmp_parallel_initialize();
1166
1167 if ( __kmp_env_consistency_check )
1168 __kmp_check_barrier( global_tid, ct_barrier, loc );
1169
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001170#if USE_ITT_NOTIFY
1171 __kmp_threads[global_tid]->th.th_ident = loc;
1172#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001173 status = __kmp_barrier( bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL );
1174
1175 return (status != 0) ? 0 : 1;
1176}
1177
1178/*!
1179@ingroup SYNCHRONIZATION
1180@param loc source location information
1181@param global_tid thread id.
1182
1183Complete the execution of a combined barrier and master. This function should
1184only be called at the completion of the <tt>master</tt> code. Other threads will
1185still be waiting at the barrier and this call releases them.
1186*/
1187void
1188__kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
1189{
1190 KC_TRACE( 10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid ));
1191
1192 __kmp_end_split_barrier ( bs_plain_barrier, global_tid );
1193}
1194
1195/*!
1196@ingroup SYNCHRONIZATION
1197@param loc source location information
1198@param global_tid thread id.
1199@return one if the thread should execute the master block, zero otherwise
1200
1201Start execution of a combined barrier and master(nowait) construct.
1202The barrier is executed inside this function.
1203There is no equivalent "end" function, since the
1204*/
1205kmp_int32
1206__kmpc_barrier_master_nowait( ident_t * loc, kmp_int32 global_tid )
1207{
1208 kmp_int32 ret;
1209
1210 KC_TRACE( 10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid ));
1211
1212 if (! TCR_4(__kmp_init_parallel))
1213 __kmp_parallel_initialize();
1214
1215 if ( __kmp_env_consistency_check ) {
1216 if ( loc == 0 ) {
1217 KMP_WARNING( ConstructIdentInvalid ); // ??? What does it mean for the user?
1218 }
1219 __kmp_check_barrier( global_tid, ct_barrier, loc );
1220 }
1221
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001222#if USE_ITT_NOTIFY
1223 __kmp_threads[global_tid]->th.th_ident = loc;
1224#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001225 __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
1226
1227 ret = __kmpc_master (loc, global_tid);
1228
1229 if ( __kmp_env_consistency_check ) {
1230 /* there's no __kmpc_end_master called; so the (stats) */
1231 /* actions of __kmpc_end_master are done here */
1232
1233 if ( global_tid < 0 ) {
1234 KMP_WARNING( ThreadIdentInvalid );
1235 }
1236 if (ret) {
1237 /* only one thread should do the pop since only */
1238 /* one did the push (see __kmpc_master()) */
1239
1240 __kmp_pop_sync( global_tid, ct_master, loc );
1241 }
1242 }
1243
1244 return (ret);
1245}
1246
1247/* The BARRIER for a SINGLE process section is always explicit */
1248/*!
1249@ingroup WORK_SHARING
1250@param loc source location information
1251@param global_tid global thread number
1252@return One if this thread should execute the single construct, zero otherwise.
1253
1254Test whether to execute a <tt>single</tt> construct.
1255There are no implicit barriers in the two "single" calls, rather the compiler should
1256introduce an explicit barrier if it is required.
1257*/
1258
1259kmp_int32
1260__kmpc_single(ident_t *loc, kmp_int32 global_tid)
1261{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001262 KMP_COUNT_BLOCK(OMP_SINGLE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001263 kmp_int32 rc = __kmp_enter_single( global_tid, loc, TRUE );
1264 return rc;
1265}
1266
1267/*!
1268@ingroup WORK_SHARING
1269@param loc source location information
1270@param global_tid global thread number
1271
1272Mark the end of a <tt>single</tt> construct. This function should
1273only be called by the thread that executed the block of code protected
1274by the `single` construct.
1275*/
1276void
1277__kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
1278{
1279 __kmp_exit_single( global_tid );
1280}
1281
1282/*!
1283@ingroup WORK_SHARING
1284@param loc Source location
1285@param global_tid Global thread id
1286
1287Mark the end of a statically scheduled loop.
1288*/
1289void
1290__kmpc_for_static_fini( ident_t *loc, kmp_int32 global_tid )
1291{
1292 KE_TRACE( 10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
1293
1294 if ( __kmp_env_consistency_check )
1295 __kmp_pop_workshare( global_tid, ct_pdo, loc );
1296}
1297
1298/*
1299 * User routines which take C-style arguments (call by value)
1300 * different from the Fortran equivalent routines
1301 */
1302
1303void
1304ompc_set_num_threads( int arg )
1305{
1306// !!!!! TODO: check the per-task binding
1307 __kmp_set_num_threads( arg, __kmp_entry_gtid() );
1308}
1309
1310void
1311ompc_set_dynamic( int flag )
1312{
1313 kmp_info_t *thread;
1314
1315 /* For the thread-private implementation of the internal controls */
1316 thread = __kmp_entry_thread();
1317
1318 __kmp_save_internal_controls( thread );
1319
1320 set__dynamic( thread, flag ? TRUE : FALSE );
1321}
1322
1323void
1324ompc_set_nested( int flag )
1325{
1326 kmp_info_t *thread;
1327
1328 /* For the thread-private internal controls implementation */
1329 thread = __kmp_entry_thread();
1330
1331 __kmp_save_internal_controls( thread );
1332
1333 set__nested( thread, flag ? TRUE : FALSE );
1334}
1335
Jim Cownie5e8470a2013-09-27 10:38:44 +00001336void
1337ompc_set_max_active_levels( int max_active_levels )
1338{
1339 /* TO DO */
1340 /* we want per-task implementation of this internal control */
1341
1342 /* For the per-thread internal controls implementation */
1343 __kmp_set_max_active_levels( __kmp_entry_gtid(), max_active_levels );
1344}
1345
1346void
1347ompc_set_schedule( omp_sched_t kind, int modifier )
1348{
1349// !!!!! TODO: check the per-task binding
1350 __kmp_set_schedule( __kmp_entry_gtid(), ( kmp_sched_t ) kind, modifier );
1351}
1352
1353int
1354ompc_get_ancestor_thread_num( int level )
1355{
1356 return __kmp_get_ancestor_thread_num( __kmp_entry_gtid(), level );
1357}
1358
1359int
1360ompc_get_team_size( int level )
1361{
1362 return __kmp_get_team_size( __kmp_entry_gtid(), level );
1363}
1364
Jim Cownie5e8470a2013-09-27 10:38:44 +00001365void
1366kmpc_set_stacksize( int arg )
1367{
1368 // __kmp_aux_set_stacksize initializes the library if needed
1369 __kmp_aux_set_stacksize( arg );
1370}
1371
1372void
1373kmpc_set_stacksize_s( size_t arg )
1374{
1375 // __kmp_aux_set_stacksize initializes the library if needed
1376 __kmp_aux_set_stacksize( arg );
1377}
1378
1379void
1380kmpc_set_blocktime( int arg )
1381{
1382 int gtid, tid;
1383 kmp_info_t *thread;
1384
1385 gtid = __kmp_entry_gtid();
1386 tid = __kmp_tid_from_gtid(gtid);
1387 thread = __kmp_thread_from_gtid(gtid);
1388
1389 __kmp_aux_set_blocktime( arg, thread, tid );
1390}
1391
1392void
1393kmpc_set_library( int arg )
1394{
1395 // __kmp_user_set_library initializes the library if needed
1396 __kmp_user_set_library( (enum library_type)arg );
1397}
1398
1399void
1400kmpc_set_defaults( char const * str )
1401{
1402 // __kmp_aux_set_defaults initializes the library if needed
1403 __kmp_aux_set_defaults( str, strlen( str ) );
1404}
1405
Jim Cownie5e8470a2013-09-27 10:38:44 +00001406int
1407kmpc_set_affinity_mask_proc( int proc, void **mask )
1408{
Alp Toker98758b02014-03-02 04:12:06 +00001409#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001410 return -1;
1411#else
1412 if ( ! TCR_4(__kmp_init_middle) ) {
1413 __kmp_middle_initialize();
1414 }
1415 return __kmp_aux_set_affinity_mask_proc( proc, mask );
1416#endif
1417}
1418
1419int
1420kmpc_unset_affinity_mask_proc( int proc, void **mask )
1421{
Alp Toker98758b02014-03-02 04:12:06 +00001422#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001423 return -1;
1424#else
1425 if ( ! TCR_4(__kmp_init_middle) ) {
1426 __kmp_middle_initialize();
1427 }
1428 return __kmp_aux_unset_affinity_mask_proc( proc, mask );
1429#endif
1430}
1431
1432int
1433kmpc_get_affinity_mask_proc( int proc, void **mask )
1434{
Alp Toker98758b02014-03-02 04:12:06 +00001435#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001436 return -1;
1437#else
1438 if ( ! TCR_4(__kmp_init_middle) ) {
1439 __kmp_middle_initialize();
1440 }
1441 return __kmp_aux_get_affinity_mask_proc( proc, mask );
1442#endif
1443}
1444
Jim Cownie5e8470a2013-09-27 10:38:44 +00001445
1446/* -------------------------------------------------------------------------- */
1447/*!
1448@ingroup THREADPRIVATE
1449@param loc source location information
1450@param gtid global thread number
1451@param cpy_size size of the cpy_data buffer
1452@param cpy_data pointer to data to be copied
1453@param cpy_func helper function to call for copying data
1454@param didit flag variable: 1=single thread; 0=not single thread
1455
1456__kmpc_copyprivate implements the interface for the private data broadcast needed for
1457the copyprivate clause associated with a single region in an OpenMP<sup>*</sup> program (both C and Fortran).
1458All threads participating in the parallel region call this routine.
1459One of the threads (called the single thread) should have the <tt>didit</tt> variable set to 1
1460and all other threads should have that variable set to 0.
1461All threads pass a pointer to a data buffer (cpy_data) that they have built.
1462
1463The OpenMP specification forbids the use of nowait on the single region when a copyprivate
1464clause is present. However, @ref __kmpc_copyprivate implements a barrier internally to avoid
1465race conditions, so the code generation for the single region should avoid generating a barrier
1466after the call to @ref __kmpc_copyprivate.
1467
1468The <tt>gtid</tt> parameter is the global thread id for the current thread.
1469The <tt>loc</tt> parameter is a pointer to source location information.
1470
1471Internal implementation: The single thread will first copy its descriptor address (cpy_data)
1472to a team-private location, then the other threads will each call the function pointed to by
1473the parameter cpy_func, which carries out the copy by copying the data using the cpy_data buffer.
1474
1475The cpy_func routine used for the copy and the contents of the data area defined by cpy_data
1476and cpy_size may be built in any fashion that will allow the copy to be done. For instance,
1477the cpy_data buffer can hold the actual data to be copied or it may hold a list of pointers
1478to the data. The cpy_func routine must interpret the cpy_data buffer appropriately.
1479
1480The interface to cpy_func is as follows:
1481@code
1482void cpy_func( void *destination, void *source )
1483@endcode
1484where void *destination is the cpy_data pointer for the thread being copied to
1485and void *source is the cpy_data pointer for the thread being copied from.
1486*/
1487void
1488__kmpc_copyprivate( ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void*,void*), kmp_int32 didit )
1489{
1490 void **data_ptr;
1491
1492 KC_TRACE( 10, ("__kmpc_copyprivate: called T#%d\n", gtid ));
1493
1494 KMP_MB();
1495
1496 data_ptr = & __kmp_team_from_gtid( gtid )->t.t_copypriv_data;
1497
1498 if ( __kmp_env_consistency_check ) {
1499 if ( loc == 0 ) {
1500 KMP_WARNING( ConstructIdentInvalid );
1501 }
1502 }
1503
1504 /* ToDo: Optimize the following two barriers into some kind of split barrier */
1505
1506 if (didit) *data_ptr = cpy_data;
1507
1508 /* This barrier is not a barrier region boundary */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001509#if USE_ITT_NOTIFY
1510 __kmp_threads[gtid]->th.th_ident = loc;
1511#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001512 __kmp_barrier( bs_plain_barrier, gtid, FALSE , 0, NULL, NULL );
1513
1514 if (! didit) (*cpy_func)( cpy_data, *data_ptr );
1515
1516 /* Consider next barrier the user-visible barrier for barrier region boundaries */
1517 /* Nesting checks are already handled by the single construct checks */
1518
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001519#if USE_ITT_NOTIFY
1520 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g. tasks can overwrite the location)
1521#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001522 __kmp_barrier( bs_plain_barrier, gtid, FALSE , 0, NULL, NULL );
1523}
1524
1525/* -------------------------------------------------------------------------- */
1526
1527#define INIT_LOCK __kmp_init_user_lock_with_checks
1528#define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
1529#define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
1530#define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
1531#define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
1532#define ACQUIRE_NESTED_LOCK_TIMED __kmp_acquire_nested_user_lock_with_checks_timed
1533#define RELEASE_LOCK __kmp_release_user_lock_with_checks
1534#define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
1535#define TEST_LOCK __kmp_test_user_lock_with_checks
1536#define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
1537#define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
1538#define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
1539
1540
1541/*
1542 * TODO: Make check abort messages use location info & pass it
1543 * into with_checks routines
1544 */
1545
1546/* initialize the lock */
1547void
1548__kmpc_init_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001549#if KMP_USE_DYNAMIC_LOCK
1550 KMP_DEBUG_ASSERT(__kmp_init_serial);
1551 if (__kmp_env_consistency_check && user_lock == NULL) {
1552 KMP_FATAL(LockIsUninitialized, "omp_init_lock");
1553 }
1554 if (DYNA_IS_D_LOCK(__kmp_user_lock_seq)) {
1555 DYNA_INIT_D_LOCK(user_lock, __kmp_user_lock_seq);
1556# if USE_ITT_BUILD
1557 __kmp_itt_lock_creating((kmp_user_lock_p)user_lock, NULL);
1558# endif
1559 } else {
1560 DYNA_INIT_I_LOCK(user_lock, __kmp_user_lock_seq);
1561 kmp_indirect_lock_t *ilk = DYNA_LOOKUP_I_LOCK(user_lock);
1562 DYNA_SET_I_LOCK_LOCATION(ilk, loc);
1563# if USE_ITT_BUILD
1564 __kmp_itt_lock_creating(ilk->lock, loc);
1565# endif
1566 }
1567
1568#else // KMP_USE_DYNAMIC_LOCK
1569
Jim Cownie5e8470a2013-09-27 10:38:44 +00001570 static char const * const func = "omp_init_lock";
1571 kmp_user_lock_p lck;
1572 KMP_DEBUG_ASSERT( __kmp_init_serial );
1573
1574 if ( __kmp_env_consistency_check ) {
1575 if ( user_lock == NULL ) {
1576 KMP_FATAL( LockIsUninitialized, func );
1577 }
1578 }
1579
1580 KMP_CHECK_USER_LOCK_INIT();
1581
1582 if ( ( __kmp_user_lock_kind == lk_tas )
1583 && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1584 lck = (kmp_user_lock_p)user_lock;
1585 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001586#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001587 else if ( ( __kmp_user_lock_kind == lk_futex )
1588 && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1589 lck = (kmp_user_lock_p)user_lock;
1590 }
1591#endif
1592 else {
Jim Cownie181b4bb2013-12-23 17:28:57 +00001593 lck = __kmp_user_lock_allocate( user_lock, gtid, 0 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001594 }
1595 INIT_LOCK( lck );
1596 __kmp_set_user_lock_location( lck, loc );
1597
1598#if USE_ITT_BUILD
1599 __kmp_itt_lock_creating( lck );
1600#endif /* USE_ITT_BUILD */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001601
1602#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00001603} // __kmpc_init_lock
1604
1605/* initialize the lock */
1606void
1607__kmpc_init_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001608#if KMP_USE_DYNAMIC_LOCK
1609
1610 KMP_DEBUG_ASSERT(__kmp_init_serial);
1611 if (__kmp_env_consistency_check && user_lock == NULL) {
1612 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
1613 }
1614 // Invoke init function after converting to nested version.
1615 kmp_dyna_lockseq_t nested_seq;
1616 switch (__kmp_user_lock_seq) {
1617 case lockseq_tas: nested_seq = lockseq_nested_tas; break;
1618#if DYNA_HAS_FUTEX
1619 case lockseq_futex: nested_seq = lockseq_nested_futex; break;
1620#endif
1621 case lockseq_ticket: nested_seq = lockseq_nested_ticket; break;
1622 case lockseq_queuing: nested_seq = lockseq_nested_queuing; break;
1623 case lockseq_drdpa: nested_seq = lockseq_nested_drdpa; break;
1624 default: nested_seq = lockseq_nested_queuing; break;
1625 // Use nested queuing lock for lock kinds without "nested" implementation.
1626 }
1627 DYNA_INIT_I_LOCK(user_lock, nested_seq);
1628 // All nested locks are indirect locks.
1629 kmp_indirect_lock_t *ilk = DYNA_LOOKUP_I_LOCK(user_lock);
1630 DYNA_SET_I_LOCK_LOCATION(ilk, loc);
1631# if USE_ITT_BUILD
1632 __kmp_itt_lock_creating(ilk->lock, loc);
1633# endif
1634
1635#else // KMP_USE_DYNAMIC_LOCK
1636
Jim Cownie5e8470a2013-09-27 10:38:44 +00001637 static char const * const func = "omp_init_nest_lock";
1638 kmp_user_lock_p lck;
1639 KMP_DEBUG_ASSERT( __kmp_init_serial );
1640
1641 if ( __kmp_env_consistency_check ) {
1642 if ( user_lock == NULL ) {
1643 KMP_FATAL( LockIsUninitialized, func );
1644 }
1645 }
1646
1647 KMP_CHECK_USER_LOCK_INIT();
1648
1649 if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
1650 + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
1651 lck = (kmp_user_lock_p)user_lock;
1652 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001653#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001654 else if ( ( __kmp_user_lock_kind == lk_futex )
1655 && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
1656 <= OMP_NEST_LOCK_T_SIZE ) ) {
1657 lck = (kmp_user_lock_p)user_lock;
1658 }
1659#endif
1660 else {
Jim Cownie181b4bb2013-12-23 17:28:57 +00001661 lck = __kmp_user_lock_allocate( user_lock, gtid, 0 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001662 }
1663
1664 INIT_NESTED_LOCK( lck );
1665 __kmp_set_user_lock_location( lck, loc );
1666
1667#if USE_ITT_BUILD
1668 __kmp_itt_lock_creating( lck );
1669#endif /* USE_ITT_BUILD */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001670
1671#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00001672} // __kmpc_init_nest_lock
1673
1674void
1675__kmpc_destroy_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001676#if KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00001677
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001678# if USE_ITT_BUILD
1679 kmp_user_lock_p lck;
1680 if (DYNA_EXTRACT_D_TAG(user_lock) == 0) {
1681 lck = ((kmp_indirect_lock_t *)DYNA_LOOKUP_I_LOCK(user_lock))->lock;
1682 } else {
1683 lck = (kmp_user_lock_p)user_lock;
1684 }
1685 __kmp_itt_lock_destroyed(lck);
1686# endif
1687 DYNA_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
1688#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00001689 kmp_user_lock_p lck;
1690
1691 if ( ( __kmp_user_lock_kind == lk_tas )
1692 && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1693 lck = (kmp_user_lock_p)user_lock;
1694 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001695#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001696 else if ( ( __kmp_user_lock_kind == lk_futex )
1697 && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1698 lck = (kmp_user_lock_p)user_lock;
1699 }
1700#endif
1701 else {
1702 lck = __kmp_lookup_user_lock( user_lock, "omp_destroy_lock" );
1703 }
1704
1705#if USE_ITT_BUILD
1706 __kmp_itt_lock_destroyed( lck );
1707#endif /* USE_ITT_BUILD */
1708 DESTROY_LOCK( lck );
1709
1710 if ( ( __kmp_user_lock_kind == lk_tas )
1711 && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1712 ;
1713 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001714#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001715 else if ( ( __kmp_user_lock_kind == lk_futex )
1716 && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1717 ;
1718 }
1719#endif
1720 else {
1721 __kmp_user_lock_free( user_lock, gtid, lck );
1722 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001723#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00001724} // __kmpc_destroy_lock
1725
1726/* destroy the lock */
1727void
1728__kmpc_destroy_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001729#if KMP_USE_DYNAMIC_LOCK
1730
1731# if USE_ITT_BUILD
1732 kmp_indirect_lock_t *ilk = DYNA_LOOKUP_I_LOCK(user_lock);
1733 __kmp_itt_lock_destroyed(ilk->lock);
1734# endif
1735 DYNA_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
1736
1737#else // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00001738
1739 kmp_user_lock_p lck;
1740
1741 if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
1742 + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
1743 lck = (kmp_user_lock_p)user_lock;
1744 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001745#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001746 else if ( ( __kmp_user_lock_kind == lk_futex )
1747 && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
1748 <= OMP_NEST_LOCK_T_SIZE ) ) {
1749 lck = (kmp_user_lock_p)user_lock;
1750 }
1751#endif
1752 else {
1753 lck = __kmp_lookup_user_lock( user_lock, "omp_destroy_nest_lock" );
1754 }
1755
1756#if USE_ITT_BUILD
1757 __kmp_itt_lock_destroyed( lck );
1758#endif /* USE_ITT_BUILD */
1759
1760 DESTROY_NESTED_LOCK( lck );
1761
1762 if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
1763 + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
1764 ;
1765 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001766#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001767 else if ( ( __kmp_user_lock_kind == lk_futex )
1768 && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
1769 <= OMP_NEST_LOCK_T_SIZE ) ) {
1770 ;
1771 }
1772#endif
1773 else {
1774 __kmp_user_lock_free( user_lock, gtid, lck );
1775 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001776#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00001777} // __kmpc_destroy_nest_lock
1778
1779void
1780__kmpc_set_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001781 KMP_COUNT_BLOCK(OMP_set_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001782#if KMP_USE_DYNAMIC_LOCK
1783 int tag = DYNA_EXTRACT_D_TAG(user_lock);
1784# if USE_ITT_BUILD
1785 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); // itt function will get to the right lock object.
1786# endif
1787# if DYNA_USE_FAST_TAS
1788 if (tag == locktag_tas && !__kmp_env_consistency_check) {
1789 DYNA_ACQUIRE_TAS_LOCK(user_lock, gtid);
1790 } else
1791# elif DYNA_USE_FAST_FUTEX
1792 if (tag == locktag_futex && !__kmp_env_consistency_check) {
1793 DYNA_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
1794 } else
1795# endif
1796 {
1797 __kmp_direct_set_ops[tag]((kmp_dyna_lock_t *)user_lock, gtid);
1798 }
1799# if USE_ITT_BUILD
1800 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
1801# endif
1802
1803#else // KMP_USE_DYNAMIC_LOCK
1804
Jim Cownie5e8470a2013-09-27 10:38:44 +00001805 kmp_user_lock_p lck;
1806
1807 if ( ( __kmp_user_lock_kind == lk_tas )
1808 && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1809 lck = (kmp_user_lock_p)user_lock;
1810 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001811#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001812 else if ( ( __kmp_user_lock_kind == lk_futex )
1813 && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1814 lck = (kmp_user_lock_p)user_lock;
1815 }
1816#endif
1817 else {
1818 lck = __kmp_lookup_user_lock( user_lock, "omp_set_lock" );
1819 }
1820
1821#if USE_ITT_BUILD
1822 __kmp_itt_lock_acquiring( lck );
1823#endif /* USE_ITT_BUILD */
1824
1825 ACQUIRE_LOCK( lck, gtid );
1826
1827#if USE_ITT_BUILD
1828 __kmp_itt_lock_acquired( lck );
1829#endif /* USE_ITT_BUILD */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001830
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001831#endif // KMP_USE_DYNAMIC_LOCK
1832}
Jim Cownie5e8470a2013-09-27 10:38:44 +00001833
1834void
1835__kmpc_set_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001836#if KMP_USE_DYNAMIC_LOCK
1837
1838# if USE_ITT_BUILD
1839 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
1840# endif
1841 DYNA_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
1842# if USE_ITT_BUILD
1843 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
1844#endif
1845
1846#else // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00001847 kmp_user_lock_p lck;
1848
1849 if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
1850 + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
1851 lck = (kmp_user_lock_p)user_lock;
1852 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001853#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001854 else if ( ( __kmp_user_lock_kind == lk_futex )
1855 && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
1856 <= OMP_NEST_LOCK_T_SIZE ) ) {
1857 lck = (kmp_user_lock_p)user_lock;
1858 }
1859#endif
1860 else {
1861 lck = __kmp_lookup_user_lock( user_lock, "omp_set_nest_lock" );
1862 }
1863
1864#if USE_ITT_BUILD
1865 __kmp_itt_lock_acquiring( lck );
1866#endif /* USE_ITT_BUILD */
1867
1868 ACQUIRE_NESTED_LOCK( lck, gtid );
1869
1870#if USE_ITT_BUILD
1871 __kmp_itt_lock_acquired( lck );
1872#endif /* USE_ITT_BUILD */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001873#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00001874}
1875
1876void
1877__kmpc_unset_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
1878{
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001879#if KMP_USE_DYNAMIC_LOCK
1880
1881 int tag = DYNA_EXTRACT_D_TAG(user_lock);
1882# if USE_ITT_BUILD
1883 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
1884# endif
1885# if DYNA_USE_FAST_TAS
1886 if (tag == locktag_tas && !__kmp_env_consistency_check) {
1887 DYNA_RELEASE_TAS_LOCK(user_lock, gtid);
1888 } else
1889# elif DYNA_USE_FAST_FUTEX
1890 if (tag == locktag_futex && !__kmp_env_consistency_check) {
1891 DYNA_RELEASE_FUTEX_LOCK(user_lock, gtid);
1892 } else
1893# endif
1894 {
1895 __kmp_direct_unset_ops[tag]((kmp_dyna_lock_t *)user_lock, gtid);
1896 }
1897
1898#else // KMP_USE_DYNAMIC_LOCK
1899
Jim Cownie5e8470a2013-09-27 10:38:44 +00001900 kmp_user_lock_p lck;
1901
1902 /* Can't use serial interval since not block structured */
1903 /* release the lock */
1904
1905 if ( ( __kmp_user_lock_kind == lk_tas )
1906 && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001907#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001908 // "fast" path implemented to fix customer performance issue
1909#if USE_ITT_BUILD
1910 __kmp_itt_lock_releasing( (kmp_user_lock_p)user_lock );
1911#endif /* USE_ITT_BUILD */
1912 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
1913 KMP_MB();
1914 return;
1915#else
1916 lck = (kmp_user_lock_p)user_lock;
1917#endif
1918 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001919#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001920 else if ( ( __kmp_user_lock_kind == lk_futex )
1921 && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1922 lck = (kmp_user_lock_p)user_lock;
1923 }
1924#endif
1925 else {
1926 lck = __kmp_lookup_user_lock( user_lock, "omp_unset_lock" );
1927 }
1928
1929#if USE_ITT_BUILD
1930 __kmp_itt_lock_releasing( lck );
1931#endif /* USE_ITT_BUILD */
1932
1933 RELEASE_LOCK( lck, gtid );
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001934
1935#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00001936}
1937
1938/* release the lock */
1939void
1940__kmpc_unset_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
1941{
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001942#if KMP_USE_DYNAMIC_LOCK
1943
1944# if USE_ITT_BUILD
1945 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
1946# endif
1947 DYNA_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
1948
1949#else // KMP_USE_DYNAMIC_LOCK
1950
Jim Cownie5e8470a2013-09-27 10:38:44 +00001951 kmp_user_lock_p lck;
1952
1953 /* Can't use serial interval since not block structured */
1954
1955 if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
1956 + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001957#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001958 // "fast" path implemented to fix customer performance issue
1959 kmp_tas_lock_t *tl = (kmp_tas_lock_t*)user_lock;
1960#if USE_ITT_BUILD
1961 __kmp_itt_lock_releasing( (kmp_user_lock_p)user_lock );
1962#endif /* USE_ITT_BUILD */
1963 if ( --(tl->lk.depth_locked) == 0 ) {
1964 TCW_4(tl->lk.poll, 0);
1965 }
1966 KMP_MB();
1967 return;
1968#else
1969 lck = (kmp_user_lock_p)user_lock;
1970#endif
1971 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001972#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001973 else if ( ( __kmp_user_lock_kind == lk_futex )
1974 && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
1975 <= OMP_NEST_LOCK_T_SIZE ) ) {
1976 lck = (kmp_user_lock_p)user_lock;
1977 }
1978#endif
1979 else {
1980 lck = __kmp_lookup_user_lock( user_lock, "omp_unset_nest_lock" );
1981 }
1982
1983#if USE_ITT_BUILD
1984 __kmp_itt_lock_releasing( lck );
1985#endif /* USE_ITT_BUILD */
1986
1987 RELEASE_NESTED_LOCK( lck, gtid );
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001988
1989#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00001990}
1991
1992/* try to acquire the lock */
1993int
1994__kmpc_test_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
1995{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001996 KMP_COUNT_BLOCK(OMP_test_lock);
1997 KMP_TIME_BLOCK(OMP_test_lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00001998
1999#if KMP_USE_DYNAMIC_LOCK
2000 int rc;
2001 int tag = DYNA_EXTRACT_D_TAG(user_lock);
2002# if USE_ITT_BUILD
2003 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2004# endif
2005# if DYNA_USE_FAST_TAS
2006 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2007 DYNA_TEST_TAS_LOCK(user_lock, gtid, rc);
2008 } else
2009# elif DYNA_USE_FAST_FUTEX
2010 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2011 DYNA_TEST_FUTEX_LOCK(user_lock, gtid, rc);
2012 } else
2013# endif
2014 {
2015 rc = __kmp_direct_test_ops[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2016 }
2017 if (rc) {
2018# if USE_ITT_BUILD
2019 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2020# endif
2021 return FTN_TRUE;
2022 } else {
2023# if USE_ITT_BUILD
2024 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
2025# endif
2026 return FTN_FALSE;
2027 }
2028
2029#else // KMP_USE_DYNAMIC_LOCK
2030
Jim Cownie5e8470a2013-09-27 10:38:44 +00002031 kmp_user_lock_p lck;
2032 int rc;
2033
2034 if ( ( __kmp_user_lock_kind == lk_tas )
2035 && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2036 lck = (kmp_user_lock_p)user_lock;
2037 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002038#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002039 else if ( ( __kmp_user_lock_kind == lk_futex )
2040 && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2041 lck = (kmp_user_lock_p)user_lock;
2042 }
2043#endif
2044 else {
2045 lck = __kmp_lookup_user_lock( user_lock, "omp_test_lock" );
2046 }
2047
2048#if USE_ITT_BUILD
2049 __kmp_itt_lock_acquiring( lck );
2050#endif /* USE_ITT_BUILD */
2051
2052 rc = TEST_LOCK( lck, gtid );
2053#if USE_ITT_BUILD
2054 if ( rc ) {
2055 __kmp_itt_lock_acquired( lck );
2056 } else {
2057 __kmp_itt_lock_cancelled( lck );
2058 }
2059#endif /* USE_ITT_BUILD */
2060 return ( rc ? FTN_TRUE : FTN_FALSE );
2061
2062 /* Can't use serial interval since not block structured */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002063
2064#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002065}
2066
2067/* try to acquire the lock */
2068int
2069__kmpc_test_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
2070{
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002071#if KMP_USE_DYNAMIC_LOCK
2072 int rc;
2073# if USE_ITT_BUILD
2074 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2075# endif
2076 rc = DYNA_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
2077# if USE_ITT_BUILD
2078 if (rc) {
2079 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2080 } else {
2081 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
2082 }
2083# endif
2084 return rc;
2085
2086#else // KMP_USE_DYNAMIC_LOCK
2087
Jim Cownie5e8470a2013-09-27 10:38:44 +00002088 kmp_user_lock_p lck;
2089 int rc;
2090
2091 if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
2092 + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
2093 lck = (kmp_user_lock_p)user_lock;
2094 }
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002095#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002096 else if ( ( __kmp_user_lock_kind == lk_futex )
2097 && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
2098 <= OMP_NEST_LOCK_T_SIZE ) ) {
2099 lck = (kmp_user_lock_p)user_lock;
2100 }
2101#endif
2102 else {
2103 lck = __kmp_lookup_user_lock( user_lock, "omp_test_nest_lock" );
2104 }
2105
2106#if USE_ITT_BUILD
2107 __kmp_itt_lock_acquiring( lck );
2108#endif /* USE_ITT_BUILD */
2109
2110 rc = TEST_NESTED_LOCK( lck, gtid );
2111#if USE_ITT_BUILD
2112 if ( rc ) {
2113 __kmp_itt_lock_acquired( lck );
2114 } else {
2115 __kmp_itt_lock_cancelled( lck );
2116 }
2117#endif /* USE_ITT_BUILD */
2118 return rc;
2119
2120 /* Can't use serial interval since not block structured */
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002121
2122#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002123}
2124
2125
2126/*--------------------------------------------------------------------------------------------------------------------*/
2127
2128/*
2129 * Interface to fast scalable reduce methods routines
2130 */
2131
2132// keep the selected method in a thread local structure for cross-function usage: will be used in __kmpc_end_reduce* functions;
2133// another solution: to re-determine the method one more time in __kmpc_end_reduce* functions (new prototype required then)
2134// AT: which solution is better?
2135#define __KMP_SET_REDUCTION_METHOD(gtid,rmethod) \
2136 ( ( __kmp_threads[ ( gtid ) ] -> th.th_local.packed_reduction_method ) = ( rmethod ) )
2137
2138#define __KMP_GET_REDUCTION_METHOD(gtid) \
2139 ( __kmp_threads[ ( gtid ) ] -> th.th_local.packed_reduction_method )
2140
2141// description of the packed_reduction_method variable: look at the macros in kmp.h
2142
2143
2144// used in a critical section reduce block
2145static __forceinline void
2146__kmp_enter_critical_section_reduce_block( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit ) {
2147
2148 // this lock was visible to a customer and to the thread profiler as a serial overhead span
2149 // (although it's used for an internal purpose only)
2150 // why was it visible in previous implementation?
2151 // should we keep it visible in new reduce block?
2152 kmp_user_lock_p lck;
2153
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002154#if KMP_USE_DYNAMIC_LOCK
2155
2156 if (DYNA_IS_D_LOCK(__kmp_user_lock_seq)) {
2157 lck = (kmp_user_lock_p)crit;
2158 if (*((kmp_dyna_lock_t *)lck) == 0) {
2159 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)lck, 0, DYNA_GET_D_TAG(__kmp_user_lock_seq));
2160 }
2161 KMP_DEBUG_ASSERT(lck != NULL);
2162 if (__kmp_env_consistency_check) {
2163 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
2164 }
2165 DYNA_D_LOCK_FUNC(lck, set)((kmp_dyna_lock_t *)lck, global_tid);
2166 } else {
2167 kmp_indirect_lock_t *ilk = __kmp_get_indirect_csptr(crit, loc, global_tid, __kmp_user_lock_seq);
2168 KMP_DEBUG_ASSERT(ilk != NULL);
2169 if (__kmp_env_consistency_check) {
2170 __kmp_push_sync(global_tid, ct_critical, loc, ilk->lock, __kmp_user_lock_seq);
2171 }
2172 DYNA_I_LOCK_FUNC(ilk, set)(ilk->lock, global_tid);
2173 }
2174
2175#else // KMP_USE_DYNAMIC_LOCK
2176
Jim Cownie5e8470a2013-09-27 10:38:44 +00002177 // We know that the fast reduction code is only emitted by Intel compilers
2178 // with 32 byte critical sections. If there isn't enough space, then we
2179 // have to use a pointer.
2180 if ( __kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE ) {
2181 lck = (kmp_user_lock_p)crit;
2182 }
2183 else {
2184 lck = __kmp_get_critical_section_ptr( crit, loc, global_tid );
2185 }
2186 KMP_DEBUG_ASSERT( lck != NULL );
2187
2188 if ( __kmp_env_consistency_check )
2189 __kmp_push_sync( global_tid, ct_critical, loc, lck );
2190
2191 __kmp_acquire_user_lock_with_checks( lck, global_tid );
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002192
2193#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002194}
2195
2196// used in a critical section reduce block
2197static __forceinline void
2198__kmp_end_critical_section_reduce_block( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit ) {
2199
2200 kmp_user_lock_p lck;
2201
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002202#if KMP_USE_DYNAMIC_LOCK
2203
2204 if (DYNA_IS_D_LOCK(__kmp_user_lock_seq)) {
2205 lck = (kmp_user_lock_p)crit;
2206 if (__kmp_env_consistency_check)
2207 __kmp_pop_sync(global_tid, ct_critical, loc);
2208 DYNA_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
2209 } else {
2210 kmp_indirect_lock_t *ilk = (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
2211 if (__kmp_env_consistency_check)
2212 __kmp_pop_sync(global_tid, ct_critical, loc);
2213 DYNA_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
2214 }
2215
2216#else // KMP_USE_DYNAMIC_LOCK
2217
Jim Cownie5e8470a2013-09-27 10:38:44 +00002218 // We know that the fast reduction code is only emitted by Intel compilers with 32 byte critical
2219 // sections. If there isn't enough space, then we have to use a pointer.
2220 if ( __kmp_base_user_lock_size > 32 ) {
2221 lck = *( (kmp_user_lock_p *) crit );
2222 KMP_ASSERT( lck != NULL );
2223 } else {
2224 lck = (kmp_user_lock_p) crit;
2225 }
2226
2227 if ( __kmp_env_consistency_check )
2228 __kmp_pop_sync( global_tid, ct_critical, loc );
2229
2230 __kmp_release_user_lock_with_checks( lck, global_tid );
2231
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002232#endif // KMP_USE_DYNAMIC_LOCK
Jim Cownie5e8470a2013-09-27 10:38:44 +00002233} // __kmp_end_critical_section_reduce_block
2234
2235
2236/* 2.a.i. Reduce Block without a terminating barrier */
2237/*!
2238@ingroup SYNCHRONIZATION
2239@param loc source location information
2240@param global_tid global thread number
2241@param num_vars number of items (variables) to be reduced
2242@param reduce_size size of data in bytes to be reduced
2243@param reduce_data pointer to data to be reduced
2244@param reduce_func callback function providing reduction operation on two operands and returning result of reduction in lhs_data
2245@param lck pointer to the unique lock data structure
2246@result 1 for the master thread, 0 for all other team threads, 2 for all team threads if atomic reduction needed
2247
2248The nowait version is used for a reduce clause with the nowait argument.
2249*/
2250kmp_int32
2251__kmpc_reduce_nowait(
2252 ident_t *loc, kmp_int32 global_tid,
2253 kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
2254 kmp_critical_name *lck ) {
2255
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002256 KMP_COUNT_BLOCK(REDUCE_nowait);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002257 int retval;
2258 PACKED_REDUCTION_METHOD_T packed_reduction_method;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002259#if OMP_40_ENABLED
2260 kmp_team_t *team;
2261 kmp_info_t *th;
2262 int teams_swapped = 0, task_state;
2263#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002264 KA_TRACE( 10, ( "__kmpc_reduce_nowait() enter: called T#%d\n", global_tid ) );
2265
2266 // why do we need this initialization here at all?
2267 // Reduction clause can not be used as a stand-alone directive.
2268
2269 // do not call __kmp_serial_initialize(), it will be called by __kmp_parallel_initialize() if needed
2270 // possible detection of false-positive race by the threadchecker ???
2271 if( ! TCR_4( __kmp_init_parallel ) )
2272 __kmp_parallel_initialize();
2273
2274 // check correctness of reduce block nesting
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002275#if KMP_USE_DYNAMIC_LOCK
2276 if ( __kmp_env_consistency_check )
2277 __kmp_push_sync( global_tid, ct_reduce, loc, NULL, 0 );
2278#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00002279 if ( __kmp_env_consistency_check )
2280 __kmp_push_sync( global_tid, ct_reduce, loc, NULL );
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002281#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002282
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002283#if OMP_40_ENABLED
2284 th = __kmp_thread_from_gtid(global_tid);
2285 if( th->th.th_teams_microtask ) { // AC: check if we are inside the teams construct?
2286 team = th->th.th_team;
2287 if( team->t.t_level == th->th.th_teams_level ) {
2288 // this is reduction at teams construct
2289 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
2290 // Let's swap teams temporarily for the reduction barrier
2291 teams_swapped = 1;
2292 th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2293 th->th.th_team = team->t.t_parent;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002294 th->th.th_team_nproc = th->th.th_team->t.t_nproc;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002295 th->th.th_task_team = th->th.th_team->t.t_task_team[0];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002296 task_state = th->th.th_task_state;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002297 th->th.th_task_state = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002298 }
2299 }
2300#endif // OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00002301
2302 // packed_reduction_method value will be reused by __kmp_end_reduce* function, the value should be kept in a variable
2303 // the variable should be either a construct-specific or thread-specific property, not a team specific property
2304 // (a thread can reach the next reduce block on the next construct, reduce method may differ on the next construct)
2305 // an ident_t "loc" parameter could be used as a construct-specific property (what if loc == 0?)
2306 // (if both construct-specific and team-specific variables were shared, then unness extra syncs should be needed)
2307 // a thread-specific variable is better regarding two issues above (next construct and extra syncs)
2308 // a thread-specific "th_local.reduction_method" variable is used currently
2309 // each thread executes 'determine' and 'set' lines (no need to execute by one thread, to avoid unness extra syncs)
2310
2311 packed_reduction_method = __kmp_determine_reduction_method( loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck );
2312 __KMP_SET_REDUCTION_METHOD( global_tid, packed_reduction_method );
2313
2314 if( packed_reduction_method == critical_reduce_block ) {
2315
2316 __kmp_enter_critical_section_reduce_block( loc, global_tid, lck );
2317 retval = 1;
2318
2319 } else if( packed_reduction_method == empty_reduce_block ) {
2320
2321 // usage: if team size == 1, no synchronization is required ( Intel platforms only )
2322 retval = 1;
2323
2324 } else if( packed_reduction_method == atomic_reduce_block ) {
2325
2326 retval = 2;
2327
2328 // all threads should do this pop here (because __kmpc_end_reduce_nowait() won't be called by the code gen)
2329 // (it's not quite good, because the checking block has been closed by this 'pop',
2330 // but atomic operation has not been executed yet, will be executed slightly later, literally on next instruction)
2331 if ( __kmp_env_consistency_check )
2332 __kmp_pop_sync( global_tid, ct_reduce, loc );
2333
2334 } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
2335
2336 //AT: performance issue: a real barrier here
2337 //AT: (if master goes slow, other threads are blocked here waiting for the master to come and release them)
2338 //AT: (it's not what a customer might expect specifying NOWAIT clause)
2339 //AT: (specifying NOWAIT won't result in improvement of performance, it'll be confusing to a customer)
2340 //AT: another implementation of *barrier_gather*nowait() (or some other design) might go faster
2341 // and be more in line with sense of NOWAIT
2342 //AT: TO DO: do epcc test and compare times
2343
2344 // this barrier should be invisible to a customer and to the thread profiler
2345 // (it's neither a terminating barrier nor customer's code, it's used for an internal purpose)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002346#if USE_ITT_NOTIFY
2347 __kmp_threads[global_tid]->th.th_ident = loc;
2348#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002349 retval = __kmp_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid, FALSE, reduce_size, reduce_data, reduce_func );
2350 retval = ( retval != 0 ) ? ( 0 ) : ( 1 );
2351
2352 // all other workers except master should do this pop here
2353 // ( none of other workers will get to __kmpc_end_reduce_nowait() )
2354 if ( __kmp_env_consistency_check ) {
2355 if( retval == 0 ) {
2356 __kmp_pop_sync( global_tid, ct_reduce, loc );
2357 }
2358 }
2359
2360 } else {
2361
2362 // should never reach this block
2363 KMP_ASSERT( 0 ); // "unexpected method"
2364
2365 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002366#if OMP_40_ENABLED
2367 if( teams_swapped ) {
2368 // Restore thread structure
2369 th->th.th_info.ds.ds_tid = 0;
2370 th->th.th_team = team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002371 th->th.th_team_nproc = team->t.t_nproc;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002372 th->th.th_task_team = team->t.t_task_team[task_state];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002373 th->th.th_task_state = task_state;
2374 }
2375#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002376 KA_TRACE( 10, ( "__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n", global_tid, packed_reduction_method, retval ) );
2377
2378 return retval;
2379}
2380
2381/*!
2382@ingroup SYNCHRONIZATION
2383@param loc source location information
2384@param global_tid global thread id.
2385@param lck pointer to the unique lock data structure
2386
2387Finish the execution of a reduce nowait.
2388*/
2389void
2390__kmpc_end_reduce_nowait( ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck ) {
2391
2392 PACKED_REDUCTION_METHOD_T packed_reduction_method;
2393
2394 KA_TRACE( 10, ( "__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid ) );
2395
2396 packed_reduction_method = __KMP_GET_REDUCTION_METHOD( global_tid );
2397
2398 if( packed_reduction_method == critical_reduce_block ) {
2399
2400 __kmp_end_critical_section_reduce_block( loc, global_tid, lck );
2401
2402 } else if( packed_reduction_method == empty_reduce_block ) {
2403
2404 // usage: if team size == 1, no synchronization is required ( on Intel platforms only )
2405
2406 } else if( packed_reduction_method == atomic_reduce_block ) {
2407
2408 // neither master nor other workers should get here
2409 // (code gen does not generate this call in case 2: atomic reduce block)
2410 // actually it's better to remove this elseif at all;
2411 // after removal this value will checked by the 'else' and will assert
2412
2413 } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
2414
2415 // only master gets here
2416
2417 } else {
2418
2419 // should never reach this block
2420 KMP_ASSERT( 0 ); // "unexpected method"
2421
2422 }
2423
2424 if ( __kmp_env_consistency_check )
2425 __kmp_pop_sync( global_tid, ct_reduce, loc );
2426
2427 KA_TRACE( 10, ( "__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n", global_tid, packed_reduction_method ) );
2428
2429 return;
2430}
2431
2432/* 2.a.ii. Reduce Block with a terminating barrier */
2433
2434/*!
2435@ingroup SYNCHRONIZATION
2436@param loc source location information
2437@param global_tid global thread number
2438@param num_vars number of items (variables) to be reduced
2439@param reduce_size size of data in bytes to be reduced
2440@param reduce_data pointer to data to be reduced
2441@param reduce_func callback function providing reduction operation on two operands and returning result of reduction in lhs_data
2442@param lck pointer to the unique lock data structure
2443@result 1 for the master thread, 0 for all other team threads, 2 for all team threads if atomic reduction needed
2444
2445A blocking reduce that includes an implicit barrier.
2446*/
2447kmp_int32
2448__kmpc_reduce(
2449 ident_t *loc, kmp_int32 global_tid,
2450 kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
2451 void (*reduce_func)(void *lhs_data, void *rhs_data),
2452 kmp_critical_name *lck )
2453{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002454 KMP_COUNT_BLOCK(REDUCE_wait);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002455 int retval;
2456 PACKED_REDUCTION_METHOD_T packed_reduction_method;
2457
2458 KA_TRACE( 10, ( "__kmpc_reduce() enter: called T#%d\n", global_tid ) );
2459
2460 // why do we need this initialization here at all?
2461 // Reduction clause can not be a stand-alone directive.
2462
2463 // do not call __kmp_serial_initialize(), it will be called by __kmp_parallel_initialize() if needed
2464 // possible detection of false-positive race by the threadchecker ???
2465 if( ! TCR_4( __kmp_init_parallel ) )
2466 __kmp_parallel_initialize();
2467
2468 // check correctness of reduce block nesting
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002469#if KMP_USE_DYNAMIC_LOCK
2470 if ( __kmp_env_consistency_check )
2471 __kmp_push_sync( global_tid, ct_reduce, loc, NULL, 0 );
2472#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00002473 if ( __kmp_env_consistency_check )
2474 __kmp_push_sync( global_tid, ct_reduce, loc, NULL );
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002475#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002476
Jim Cownie5e8470a2013-09-27 10:38:44 +00002477 packed_reduction_method = __kmp_determine_reduction_method( loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck );
2478 __KMP_SET_REDUCTION_METHOD( global_tid, packed_reduction_method );
2479
2480 if( packed_reduction_method == critical_reduce_block ) {
2481
2482 __kmp_enter_critical_section_reduce_block( loc, global_tid, lck );
2483 retval = 1;
2484
2485 } else if( packed_reduction_method == empty_reduce_block ) {
2486
2487 // usage: if team size == 1, no synchronization is required ( Intel platforms only )
2488 retval = 1;
2489
2490 } else if( packed_reduction_method == atomic_reduce_block ) {
2491
2492 retval = 2;
2493
2494 } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
2495
2496 //case tree_reduce_block:
2497 // this barrier should be visible to a customer and to the thread profiler
2498 // (it's a terminating barrier on constructs if NOWAIT not specified)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002499#if USE_ITT_NOTIFY
2500 __kmp_threads[global_tid]->th.th_ident = loc; // needed for correct notification of frames
2501#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002502 retval = __kmp_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid, TRUE, reduce_size, reduce_data, reduce_func );
2503 retval = ( retval != 0 ) ? ( 0 ) : ( 1 );
2504
2505 // all other workers except master should do this pop here
2506 // ( none of other workers except master will enter __kmpc_end_reduce() )
2507 if ( __kmp_env_consistency_check ) {
2508 if( retval == 0 ) { // 0: all other workers; 1: master
2509 __kmp_pop_sync( global_tid, ct_reduce, loc );
2510 }
2511 }
2512
2513 } else {
2514
2515 // should never reach this block
2516 KMP_ASSERT( 0 ); // "unexpected method"
2517
2518 }
2519
2520 KA_TRACE( 10, ( "__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n", global_tid, packed_reduction_method, retval ) );
2521
2522 return retval;
2523}
2524
2525/*!
2526@ingroup SYNCHRONIZATION
2527@param loc source location information
2528@param global_tid global thread id.
2529@param lck pointer to the unique lock data structure
2530
2531Finish the execution of a blocking reduce.
2532The <tt>lck</tt> pointer must be the same as that used in the corresponding start function.
2533*/
2534void
2535__kmpc_end_reduce( ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck ) {
2536
2537 PACKED_REDUCTION_METHOD_T packed_reduction_method;
2538
2539 KA_TRACE( 10, ( "__kmpc_end_reduce() enter: called T#%d\n", global_tid ) );
2540
2541 packed_reduction_method = __KMP_GET_REDUCTION_METHOD( global_tid );
2542
2543 // this barrier should be visible to a customer and to the thread profiler
2544 // (it's a terminating barrier on constructs if NOWAIT not specified)
2545
2546 if( packed_reduction_method == critical_reduce_block ) {
2547
2548 __kmp_end_critical_section_reduce_block( loc, global_tid, lck );
2549
2550 // TODO: implicit barrier: should be exposed
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002551#if USE_ITT_NOTIFY
2552 __kmp_threads[global_tid]->th.th_ident = loc;
2553#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002554 __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
2555
2556 } else if( packed_reduction_method == empty_reduce_block ) {
2557
2558 // usage: if team size == 1, no synchronization is required ( Intel platforms only )
2559
2560 // TODO: implicit barrier: should be exposed
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002561#if USE_ITT_NOTIFY
2562 __kmp_threads[global_tid]->th.th_ident = loc;
2563#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002564 __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
2565
2566 } else if( packed_reduction_method == atomic_reduce_block ) {
2567
2568 // TODO: implicit barrier: should be exposed
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002569#if USE_ITT_NOTIFY
2570 __kmp_threads[global_tid]->th.th_ident = loc;
2571#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002572 __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
2573
2574 } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
2575
2576 // only master executes here (master releases all other workers)
2577 __kmp_end_split_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid );
2578
2579 } else {
2580
2581 // should never reach this block
2582 KMP_ASSERT( 0 ); // "unexpected method"
2583
2584 }
2585
2586 if ( __kmp_env_consistency_check )
2587 __kmp_pop_sync( global_tid, ct_reduce, loc );
2588
2589 KA_TRACE( 10, ( "__kmpc_end_reduce() exit: called T#%d: method %08x\n", global_tid, packed_reduction_method ) );
2590
2591 return;
2592}
2593
2594#undef __KMP_GET_REDUCTION_METHOD
2595#undef __KMP_SET_REDUCTION_METHOD
2596
2597/*-- end of interface to fast scalable reduce routines ---------------------------------------------------------------*/
2598
2599kmp_uint64
2600__kmpc_get_taskid() {
2601
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002602 kmp_int32 gtid;
2603 kmp_info_t * thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002604
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002605 gtid = __kmp_get_gtid();
2606 if ( gtid < 0 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002607 return 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002608 }; // if
2609 thread = __kmp_thread_from_gtid( gtid );
2610 return thread->th.th_current_task->td_task_id;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002611
2612} // __kmpc_get_taskid
2613
2614
2615kmp_uint64
2616__kmpc_get_parent_taskid() {
2617
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002618 kmp_int32 gtid;
2619 kmp_info_t * thread;
2620 kmp_taskdata_t * parent_task;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002621
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002622 gtid = __kmp_get_gtid();
2623 if ( gtid < 0 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002624 return 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002625 }; // if
2626 thread = __kmp_thread_from_gtid( gtid );
2627 parent_task = thread->th.th_current_task->td_parent;
2628 return ( parent_task == NULL ? 0 : parent_task->td_task_id );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002629
2630} // __kmpc_get_parent_taskid
2631
2632void __kmpc_place_threads(int nC, int nT, int nO)
2633{
Jim Cownie5e8470a2013-09-27 10:38:44 +00002634 if ( ! __kmp_init_serial ) {
2635 __kmp_serial_initialize();
2636 }
2637 __kmp_place_num_cores = nC;
2638 __kmp_place_num_threads_per_core = nT;
2639 __kmp_place_core_offset = nO;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002640}
2641
2642// end of file //
2643