blob: 5fb5e4779e5e5f3d5f094068b96e9de77ad3dcf6 [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
2 * kmp_sched.c -- static scheduling -- iteration initialization
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16/*
17 * Static scheduling initialization.
18 *
19 * NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however
20 * it may change values between parallel regions. __kmp_max_nth
21 * is the largest value __kmp_nth may take, 1 is the smallest.
22 *
23 */
24
25#include "kmp.h"
26#include "kmp_i18n.h"
27#include "kmp_str.h"
28#include "kmp_error.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000029#include "kmp_stats.h"
30#include "kmp_itt.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000031
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000032#if OMPT_SUPPORT
33#include "ompt-specific.h"
34#endif
35
Jim Cownie5e8470a2013-09-27 10:38:44 +000036// template for type limits
37template< typename T >
38struct i_maxmin {
39 static const T mx;
40 static const T mn;
41};
42template<>
43struct i_maxmin< int > {
44 static const int mx = 0x7fffffff;
45 static const int mn = 0x80000000;
46};
47template<>
48struct i_maxmin< unsigned int > {
49 static const unsigned int mx = 0xffffffff;
50 static const unsigned int mn = 0x00000000;
51};
52template<>
53struct i_maxmin< long long > {
54 static const long long mx = 0x7fffffffffffffffLL;
55 static const long long mn = 0x8000000000000000LL;
56};
57template<>
58struct i_maxmin< unsigned long long > {
59 static const unsigned long long mx = 0xffffffffffffffffLL;
60 static const unsigned long long mn = 0x0000000000000000LL;
61};
62//-------------------------------------------------------------------------
63#ifdef KMP_DEBUG
64//-------------------------------------------------------------------------
65// template for debug prints specification ( d, u, lld, llu )
66 char const * traits_t< int >::spec = "d";
67 char const * traits_t< unsigned int >::spec = "u";
68 char const * traits_t< long long >::spec = "lld";
69 char const * traits_t< unsigned long long >::spec = "llu";
70//-------------------------------------------------------------------------
71#endif
72
73template< typename T >
74static void
75__kmp_for_static_init(
76 ident_t *loc,
77 kmp_int32 global_tid,
78 kmp_int32 schedtype,
79 kmp_int32 *plastiter,
80 T *plower,
81 T *pupper,
82 typename traits_t< T >::signed_t *pstride,
83 typename traits_t< T >::signed_t incr,
84 typename traits_t< T >::signed_t chunk
85) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +000086 KMP_COUNT_BLOCK(OMP_FOR_static);
Jonathan Peyton45be4502015-08-11 21:36:41 +000087 KMP_TIME_BLOCK (FOR_static_scheduling);
88
Jim Cownie5e8470a2013-09-27 10:38:44 +000089 typedef typename traits_t< T >::unsigned_t UT;
90 typedef typename traits_t< T >::signed_t ST;
91 /* this all has to be changed back to TID and such.. */
92 register kmp_int32 gtid = global_tid;
93 register kmp_uint32 tid;
94 register kmp_uint32 nth;
95 register UT trip_count;
96 register kmp_team_t *team;
Andrey Churbanov51aecb82015-05-06 19:22:36 +000097 register kmp_info_t *th = __kmp_threads[ gtid ];
Jim Cownie5e8470a2013-09-27 10:38:44 +000098
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000099#if OMPT_SUPPORT && OMPT_TRACE
100 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
101 ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
102#endif
103
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000104 KMP_DEBUG_ASSERT( plastiter && plower && pupper && pstride );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000105 KE_TRACE( 10, ("__kmpc_for_static_init called (%d)\n", global_tid));
106 #ifdef KMP_DEBUG
107 {
108 const char * buff;
109 // create format specifiers before the debug output
110 buff = __kmp_str_format(
111 "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s," \
112 " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n",
113 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec,
114 traits_t< ST >::spec, traits_t< ST >::spec, traits_t< T >::spec );
115 KD_TRACE(100, ( buff, global_tid, schedtype, *plastiter,
116 *plower, *pupper, *pstride, incr, chunk ) );
117 __kmp_str_free( &buff );
118 }
119 #endif
120
121 if ( __kmp_env_consistency_check ) {
122 __kmp_push_workshare( global_tid, ct_pdo, loc );
123 if ( incr == 0 ) {
124 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000125 }
126 }
127 /* special handling for zero-trip loops */
128 if ( incr > 0 ? (*pupper < *plower) : (*plower < *pupper) ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000129 if( plastiter != NULL )
130 *plastiter = FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000131 /* leave pupper and plower set to entire iteration space */
132 *pstride = incr; /* value should never be used */
133 // *plower = *pupper - incr; // let compiler bypass the illegal loop (like for(i=1;i<10;i--)) THIS LINE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE ON A ZERO-TRIP LOOP (lower=1,\
134 upper=0,stride=1) - JPH June 23, 2009.
135 #ifdef KMP_DEBUG
136 {
137 const char * buff;
138 // create format specifiers before the debug output
139 buff = __kmp_str_format(
140 "__kmpc_for_static_init:(ZERO TRIP) liter=%%d lower=%%%s upper=%%%s stride = %%%s signed?<%s>, loc = %%s\n",
141 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, traits_t< T >::spec );
142 KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride, loc->psource ) );
143 __kmp_str_free( &buff );
144 }
145 #endif
146 KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000147
148#if OMPT_SUPPORT && OMPT_TRACE
149 if ((ompt_status == ompt_status_track_callback) &&
150 ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
151 ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
152 team_info->parallel_id, task_info->task_id,
153 team_info->microtask);
154 }
155#endif
Jonathan Peyton45be4502015-08-11 21:36:41 +0000156 KMP_COUNT_VALUE (FOR_static_iterations, 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000157 return;
158 }
159
160 #if OMP_40_ENABLED
161 if ( schedtype > kmp_ord_upper ) {
162 // we are in DISTRIBUTE construct
163 schedtype += kmp_sch_static - kmp_distribute_static; // AC: convert to usual schedule type
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000164 tid = th->th.th_team->t.t_master_tid;
165 team = th->th.th_team->t.t_parent;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000166 } else
167 #endif
168 {
169 tid = __kmp_tid_from_gtid( global_tid );
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000170 team = th->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000171 }
172
173 /* determine if "for" loop is an active worksharing construct */
174 if ( team -> t.t_serialized ) {
175 /* serialized parallel, each thread executes whole iteration space */
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000176 if( plastiter != NULL )
177 *plastiter = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000178 /* leave pupper and plower set to entire iteration space */
179 *pstride = (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
180
181 #ifdef KMP_DEBUG
182 {
183 const char * buff;
184 // create format specifiers before the debug output
185 buff = __kmp_str_format(
186 "__kmpc_for_static_init: (serial) liter=%%d lower=%%%s upper=%%%s stride = %%%s\n",
187 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
188 KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) );
189 __kmp_str_free( &buff );
190 }
191 #endif
192 KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000193
194#if OMPT_SUPPORT && OMPT_TRACE
195 if ((ompt_status == ompt_status_track_callback) &&
196 ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
197 ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
198 team_info->parallel_id, task_info->task_id,
199 team_info->microtask);
200 }
201#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000202 return;
203 }
204 nth = team->t.t_nproc;
205 if ( nth == 1 ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000206 if( plastiter != NULL )
207 *plastiter = TRUE;
208 *pstride = (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000209 #ifdef KMP_DEBUG
210 {
211 const char * buff;
212 // create format specifiers before the debug output
213 buff = __kmp_str_format(
214 "__kmpc_for_static_init: (serial) liter=%%d lower=%%%s upper=%%%s stride = %%%s\n",
215 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
216 KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) );
217 __kmp_str_free( &buff );
218 }
219 #endif
220 KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000221
222#if OMPT_SUPPORT && OMPT_TRACE
223 if ((ompt_status == ompt_status_track_callback) &&
224 ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
225 ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
226 team_info->parallel_id, task_info->task_id,
227 team_info->microtask);
228 }
229#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000230 return;
231 }
232
233 /* compute trip count */
234 if ( incr == 1 ) {
235 trip_count = *pupper - *plower + 1;
236 } else if (incr == -1) {
237 trip_count = *plower - *pupper + 1;
238 } else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000239 if ( incr > 1 ) { // the check is needed for unsigned division when incr < 0
Jim Cownie5e8470a2013-09-27 10:38:44 +0000240 trip_count = (*pupper - *plower) / incr + 1;
241 } else {
242 trip_count = (*plower - *pupper) / ( -incr ) + 1;
243 }
244 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000245
Jim Cownie5e8470a2013-09-27 10:38:44 +0000246 if ( __kmp_env_consistency_check ) {
247 /* tripcount overflow? */
248 if ( trip_count == 0 && *pupper != *plower ) {
249 __kmp_error_construct( kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo, loc );
250 }
251 }
Jonathan Peyton45be4502015-08-11 21:36:41 +0000252 KMP_COUNT_VALUE (FOR_static_iterations, trip_count);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000253
254 /* compute remaining parameters */
255 switch ( schedtype ) {
256 case kmp_sch_static:
257 {
258 if ( trip_count < nth ) {
259 KMP_DEBUG_ASSERT(
260 __kmp_static == kmp_sch_static_greedy || \
261 __kmp_static == kmp_sch_static_balanced
262 ); // Unknown static scheduling type.
263 if ( tid < trip_count ) {
264 *pupper = *plower = *plower + tid * incr;
265 } else {
266 *plower = *pupper + incr;
267 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000268 if( plastiter != NULL )
269 *plastiter = ( tid == trip_count - 1 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000270 } else {
271 if ( __kmp_static == kmp_sch_static_balanced ) {
272 register UT small_chunk = trip_count / nth;
273 register UT extras = trip_count % nth;
274 *plower += incr * ( tid * small_chunk + ( tid < extras ? tid : extras ) );
275 *pupper = *plower + small_chunk * incr - ( tid < extras ? 0 : incr );
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000276 if( plastiter != NULL )
277 *plastiter = ( tid == nth - 1 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000278 } else {
279 register T big_chunk_inc_count = ( trip_count/nth +
280 ( ( trip_count % nth ) ? 1 : 0) ) * incr;
281 register T old_upper = *pupper;
282
283 KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy );
284 // Unknown static scheduling type.
285
286 *plower += tid * big_chunk_inc_count;
287 *pupper = *plower + big_chunk_inc_count - incr;
288 if ( incr > 0 ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000289 if( *pupper < *plower )
Jim Cownie5e8470a2013-09-27 10:38:44 +0000290 *pupper = i_maxmin< T >::mx;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000291 if( plastiter != NULL )
292 *plastiter = *plower <= old_upper && *pupper > old_upper - incr;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000293 if ( *pupper > old_upper ) *pupper = old_upper; // tracker C73258
294 } else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000295 if( *pupper > *plower )
Jim Cownie5e8470a2013-09-27 10:38:44 +0000296 *pupper = i_maxmin< T >::mn;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000297 if( plastiter != NULL )
298 *plastiter = *plower >= old_upper && *pupper < old_upper - incr;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000299 if ( *pupper < old_upper ) *pupper = old_upper; // tracker C73258
300 }
301 }
302 }
303 break;
304 }
305 case kmp_sch_static_chunked:
306 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000307 register ST span;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000308 if ( chunk < 1 ) {
309 chunk = 1;
310 }
311 span = chunk * incr;
312 *pstride = span * nth;
313 *plower = *plower + (span * tid);
314 *pupper = *plower + span - incr;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000315 if( plastiter != NULL )
316 *plastiter = (tid == ((trip_count - 1)/( UT )chunk) % nth);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000317 break;
318 }
319 default:
320 KMP_ASSERT2( 0, "__kmpc_for_static_init: unknown scheduling type" );
321 break;
322 }
323
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000324#if USE_ITT_BUILD
325 // Report loop metadata
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000326 if ( KMP_MASTER_TID(tid) && __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
327#if OMP_40_ENABLED
328 th->th.th_teams_microtask == NULL &&
329#endif
330 team->t.t_active_level == 1 )
331 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000332 kmp_uint64 cur_chunk = chunk;
333 // Calculate chunk in case it was not specified; it is specified for kmp_sch_static_chunked
334 if ( schedtype == kmp_sch_static ) {
335 cur_chunk = trip_count / nth + ( ( trip_count % nth ) ? 1 : 0);
336 }
337 // 0 - "static" schedule
338 __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk);
339 }
340#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000341 #ifdef KMP_DEBUG
342 {
343 const char * buff;
344 // create format specifiers before the debug output
345 buff = __kmp_str_format(
346 "__kmpc_for_static_init: liter=%%d lower=%%%s upper=%%%s stride = %%%s signed?<%s>\n",
347 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, traits_t< T >::spec );
348 KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) );
349 __kmp_str_free( &buff );
350 }
351 #endif
352 KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000353
354#if OMPT_SUPPORT && OMPT_TRACE
355 if ((ompt_status == ompt_status_track_callback) &&
356 ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
357 ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
358 team_info->parallel_id, task_info->task_id, team_info->microtask);
359 }
360#endif
361
Jim Cownie5e8470a2013-09-27 10:38:44 +0000362 return;
363}
364
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000365template< typename T >
366static void
367__kmp_dist_for_static_init(
368 ident_t *loc,
369 kmp_int32 gtid,
370 kmp_int32 schedule,
371 kmp_int32 *plastiter,
372 T *plower,
373 T *pupper,
374 T *pupperDist,
375 typename traits_t< T >::signed_t *pstride,
376 typename traits_t< T >::signed_t incr,
377 typename traits_t< T >::signed_t chunk
378) {
Jonathan Peyton45be4502015-08-11 21:36:41 +0000379 KMP_COUNT_BLOCK(OMP_DISTRIBUTE);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000380 typedef typename traits_t< T >::unsigned_t UT;
381 typedef typename traits_t< T >::signed_t ST;
382 register kmp_uint32 tid;
383 register kmp_uint32 nth;
384 register kmp_uint32 team_id;
385 register kmp_uint32 nteams;
386 register UT trip_count;
387 register kmp_team_t *team;
388 kmp_info_t * th;
389
390 KMP_DEBUG_ASSERT( plastiter && plower && pupper && pupperDist && pstride );
391 KE_TRACE( 10, ("__kmpc_dist_for_static_init called (%d)\n", gtid));
392 #ifdef KMP_DEBUG
393 {
394 const char * buff;
395 // create format specifiers before the debug output
396 buff = __kmp_str_format(
397 "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d "\
398 "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n",
399 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec,
400 traits_t< ST >::spec, traits_t< T >::spec );
401 KD_TRACE(100, ( buff, gtid, schedule, *plastiter,
402 *plower, *pupper, incr, chunk ) );
403 __kmp_str_free( &buff );
404 }
405 #endif
406
407 if( __kmp_env_consistency_check ) {
408 __kmp_push_workshare( gtid, ct_pdo, loc );
409 if( incr == 0 ) {
410 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc );
411 }
412 if( incr > 0 ? (*pupper < *plower) : (*plower < *pupper) ) {
413 // The loop is illegal.
414 // Some zero-trip loops maintained by compiler, e.g.:
415 // for(i=10;i<0;++i) // lower >= upper - run-time check
416 // for(i=0;i>10;--i) // lower <= upper - run-time check
417 // for(i=0;i>10;++i) // incr > 0 - compile-time check
418 // for(i=10;i<0;--i) // incr < 0 - compile-time check
419 // Compiler does not check the following illegal loops:
420 // for(i=0;i<10;i+=incr) // where incr<0
421 // for(i=10;i>0;i-=incr) // where incr<0
422 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc );
423 }
424 }
425 tid = __kmp_tid_from_gtid( gtid );
426 th = __kmp_threads[gtid];
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000427 nth = th->th.th_team_nproc;
428 team = th->th.th_team;
429 #if OMP_40_ENABLED
Jonathan Peyton441f3372015-09-21 17:24:46 +0000430 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000431 nteams = th->th.th_teams_size.nteams;
432 #endif
433 team_id = team->t.t_master_tid;
434 KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc);
435
436 // compute global trip count
437 if( incr == 1 ) {
438 trip_count = *pupper - *plower + 1;
439 } else if(incr == -1) {
440 trip_count = *plower - *pupper + 1;
441 } else {
442 trip_count = (ST)(*pupper - *plower) / incr + 1; // cast to signed to cover incr<0 case
443 }
Jonathan Peyton45be4502015-08-11 21:36:41 +0000444
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000445 *pstride = *pupper - *plower; // just in case (can be unused)
446 if( trip_count <= nteams ) {
447 KMP_DEBUG_ASSERT(
448 __kmp_static == kmp_sch_static_greedy || \
449 __kmp_static == kmp_sch_static_balanced
450 ); // Unknown static scheduling type.
451 // only masters of some teams get single iteration, other threads get nothing
452 if( team_id < trip_count && tid == 0 ) {
453 *pupper = *pupperDist = *plower = *plower + team_id * incr;
454 } else {
455 *pupperDist = *pupper;
456 *plower = *pupper + incr; // compiler should skip loop body
457 }
458 if( plastiter != NULL )
459 *plastiter = ( tid == 0 && team_id == trip_count - 1 );
460 } else {
461 // Get the team's chunk first (each team gets at most one chunk)
462 if( __kmp_static == kmp_sch_static_balanced ) {
463 register UT chunkD = trip_count / nteams;
464 register UT extras = trip_count % nteams;
465 *plower += incr * ( team_id * chunkD + ( team_id < extras ? team_id : extras ) );
466 *pupperDist = *plower + chunkD * incr - ( team_id < extras ? 0 : incr );
467 if( plastiter != NULL )
468 *plastiter = ( team_id == nteams - 1 );
469 } else {
470 register T chunk_inc_count =
471 ( trip_count / nteams + ( ( trip_count % nteams ) ? 1 : 0) ) * incr;
472 register T upper = *pupper;
473 KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy );
474 // Unknown static scheduling type.
475 *plower += team_id * chunk_inc_count;
476 *pupperDist = *plower + chunk_inc_count - incr;
477 // Check/correct bounds if needed
478 if( incr > 0 ) {
479 if( *pupperDist < *plower )
480 *pupperDist = i_maxmin< T >::mx;
481 if( plastiter != NULL )
482 *plastiter = *plower <= upper && *pupperDist > upper - incr;
483 if( *pupperDist > upper )
484 *pupperDist = upper; // tracker C73258
485 if( *plower > *pupperDist ) {
486 *pupper = *pupperDist; // no iterations available for the team
487 goto end;
488 }
489 } else {
490 if( *pupperDist > *plower )
491 *pupperDist = i_maxmin< T >::mn;
492 if( plastiter != NULL )
493 *plastiter = *plower >= upper && *pupperDist < upper - incr;
494 if( *pupperDist < upper )
495 *pupperDist = upper; // tracker C73258
496 if( *plower < *pupperDist ) {
497 *pupper = *pupperDist; // no iterations available for the team
498 goto end;
499 }
500 }
501 }
502 // Get the parallel loop chunk now (for thread)
503 // compute trip count for team's chunk
504 if( incr == 1 ) {
505 trip_count = *pupperDist - *plower + 1;
506 } else if(incr == -1) {
507 trip_count = *plower - *pupperDist + 1;
508 } else {
509 trip_count = (ST)(*pupperDist - *plower) / incr + 1;
510 }
511 KMP_DEBUG_ASSERT( trip_count );
512 switch( schedule ) {
513 case kmp_sch_static:
514 {
515 if( trip_count <= nth ) {
516 KMP_DEBUG_ASSERT(
517 __kmp_static == kmp_sch_static_greedy || \
518 __kmp_static == kmp_sch_static_balanced
519 ); // Unknown static scheduling type.
520 if( tid < trip_count )
521 *pupper = *plower = *plower + tid * incr;
522 else
523 *plower = *pupper + incr; // no iterations available
524 if( plastiter != NULL )
525 if( *plastiter != 0 && !( tid == trip_count - 1 ) )
526 *plastiter = 0;
527 } else {
528 if( __kmp_static == kmp_sch_static_balanced ) {
529 register UT chunkL = trip_count / nth;
530 register UT extras = trip_count % nth;
531 *plower += incr * (tid * chunkL + (tid < extras ? tid : extras));
532 *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr);
533 if( plastiter != NULL )
534 if( *plastiter != 0 && !( tid == nth - 1 ) )
535 *plastiter = 0;
536 } else {
537 register T chunk_inc_count =
538 ( trip_count / nth + ( ( trip_count % nth ) ? 1 : 0) ) * incr;
539 register T upper = *pupperDist;
540 KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy );
541 // Unknown static scheduling type.
542 *plower += tid * chunk_inc_count;
543 *pupper = *plower + chunk_inc_count - incr;
544 if( incr > 0 ) {
545 if( *pupper < *plower )
546 *pupper = i_maxmin< T >::mx;
547 if( plastiter != NULL )
548 if( *plastiter != 0 && !(*plower <= upper && *pupper > upper - incr) )
549 *plastiter = 0;
550 if( *pupper > upper )
551 *pupper = upper;//tracker C73258
552 } else {
553 if( *pupper > *plower )
554 *pupper = i_maxmin< T >::mn;
555 if( plastiter != NULL )
556 if( *plastiter != 0 && !(*plower >= upper && *pupper < upper - incr) )
557 *plastiter = 0;
558 if( *pupper < upper )
559 *pupper = upper;//tracker C73258
560 }
561 }
562 }
563 break;
564 }
565 case kmp_sch_static_chunked:
566 {
567 register ST span;
568 if( chunk < 1 )
569 chunk = 1;
570 span = chunk * incr;
571 *pstride = span * nth;
572 *plower = *plower + (span * tid);
573 *pupper = *plower + span - incr;
574 if( plastiter != NULL )
575 if( *plastiter != 0 && !(tid == ((trip_count - 1) / ( UT )chunk) % nth) )
576 *plastiter = 0;
577 break;
578 }
579 default:
580 KMP_ASSERT2( 0, "__kmpc_dist_for_static_init: unknown loop scheduling type" );
581 break;
582 }
583 }
584 end:;
585 #ifdef KMP_DEBUG
586 {
587 const char * buff;
588 // create format specifiers before the debug output
589 buff = __kmp_str_format(
590 "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s "\
591 "stride=%%%s signed?<%s>\n",
592 traits_t< T >::spec, traits_t< T >::spec, traits_t< T >::spec,
593 traits_t< ST >::spec, traits_t< T >::spec );
594 KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pupperDist, *pstride ) );
595 __kmp_str_free( &buff );
596 }
597 #endif
598 KE_TRACE( 10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid ) );
599 return;
600}
601
602template< typename T >
603static void
604__kmp_team_static_init(
605 ident_t *loc,
606 kmp_int32 gtid,
607 kmp_int32 *p_last,
608 T *p_lb,
609 T *p_ub,
610 typename traits_t< T >::signed_t *p_st,
611 typename traits_t< T >::signed_t incr,
612 typename traits_t< T >::signed_t chunk
613) {
614 // The routine returns the first chunk distributed to the team and
615 // stride for next chunks calculation.
616 // Last iteration flag set for the team that will execute
617 // the last iteration of the loop.
618 // The routine is called for dist_schedue(static,chunk) only.
619 typedef typename traits_t< T >::unsigned_t UT;
620 typedef typename traits_t< T >::signed_t ST;
621 kmp_uint32 team_id;
622 kmp_uint32 nteams;
623 UT trip_count;
624 T lower;
625 T upper;
626 ST span;
627 kmp_team_t *team;
628 kmp_info_t *th;
629
630 KMP_DEBUG_ASSERT( p_last && p_lb && p_ub && p_st );
631 KE_TRACE( 10, ("__kmp_team_static_init called (%d)\n", gtid));
632 #ifdef KMP_DEBUG
633 {
634 const char * buff;
635 // create format specifiers before the debug output
636 buff = __kmp_str_format( "__kmp_team_static_init enter: T#%%d liter=%%d "\
637 "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n",
638 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec,
639 traits_t< ST >::spec, traits_t< T >::spec );
640 KD_TRACE(100, ( buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk ) );
641 __kmp_str_free( &buff );
642 }
643 #endif
644
645 lower = *p_lb;
646 upper = *p_ub;
647 if( __kmp_env_consistency_check ) {
648 if( incr == 0 ) {
649 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc );
650 }
651 if( incr > 0 ? (upper < lower) : (lower < upper) ) {
652 // The loop is illegal.
653 // Some zero-trip loops maintained by compiler, e.g.:
654 // for(i=10;i<0;++i) // lower >= upper - run-time check
655 // for(i=0;i>10;--i) // lower <= upper - run-time check
656 // for(i=0;i>10;++i) // incr > 0 - compile-time check
657 // for(i=10;i<0;--i) // incr < 0 - compile-time check
658 // Compiler does not check the following illegal loops:
659 // for(i=0;i<10;i+=incr) // where incr<0
660 // for(i=10;i>0;i-=incr) // where incr<0
661 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc );
662 }
663 }
664 th = __kmp_threads[gtid];
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000665 team = th->th.th_team;
666 #if OMP_40_ENABLED
Jonathan Peyton441f3372015-09-21 17:24:46 +0000667 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000668 nteams = th->th.th_teams_size.nteams;
669 #endif
670 team_id = team->t.t_master_tid;
671 KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc);
672
673 // compute trip count
674 if( incr == 1 ) {
675 trip_count = upper - lower + 1;
676 } else if(incr == -1) {
677 trip_count = lower - upper + 1;
678 } else {
679 trip_count = (ST)(upper - lower) / incr + 1; // cast to signed to cover incr<0 case
680 }
681 if( chunk < 1 )
682 chunk = 1;
683 span = chunk * incr;
684 *p_st = span * nteams;
685 *p_lb = lower + (span * team_id);
686 *p_ub = *p_lb + span - incr;
687 if ( p_last != NULL )
688 *p_last = (team_id == ((trip_count - 1)/(UT)chunk) % nteams);
689 // Correct upper bound if needed
690 if( incr > 0 ) {
691 if( *p_ub < *p_lb ) // overflow?
692 *p_ub = i_maxmin< T >::mx;
693 if( *p_ub > upper )
694 *p_ub = upper; // tracker C73258
695 } else { // incr < 0
696 if( *p_ub > *p_lb )
697 *p_ub = i_maxmin< T >::mn;
698 if( *p_ub < upper )
699 *p_ub = upper; // tracker C73258
700 }
701 #ifdef KMP_DEBUG
702 {
703 const char * buff;
704 // create format specifiers before the debug output
705 buff = __kmp_str_format( "__kmp_team_static_init exit: T#%%d team%%u liter=%%d "\
706 "iter=(%%%s, %%%s, %%%s) chunk %%%s\n",
707 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec,
708 traits_t< ST >::spec );
709 KD_TRACE(100, ( buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk ) );
710 __kmp_str_free( &buff );
711 }
712 #endif
713}
714
Jim Cownie5e8470a2013-09-27 10:38:44 +0000715//--------------------------------------------------------------------------------------
716extern "C" {
717
718/*!
719@ingroup WORK_SHARING
720@param loc Source code location
721@param gtid Global thread id of this thread
722@param schedtype Scheduling type
723@param plastiter Pointer to the "last iteration" flag
724@param plower Pointer to the lower bound
725@param pupper Pointer to the upper bound
726@param pstride Pointer to the stride
727@param incr Loop increment
728@param chunk The chunk size
729
730Each of the four functions here are identical apart from the argument types.
731
732The functions compute the upper and lower bounds and stride to be used for the set of iterations
733to be executed by the current thread from the statically scheduled loop that is described by the
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000734initial values of the bounds, stride, increment and chunk size.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000735
736@{
737*/
738void
739__kmpc_for_static_init_4( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter,
740 kmp_int32 *plower, kmp_int32 *pupper,
741 kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk )
742{
743 __kmp_for_static_init< kmp_int32 >(
744 loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk );
745}
746
747/*!
748 See @ref __kmpc_for_static_init_4
749 */
750void
751__kmpc_for_static_init_4u( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter,
752 kmp_uint32 *plower, kmp_uint32 *pupper,
753 kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk )
754{
755 __kmp_for_static_init< kmp_uint32 >(
756 loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk );
757}
758
759/*!
760 See @ref __kmpc_for_static_init_4
761 */
762void
763__kmpc_for_static_init_8( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter,
764 kmp_int64 *plower, kmp_int64 *pupper,
765 kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk )
766{
767 __kmp_for_static_init< kmp_int64 >(
768 loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk );
769}
770
771/*!
772 See @ref __kmpc_for_static_init_4
773 */
774void
775__kmpc_for_static_init_8u( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter,
776 kmp_uint64 *plower, kmp_uint64 *pupper,
777 kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk )
778{
779 __kmp_for_static_init< kmp_uint64 >(
780 loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk );
781}
782/*!
783@}
784*/
785
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000786/*!
787@ingroup WORK_SHARING
788@param loc Source code location
789@param gtid Global thread id of this thread
Jonathan Peyton81f9cd12015-05-22 22:37:22 +0000790@param schedule Scheduling type for the parallel loop
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000791@param plastiter Pointer to the "last iteration" flag
792@param plower Pointer to the lower bound
793@param pupper Pointer to the upper bound of loop chunk
794@param pupperD Pointer to the upper bound of dist_chunk
Jonathan Peyton81f9cd12015-05-22 22:37:22 +0000795@param pstride Pointer to the stride for parallel loop
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000796@param incr Loop increment
Jonathan Peyton81f9cd12015-05-22 22:37:22 +0000797@param chunk The chunk size for the parallel loop
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000798
799Each of the four functions here are identical apart from the argument types.
800
801The functions compute the upper and lower bounds and strides to be used for the set of iterations
802to be executed by the current thread from the statically scheduled loop that is described by the
803initial values of the bounds, strides, increment and chunks for parallel loop and distribute
804constructs.
805
806@{
807*/
808void
809__kmpc_dist_for_static_init_4(
810 ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter,
811 kmp_int32 *plower, kmp_int32 *pupper, kmp_int32 *pupperD,
812 kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk )
813{
814 __kmp_dist_for_static_init< kmp_int32 >(
815 loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk );
816}
817
818/*!
819 See @ref __kmpc_dist_for_static_init_4
820 */
821void
822__kmpc_dist_for_static_init_4u(
823 ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter,
824 kmp_uint32 *plower, kmp_uint32 *pupper, kmp_uint32 *pupperD,
825 kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk )
826{
827 __kmp_dist_for_static_init< kmp_uint32 >(
828 loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk );
829}
830
831/*!
832 See @ref __kmpc_dist_for_static_init_4
833 */
834void
835__kmpc_dist_for_static_init_8(
836 ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter,
837 kmp_int64 *plower, kmp_int64 *pupper, kmp_int64 *pupperD,
838 kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk )
839{
840 __kmp_dist_for_static_init< kmp_int64 >(
841 loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk );
842}
843
844/*!
845 See @ref __kmpc_dist_for_static_init_4
846 */
847void
848__kmpc_dist_for_static_init_8u(
849 ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter,
850 kmp_uint64 *plower, kmp_uint64 *pupper, kmp_uint64 *pupperD,
851 kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk )
852{
853 __kmp_dist_for_static_init< kmp_uint64 >(
854 loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk );
855}
856/*!
857@}
858*/
859
860//-----------------------------------------------------------------------------------------
861// Auxiliary routines for Distribute Parallel Loop construct implementation
862// Transfer call to template< type T >
863// __kmp_team_static_init( ident_t *loc, int gtid,
864// int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk )
865
866/*!
867@ingroup WORK_SHARING
868@{
869@param loc Source location
870@param gtid Global thread id
871@param p_last pointer to last iteration flag
872@param p_lb pointer to Lower bound
873@param p_ub pointer to Upper bound
874@param p_st Step (or increment if you prefer)
875@param incr Loop increment
876@param chunk The chunk size to block with
877
878The functions compute the upper and lower bounds and stride to be used for the set of iterations
879to be executed by the current team from the statically scheduled loop that is described by the
880initial values of the bounds, stride, increment and chunk for the distribute construct as part of
881composite distribute parallel loop construct.
882These functions are all identical apart from the types of the arguments.
883*/
884
885void
886__kmpc_team_static_init_4(
887 ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
888 kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk )
889{
890 KMP_DEBUG_ASSERT( __kmp_init_serial );
891 __kmp_team_static_init< kmp_int32 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk );
892}
893
894/*!
895 See @ref __kmpc_team_static_init_4
896 */
897void
898__kmpc_team_static_init_4u(
899 ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
900 kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk )
901{
902 KMP_DEBUG_ASSERT( __kmp_init_serial );
903 __kmp_team_static_init< kmp_uint32 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk );
904}
905
906/*!
907 See @ref __kmpc_team_static_init_4
908 */
909void
910__kmpc_team_static_init_8(
911 ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
912 kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk )
913{
914 KMP_DEBUG_ASSERT( __kmp_init_serial );
915 __kmp_team_static_init< kmp_int64 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk );
916}
917
918/*!
919 See @ref __kmpc_team_static_init_4
920 */
921void
922__kmpc_team_static_init_8u(
923 ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
924 kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk )
925{
926 KMP_DEBUG_ASSERT( __kmp_init_serial );
927 __kmp_team_static_init< kmp_uint64 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk );
928}
929/*!
930@}
931*/
932
Jim Cownie5e8470a2013-09-27 10:38:44 +0000933} // extern "C"
934