blob: 375cacbc835c9ced331cd6ea1a8dbf7fd25a88a6 [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
2 * kmp_sched.c -- static scheduling -- iteration initialization
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16/*
17 * Static scheduling initialization.
18 *
19 * NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however
20 * it may change values between parallel regions. __kmp_max_nth
21 * is the largest value __kmp_nth may take, 1 is the smallest.
22 *
23 */
24
25#include "kmp.h"
26#include "kmp_i18n.h"
27#include "kmp_str.h"
28#include "kmp_error.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000029#include "kmp_stats.h"
30#include "kmp_itt.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000031
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000032#if OMPT_SUPPORT
33#include "ompt-specific.h"
34#endif
35
Jim Cownie5e8470a2013-09-27 10:38:44 +000036// template for type limits
37template< typename T >
38struct i_maxmin {
39 static const T mx;
40 static const T mn;
41};
42template<>
43struct i_maxmin< int > {
44 static const int mx = 0x7fffffff;
45 static const int mn = 0x80000000;
46};
47template<>
48struct i_maxmin< unsigned int > {
49 static const unsigned int mx = 0xffffffff;
50 static const unsigned int mn = 0x00000000;
51};
52template<>
53struct i_maxmin< long long > {
54 static const long long mx = 0x7fffffffffffffffLL;
55 static const long long mn = 0x8000000000000000LL;
56};
57template<>
58struct i_maxmin< unsigned long long > {
59 static const unsigned long long mx = 0xffffffffffffffffLL;
60 static const unsigned long long mn = 0x0000000000000000LL;
61};
62//-------------------------------------------------------------------------
63#ifdef KMP_DEBUG
64//-------------------------------------------------------------------------
65// template for debug prints specification ( d, u, lld, llu )
66 char const * traits_t< int >::spec = "d";
67 char const * traits_t< unsigned int >::spec = "u";
68 char const * traits_t< long long >::spec = "lld";
69 char const * traits_t< unsigned long long >::spec = "llu";
70//-------------------------------------------------------------------------
71#endif
72
73template< typename T >
74static void
75__kmp_for_static_init(
76 ident_t *loc,
77 kmp_int32 global_tid,
78 kmp_int32 schedtype,
79 kmp_int32 *plastiter,
80 T *plower,
81 T *pupper,
82 typename traits_t< T >::signed_t *pstride,
83 typename traits_t< T >::signed_t incr,
84 typename traits_t< T >::signed_t chunk
85) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +000086 KMP_COUNT_BLOCK(OMP_FOR_static);
Jim Cownie5e8470a2013-09-27 10:38:44 +000087 typedef typename traits_t< T >::unsigned_t UT;
88 typedef typename traits_t< T >::signed_t ST;
89 /* this all has to be changed back to TID and such.. */
90 register kmp_int32 gtid = global_tid;
91 register kmp_uint32 tid;
92 register kmp_uint32 nth;
93 register UT trip_count;
94 register kmp_team_t *team;
95
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000096#if OMPT_SUPPORT && OMPT_TRACE
97 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
98 ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
99#endif
100
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000101 KMP_DEBUG_ASSERT( plastiter && plower && pupper && pstride );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000102 KE_TRACE( 10, ("__kmpc_for_static_init called (%d)\n", global_tid));
103 #ifdef KMP_DEBUG
104 {
105 const char * buff;
106 // create format specifiers before the debug output
107 buff = __kmp_str_format(
108 "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s," \
109 " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n",
110 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec,
111 traits_t< ST >::spec, traits_t< ST >::spec, traits_t< T >::spec );
112 KD_TRACE(100, ( buff, global_tid, schedtype, *plastiter,
113 *plower, *pupper, *pstride, incr, chunk ) );
114 __kmp_str_free( &buff );
115 }
116 #endif
117
118 if ( __kmp_env_consistency_check ) {
119 __kmp_push_workshare( global_tid, ct_pdo, loc );
120 if ( incr == 0 ) {
121 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000122 }
123 }
124 /* special handling for zero-trip loops */
125 if ( incr > 0 ? (*pupper < *plower) : (*plower < *pupper) ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000126 if( plastiter != NULL )
127 *plastiter = FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000128 /* leave pupper and plower set to entire iteration space */
129 *pstride = incr; /* value should never be used */
130 // *plower = *pupper - incr; // let compiler bypass the illegal loop (like for(i=1;i<10;i--)) THIS LINE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE ON A ZERO-TRIP LOOP (lower=1,\
131 upper=0,stride=1) - JPH June 23, 2009.
132 #ifdef KMP_DEBUG
133 {
134 const char * buff;
135 // create format specifiers before the debug output
136 buff = __kmp_str_format(
137 "__kmpc_for_static_init:(ZERO TRIP) liter=%%d lower=%%%s upper=%%%s stride = %%%s signed?<%s>, loc = %%s\n",
138 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, traits_t< T >::spec );
139 KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride, loc->psource ) );
140 __kmp_str_free( &buff );
141 }
142 #endif
143 KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000144
145#if OMPT_SUPPORT && OMPT_TRACE
146 if ((ompt_status == ompt_status_track_callback) &&
147 ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
148 ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
149 team_info->parallel_id, task_info->task_id,
150 team_info->microtask);
151 }
152#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000153 return;
154 }
155
156 #if OMP_40_ENABLED
157 if ( schedtype > kmp_ord_upper ) {
158 // we are in DISTRIBUTE construct
159 schedtype += kmp_sch_static - kmp_distribute_static; // AC: convert to usual schedule type
160 tid = __kmp_threads[ gtid ]->th.th_team->t.t_master_tid;
161 team = __kmp_threads[ gtid ]->th.th_team->t.t_parent;
162 } else
163 #endif
164 {
165 tid = __kmp_tid_from_gtid( global_tid );
166 team = __kmp_threads[ gtid ]->th.th_team;
167 }
168
169 /* determine if "for" loop is an active worksharing construct */
170 if ( team -> t.t_serialized ) {
171 /* serialized parallel, each thread executes whole iteration space */
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000172 if( plastiter != NULL )
173 *plastiter = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000174 /* leave pupper and plower set to entire iteration space */
175 *pstride = (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
176
177 #ifdef KMP_DEBUG
178 {
179 const char * buff;
180 // create format specifiers before the debug output
181 buff = __kmp_str_format(
182 "__kmpc_for_static_init: (serial) liter=%%d lower=%%%s upper=%%%s stride = %%%s\n",
183 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
184 KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) );
185 __kmp_str_free( &buff );
186 }
187 #endif
188 KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000189
190#if OMPT_SUPPORT && OMPT_TRACE
191 if ((ompt_status == ompt_status_track_callback) &&
192 ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
193 ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
194 team_info->parallel_id, task_info->task_id,
195 team_info->microtask);
196 }
197#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000198 return;
199 }
200 nth = team->t.t_nproc;
201 if ( nth == 1 ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000202 if( plastiter != NULL )
203 *plastiter = TRUE;
204 *pstride = (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000205 #ifdef KMP_DEBUG
206 {
207 const char * buff;
208 // create format specifiers before the debug output
209 buff = __kmp_str_format(
210 "__kmpc_for_static_init: (serial) liter=%%d lower=%%%s upper=%%%s stride = %%%s\n",
211 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
212 KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) );
213 __kmp_str_free( &buff );
214 }
215 #endif
216 KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000217
218#if OMPT_SUPPORT && OMPT_TRACE
219 if ((ompt_status == ompt_status_track_callback) &&
220 ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
221 ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
222 team_info->parallel_id, task_info->task_id,
223 team_info->microtask);
224 }
225#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000226 return;
227 }
228
229 /* compute trip count */
230 if ( incr == 1 ) {
231 trip_count = *pupper - *plower + 1;
232 } else if (incr == -1) {
233 trip_count = *plower - *pupper + 1;
234 } else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000235 if ( incr > 1 ) { // the check is needed for unsigned division when incr < 0
Jim Cownie5e8470a2013-09-27 10:38:44 +0000236 trip_count = (*pupper - *plower) / incr + 1;
237 } else {
238 trip_count = (*plower - *pupper) / ( -incr ) + 1;
239 }
240 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000241
Jim Cownie5e8470a2013-09-27 10:38:44 +0000242 if ( __kmp_env_consistency_check ) {
243 /* tripcount overflow? */
244 if ( trip_count == 0 && *pupper != *plower ) {
245 __kmp_error_construct( kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo, loc );
246 }
247 }
248
249 /* compute remaining parameters */
250 switch ( schedtype ) {
251 case kmp_sch_static:
252 {
253 if ( trip_count < nth ) {
254 KMP_DEBUG_ASSERT(
255 __kmp_static == kmp_sch_static_greedy || \
256 __kmp_static == kmp_sch_static_balanced
257 ); // Unknown static scheduling type.
258 if ( tid < trip_count ) {
259 *pupper = *plower = *plower + tid * incr;
260 } else {
261 *plower = *pupper + incr;
262 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000263 if( plastiter != NULL )
264 *plastiter = ( tid == trip_count - 1 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000265 } else {
266 if ( __kmp_static == kmp_sch_static_balanced ) {
267 register UT small_chunk = trip_count / nth;
268 register UT extras = trip_count % nth;
269 *plower += incr * ( tid * small_chunk + ( tid < extras ? tid : extras ) );
270 *pupper = *plower + small_chunk * incr - ( tid < extras ? 0 : incr );
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000271 if( plastiter != NULL )
272 *plastiter = ( tid == nth - 1 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000273 } else {
274 register T big_chunk_inc_count = ( trip_count/nth +
275 ( ( trip_count % nth ) ? 1 : 0) ) * incr;
276 register T old_upper = *pupper;
277
278 KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy );
279 // Unknown static scheduling type.
280
281 *plower += tid * big_chunk_inc_count;
282 *pupper = *plower + big_chunk_inc_count - incr;
283 if ( incr > 0 ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000284 if( *pupper < *plower )
Jim Cownie5e8470a2013-09-27 10:38:44 +0000285 *pupper = i_maxmin< T >::mx;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000286 if( plastiter != NULL )
287 *plastiter = *plower <= old_upper && *pupper > old_upper - incr;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000288 if ( *pupper > old_upper ) *pupper = old_upper; // tracker C73258
289 } else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000290 if( *pupper > *plower )
Jim Cownie5e8470a2013-09-27 10:38:44 +0000291 *pupper = i_maxmin< T >::mn;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000292 if( plastiter != NULL )
293 *plastiter = *plower >= old_upper && *pupper < old_upper - incr;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000294 if ( *pupper < old_upper ) *pupper = old_upper; // tracker C73258
295 }
296 }
297 }
298 break;
299 }
300 case kmp_sch_static_chunked:
301 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000302 register ST span;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000303 if ( chunk < 1 ) {
304 chunk = 1;
305 }
306 span = chunk * incr;
307 *pstride = span * nth;
308 *plower = *plower + (span * tid);
309 *pupper = *plower + span - incr;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000310 if( plastiter != NULL )
311 *plastiter = (tid == ((trip_count - 1)/( UT )chunk) % nth);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000312 break;
313 }
314 default:
315 KMP_ASSERT2( 0, "__kmpc_for_static_init: unknown scheduling type" );
316 break;
317 }
318
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000319#if USE_ITT_BUILD
320 // Report loop metadata
321 if ( KMP_MASTER_TID(tid) && __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 ) {
322 kmp_uint64 cur_chunk = chunk;
323 // Calculate chunk in case it was not specified; it is specified for kmp_sch_static_chunked
324 if ( schedtype == kmp_sch_static ) {
325 cur_chunk = trip_count / nth + ( ( trip_count % nth ) ? 1 : 0);
326 }
327 // 0 - "static" schedule
328 __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk);
329 }
330#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000331 #ifdef KMP_DEBUG
332 {
333 const char * buff;
334 // create format specifiers before the debug output
335 buff = __kmp_str_format(
336 "__kmpc_for_static_init: liter=%%d lower=%%%s upper=%%%s stride = %%%s signed?<%s>\n",
337 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, traits_t< T >::spec );
338 KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) );
339 __kmp_str_free( &buff );
340 }
341 #endif
342 KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000343
344#if OMPT_SUPPORT && OMPT_TRACE
345 if ((ompt_status == ompt_status_track_callback) &&
346 ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
347 ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
348 team_info->parallel_id, task_info->task_id, team_info->microtask);
349 }
350#endif
351
Jim Cownie5e8470a2013-09-27 10:38:44 +0000352 return;
353}
354
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000355template< typename T >
356static void
357__kmp_dist_for_static_init(
358 ident_t *loc,
359 kmp_int32 gtid,
360 kmp_int32 schedule,
361 kmp_int32 *plastiter,
362 T *plower,
363 T *pupper,
364 T *pupperDist,
365 typename traits_t< T >::signed_t *pstride,
366 typename traits_t< T >::signed_t incr,
367 typename traits_t< T >::signed_t chunk
368) {
369 KMP_COUNT_BLOCK(OMP_DISTR_FOR_static);
370 typedef typename traits_t< T >::unsigned_t UT;
371 typedef typename traits_t< T >::signed_t ST;
372 register kmp_uint32 tid;
373 register kmp_uint32 nth;
374 register kmp_uint32 team_id;
375 register kmp_uint32 nteams;
376 register UT trip_count;
377 register kmp_team_t *team;
378 kmp_info_t * th;
379
380 KMP_DEBUG_ASSERT( plastiter && plower && pupper && pupperDist && pstride );
381 KE_TRACE( 10, ("__kmpc_dist_for_static_init called (%d)\n", gtid));
382 #ifdef KMP_DEBUG
383 {
384 const char * buff;
385 // create format specifiers before the debug output
386 buff = __kmp_str_format(
387 "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d "\
388 "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n",
389 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec,
390 traits_t< ST >::spec, traits_t< T >::spec );
391 KD_TRACE(100, ( buff, gtid, schedule, *plastiter,
392 *plower, *pupper, incr, chunk ) );
393 __kmp_str_free( &buff );
394 }
395 #endif
396
397 if( __kmp_env_consistency_check ) {
398 __kmp_push_workshare( gtid, ct_pdo, loc );
399 if( incr == 0 ) {
400 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc );
401 }
402 if( incr > 0 ? (*pupper < *plower) : (*plower < *pupper) ) {
403 // The loop is illegal.
404 // Some zero-trip loops maintained by compiler, e.g.:
405 // for(i=10;i<0;++i) // lower >= upper - run-time check
406 // for(i=0;i>10;--i) // lower <= upper - run-time check
407 // for(i=0;i>10;++i) // incr > 0 - compile-time check
408 // for(i=10;i<0;--i) // incr < 0 - compile-time check
409 // Compiler does not check the following illegal loops:
410 // for(i=0;i<10;i+=incr) // where incr<0
411 // for(i=10;i>0;i-=incr) // where incr<0
412 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc );
413 }
414 }
415 tid = __kmp_tid_from_gtid( gtid );
416 th = __kmp_threads[gtid];
417 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
418 nth = th->th.th_team_nproc;
419 team = th->th.th_team;
420 #if OMP_40_ENABLED
421 nteams = th->th.th_teams_size.nteams;
422 #endif
423 team_id = team->t.t_master_tid;
424 KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc);
425
426 // compute global trip count
427 if( incr == 1 ) {
428 trip_count = *pupper - *plower + 1;
429 } else if(incr == -1) {
430 trip_count = *plower - *pupper + 1;
431 } else {
432 trip_count = (ST)(*pupper - *plower) / incr + 1; // cast to signed to cover incr<0 case
433 }
434 *pstride = *pupper - *plower; // just in case (can be unused)
435 if( trip_count <= nteams ) {
436 KMP_DEBUG_ASSERT(
437 __kmp_static == kmp_sch_static_greedy || \
438 __kmp_static == kmp_sch_static_balanced
439 ); // Unknown static scheduling type.
440 // only masters of some teams get single iteration, other threads get nothing
441 if( team_id < trip_count && tid == 0 ) {
442 *pupper = *pupperDist = *plower = *plower + team_id * incr;
443 } else {
444 *pupperDist = *pupper;
445 *plower = *pupper + incr; // compiler should skip loop body
446 }
447 if( plastiter != NULL )
448 *plastiter = ( tid == 0 && team_id == trip_count - 1 );
449 } else {
450 // Get the team's chunk first (each team gets at most one chunk)
451 if( __kmp_static == kmp_sch_static_balanced ) {
452 register UT chunkD = trip_count / nteams;
453 register UT extras = trip_count % nteams;
454 *plower += incr * ( team_id * chunkD + ( team_id < extras ? team_id : extras ) );
455 *pupperDist = *plower + chunkD * incr - ( team_id < extras ? 0 : incr );
456 if( plastiter != NULL )
457 *plastiter = ( team_id == nteams - 1 );
458 } else {
459 register T chunk_inc_count =
460 ( trip_count / nteams + ( ( trip_count % nteams ) ? 1 : 0) ) * incr;
461 register T upper = *pupper;
462 KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy );
463 // Unknown static scheduling type.
464 *plower += team_id * chunk_inc_count;
465 *pupperDist = *plower + chunk_inc_count - incr;
466 // Check/correct bounds if needed
467 if( incr > 0 ) {
468 if( *pupperDist < *plower )
469 *pupperDist = i_maxmin< T >::mx;
470 if( plastiter != NULL )
471 *plastiter = *plower <= upper && *pupperDist > upper - incr;
472 if( *pupperDist > upper )
473 *pupperDist = upper; // tracker C73258
474 if( *plower > *pupperDist ) {
475 *pupper = *pupperDist; // no iterations available for the team
476 goto end;
477 }
478 } else {
479 if( *pupperDist > *plower )
480 *pupperDist = i_maxmin< T >::mn;
481 if( plastiter != NULL )
482 *plastiter = *plower >= upper && *pupperDist < upper - incr;
483 if( *pupperDist < upper )
484 *pupperDist = upper; // tracker C73258
485 if( *plower < *pupperDist ) {
486 *pupper = *pupperDist; // no iterations available for the team
487 goto end;
488 }
489 }
490 }
491 // Get the parallel loop chunk now (for thread)
492 // compute trip count for team's chunk
493 if( incr == 1 ) {
494 trip_count = *pupperDist - *plower + 1;
495 } else if(incr == -1) {
496 trip_count = *plower - *pupperDist + 1;
497 } else {
498 trip_count = (ST)(*pupperDist - *plower) / incr + 1;
499 }
500 KMP_DEBUG_ASSERT( trip_count );
501 switch( schedule ) {
502 case kmp_sch_static:
503 {
504 if( trip_count <= nth ) {
505 KMP_DEBUG_ASSERT(
506 __kmp_static == kmp_sch_static_greedy || \
507 __kmp_static == kmp_sch_static_balanced
508 ); // Unknown static scheduling type.
509 if( tid < trip_count )
510 *pupper = *plower = *plower + tid * incr;
511 else
512 *plower = *pupper + incr; // no iterations available
513 if( plastiter != NULL )
514 if( *plastiter != 0 && !( tid == trip_count - 1 ) )
515 *plastiter = 0;
516 } else {
517 if( __kmp_static == kmp_sch_static_balanced ) {
518 register UT chunkL = trip_count / nth;
519 register UT extras = trip_count % nth;
520 *plower += incr * (tid * chunkL + (tid < extras ? tid : extras));
521 *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr);
522 if( plastiter != NULL )
523 if( *plastiter != 0 && !( tid == nth - 1 ) )
524 *plastiter = 0;
525 } else {
526 register T chunk_inc_count =
527 ( trip_count / nth + ( ( trip_count % nth ) ? 1 : 0) ) * incr;
528 register T upper = *pupperDist;
529 KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy );
530 // Unknown static scheduling type.
531 *plower += tid * chunk_inc_count;
532 *pupper = *plower + chunk_inc_count - incr;
533 if( incr > 0 ) {
534 if( *pupper < *plower )
535 *pupper = i_maxmin< T >::mx;
536 if( plastiter != NULL )
537 if( *plastiter != 0 && !(*plower <= upper && *pupper > upper - incr) )
538 *plastiter = 0;
539 if( *pupper > upper )
540 *pupper = upper;//tracker C73258
541 } else {
542 if( *pupper > *plower )
543 *pupper = i_maxmin< T >::mn;
544 if( plastiter != NULL )
545 if( *plastiter != 0 && !(*plower >= upper && *pupper < upper - incr) )
546 *plastiter = 0;
547 if( *pupper < upper )
548 *pupper = upper;//tracker C73258
549 }
550 }
551 }
552 break;
553 }
554 case kmp_sch_static_chunked:
555 {
556 register ST span;
557 if( chunk < 1 )
558 chunk = 1;
559 span = chunk * incr;
560 *pstride = span * nth;
561 *plower = *plower + (span * tid);
562 *pupper = *plower + span - incr;
563 if( plastiter != NULL )
564 if( *plastiter != 0 && !(tid == ((trip_count - 1) / ( UT )chunk) % nth) )
565 *plastiter = 0;
566 break;
567 }
568 default:
569 KMP_ASSERT2( 0, "__kmpc_dist_for_static_init: unknown loop scheduling type" );
570 break;
571 }
572 }
573 end:;
574 #ifdef KMP_DEBUG
575 {
576 const char * buff;
577 // create format specifiers before the debug output
578 buff = __kmp_str_format(
579 "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s "\
580 "stride=%%%s signed?<%s>\n",
581 traits_t< T >::spec, traits_t< T >::spec, traits_t< T >::spec,
582 traits_t< ST >::spec, traits_t< T >::spec );
583 KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pupperDist, *pstride ) );
584 __kmp_str_free( &buff );
585 }
586 #endif
587 KE_TRACE( 10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid ) );
588 return;
589}
590
591template< typename T >
592static void
593__kmp_team_static_init(
594 ident_t *loc,
595 kmp_int32 gtid,
596 kmp_int32 *p_last,
597 T *p_lb,
598 T *p_ub,
599 typename traits_t< T >::signed_t *p_st,
600 typename traits_t< T >::signed_t incr,
601 typename traits_t< T >::signed_t chunk
602) {
603 // The routine returns the first chunk distributed to the team and
604 // stride for next chunks calculation.
605 // Last iteration flag set for the team that will execute
606 // the last iteration of the loop.
607 // The routine is called for dist_schedue(static,chunk) only.
608 typedef typename traits_t< T >::unsigned_t UT;
609 typedef typename traits_t< T >::signed_t ST;
610 kmp_uint32 team_id;
611 kmp_uint32 nteams;
612 UT trip_count;
613 T lower;
614 T upper;
615 ST span;
616 kmp_team_t *team;
617 kmp_info_t *th;
618
619 KMP_DEBUG_ASSERT( p_last && p_lb && p_ub && p_st );
620 KE_TRACE( 10, ("__kmp_team_static_init called (%d)\n", gtid));
621 #ifdef KMP_DEBUG
622 {
623 const char * buff;
624 // create format specifiers before the debug output
625 buff = __kmp_str_format( "__kmp_team_static_init enter: T#%%d liter=%%d "\
626 "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n",
627 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec,
628 traits_t< ST >::spec, traits_t< T >::spec );
629 KD_TRACE(100, ( buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk ) );
630 __kmp_str_free( &buff );
631 }
632 #endif
633
634 lower = *p_lb;
635 upper = *p_ub;
636 if( __kmp_env_consistency_check ) {
637 if( incr == 0 ) {
638 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc );
639 }
640 if( incr > 0 ? (upper < lower) : (lower < upper) ) {
641 // The loop is illegal.
642 // Some zero-trip loops maintained by compiler, e.g.:
643 // for(i=10;i<0;++i) // lower >= upper - run-time check
644 // for(i=0;i>10;--i) // lower <= upper - run-time check
645 // for(i=0;i>10;++i) // incr > 0 - compile-time check
646 // for(i=10;i<0;--i) // incr < 0 - compile-time check
647 // Compiler does not check the following illegal loops:
648 // for(i=0;i<10;i+=incr) // where incr<0
649 // for(i=10;i>0;i-=incr) // where incr<0
650 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc );
651 }
652 }
653 th = __kmp_threads[gtid];
654 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
655 team = th->th.th_team;
656 #if OMP_40_ENABLED
657 nteams = th->th.th_teams_size.nteams;
658 #endif
659 team_id = team->t.t_master_tid;
660 KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc);
661
662 // compute trip count
663 if( incr == 1 ) {
664 trip_count = upper - lower + 1;
665 } else if(incr == -1) {
666 trip_count = lower - upper + 1;
667 } else {
668 trip_count = (ST)(upper - lower) / incr + 1; // cast to signed to cover incr<0 case
669 }
670 if( chunk < 1 )
671 chunk = 1;
672 span = chunk * incr;
673 *p_st = span * nteams;
674 *p_lb = lower + (span * team_id);
675 *p_ub = *p_lb + span - incr;
676 if ( p_last != NULL )
677 *p_last = (team_id == ((trip_count - 1)/(UT)chunk) % nteams);
678 // Correct upper bound if needed
679 if( incr > 0 ) {
680 if( *p_ub < *p_lb ) // overflow?
681 *p_ub = i_maxmin< T >::mx;
682 if( *p_ub > upper )
683 *p_ub = upper; // tracker C73258
684 } else { // incr < 0
685 if( *p_ub > *p_lb )
686 *p_ub = i_maxmin< T >::mn;
687 if( *p_ub < upper )
688 *p_ub = upper; // tracker C73258
689 }
690 #ifdef KMP_DEBUG
691 {
692 const char * buff;
693 // create format specifiers before the debug output
694 buff = __kmp_str_format( "__kmp_team_static_init exit: T#%%d team%%u liter=%%d "\
695 "iter=(%%%s, %%%s, %%%s) chunk %%%s\n",
696 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec,
697 traits_t< ST >::spec );
698 KD_TRACE(100, ( buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk ) );
699 __kmp_str_free( &buff );
700 }
701 #endif
702}
703
Jim Cownie5e8470a2013-09-27 10:38:44 +0000704//--------------------------------------------------------------------------------------
705extern "C" {
706
707/*!
708@ingroup WORK_SHARING
709@param loc Source code location
710@param gtid Global thread id of this thread
711@param schedtype Scheduling type
712@param plastiter Pointer to the "last iteration" flag
713@param plower Pointer to the lower bound
714@param pupper Pointer to the upper bound
715@param pstride Pointer to the stride
716@param incr Loop increment
717@param chunk The chunk size
718
719Each of the four functions here are identical apart from the argument types.
720
721The functions compute the upper and lower bounds and stride to be used for the set of iterations
722to be executed by the current thread from the statically scheduled loop that is described by the
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000723initial values of the bounds, stride, increment and chunk size.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000724
725@{
726*/
727void
728__kmpc_for_static_init_4( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter,
729 kmp_int32 *plower, kmp_int32 *pupper,
730 kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk )
731{
732 __kmp_for_static_init< kmp_int32 >(
733 loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk );
734}
735
736/*!
737 See @ref __kmpc_for_static_init_4
738 */
739void
740__kmpc_for_static_init_4u( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter,
741 kmp_uint32 *plower, kmp_uint32 *pupper,
742 kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk )
743{
744 __kmp_for_static_init< kmp_uint32 >(
745 loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk );
746}
747
748/*!
749 See @ref __kmpc_for_static_init_4
750 */
751void
752__kmpc_for_static_init_8( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter,
753 kmp_int64 *plower, kmp_int64 *pupper,
754 kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk )
755{
756 __kmp_for_static_init< kmp_int64 >(
757 loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk );
758}
759
760/*!
761 See @ref __kmpc_for_static_init_4
762 */
763void
764__kmpc_for_static_init_8u( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter,
765 kmp_uint64 *plower, kmp_uint64 *pupper,
766 kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk )
767{
768 __kmp_for_static_init< kmp_uint64 >(
769 loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk );
770}
771/*!
772@}
773*/
774
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000775/*!
776@ingroup WORK_SHARING
777@param loc Source code location
778@param gtid Global thread id of this thread
779@param scheduleD Scheduling type for the distribute
780@param scheduleL Scheduling type for the parallel loop
781@param plastiter Pointer to the "last iteration" flag
782@param plower Pointer to the lower bound
783@param pupper Pointer to the upper bound of loop chunk
784@param pupperD Pointer to the upper bound of dist_chunk
785@param pstrideD Pointer to the stride for distribute
786@param pstrideL Pointer to the stride for parallel loop
787@param incr Loop increment
788@param chunkD The chunk size for the distribute
789@param chunkL The chunk size for the parallel loop
790
791Each of the four functions here are identical apart from the argument types.
792
793The functions compute the upper and lower bounds and strides to be used for the set of iterations
794to be executed by the current thread from the statically scheduled loop that is described by the
795initial values of the bounds, strides, increment and chunks for parallel loop and distribute
796constructs.
797
798@{
799*/
800void
801__kmpc_dist_for_static_init_4(
802 ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter,
803 kmp_int32 *plower, kmp_int32 *pupper, kmp_int32 *pupperD,
804 kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk )
805{
806 __kmp_dist_for_static_init< kmp_int32 >(
807 loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk );
808}
809
810/*!
811 See @ref __kmpc_dist_for_static_init_4
812 */
813void
814__kmpc_dist_for_static_init_4u(
815 ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter,
816 kmp_uint32 *plower, kmp_uint32 *pupper, kmp_uint32 *pupperD,
817 kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk )
818{
819 __kmp_dist_for_static_init< kmp_uint32 >(
820 loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk );
821}
822
823/*!
824 See @ref __kmpc_dist_for_static_init_4
825 */
826void
827__kmpc_dist_for_static_init_8(
828 ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter,
829 kmp_int64 *plower, kmp_int64 *pupper, kmp_int64 *pupperD,
830 kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk )
831{
832 __kmp_dist_for_static_init< kmp_int64 >(
833 loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk );
834}
835
836/*!
837 See @ref __kmpc_dist_for_static_init_4
838 */
839void
840__kmpc_dist_for_static_init_8u(
841 ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter,
842 kmp_uint64 *plower, kmp_uint64 *pupper, kmp_uint64 *pupperD,
843 kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk )
844{
845 __kmp_dist_for_static_init< kmp_uint64 >(
846 loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk );
847}
848/*!
849@}
850*/
851
852//-----------------------------------------------------------------------------------------
853// Auxiliary routines for Distribute Parallel Loop construct implementation
854// Transfer call to template< type T >
855// __kmp_team_static_init( ident_t *loc, int gtid,
856// int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk )
857
858/*!
859@ingroup WORK_SHARING
860@{
861@param loc Source location
862@param gtid Global thread id
863@param p_last pointer to last iteration flag
864@param p_lb pointer to Lower bound
865@param p_ub pointer to Upper bound
866@param p_st Step (or increment if you prefer)
867@param incr Loop increment
868@param chunk The chunk size to block with
869
870The functions compute the upper and lower bounds and stride to be used for the set of iterations
871to be executed by the current team from the statically scheduled loop that is described by the
872initial values of the bounds, stride, increment and chunk for the distribute construct as part of
873composite distribute parallel loop construct.
874These functions are all identical apart from the types of the arguments.
875*/
876
877void
878__kmpc_team_static_init_4(
879 ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
880 kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk )
881{
882 KMP_DEBUG_ASSERT( __kmp_init_serial );
883 __kmp_team_static_init< kmp_int32 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk );
884}
885
886/*!
887 See @ref __kmpc_team_static_init_4
888 */
889void
890__kmpc_team_static_init_4u(
891 ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
892 kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk )
893{
894 KMP_DEBUG_ASSERT( __kmp_init_serial );
895 __kmp_team_static_init< kmp_uint32 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk );
896}
897
898/*!
899 See @ref __kmpc_team_static_init_4
900 */
901void
902__kmpc_team_static_init_8(
903 ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
904 kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk )
905{
906 KMP_DEBUG_ASSERT( __kmp_init_serial );
907 __kmp_team_static_init< kmp_int64 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk );
908}
909
910/*!
911 See @ref __kmpc_team_static_init_4
912 */
913void
914__kmpc_team_static_init_8u(
915 ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
916 kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk )
917{
918 KMP_DEBUG_ASSERT( __kmp_init_serial );
919 __kmp_team_static_init< kmp_uint64 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk );
920}
921/*!
922@}
923*/
924
Jim Cownie5e8470a2013-09-27 10:38:44 +0000925} // extern "C"
926