blob: 14fa62ee85d8854e198a0af33894021c8ad4d0f6 [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
2 * kmp_sched.c -- static scheduling -- iteration initialization
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003 * $Revision: 43457 $
4 * $Date: 2014-09-17 03:57:22 -0500 (Wed, 17 Sep 2014) $
Jim Cownie5e8470a2013-09-27 10:38:44 +00005 */
6
7
8//===----------------------------------------------------------------------===//
9//
10// The LLVM Compiler Infrastructure
11//
12// This file is dual licensed under the MIT and the University of Illinois Open
13// Source Licenses. See LICENSE.txt for details.
14//
15//===----------------------------------------------------------------------===//
16
17
18/*
19 * Static scheduling initialization.
20 *
21 * NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however
22 * it may change values between parallel regions. __kmp_max_nth
23 * is the largest value __kmp_nth may take, 1 is the smallest.
24 *
25 */
26
27#include "kmp.h"
28#include "kmp_i18n.h"
29#include "kmp_str.h"
30#include "kmp_error.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000031#include "kmp_stats.h"
32#include "kmp_itt.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000033
34// template for type limits
35template< typename T >
36struct i_maxmin {
37 static const T mx;
38 static const T mn;
39};
40template<>
41struct i_maxmin< int > {
42 static const int mx = 0x7fffffff;
43 static const int mn = 0x80000000;
44};
45template<>
46struct i_maxmin< unsigned int > {
47 static const unsigned int mx = 0xffffffff;
48 static const unsigned int mn = 0x00000000;
49};
50template<>
51struct i_maxmin< long long > {
52 static const long long mx = 0x7fffffffffffffffLL;
53 static const long long mn = 0x8000000000000000LL;
54};
55template<>
56struct i_maxmin< unsigned long long > {
57 static const unsigned long long mx = 0xffffffffffffffffLL;
58 static const unsigned long long mn = 0x0000000000000000LL;
59};
60//-------------------------------------------------------------------------
61#ifdef KMP_DEBUG
62//-------------------------------------------------------------------------
63// template for debug prints specification ( d, u, lld, llu )
64 char const * traits_t< int >::spec = "d";
65 char const * traits_t< unsigned int >::spec = "u";
66 char const * traits_t< long long >::spec = "lld";
67 char const * traits_t< unsigned long long >::spec = "llu";
68//-------------------------------------------------------------------------
69#endif
70
71template< typename T >
72static void
73__kmp_for_static_init(
74 ident_t *loc,
75 kmp_int32 global_tid,
76 kmp_int32 schedtype,
77 kmp_int32 *plastiter,
78 T *plower,
79 T *pupper,
80 typename traits_t< T >::signed_t *pstride,
81 typename traits_t< T >::signed_t incr,
82 typename traits_t< T >::signed_t chunk
83) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +000084 KMP_COUNT_BLOCK(OMP_FOR_static);
Jim Cownie5e8470a2013-09-27 10:38:44 +000085 typedef typename traits_t< T >::unsigned_t UT;
86 typedef typename traits_t< T >::signed_t ST;
87 /* this all has to be changed back to TID and such.. */
88 register kmp_int32 gtid = global_tid;
89 register kmp_uint32 tid;
90 register kmp_uint32 nth;
91 register UT trip_count;
92 register kmp_team_t *team;
93
Jim Cownie4cc4bb42014-10-07 16:25:50 +000094 KMP_DEBUG_ASSERT( plastiter && plower && pupper && pstride );
Jim Cownie5e8470a2013-09-27 10:38:44 +000095 KE_TRACE( 10, ("__kmpc_for_static_init called (%d)\n", global_tid));
96 #ifdef KMP_DEBUG
97 {
98 const char * buff;
99 // create format specifiers before the debug output
100 buff = __kmp_str_format(
101 "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s," \
102 " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n",
103 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec,
104 traits_t< ST >::spec, traits_t< ST >::spec, traits_t< T >::spec );
105 KD_TRACE(100, ( buff, global_tid, schedtype, *plastiter,
106 *plower, *pupper, *pstride, incr, chunk ) );
107 __kmp_str_free( &buff );
108 }
109 #endif
110
111 if ( __kmp_env_consistency_check ) {
112 __kmp_push_workshare( global_tid, ct_pdo, loc );
113 if ( incr == 0 ) {
114 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000115 }
116 }
117 /* special handling for zero-trip loops */
118 if ( incr > 0 ? (*pupper < *plower) : (*plower < *pupper) ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000119 if( plastiter != NULL )
120 *plastiter = FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000121 /* leave pupper and plower set to entire iteration space */
122 *pstride = incr; /* value should never be used */
123 // *plower = *pupper - incr; // let compiler bypass the illegal loop (like for(i=1;i<10;i--)) THIS LINE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE ON A ZERO-TRIP LOOP (lower=1,\
124 upper=0,stride=1) - JPH June 23, 2009.
125 #ifdef KMP_DEBUG
126 {
127 const char * buff;
128 // create format specifiers before the debug output
129 buff = __kmp_str_format(
130 "__kmpc_for_static_init:(ZERO TRIP) liter=%%d lower=%%%s upper=%%%s stride = %%%s signed?<%s>, loc = %%s\n",
131 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, traits_t< T >::spec );
132 KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride, loc->psource ) );
133 __kmp_str_free( &buff );
134 }
135 #endif
136 KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) );
137 return;
138 }
139
140 #if OMP_40_ENABLED
141 if ( schedtype > kmp_ord_upper ) {
142 // we are in DISTRIBUTE construct
143 schedtype += kmp_sch_static - kmp_distribute_static; // AC: convert to usual schedule type
144 tid = __kmp_threads[ gtid ]->th.th_team->t.t_master_tid;
145 team = __kmp_threads[ gtid ]->th.th_team->t.t_parent;
146 } else
147 #endif
148 {
149 tid = __kmp_tid_from_gtid( global_tid );
150 team = __kmp_threads[ gtid ]->th.th_team;
151 }
152
153 /* determine if "for" loop is an active worksharing construct */
154 if ( team -> t.t_serialized ) {
155 /* serialized parallel, each thread executes whole iteration space */
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000156 if( plastiter != NULL )
157 *plastiter = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000158 /* leave pupper and plower set to entire iteration space */
159 *pstride = (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
160
161 #ifdef KMP_DEBUG
162 {
163 const char * buff;
164 // create format specifiers before the debug output
165 buff = __kmp_str_format(
166 "__kmpc_for_static_init: (serial) liter=%%d lower=%%%s upper=%%%s stride = %%%s\n",
167 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
168 KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) );
169 __kmp_str_free( &buff );
170 }
171 #endif
172 KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) );
173 return;
174 }
175 nth = team->t.t_nproc;
176 if ( nth == 1 ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000177 if( plastiter != NULL )
178 *plastiter = TRUE;
179 *pstride = (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000180 #ifdef KMP_DEBUG
181 {
182 const char * buff;
183 // create format specifiers before the debug output
184 buff = __kmp_str_format(
185 "__kmpc_for_static_init: (serial) liter=%%d lower=%%%s upper=%%%s stride = %%%s\n",
186 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
187 KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) );
188 __kmp_str_free( &buff );
189 }
190 #endif
191 KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) );
192 return;
193 }
194
195 /* compute trip count */
196 if ( incr == 1 ) {
197 trip_count = *pupper - *plower + 1;
198 } else if (incr == -1) {
199 trip_count = *plower - *pupper + 1;
200 } else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000201 if ( incr > 1 ) { // the check is needed for unsigned division when incr < 0
Jim Cownie5e8470a2013-09-27 10:38:44 +0000202 trip_count = (*pupper - *plower) / incr + 1;
203 } else {
204 trip_count = (*plower - *pupper) / ( -incr ) + 1;
205 }
206 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000207
Jim Cownie5e8470a2013-09-27 10:38:44 +0000208 if ( __kmp_env_consistency_check ) {
209 /* tripcount overflow? */
210 if ( trip_count == 0 && *pupper != *plower ) {
211 __kmp_error_construct( kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo, loc );
212 }
213 }
214
215 /* compute remaining parameters */
216 switch ( schedtype ) {
217 case kmp_sch_static:
218 {
219 if ( trip_count < nth ) {
220 KMP_DEBUG_ASSERT(
221 __kmp_static == kmp_sch_static_greedy || \
222 __kmp_static == kmp_sch_static_balanced
223 ); // Unknown static scheduling type.
224 if ( tid < trip_count ) {
225 *pupper = *plower = *plower + tid * incr;
226 } else {
227 *plower = *pupper + incr;
228 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000229 if( plastiter != NULL )
230 *plastiter = ( tid == trip_count - 1 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000231 } else {
232 if ( __kmp_static == kmp_sch_static_balanced ) {
233 register UT small_chunk = trip_count / nth;
234 register UT extras = trip_count % nth;
235 *plower += incr * ( tid * small_chunk + ( tid < extras ? tid : extras ) );
236 *pupper = *plower + small_chunk * incr - ( tid < extras ? 0 : incr );
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000237 if( plastiter != NULL )
238 *plastiter = ( tid == nth - 1 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000239 } else {
240 register T big_chunk_inc_count = ( trip_count/nth +
241 ( ( trip_count % nth ) ? 1 : 0) ) * incr;
242 register T old_upper = *pupper;
243
244 KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy );
245 // Unknown static scheduling type.
246
247 *plower += tid * big_chunk_inc_count;
248 *pupper = *plower + big_chunk_inc_count - incr;
249 if ( incr > 0 ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000250 if( *pupper < *plower )
Jim Cownie5e8470a2013-09-27 10:38:44 +0000251 *pupper = i_maxmin< T >::mx;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000252 if( plastiter != NULL )
253 *plastiter = *plower <= old_upper && *pupper > old_upper - incr;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000254 if ( *pupper > old_upper ) *pupper = old_upper; // tracker C73258
255 } else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000256 if( *pupper > *plower )
Jim Cownie5e8470a2013-09-27 10:38:44 +0000257 *pupper = i_maxmin< T >::mn;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000258 if( plastiter != NULL )
259 *plastiter = *plower >= old_upper && *pupper < old_upper - incr;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000260 if ( *pupper < old_upper ) *pupper = old_upper; // tracker C73258
261 }
262 }
263 }
264 break;
265 }
266 case kmp_sch_static_chunked:
267 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000268 register ST span;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000269 if ( chunk < 1 ) {
270 chunk = 1;
271 }
272 span = chunk * incr;
273 *pstride = span * nth;
274 *plower = *plower + (span * tid);
275 *pupper = *plower + span - incr;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000276 if( plastiter != NULL )
277 *plastiter = (tid == ((trip_count - 1)/( UT )chunk) % nth);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000278 break;
279 }
280 default:
281 KMP_ASSERT2( 0, "__kmpc_for_static_init: unknown scheduling type" );
282 break;
283 }
284
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000285#if USE_ITT_BUILD
286 // Report loop metadata
287 if ( KMP_MASTER_TID(tid) && __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 ) {
288 kmp_uint64 cur_chunk = chunk;
289 // Calculate chunk in case it was not specified; it is specified for kmp_sch_static_chunked
290 if ( schedtype == kmp_sch_static ) {
291 cur_chunk = trip_count / nth + ( ( trip_count % nth ) ? 1 : 0);
292 }
293 // 0 - "static" schedule
294 __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk);
295 }
296#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000297 #ifdef KMP_DEBUG
298 {
299 const char * buff;
300 // create format specifiers before the debug output
301 buff = __kmp_str_format(
302 "__kmpc_for_static_init: liter=%%d lower=%%%s upper=%%%s stride = %%%s signed?<%s>\n",
303 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, traits_t< T >::spec );
304 KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) );
305 __kmp_str_free( &buff );
306 }
307 #endif
308 KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) );
309 return;
310}
311
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000312template< typename T >
313static void
314__kmp_dist_for_static_init(
315 ident_t *loc,
316 kmp_int32 gtid,
317 kmp_int32 schedule,
318 kmp_int32 *plastiter,
319 T *plower,
320 T *pupper,
321 T *pupperDist,
322 typename traits_t< T >::signed_t *pstride,
323 typename traits_t< T >::signed_t incr,
324 typename traits_t< T >::signed_t chunk
325) {
326 KMP_COUNT_BLOCK(OMP_DISTR_FOR_static);
327 typedef typename traits_t< T >::unsigned_t UT;
328 typedef typename traits_t< T >::signed_t ST;
329 register kmp_uint32 tid;
330 register kmp_uint32 nth;
331 register kmp_uint32 team_id;
332 register kmp_uint32 nteams;
333 register UT trip_count;
334 register kmp_team_t *team;
335 kmp_info_t * th;
336
337 KMP_DEBUG_ASSERT( plastiter && plower && pupper && pupperDist && pstride );
338 KE_TRACE( 10, ("__kmpc_dist_for_static_init called (%d)\n", gtid));
339 #ifdef KMP_DEBUG
340 {
341 const char * buff;
342 // create format specifiers before the debug output
343 buff = __kmp_str_format(
344 "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d "\
345 "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n",
346 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec,
347 traits_t< ST >::spec, traits_t< T >::spec );
348 KD_TRACE(100, ( buff, gtid, schedule, *plastiter,
349 *plower, *pupper, incr, chunk ) );
350 __kmp_str_free( &buff );
351 }
352 #endif
353
354 if( __kmp_env_consistency_check ) {
355 __kmp_push_workshare( gtid, ct_pdo, loc );
356 if( incr == 0 ) {
357 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc );
358 }
359 if( incr > 0 ? (*pupper < *plower) : (*plower < *pupper) ) {
360 // The loop is illegal.
361 // Some zero-trip loops maintained by compiler, e.g.:
362 // for(i=10;i<0;++i) // lower >= upper - run-time check
363 // for(i=0;i>10;--i) // lower <= upper - run-time check
364 // for(i=0;i>10;++i) // incr > 0 - compile-time check
365 // for(i=10;i<0;--i) // incr < 0 - compile-time check
366 // Compiler does not check the following illegal loops:
367 // for(i=0;i<10;i+=incr) // where incr<0
368 // for(i=10;i>0;i-=incr) // where incr<0
369 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc );
370 }
371 }
372 tid = __kmp_tid_from_gtid( gtid );
373 th = __kmp_threads[gtid];
374 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
375 nth = th->th.th_team_nproc;
376 team = th->th.th_team;
377 #if OMP_40_ENABLED
378 nteams = th->th.th_teams_size.nteams;
379 #endif
380 team_id = team->t.t_master_tid;
381 KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc);
382
383 // compute global trip count
384 if( incr == 1 ) {
385 trip_count = *pupper - *plower + 1;
386 } else if(incr == -1) {
387 trip_count = *plower - *pupper + 1;
388 } else {
389 trip_count = (ST)(*pupper - *plower) / incr + 1; // cast to signed to cover incr<0 case
390 }
391 *pstride = *pupper - *plower; // just in case (can be unused)
392 if( trip_count <= nteams ) {
393 KMP_DEBUG_ASSERT(
394 __kmp_static == kmp_sch_static_greedy || \
395 __kmp_static == kmp_sch_static_balanced
396 ); // Unknown static scheduling type.
397 // only masters of some teams get single iteration, other threads get nothing
398 if( team_id < trip_count && tid == 0 ) {
399 *pupper = *pupperDist = *plower = *plower + team_id * incr;
400 } else {
401 *pupperDist = *pupper;
402 *plower = *pupper + incr; // compiler should skip loop body
403 }
404 if( plastiter != NULL )
405 *plastiter = ( tid == 0 && team_id == trip_count - 1 );
406 } else {
407 // Get the team's chunk first (each team gets at most one chunk)
408 if( __kmp_static == kmp_sch_static_balanced ) {
409 register UT chunkD = trip_count / nteams;
410 register UT extras = trip_count % nteams;
411 *plower += incr * ( team_id * chunkD + ( team_id < extras ? team_id : extras ) );
412 *pupperDist = *plower + chunkD * incr - ( team_id < extras ? 0 : incr );
413 if( plastiter != NULL )
414 *plastiter = ( team_id == nteams - 1 );
415 } else {
416 register T chunk_inc_count =
417 ( trip_count / nteams + ( ( trip_count % nteams ) ? 1 : 0) ) * incr;
418 register T upper = *pupper;
419 KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy );
420 // Unknown static scheduling type.
421 *plower += team_id * chunk_inc_count;
422 *pupperDist = *plower + chunk_inc_count - incr;
423 // Check/correct bounds if needed
424 if( incr > 0 ) {
425 if( *pupperDist < *plower )
426 *pupperDist = i_maxmin< T >::mx;
427 if( plastiter != NULL )
428 *plastiter = *plower <= upper && *pupperDist > upper - incr;
429 if( *pupperDist > upper )
430 *pupperDist = upper; // tracker C73258
431 if( *plower > *pupperDist ) {
432 *pupper = *pupperDist; // no iterations available for the team
433 goto end;
434 }
435 } else {
436 if( *pupperDist > *plower )
437 *pupperDist = i_maxmin< T >::mn;
438 if( plastiter != NULL )
439 *plastiter = *plower >= upper && *pupperDist < upper - incr;
440 if( *pupperDist < upper )
441 *pupperDist = upper; // tracker C73258
442 if( *plower < *pupperDist ) {
443 *pupper = *pupperDist; // no iterations available for the team
444 goto end;
445 }
446 }
447 }
448 // Get the parallel loop chunk now (for thread)
449 // compute trip count for team's chunk
450 if( incr == 1 ) {
451 trip_count = *pupperDist - *plower + 1;
452 } else if(incr == -1) {
453 trip_count = *plower - *pupperDist + 1;
454 } else {
455 trip_count = (ST)(*pupperDist - *plower) / incr + 1;
456 }
457 KMP_DEBUG_ASSERT( trip_count );
458 switch( schedule ) {
459 case kmp_sch_static:
460 {
461 if( trip_count <= nth ) {
462 KMP_DEBUG_ASSERT(
463 __kmp_static == kmp_sch_static_greedy || \
464 __kmp_static == kmp_sch_static_balanced
465 ); // Unknown static scheduling type.
466 if( tid < trip_count )
467 *pupper = *plower = *plower + tid * incr;
468 else
469 *plower = *pupper + incr; // no iterations available
470 if( plastiter != NULL )
471 if( *plastiter != 0 && !( tid == trip_count - 1 ) )
472 *plastiter = 0;
473 } else {
474 if( __kmp_static == kmp_sch_static_balanced ) {
475 register UT chunkL = trip_count / nth;
476 register UT extras = trip_count % nth;
477 *plower += incr * (tid * chunkL + (tid < extras ? tid : extras));
478 *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr);
479 if( plastiter != NULL )
480 if( *plastiter != 0 && !( tid == nth - 1 ) )
481 *plastiter = 0;
482 } else {
483 register T chunk_inc_count =
484 ( trip_count / nth + ( ( trip_count % nth ) ? 1 : 0) ) * incr;
485 register T upper = *pupperDist;
486 KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy );
487 // Unknown static scheduling type.
488 *plower += tid * chunk_inc_count;
489 *pupper = *plower + chunk_inc_count - incr;
490 if( incr > 0 ) {
491 if( *pupper < *plower )
492 *pupper = i_maxmin< T >::mx;
493 if( plastiter != NULL )
494 if( *plastiter != 0 && !(*plower <= upper && *pupper > upper - incr) )
495 *plastiter = 0;
496 if( *pupper > upper )
497 *pupper = upper;//tracker C73258
498 } else {
499 if( *pupper > *plower )
500 *pupper = i_maxmin< T >::mn;
501 if( plastiter != NULL )
502 if( *plastiter != 0 && !(*plower >= upper && *pupper < upper - incr) )
503 *plastiter = 0;
504 if( *pupper < upper )
505 *pupper = upper;//tracker C73258
506 }
507 }
508 }
509 break;
510 }
511 case kmp_sch_static_chunked:
512 {
513 register ST span;
514 if( chunk < 1 )
515 chunk = 1;
516 span = chunk * incr;
517 *pstride = span * nth;
518 *plower = *plower + (span * tid);
519 *pupper = *plower + span - incr;
520 if( plastiter != NULL )
521 if( *plastiter != 0 && !(tid == ((trip_count - 1) / ( UT )chunk) % nth) )
522 *plastiter = 0;
523 break;
524 }
525 default:
526 KMP_ASSERT2( 0, "__kmpc_dist_for_static_init: unknown loop scheduling type" );
527 break;
528 }
529 }
530 end:;
531 #ifdef KMP_DEBUG
532 {
533 const char * buff;
534 // create format specifiers before the debug output
535 buff = __kmp_str_format(
536 "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s "\
537 "stride=%%%s signed?<%s>\n",
538 traits_t< T >::spec, traits_t< T >::spec, traits_t< T >::spec,
539 traits_t< ST >::spec, traits_t< T >::spec );
540 KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pupperDist, *pstride ) );
541 __kmp_str_free( &buff );
542 }
543 #endif
544 KE_TRACE( 10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid ) );
545 return;
546}
547
548template< typename T >
549static void
550__kmp_team_static_init(
551 ident_t *loc,
552 kmp_int32 gtid,
553 kmp_int32 *p_last,
554 T *p_lb,
555 T *p_ub,
556 typename traits_t< T >::signed_t *p_st,
557 typename traits_t< T >::signed_t incr,
558 typename traits_t< T >::signed_t chunk
559) {
560 // The routine returns the first chunk distributed to the team and
561 // stride for next chunks calculation.
562 // Last iteration flag set for the team that will execute
563 // the last iteration of the loop.
564 // The routine is called for dist_schedue(static,chunk) only.
565 typedef typename traits_t< T >::unsigned_t UT;
566 typedef typename traits_t< T >::signed_t ST;
567 kmp_uint32 team_id;
568 kmp_uint32 nteams;
569 UT trip_count;
570 T lower;
571 T upper;
572 ST span;
573 kmp_team_t *team;
574 kmp_info_t *th;
575
576 KMP_DEBUG_ASSERT( p_last && p_lb && p_ub && p_st );
577 KE_TRACE( 10, ("__kmp_team_static_init called (%d)\n", gtid));
578 #ifdef KMP_DEBUG
579 {
580 const char * buff;
581 // create format specifiers before the debug output
582 buff = __kmp_str_format( "__kmp_team_static_init enter: T#%%d liter=%%d "\
583 "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n",
584 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec,
585 traits_t< ST >::spec, traits_t< T >::spec );
586 KD_TRACE(100, ( buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk ) );
587 __kmp_str_free( &buff );
588 }
589 #endif
590
591 lower = *p_lb;
592 upper = *p_ub;
593 if( __kmp_env_consistency_check ) {
594 if( incr == 0 ) {
595 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc );
596 }
597 if( incr > 0 ? (upper < lower) : (lower < upper) ) {
598 // The loop is illegal.
599 // Some zero-trip loops maintained by compiler, e.g.:
600 // for(i=10;i<0;++i) // lower >= upper - run-time check
601 // for(i=0;i>10;--i) // lower <= upper - run-time check
602 // for(i=0;i>10;++i) // incr > 0 - compile-time check
603 // for(i=10;i<0;--i) // incr < 0 - compile-time check
604 // Compiler does not check the following illegal loops:
605 // for(i=0;i<10;i+=incr) // where incr<0
606 // for(i=10;i>0;i-=incr) // where incr<0
607 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc );
608 }
609 }
610 th = __kmp_threads[gtid];
611 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
612 team = th->th.th_team;
613 #if OMP_40_ENABLED
614 nteams = th->th.th_teams_size.nteams;
615 #endif
616 team_id = team->t.t_master_tid;
617 KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc);
618
619 // compute trip count
620 if( incr == 1 ) {
621 trip_count = upper - lower + 1;
622 } else if(incr == -1) {
623 trip_count = lower - upper + 1;
624 } else {
625 trip_count = (ST)(upper - lower) / incr + 1; // cast to signed to cover incr<0 case
626 }
627 if( chunk < 1 )
628 chunk = 1;
629 span = chunk * incr;
630 *p_st = span * nteams;
631 *p_lb = lower + (span * team_id);
632 *p_ub = *p_lb + span - incr;
633 if ( p_last != NULL )
634 *p_last = (team_id == ((trip_count - 1)/(UT)chunk) % nteams);
635 // Correct upper bound if needed
636 if( incr > 0 ) {
637 if( *p_ub < *p_lb ) // overflow?
638 *p_ub = i_maxmin< T >::mx;
639 if( *p_ub > upper )
640 *p_ub = upper; // tracker C73258
641 } else { // incr < 0
642 if( *p_ub > *p_lb )
643 *p_ub = i_maxmin< T >::mn;
644 if( *p_ub < upper )
645 *p_ub = upper; // tracker C73258
646 }
647 #ifdef KMP_DEBUG
648 {
649 const char * buff;
650 // create format specifiers before the debug output
651 buff = __kmp_str_format( "__kmp_team_static_init exit: T#%%d team%%u liter=%%d "\
652 "iter=(%%%s, %%%s, %%%s) chunk %%%s\n",
653 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec,
654 traits_t< ST >::spec );
655 KD_TRACE(100, ( buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk ) );
656 __kmp_str_free( &buff );
657 }
658 #endif
659}
660
Jim Cownie5e8470a2013-09-27 10:38:44 +0000661//--------------------------------------------------------------------------------------
662extern "C" {
663
664/*!
665@ingroup WORK_SHARING
666@param loc Source code location
667@param gtid Global thread id of this thread
668@param schedtype Scheduling type
669@param plastiter Pointer to the "last iteration" flag
670@param plower Pointer to the lower bound
671@param pupper Pointer to the upper bound
672@param pstride Pointer to the stride
673@param incr Loop increment
674@param chunk The chunk size
675
676Each of the four functions here are identical apart from the argument types.
677
678The functions compute the upper and lower bounds and stride to be used for the set of iterations
679to be executed by the current thread from the statically scheduled loop that is described by the
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000680initial values of the bounds, stride, increment and chunk size.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000681
682@{
683*/
684void
685__kmpc_for_static_init_4( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter,
686 kmp_int32 *plower, kmp_int32 *pupper,
687 kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk )
688{
689 __kmp_for_static_init< kmp_int32 >(
690 loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk );
691}
692
693/*!
694 See @ref __kmpc_for_static_init_4
695 */
696void
697__kmpc_for_static_init_4u( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter,
698 kmp_uint32 *plower, kmp_uint32 *pupper,
699 kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk )
700{
701 __kmp_for_static_init< kmp_uint32 >(
702 loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk );
703}
704
705/*!
706 See @ref __kmpc_for_static_init_4
707 */
708void
709__kmpc_for_static_init_8( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter,
710 kmp_int64 *plower, kmp_int64 *pupper,
711 kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk )
712{
713 __kmp_for_static_init< kmp_int64 >(
714 loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk );
715}
716
717/*!
718 See @ref __kmpc_for_static_init_4
719 */
720void
721__kmpc_for_static_init_8u( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter,
722 kmp_uint64 *plower, kmp_uint64 *pupper,
723 kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk )
724{
725 __kmp_for_static_init< kmp_uint64 >(
726 loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk );
727}
728/*!
729@}
730*/
731
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000732/*!
733@ingroup WORK_SHARING
734@param loc Source code location
735@param gtid Global thread id of this thread
736@param scheduleD Scheduling type for the distribute
737@param scheduleL Scheduling type for the parallel loop
738@param plastiter Pointer to the "last iteration" flag
739@param plower Pointer to the lower bound
740@param pupper Pointer to the upper bound of loop chunk
741@param pupperD Pointer to the upper bound of dist_chunk
742@param pstrideD Pointer to the stride for distribute
743@param pstrideL Pointer to the stride for parallel loop
744@param incr Loop increment
745@param chunkD The chunk size for the distribute
746@param chunkL The chunk size for the parallel loop
747
748Each of the four functions here are identical apart from the argument types.
749
750The functions compute the upper and lower bounds and strides to be used for the set of iterations
751to be executed by the current thread from the statically scheduled loop that is described by the
752initial values of the bounds, strides, increment and chunks for parallel loop and distribute
753constructs.
754
755@{
756*/
757void
758__kmpc_dist_for_static_init_4(
759 ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter,
760 kmp_int32 *plower, kmp_int32 *pupper, kmp_int32 *pupperD,
761 kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk )
762{
763 __kmp_dist_for_static_init< kmp_int32 >(
764 loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk );
765}
766
767/*!
768 See @ref __kmpc_dist_for_static_init_4
769 */
770void
771__kmpc_dist_for_static_init_4u(
772 ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter,
773 kmp_uint32 *plower, kmp_uint32 *pupper, kmp_uint32 *pupperD,
774 kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk )
775{
776 __kmp_dist_for_static_init< kmp_uint32 >(
777 loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk );
778}
779
780/*!
781 See @ref __kmpc_dist_for_static_init_4
782 */
783void
784__kmpc_dist_for_static_init_8(
785 ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter,
786 kmp_int64 *plower, kmp_int64 *pupper, kmp_int64 *pupperD,
787 kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk )
788{
789 __kmp_dist_for_static_init< kmp_int64 >(
790 loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk );
791}
792
793/*!
794 See @ref __kmpc_dist_for_static_init_4
795 */
796void
797__kmpc_dist_for_static_init_8u(
798 ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter,
799 kmp_uint64 *plower, kmp_uint64 *pupper, kmp_uint64 *pupperD,
800 kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk )
801{
802 __kmp_dist_for_static_init< kmp_uint64 >(
803 loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk );
804}
805/*!
806@}
807*/
808
809//-----------------------------------------------------------------------------------------
810// Auxiliary routines for Distribute Parallel Loop construct implementation
811// Transfer call to template< type T >
812// __kmp_team_static_init( ident_t *loc, int gtid,
813// int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk )
814
815/*!
816@ingroup WORK_SHARING
817@{
818@param loc Source location
819@param gtid Global thread id
820@param p_last pointer to last iteration flag
821@param p_lb pointer to Lower bound
822@param p_ub pointer to Upper bound
823@param p_st Step (or increment if you prefer)
824@param incr Loop increment
825@param chunk The chunk size to block with
826
827The functions compute the upper and lower bounds and stride to be used for the set of iterations
828to be executed by the current team from the statically scheduled loop that is described by the
829initial values of the bounds, stride, increment and chunk for the distribute construct as part of
830composite distribute parallel loop construct.
831These functions are all identical apart from the types of the arguments.
832*/
833
834void
835__kmpc_team_static_init_4(
836 ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
837 kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk )
838{
839 KMP_DEBUG_ASSERT( __kmp_init_serial );
840 __kmp_team_static_init< kmp_int32 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk );
841}
842
843/*!
844 See @ref __kmpc_team_static_init_4
845 */
846void
847__kmpc_team_static_init_4u(
848 ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
849 kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk )
850{
851 KMP_DEBUG_ASSERT( __kmp_init_serial );
852 __kmp_team_static_init< kmp_uint32 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk );
853}
854
855/*!
856 See @ref __kmpc_team_static_init_4
857 */
858void
859__kmpc_team_static_init_8(
860 ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
861 kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk )
862{
863 KMP_DEBUG_ASSERT( __kmp_init_serial );
864 __kmp_team_static_init< kmp_int64 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk );
865}
866
867/*!
868 See @ref __kmpc_team_static_init_4
869 */
870void
871__kmpc_team_static_init_8u(
872 ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
873 kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk )
874{
875 KMP_DEBUG_ASSERT( __kmp_init_serial );
876 __kmp_team_static_init< kmp_uint64 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk );
877}
878/*!
879@}
880*/
881
Jim Cownie5e8470a2013-09-27 10:38:44 +0000882} // extern "C"
883