blob: 1977ca4d4184a3fe8b457277e4c79056855795a5 [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
2 * kmp_sched.c -- static scheduling -- iteration initialization
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16/*
17 * Static scheduling initialization.
18 *
19 * NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however
20 * it may change values between parallel regions. __kmp_max_nth
21 * is the largest value __kmp_nth may take, 1 is the smallest.
22 *
23 */
24
25#include "kmp.h"
26#include "kmp_i18n.h"
27#include "kmp_str.h"
28#include "kmp_error.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000029#include "kmp_stats.h"
30#include "kmp_itt.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000031
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000032#if OMPT_SUPPORT
33#include "ompt-specific.h"
34#endif
35
Jim Cownie5e8470a2013-09-27 10:38:44 +000036// template for type limits
37template< typename T >
38struct i_maxmin {
39 static const T mx;
40 static const T mn;
41};
42template<>
43struct i_maxmin< int > {
44 static const int mx = 0x7fffffff;
45 static const int mn = 0x80000000;
46};
47template<>
48struct i_maxmin< unsigned int > {
49 static const unsigned int mx = 0xffffffff;
50 static const unsigned int mn = 0x00000000;
51};
52template<>
53struct i_maxmin< long long > {
54 static const long long mx = 0x7fffffffffffffffLL;
55 static const long long mn = 0x8000000000000000LL;
56};
57template<>
58struct i_maxmin< unsigned long long > {
59 static const unsigned long long mx = 0xffffffffffffffffLL;
60 static const unsigned long long mn = 0x0000000000000000LL;
61};
62//-------------------------------------------------------------------------
63#ifdef KMP_DEBUG
64//-------------------------------------------------------------------------
65// template for debug prints specification ( d, u, lld, llu )
66 char const * traits_t< int >::spec = "d";
67 char const * traits_t< unsigned int >::spec = "u";
68 char const * traits_t< long long >::spec = "lld";
69 char const * traits_t< unsigned long long >::spec = "llu";
70//-------------------------------------------------------------------------
71#endif
72
73template< typename T >
74static void
75__kmp_for_static_init(
76 ident_t *loc,
77 kmp_int32 global_tid,
78 kmp_int32 schedtype,
79 kmp_int32 *plastiter,
80 T *plower,
81 T *pupper,
82 typename traits_t< T >::signed_t *pstride,
83 typename traits_t< T >::signed_t incr,
84 typename traits_t< T >::signed_t chunk
85) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +000086 KMP_COUNT_BLOCK(OMP_FOR_static);
Jonathan Peyton45be4502015-08-11 21:36:41 +000087 KMP_TIME_BLOCK (FOR_static_scheduling);
88
Jim Cownie5e8470a2013-09-27 10:38:44 +000089 typedef typename traits_t< T >::unsigned_t UT;
90 typedef typename traits_t< T >::signed_t ST;
91 /* this all has to be changed back to TID and such.. */
92 register kmp_int32 gtid = global_tid;
93 register kmp_uint32 tid;
94 register kmp_uint32 nth;
95 register UT trip_count;
96 register kmp_team_t *team;
Andrey Churbanov51aecb82015-05-06 19:22:36 +000097 register kmp_info_t *th = __kmp_threads[ gtid ];
Jim Cownie5e8470a2013-09-27 10:38:44 +000098
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000099#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peytonf0344bb2015-10-09 17:42:52 +0000100 ompt_team_info_t *team_info = NULL;
101 ompt_task_info_t *task_info = NULL;
102
103 if (ompt_enabled) {
104 // Only fully initialize variables needed by OMPT if OMPT is enabled.
105 team_info = __ompt_get_teaminfo(0, NULL);
106 task_info = __ompt_get_taskinfo(0);
107 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000108#endif
109
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000110 KMP_DEBUG_ASSERT( plastiter && plower && pupper && pstride );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000111 KE_TRACE( 10, ("__kmpc_for_static_init called (%d)\n", global_tid));
112 #ifdef KMP_DEBUG
113 {
114 const char * buff;
115 // create format specifiers before the debug output
116 buff = __kmp_str_format(
117 "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s," \
118 " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n",
119 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec,
120 traits_t< ST >::spec, traits_t< ST >::spec, traits_t< T >::spec );
121 KD_TRACE(100, ( buff, global_tid, schedtype, *plastiter,
122 *plower, *pupper, *pstride, incr, chunk ) );
123 __kmp_str_free( &buff );
124 }
125 #endif
126
127 if ( __kmp_env_consistency_check ) {
128 __kmp_push_workshare( global_tid, ct_pdo, loc );
129 if ( incr == 0 ) {
130 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000131 }
132 }
133 /* special handling for zero-trip loops */
134 if ( incr > 0 ? (*pupper < *plower) : (*plower < *pupper) ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000135 if( plastiter != NULL )
136 *plastiter = FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000137 /* leave pupper and plower set to entire iteration space */
138 *pstride = incr; /* value should never be used */
139 // *plower = *pupper - incr; // let compiler bypass the illegal loop (like for(i=1;i<10;i--)) THIS LINE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE ON A ZERO-TRIP LOOP (lower=1,\
140 upper=0,stride=1) - JPH June 23, 2009.
141 #ifdef KMP_DEBUG
142 {
143 const char * buff;
144 // create format specifiers before the debug output
145 buff = __kmp_str_format(
146 "__kmpc_for_static_init:(ZERO TRIP) liter=%%d lower=%%%s upper=%%%s stride = %%%s signed?<%s>, loc = %%s\n",
147 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, traits_t< T >::spec );
148 KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride, loc->psource ) );
149 __kmp_str_free( &buff );
150 }
151 #endif
152 KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000153
154#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000155 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000156 ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
157 ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
158 team_info->parallel_id, task_info->task_id,
159 team_info->microtask);
160 }
161#endif
Jonathan Peyton45be4502015-08-11 21:36:41 +0000162 KMP_COUNT_VALUE (FOR_static_iterations, 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000163 return;
164 }
165
166 #if OMP_40_ENABLED
167 if ( schedtype > kmp_ord_upper ) {
168 // we are in DISTRIBUTE construct
169 schedtype += kmp_sch_static - kmp_distribute_static; // AC: convert to usual schedule type
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000170 tid = th->th.th_team->t.t_master_tid;
171 team = th->th.th_team->t.t_parent;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000172 } else
173 #endif
174 {
175 tid = __kmp_tid_from_gtid( global_tid );
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000176 team = th->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000177 }
178
179 /* determine if "for" loop is an active worksharing construct */
180 if ( team -> t.t_serialized ) {
181 /* serialized parallel, each thread executes whole iteration space */
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000182 if( plastiter != NULL )
183 *plastiter = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000184 /* leave pupper and plower set to entire iteration space */
185 *pstride = (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
186
187 #ifdef KMP_DEBUG
188 {
189 const char * buff;
190 // create format specifiers before the debug output
191 buff = __kmp_str_format(
192 "__kmpc_for_static_init: (serial) liter=%%d lower=%%%s upper=%%%s stride = %%%s\n",
193 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
194 KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) );
195 __kmp_str_free( &buff );
196 }
197 #endif
198 KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000199
200#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000201 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000202 ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
203 ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
204 team_info->parallel_id, task_info->task_id,
205 team_info->microtask);
206 }
207#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000208 return;
209 }
210 nth = team->t.t_nproc;
211 if ( nth == 1 ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000212 if( plastiter != NULL )
213 *plastiter = TRUE;
214 *pstride = (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000215 #ifdef KMP_DEBUG
216 {
217 const char * buff;
218 // create format specifiers before the debug output
219 buff = __kmp_str_format(
220 "__kmpc_for_static_init: (serial) liter=%%d lower=%%%s upper=%%%s stride = %%%s\n",
221 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
222 KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) );
223 __kmp_str_free( &buff );
224 }
225 #endif
226 KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000227
228#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000229 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000230 ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
231 ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
232 team_info->parallel_id, task_info->task_id,
233 team_info->microtask);
234 }
235#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000236 return;
237 }
238
239 /* compute trip count */
240 if ( incr == 1 ) {
241 trip_count = *pupper - *plower + 1;
242 } else if (incr == -1) {
243 trip_count = *plower - *pupper + 1;
244 } else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000245 if ( incr > 1 ) { // the check is needed for unsigned division when incr < 0
Jim Cownie5e8470a2013-09-27 10:38:44 +0000246 trip_count = (*pupper - *plower) / incr + 1;
247 } else {
248 trip_count = (*plower - *pupper) / ( -incr ) + 1;
249 }
250 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000251
Jim Cownie5e8470a2013-09-27 10:38:44 +0000252 if ( __kmp_env_consistency_check ) {
253 /* tripcount overflow? */
254 if ( trip_count == 0 && *pupper != *plower ) {
255 __kmp_error_construct( kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo, loc );
256 }
257 }
Jonathan Peyton45be4502015-08-11 21:36:41 +0000258 KMP_COUNT_VALUE (FOR_static_iterations, trip_count);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000259
260 /* compute remaining parameters */
261 switch ( schedtype ) {
262 case kmp_sch_static:
263 {
264 if ( trip_count < nth ) {
265 KMP_DEBUG_ASSERT(
266 __kmp_static == kmp_sch_static_greedy || \
267 __kmp_static == kmp_sch_static_balanced
268 ); // Unknown static scheduling type.
269 if ( tid < trip_count ) {
270 *pupper = *plower = *plower + tid * incr;
271 } else {
272 *plower = *pupper + incr;
273 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000274 if( plastiter != NULL )
275 *plastiter = ( tid == trip_count - 1 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000276 } else {
277 if ( __kmp_static == kmp_sch_static_balanced ) {
278 register UT small_chunk = trip_count / nth;
279 register UT extras = trip_count % nth;
280 *plower += incr * ( tid * small_chunk + ( tid < extras ? tid : extras ) );
281 *pupper = *plower + small_chunk * incr - ( tid < extras ? 0 : incr );
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000282 if( plastiter != NULL )
283 *plastiter = ( tid == nth - 1 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000284 } else {
285 register T big_chunk_inc_count = ( trip_count/nth +
286 ( ( trip_count % nth ) ? 1 : 0) ) * incr;
287 register T old_upper = *pupper;
288
289 KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy );
290 // Unknown static scheduling type.
291
292 *plower += tid * big_chunk_inc_count;
293 *pupper = *plower + big_chunk_inc_count - incr;
294 if ( incr > 0 ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000295 if( *pupper < *plower )
Jim Cownie5e8470a2013-09-27 10:38:44 +0000296 *pupper = i_maxmin< T >::mx;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000297 if( plastiter != NULL )
298 *plastiter = *plower <= old_upper && *pupper > old_upper - incr;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000299 if ( *pupper > old_upper ) *pupper = old_upper; // tracker C73258
300 } else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000301 if( *pupper > *plower )
Jim Cownie5e8470a2013-09-27 10:38:44 +0000302 *pupper = i_maxmin< T >::mn;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000303 if( plastiter != NULL )
304 *plastiter = *plower >= old_upper && *pupper < old_upper - incr;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000305 if ( *pupper < old_upper ) *pupper = old_upper; // tracker C73258
306 }
307 }
308 }
309 break;
310 }
311 case kmp_sch_static_chunked:
312 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000313 register ST span;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000314 if ( chunk < 1 ) {
315 chunk = 1;
316 }
317 span = chunk * incr;
318 *pstride = span * nth;
319 *plower = *plower + (span * tid);
320 *pupper = *plower + span - incr;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000321 if( plastiter != NULL )
322 *plastiter = (tid == ((trip_count - 1)/( UT )chunk) % nth);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000323 break;
324 }
325 default:
326 KMP_ASSERT2( 0, "__kmpc_for_static_init: unknown scheduling type" );
327 break;
328 }
329
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000330#if USE_ITT_BUILD
331 // Report loop metadata
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000332 if ( KMP_MASTER_TID(tid) && __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
333#if OMP_40_ENABLED
334 th->th.th_teams_microtask == NULL &&
335#endif
336 team->t.t_active_level == 1 )
337 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000338 kmp_uint64 cur_chunk = chunk;
339 // Calculate chunk in case it was not specified; it is specified for kmp_sch_static_chunked
340 if ( schedtype == kmp_sch_static ) {
341 cur_chunk = trip_count / nth + ( ( trip_count % nth ) ? 1 : 0);
342 }
343 // 0 - "static" schedule
344 __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk);
345 }
346#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000347 #ifdef KMP_DEBUG
348 {
349 const char * buff;
350 // create format specifiers before the debug output
351 buff = __kmp_str_format(
352 "__kmpc_for_static_init: liter=%%d lower=%%%s upper=%%%s stride = %%%s signed?<%s>\n",
353 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, traits_t< T >::spec );
354 KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) );
355 __kmp_str_free( &buff );
356 }
357 #endif
358 KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000359
360#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000361 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000362 ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
363 ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
364 team_info->parallel_id, task_info->task_id, team_info->microtask);
365 }
366#endif
367
Jim Cownie5e8470a2013-09-27 10:38:44 +0000368 return;
369}
370
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000371template< typename T >
372static void
373__kmp_dist_for_static_init(
374 ident_t *loc,
375 kmp_int32 gtid,
376 kmp_int32 schedule,
377 kmp_int32 *plastiter,
378 T *plower,
379 T *pupper,
380 T *pupperDist,
381 typename traits_t< T >::signed_t *pstride,
382 typename traits_t< T >::signed_t incr,
383 typename traits_t< T >::signed_t chunk
384) {
Jonathan Peyton45be4502015-08-11 21:36:41 +0000385 KMP_COUNT_BLOCK(OMP_DISTRIBUTE);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000386 typedef typename traits_t< T >::unsigned_t UT;
387 typedef typename traits_t< T >::signed_t ST;
388 register kmp_uint32 tid;
389 register kmp_uint32 nth;
390 register kmp_uint32 team_id;
391 register kmp_uint32 nteams;
392 register UT trip_count;
393 register kmp_team_t *team;
394 kmp_info_t * th;
395
396 KMP_DEBUG_ASSERT( plastiter && plower && pupper && pupperDist && pstride );
397 KE_TRACE( 10, ("__kmpc_dist_for_static_init called (%d)\n", gtid));
398 #ifdef KMP_DEBUG
399 {
400 const char * buff;
401 // create format specifiers before the debug output
402 buff = __kmp_str_format(
403 "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d "\
404 "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n",
405 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec,
406 traits_t< ST >::spec, traits_t< T >::spec );
407 KD_TRACE(100, ( buff, gtid, schedule, *plastiter,
408 *plower, *pupper, incr, chunk ) );
409 __kmp_str_free( &buff );
410 }
411 #endif
412
413 if( __kmp_env_consistency_check ) {
414 __kmp_push_workshare( gtid, ct_pdo, loc );
415 if( incr == 0 ) {
416 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc );
417 }
418 if( incr > 0 ? (*pupper < *plower) : (*plower < *pupper) ) {
419 // The loop is illegal.
420 // Some zero-trip loops maintained by compiler, e.g.:
421 // for(i=10;i<0;++i) // lower >= upper - run-time check
422 // for(i=0;i>10;--i) // lower <= upper - run-time check
423 // for(i=0;i>10;++i) // incr > 0 - compile-time check
424 // for(i=10;i<0;--i) // incr < 0 - compile-time check
425 // Compiler does not check the following illegal loops:
426 // for(i=0;i<10;i+=incr) // where incr<0
427 // for(i=10;i>0;i-=incr) // where incr<0
428 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc );
429 }
430 }
431 tid = __kmp_tid_from_gtid( gtid );
432 th = __kmp_threads[gtid];
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000433 nth = th->th.th_team_nproc;
434 team = th->th.th_team;
435 #if OMP_40_ENABLED
Jonathan Peyton441f3372015-09-21 17:24:46 +0000436 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000437 nteams = th->th.th_teams_size.nteams;
438 #endif
439 team_id = team->t.t_master_tid;
440 KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc);
441
442 // compute global trip count
443 if( incr == 1 ) {
444 trip_count = *pupper - *plower + 1;
445 } else if(incr == -1) {
446 trip_count = *plower - *pupper + 1;
447 } else {
448 trip_count = (ST)(*pupper - *plower) / incr + 1; // cast to signed to cover incr<0 case
449 }
Jonathan Peyton45be4502015-08-11 21:36:41 +0000450
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000451 *pstride = *pupper - *plower; // just in case (can be unused)
452 if( trip_count <= nteams ) {
453 KMP_DEBUG_ASSERT(
454 __kmp_static == kmp_sch_static_greedy || \
455 __kmp_static == kmp_sch_static_balanced
456 ); // Unknown static scheduling type.
457 // only masters of some teams get single iteration, other threads get nothing
458 if( team_id < trip_count && tid == 0 ) {
459 *pupper = *pupperDist = *plower = *plower + team_id * incr;
460 } else {
461 *pupperDist = *pupper;
462 *plower = *pupper + incr; // compiler should skip loop body
463 }
464 if( plastiter != NULL )
465 *plastiter = ( tid == 0 && team_id == trip_count - 1 );
466 } else {
467 // Get the team's chunk first (each team gets at most one chunk)
468 if( __kmp_static == kmp_sch_static_balanced ) {
469 register UT chunkD = trip_count / nteams;
470 register UT extras = trip_count % nteams;
471 *plower += incr * ( team_id * chunkD + ( team_id < extras ? team_id : extras ) );
472 *pupperDist = *plower + chunkD * incr - ( team_id < extras ? 0 : incr );
473 if( plastiter != NULL )
474 *plastiter = ( team_id == nteams - 1 );
475 } else {
476 register T chunk_inc_count =
477 ( trip_count / nteams + ( ( trip_count % nteams ) ? 1 : 0) ) * incr;
478 register T upper = *pupper;
479 KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy );
480 // Unknown static scheduling type.
481 *plower += team_id * chunk_inc_count;
482 *pupperDist = *plower + chunk_inc_count - incr;
483 // Check/correct bounds if needed
484 if( incr > 0 ) {
485 if( *pupperDist < *plower )
486 *pupperDist = i_maxmin< T >::mx;
487 if( plastiter != NULL )
488 *plastiter = *plower <= upper && *pupperDist > upper - incr;
489 if( *pupperDist > upper )
490 *pupperDist = upper; // tracker C73258
491 if( *plower > *pupperDist ) {
492 *pupper = *pupperDist; // no iterations available for the team
493 goto end;
494 }
495 } else {
496 if( *pupperDist > *plower )
497 *pupperDist = i_maxmin< T >::mn;
498 if( plastiter != NULL )
499 *plastiter = *plower >= upper && *pupperDist < upper - incr;
500 if( *pupperDist < upper )
501 *pupperDist = upper; // tracker C73258
502 if( *plower < *pupperDist ) {
503 *pupper = *pupperDist; // no iterations available for the team
504 goto end;
505 }
506 }
507 }
508 // Get the parallel loop chunk now (for thread)
509 // compute trip count for team's chunk
510 if( incr == 1 ) {
511 trip_count = *pupperDist - *plower + 1;
512 } else if(incr == -1) {
513 trip_count = *plower - *pupperDist + 1;
514 } else {
515 trip_count = (ST)(*pupperDist - *plower) / incr + 1;
516 }
517 KMP_DEBUG_ASSERT( trip_count );
518 switch( schedule ) {
519 case kmp_sch_static:
520 {
521 if( trip_count <= nth ) {
522 KMP_DEBUG_ASSERT(
523 __kmp_static == kmp_sch_static_greedy || \
524 __kmp_static == kmp_sch_static_balanced
525 ); // Unknown static scheduling type.
526 if( tid < trip_count )
527 *pupper = *plower = *plower + tid * incr;
528 else
529 *plower = *pupper + incr; // no iterations available
530 if( plastiter != NULL )
531 if( *plastiter != 0 && !( tid == trip_count - 1 ) )
532 *plastiter = 0;
533 } else {
534 if( __kmp_static == kmp_sch_static_balanced ) {
535 register UT chunkL = trip_count / nth;
536 register UT extras = trip_count % nth;
537 *plower += incr * (tid * chunkL + (tid < extras ? tid : extras));
538 *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr);
539 if( plastiter != NULL )
540 if( *plastiter != 0 && !( tid == nth - 1 ) )
541 *plastiter = 0;
542 } else {
543 register T chunk_inc_count =
544 ( trip_count / nth + ( ( trip_count % nth ) ? 1 : 0) ) * incr;
545 register T upper = *pupperDist;
546 KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy );
547 // Unknown static scheduling type.
548 *plower += tid * chunk_inc_count;
549 *pupper = *plower + chunk_inc_count - incr;
550 if( incr > 0 ) {
551 if( *pupper < *plower )
552 *pupper = i_maxmin< T >::mx;
553 if( plastiter != NULL )
554 if( *plastiter != 0 && !(*plower <= upper && *pupper > upper - incr) )
555 *plastiter = 0;
556 if( *pupper > upper )
557 *pupper = upper;//tracker C73258
558 } else {
559 if( *pupper > *plower )
560 *pupper = i_maxmin< T >::mn;
561 if( plastiter != NULL )
562 if( *plastiter != 0 && !(*plower >= upper && *pupper < upper - incr) )
563 *plastiter = 0;
564 if( *pupper < upper )
565 *pupper = upper;//tracker C73258
566 }
567 }
568 }
569 break;
570 }
571 case kmp_sch_static_chunked:
572 {
573 register ST span;
574 if( chunk < 1 )
575 chunk = 1;
576 span = chunk * incr;
577 *pstride = span * nth;
578 *plower = *plower + (span * tid);
579 *pupper = *plower + span - incr;
580 if( plastiter != NULL )
581 if( *plastiter != 0 && !(tid == ((trip_count - 1) / ( UT )chunk) % nth) )
582 *plastiter = 0;
583 break;
584 }
585 default:
586 KMP_ASSERT2( 0, "__kmpc_dist_for_static_init: unknown loop scheduling type" );
587 break;
588 }
589 }
590 end:;
591 #ifdef KMP_DEBUG
592 {
593 const char * buff;
594 // create format specifiers before the debug output
595 buff = __kmp_str_format(
596 "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s "\
597 "stride=%%%s signed?<%s>\n",
598 traits_t< T >::spec, traits_t< T >::spec, traits_t< T >::spec,
599 traits_t< ST >::spec, traits_t< T >::spec );
600 KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pupperDist, *pstride ) );
601 __kmp_str_free( &buff );
602 }
603 #endif
604 KE_TRACE( 10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid ) );
605 return;
606}
607
608template< typename T >
609static void
610__kmp_team_static_init(
611 ident_t *loc,
612 kmp_int32 gtid,
613 kmp_int32 *p_last,
614 T *p_lb,
615 T *p_ub,
616 typename traits_t< T >::signed_t *p_st,
617 typename traits_t< T >::signed_t incr,
618 typename traits_t< T >::signed_t chunk
619) {
620 // The routine returns the first chunk distributed to the team and
621 // stride for next chunks calculation.
622 // Last iteration flag set for the team that will execute
623 // the last iteration of the loop.
624 // The routine is called for dist_schedue(static,chunk) only.
625 typedef typename traits_t< T >::unsigned_t UT;
626 typedef typename traits_t< T >::signed_t ST;
627 kmp_uint32 team_id;
628 kmp_uint32 nteams;
629 UT trip_count;
630 T lower;
631 T upper;
632 ST span;
633 kmp_team_t *team;
634 kmp_info_t *th;
635
636 KMP_DEBUG_ASSERT( p_last && p_lb && p_ub && p_st );
637 KE_TRACE( 10, ("__kmp_team_static_init called (%d)\n", gtid));
638 #ifdef KMP_DEBUG
639 {
640 const char * buff;
641 // create format specifiers before the debug output
642 buff = __kmp_str_format( "__kmp_team_static_init enter: T#%%d liter=%%d "\
643 "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n",
644 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec,
645 traits_t< ST >::spec, traits_t< T >::spec );
646 KD_TRACE(100, ( buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk ) );
647 __kmp_str_free( &buff );
648 }
649 #endif
650
651 lower = *p_lb;
652 upper = *p_ub;
653 if( __kmp_env_consistency_check ) {
654 if( incr == 0 ) {
655 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc );
656 }
657 if( incr > 0 ? (upper < lower) : (lower < upper) ) {
658 // The loop is illegal.
659 // Some zero-trip loops maintained by compiler, e.g.:
660 // for(i=10;i<0;++i) // lower >= upper - run-time check
661 // for(i=0;i>10;--i) // lower <= upper - run-time check
662 // for(i=0;i>10;++i) // incr > 0 - compile-time check
663 // for(i=10;i<0;--i) // incr < 0 - compile-time check
664 // Compiler does not check the following illegal loops:
665 // for(i=0;i<10;i+=incr) // where incr<0
666 // for(i=10;i>0;i-=incr) // where incr<0
667 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc );
668 }
669 }
670 th = __kmp_threads[gtid];
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000671 team = th->th.th_team;
672 #if OMP_40_ENABLED
Jonathan Peyton441f3372015-09-21 17:24:46 +0000673 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000674 nteams = th->th.th_teams_size.nteams;
675 #endif
676 team_id = team->t.t_master_tid;
677 KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc);
678
679 // compute trip count
680 if( incr == 1 ) {
681 trip_count = upper - lower + 1;
682 } else if(incr == -1) {
683 trip_count = lower - upper + 1;
684 } else {
685 trip_count = (ST)(upper - lower) / incr + 1; // cast to signed to cover incr<0 case
686 }
687 if( chunk < 1 )
688 chunk = 1;
689 span = chunk * incr;
690 *p_st = span * nteams;
691 *p_lb = lower + (span * team_id);
692 *p_ub = *p_lb + span - incr;
693 if ( p_last != NULL )
694 *p_last = (team_id == ((trip_count - 1)/(UT)chunk) % nteams);
695 // Correct upper bound if needed
696 if( incr > 0 ) {
697 if( *p_ub < *p_lb ) // overflow?
698 *p_ub = i_maxmin< T >::mx;
699 if( *p_ub > upper )
700 *p_ub = upper; // tracker C73258
701 } else { // incr < 0
702 if( *p_ub > *p_lb )
703 *p_ub = i_maxmin< T >::mn;
704 if( *p_ub < upper )
705 *p_ub = upper; // tracker C73258
706 }
707 #ifdef KMP_DEBUG
708 {
709 const char * buff;
710 // create format specifiers before the debug output
711 buff = __kmp_str_format( "__kmp_team_static_init exit: T#%%d team%%u liter=%%d "\
712 "iter=(%%%s, %%%s, %%%s) chunk %%%s\n",
713 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec,
714 traits_t< ST >::spec );
715 KD_TRACE(100, ( buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk ) );
716 __kmp_str_free( &buff );
717 }
718 #endif
719}
720
Jim Cownie5e8470a2013-09-27 10:38:44 +0000721//--------------------------------------------------------------------------------------
722extern "C" {
723
724/*!
725@ingroup WORK_SHARING
726@param loc Source code location
727@param gtid Global thread id of this thread
728@param schedtype Scheduling type
729@param plastiter Pointer to the "last iteration" flag
730@param plower Pointer to the lower bound
731@param pupper Pointer to the upper bound
732@param pstride Pointer to the stride
733@param incr Loop increment
734@param chunk The chunk size
735
736Each of the four functions here are identical apart from the argument types.
737
738The functions compute the upper and lower bounds and stride to be used for the set of iterations
739to be executed by the current thread from the statically scheduled loop that is described by the
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000740initial values of the bounds, stride, increment and chunk size.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000741
742@{
743*/
744void
745__kmpc_for_static_init_4( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter,
746 kmp_int32 *plower, kmp_int32 *pupper,
747 kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk )
748{
749 __kmp_for_static_init< kmp_int32 >(
750 loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk );
751}
752
753/*!
754 See @ref __kmpc_for_static_init_4
755 */
756void
757__kmpc_for_static_init_4u( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter,
758 kmp_uint32 *plower, kmp_uint32 *pupper,
759 kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk )
760{
761 __kmp_for_static_init< kmp_uint32 >(
762 loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk );
763}
764
765/*!
766 See @ref __kmpc_for_static_init_4
767 */
768void
769__kmpc_for_static_init_8( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter,
770 kmp_int64 *plower, kmp_int64 *pupper,
771 kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk )
772{
773 __kmp_for_static_init< kmp_int64 >(
774 loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk );
775}
776
777/*!
778 See @ref __kmpc_for_static_init_4
779 */
780void
781__kmpc_for_static_init_8u( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter,
782 kmp_uint64 *plower, kmp_uint64 *pupper,
783 kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk )
784{
785 __kmp_for_static_init< kmp_uint64 >(
786 loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk );
787}
788/*!
789@}
790*/
791
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000792/*!
793@ingroup WORK_SHARING
794@param loc Source code location
795@param gtid Global thread id of this thread
Jonathan Peyton81f9cd12015-05-22 22:37:22 +0000796@param schedule Scheduling type for the parallel loop
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000797@param plastiter Pointer to the "last iteration" flag
798@param plower Pointer to the lower bound
799@param pupper Pointer to the upper bound of loop chunk
800@param pupperD Pointer to the upper bound of dist_chunk
Jonathan Peyton81f9cd12015-05-22 22:37:22 +0000801@param pstride Pointer to the stride for parallel loop
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000802@param incr Loop increment
Jonathan Peyton81f9cd12015-05-22 22:37:22 +0000803@param chunk The chunk size for the parallel loop
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000804
805Each of the four functions here are identical apart from the argument types.
806
807The functions compute the upper and lower bounds and strides to be used for the set of iterations
808to be executed by the current thread from the statically scheduled loop that is described by the
809initial values of the bounds, strides, increment and chunks for parallel loop and distribute
810constructs.
811
812@{
813*/
814void
815__kmpc_dist_for_static_init_4(
816 ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter,
817 kmp_int32 *plower, kmp_int32 *pupper, kmp_int32 *pupperD,
818 kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk )
819{
820 __kmp_dist_for_static_init< kmp_int32 >(
821 loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk );
822}
823
824/*!
825 See @ref __kmpc_dist_for_static_init_4
826 */
827void
828__kmpc_dist_for_static_init_4u(
829 ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter,
830 kmp_uint32 *plower, kmp_uint32 *pupper, kmp_uint32 *pupperD,
831 kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk )
832{
833 __kmp_dist_for_static_init< kmp_uint32 >(
834 loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk );
835}
836
837/*!
838 See @ref __kmpc_dist_for_static_init_4
839 */
840void
841__kmpc_dist_for_static_init_8(
842 ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter,
843 kmp_int64 *plower, kmp_int64 *pupper, kmp_int64 *pupperD,
844 kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk )
845{
846 __kmp_dist_for_static_init< kmp_int64 >(
847 loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk );
848}
849
850/*!
851 See @ref __kmpc_dist_for_static_init_4
852 */
853void
854__kmpc_dist_for_static_init_8u(
855 ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter,
856 kmp_uint64 *plower, kmp_uint64 *pupper, kmp_uint64 *pupperD,
857 kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk )
858{
859 __kmp_dist_for_static_init< kmp_uint64 >(
860 loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk );
861}
862/*!
863@}
864*/
865
866//-----------------------------------------------------------------------------------------
867// Auxiliary routines for Distribute Parallel Loop construct implementation
868// Transfer call to template< type T >
869// __kmp_team_static_init( ident_t *loc, int gtid,
870// int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk )
871
872/*!
873@ingroup WORK_SHARING
874@{
875@param loc Source location
876@param gtid Global thread id
877@param p_last pointer to last iteration flag
878@param p_lb pointer to Lower bound
879@param p_ub pointer to Upper bound
880@param p_st Step (or increment if you prefer)
881@param incr Loop increment
882@param chunk The chunk size to block with
883
884The functions compute the upper and lower bounds and stride to be used for the set of iterations
885to be executed by the current team from the statically scheduled loop that is described by the
886initial values of the bounds, stride, increment and chunk for the distribute construct as part of
887composite distribute parallel loop construct.
888These functions are all identical apart from the types of the arguments.
889*/
890
891void
892__kmpc_team_static_init_4(
893 ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
894 kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk )
895{
896 KMP_DEBUG_ASSERT( __kmp_init_serial );
897 __kmp_team_static_init< kmp_int32 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk );
898}
899
900/*!
901 See @ref __kmpc_team_static_init_4
902 */
903void
904__kmpc_team_static_init_4u(
905 ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
906 kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk )
907{
908 KMP_DEBUG_ASSERT( __kmp_init_serial );
909 __kmp_team_static_init< kmp_uint32 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk );
910}
911
912/*!
913 See @ref __kmpc_team_static_init_4
914 */
915void
916__kmpc_team_static_init_8(
917 ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
918 kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk )
919{
920 KMP_DEBUG_ASSERT( __kmp_init_serial );
921 __kmp_team_static_init< kmp_int64 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk );
922}
923
924/*!
925 See @ref __kmpc_team_static_init_4
926 */
927void
928__kmpc_team_static_init_8u(
929 ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
930 kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk )
931{
932 KMP_DEBUG_ASSERT( __kmp_init_serial );
933 __kmp_team_static_init< kmp_uint64 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk );
934}
935/*!
936@}
937*/
938
Jim Cownie5e8470a2013-09-27 10:38:44 +0000939} // extern "C"
940