blob: 0821f3805a4e8b7e8a1ccbf4eff6294c19ddffe1 [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
2 * kmp_sched.c -- static scheduling -- iteration initialization
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16/*
17 * Static scheduling initialization.
18 *
19 * NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however
20 * it may change values between parallel regions. __kmp_max_nth
21 * is the largest value __kmp_nth may take, 1 is the smallest.
22 *
23 */
24
25#include "kmp.h"
26#include "kmp_i18n.h"
27#include "kmp_str.h"
28#include "kmp_error.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000029#include "kmp_stats.h"
30#include "kmp_itt.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000031
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000032#if OMPT_SUPPORT
33#include "ompt-specific.h"
34#endif
35
Jim Cownie5e8470a2013-09-27 10:38:44 +000036// template for type limits
37template< typename T >
38struct i_maxmin {
39 static const T mx;
40 static const T mn;
41};
42template<>
43struct i_maxmin< int > {
44 static const int mx = 0x7fffffff;
45 static const int mn = 0x80000000;
46};
47template<>
48struct i_maxmin< unsigned int > {
49 static const unsigned int mx = 0xffffffff;
50 static const unsigned int mn = 0x00000000;
51};
52template<>
53struct i_maxmin< long long > {
54 static const long long mx = 0x7fffffffffffffffLL;
55 static const long long mn = 0x8000000000000000LL;
56};
57template<>
58struct i_maxmin< unsigned long long > {
59 static const unsigned long long mx = 0xffffffffffffffffLL;
60 static const unsigned long long mn = 0x0000000000000000LL;
61};
62//-------------------------------------------------------------------------
63#ifdef KMP_DEBUG
64//-------------------------------------------------------------------------
65// template for debug prints specification ( d, u, lld, llu )
66 char const * traits_t< int >::spec = "d";
67 char const * traits_t< unsigned int >::spec = "u";
68 char const * traits_t< long long >::spec = "lld";
69 char const * traits_t< unsigned long long >::spec = "llu";
70//-------------------------------------------------------------------------
71#endif
72
73template< typename T >
74static void
75__kmp_for_static_init(
76 ident_t *loc,
77 kmp_int32 global_tid,
78 kmp_int32 schedtype,
79 kmp_int32 *plastiter,
80 T *plower,
81 T *pupper,
82 typename traits_t< T >::signed_t *pstride,
83 typename traits_t< T >::signed_t incr,
84 typename traits_t< T >::signed_t chunk
85) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +000086 KMP_COUNT_BLOCK(OMP_FOR_static);
Jim Cownie5e8470a2013-09-27 10:38:44 +000087 typedef typename traits_t< T >::unsigned_t UT;
88 typedef typename traits_t< T >::signed_t ST;
89 /* this all has to be changed back to TID and such.. */
90 register kmp_int32 gtid = global_tid;
91 register kmp_uint32 tid;
92 register kmp_uint32 nth;
93 register UT trip_count;
94 register kmp_team_t *team;
Andrey Churbanov51aecb82015-05-06 19:22:36 +000095 register kmp_info_t *th = __kmp_threads[ gtid ];
Jim Cownie5e8470a2013-09-27 10:38:44 +000096
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000097#if OMPT_SUPPORT && OMPT_TRACE
98 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
99 ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
100#endif
101
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000102 KMP_DEBUG_ASSERT( plastiter && plower && pupper && pstride );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000103 KE_TRACE( 10, ("__kmpc_for_static_init called (%d)\n", global_tid));
104 #ifdef KMP_DEBUG
105 {
106 const char * buff;
107 // create format specifiers before the debug output
108 buff = __kmp_str_format(
109 "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s," \
110 " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n",
111 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec,
112 traits_t< ST >::spec, traits_t< ST >::spec, traits_t< T >::spec );
113 KD_TRACE(100, ( buff, global_tid, schedtype, *plastiter,
114 *plower, *pupper, *pstride, incr, chunk ) );
115 __kmp_str_free( &buff );
116 }
117 #endif
118
119 if ( __kmp_env_consistency_check ) {
120 __kmp_push_workshare( global_tid, ct_pdo, loc );
121 if ( incr == 0 ) {
122 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000123 }
124 }
125 /* special handling for zero-trip loops */
126 if ( incr > 0 ? (*pupper < *plower) : (*plower < *pupper) ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000127 if( plastiter != NULL )
128 *plastiter = FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000129 /* leave pupper and plower set to entire iteration space */
130 *pstride = incr; /* value should never be used */
131 // *plower = *pupper - incr; // let compiler bypass the illegal loop (like for(i=1;i<10;i--)) THIS LINE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE ON A ZERO-TRIP LOOP (lower=1,\
132 upper=0,stride=1) - JPH June 23, 2009.
133 #ifdef KMP_DEBUG
134 {
135 const char * buff;
136 // create format specifiers before the debug output
137 buff = __kmp_str_format(
138 "__kmpc_for_static_init:(ZERO TRIP) liter=%%d lower=%%%s upper=%%%s stride = %%%s signed?<%s>, loc = %%s\n",
139 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, traits_t< T >::spec );
140 KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride, loc->psource ) );
141 __kmp_str_free( &buff );
142 }
143 #endif
144 KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000145
146#if OMPT_SUPPORT && OMPT_TRACE
147 if ((ompt_status == ompt_status_track_callback) &&
148 ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
149 ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
150 team_info->parallel_id, task_info->task_id,
151 team_info->microtask);
152 }
153#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000154 return;
155 }
156
157 #if OMP_40_ENABLED
158 if ( schedtype > kmp_ord_upper ) {
159 // we are in DISTRIBUTE construct
160 schedtype += kmp_sch_static - kmp_distribute_static; // AC: convert to usual schedule type
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000161 tid = th->th.th_team->t.t_master_tid;
162 team = th->th.th_team->t.t_parent;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000163 } else
164 #endif
165 {
166 tid = __kmp_tid_from_gtid( global_tid );
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000167 team = th->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000168 }
169
170 /* determine if "for" loop is an active worksharing construct */
171 if ( team -> t.t_serialized ) {
172 /* serialized parallel, each thread executes whole iteration space */
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000173 if( plastiter != NULL )
174 *plastiter = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000175 /* leave pupper and plower set to entire iteration space */
176 *pstride = (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
177
178 #ifdef KMP_DEBUG
179 {
180 const char * buff;
181 // create format specifiers before the debug output
182 buff = __kmp_str_format(
183 "__kmpc_for_static_init: (serial) liter=%%d lower=%%%s upper=%%%s stride = %%%s\n",
184 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
185 KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) );
186 __kmp_str_free( &buff );
187 }
188 #endif
189 KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000190
191#if OMPT_SUPPORT && OMPT_TRACE
192 if ((ompt_status == ompt_status_track_callback) &&
193 ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
194 ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
195 team_info->parallel_id, task_info->task_id,
196 team_info->microtask);
197 }
198#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000199 return;
200 }
201 nth = team->t.t_nproc;
202 if ( nth == 1 ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000203 if( plastiter != NULL )
204 *plastiter = TRUE;
205 *pstride = (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000206 #ifdef KMP_DEBUG
207 {
208 const char * buff;
209 // create format specifiers before the debug output
210 buff = __kmp_str_format(
211 "__kmpc_for_static_init: (serial) liter=%%d lower=%%%s upper=%%%s stride = %%%s\n",
212 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
213 KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) );
214 __kmp_str_free( &buff );
215 }
216 #endif
217 KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000218
219#if OMPT_SUPPORT && OMPT_TRACE
220 if ((ompt_status == ompt_status_track_callback) &&
221 ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
222 ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
223 team_info->parallel_id, task_info->task_id,
224 team_info->microtask);
225 }
226#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000227 return;
228 }
229
230 /* compute trip count */
231 if ( incr == 1 ) {
232 trip_count = *pupper - *plower + 1;
233 } else if (incr == -1) {
234 trip_count = *plower - *pupper + 1;
235 } else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000236 if ( incr > 1 ) { // the check is needed for unsigned division when incr < 0
Jim Cownie5e8470a2013-09-27 10:38:44 +0000237 trip_count = (*pupper - *plower) / incr + 1;
238 } else {
239 trip_count = (*plower - *pupper) / ( -incr ) + 1;
240 }
241 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000242
Jim Cownie5e8470a2013-09-27 10:38:44 +0000243 if ( __kmp_env_consistency_check ) {
244 /* tripcount overflow? */
245 if ( trip_count == 0 && *pupper != *plower ) {
246 __kmp_error_construct( kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo, loc );
247 }
248 }
249
250 /* compute remaining parameters */
251 switch ( schedtype ) {
252 case kmp_sch_static:
253 {
254 if ( trip_count < nth ) {
255 KMP_DEBUG_ASSERT(
256 __kmp_static == kmp_sch_static_greedy || \
257 __kmp_static == kmp_sch_static_balanced
258 ); // Unknown static scheduling type.
259 if ( tid < trip_count ) {
260 *pupper = *plower = *plower + tid * incr;
261 } else {
262 *plower = *pupper + incr;
263 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000264 if( plastiter != NULL )
265 *plastiter = ( tid == trip_count - 1 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000266 } else {
267 if ( __kmp_static == kmp_sch_static_balanced ) {
268 register UT small_chunk = trip_count / nth;
269 register UT extras = trip_count % nth;
270 *plower += incr * ( tid * small_chunk + ( tid < extras ? tid : extras ) );
271 *pupper = *plower + small_chunk * incr - ( tid < extras ? 0 : incr );
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000272 if( plastiter != NULL )
273 *plastiter = ( tid == nth - 1 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000274 } else {
275 register T big_chunk_inc_count = ( trip_count/nth +
276 ( ( trip_count % nth ) ? 1 : 0) ) * incr;
277 register T old_upper = *pupper;
278
279 KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy );
280 // Unknown static scheduling type.
281
282 *plower += tid * big_chunk_inc_count;
283 *pupper = *plower + big_chunk_inc_count - incr;
284 if ( incr > 0 ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000285 if( *pupper < *plower )
Jim Cownie5e8470a2013-09-27 10:38:44 +0000286 *pupper = i_maxmin< T >::mx;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000287 if( plastiter != NULL )
288 *plastiter = *plower <= old_upper && *pupper > old_upper - incr;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000289 if ( *pupper > old_upper ) *pupper = old_upper; // tracker C73258
290 } else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000291 if( *pupper > *plower )
Jim Cownie5e8470a2013-09-27 10:38:44 +0000292 *pupper = i_maxmin< T >::mn;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000293 if( plastiter != NULL )
294 *plastiter = *plower >= old_upper && *pupper < old_upper - incr;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000295 if ( *pupper < old_upper ) *pupper = old_upper; // tracker C73258
296 }
297 }
298 }
299 break;
300 }
301 case kmp_sch_static_chunked:
302 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000303 register ST span;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000304 if ( chunk < 1 ) {
305 chunk = 1;
306 }
307 span = chunk * incr;
308 *pstride = span * nth;
309 *plower = *plower + (span * tid);
310 *pupper = *plower + span - incr;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000311 if( plastiter != NULL )
312 *plastiter = (tid == ((trip_count - 1)/( UT )chunk) % nth);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000313 break;
314 }
315 default:
316 KMP_ASSERT2( 0, "__kmpc_for_static_init: unknown scheduling type" );
317 break;
318 }
319
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000320#if USE_ITT_BUILD
321 // Report loop metadata
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000322 if ( KMP_MASTER_TID(tid) && __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
323#if OMP_40_ENABLED
324 th->th.th_teams_microtask == NULL &&
325#endif
326 team->t.t_active_level == 1 )
327 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000328 kmp_uint64 cur_chunk = chunk;
329 // Calculate chunk in case it was not specified; it is specified for kmp_sch_static_chunked
330 if ( schedtype == kmp_sch_static ) {
331 cur_chunk = trip_count / nth + ( ( trip_count % nth ) ? 1 : 0);
332 }
333 // 0 - "static" schedule
334 __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk);
335 }
336#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000337 #ifdef KMP_DEBUG
338 {
339 const char * buff;
340 // create format specifiers before the debug output
341 buff = __kmp_str_format(
342 "__kmpc_for_static_init: liter=%%d lower=%%%s upper=%%%s stride = %%%s signed?<%s>\n",
343 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, traits_t< T >::spec );
344 KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) );
345 __kmp_str_free( &buff );
346 }
347 #endif
348 KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000349
350#if OMPT_SUPPORT && OMPT_TRACE
351 if ((ompt_status == ompt_status_track_callback) &&
352 ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
353 ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
354 team_info->parallel_id, task_info->task_id, team_info->microtask);
355 }
356#endif
357
Jim Cownie5e8470a2013-09-27 10:38:44 +0000358 return;
359}
360
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000361template< typename T >
362static void
363__kmp_dist_for_static_init(
364 ident_t *loc,
365 kmp_int32 gtid,
366 kmp_int32 schedule,
367 kmp_int32 *plastiter,
368 T *plower,
369 T *pupper,
370 T *pupperDist,
371 typename traits_t< T >::signed_t *pstride,
372 typename traits_t< T >::signed_t incr,
373 typename traits_t< T >::signed_t chunk
374) {
375 KMP_COUNT_BLOCK(OMP_DISTR_FOR_static);
376 typedef typename traits_t< T >::unsigned_t UT;
377 typedef typename traits_t< T >::signed_t ST;
378 register kmp_uint32 tid;
379 register kmp_uint32 nth;
380 register kmp_uint32 team_id;
381 register kmp_uint32 nteams;
382 register UT trip_count;
383 register kmp_team_t *team;
384 kmp_info_t * th;
385
386 KMP_DEBUG_ASSERT( plastiter && plower && pupper && pupperDist && pstride );
387 KE_TRACE( 10, ("__kmpc_dist_for_static_init called (%d)\n", gtid));
388 #ifdef KMP_DEBUG
389 {
390 const char * buff;
391 // create format specifiers before the debug output
392 buff = __kmp_str_format(
393 "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d "\
394 "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n",
395 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec,
396 traits_t< ST >::spec, traits_t< T >::spec );
397 KD_TRACE(100, ( buff, gtid, schedule, *plastiter,
398 *plower, *pupper, incr, chunk ) );
399 __kmp_str_free( &buff );
400 }
401 #endif
402
403 if( __kmp_env_consistency_check ) {
404 __kmp_push_workshare( gtid, ct_pdo, loc );
405 if( incr == 0 ) {
406 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc );
407 }
408 if( incr > 0 ? (*pupper < *plower) : (*plower < *pupper) ) {
409 // The loop is illegal.
410 // Some zero-trip loops maintained by compiler, e.g.:
411 // for(i=10;i<0;++i) // lower >= upper - run-time check
412 // for(i=0;i>10;--i) // lower <= upper - run-time check
413 // for(i=0;i>10;++i) // incr > 0 - compile-time check
414 // for(i=10;i<0;--i) // incr < 0 - compile-time check
415 // Compiler does not check the following illegal loops:
416 // for(i=0;i<10;i+=incr) // where incr<0
417 // for(i=10;i>0;i-=incr) // where incr<0
418 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc );
419 }
420 }
421 tid = __kmp_tid_from_gtid( gtid );
422 th = __kmp_threads[gtid];
423 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
424 nth = th->th.th_team_nproc;
425 team = th->th.th_team;
426 #if OMP_40_ENABLED
427 nteams = th->th.th_teams_size.nteams;
428 #endif
429 team_id = team->t.t_master_tid;
430 KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc);
431
432 // compute global trip count
433 if( incr == 1 ) {
434 trip_count = *pupper - *plower + 1;
435 } else if(incr == -1) {
436 trip_count = *plower - *pupper + 1;
437 } else {
438 trip_count = (ST)(*pupper - *plower) / incr + 1; // cast to signed to cover incr<0 case
439 }
440 *pstride = *pupper - *plower; // just in case (can be unused)
441 if( trip_count <= nteams ) {
442 KMP_DEBUG_ASSERT(
443 __kmp_static == kmp_sch_static_greedy || \
444 __kmp_static == kmp_sch_static_balanced
445 ); // Unknown static scheduling type.
446 // only masters of some teams get single iteration, other threads get nothing
447 if( team_id < trip_count && tid == 0 ) {
448 *pupper = *pupperDist = *plower = *plower + team_id * incr;
449 } else {
450 *pupperDist = *pupper;
451 *plower = *pupper + incr; // compiler should skip loop body
452 }
453 if( plastiter != NULL )
454 *plastiter = ( tid == 0 && team_id == trip_count - 1 );
455 } else {
456 // Get the team's chunk first (each team gets at most one chunk)
457 if( __kmp_static == kmp_sch_static_balanced ) {
458 register UT chunkD = trip_count / nteams;
459 register UT extras = trip_count % nteams;
460 *plower += incr * ( team_id * chunkD + ( team_id < extras ? team_id : extras ) );
461 *pupperDist = *plower + chunkD * incr - ( team_id < extras ? 0 : incr );
462 if( plastiter != NULL )
463 *plastiter = ( team_id == nteams - 1 );
464 } else {
465 register T chunk_inc_count =
466 ( trip_count / nteams + ( ( trip_count % nteams ) ? 1 : 0) ) * incr;
467 register T upper = *pupper;
468 KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy );
469 // Unknown static scheduling type.
470 *plower += team_id * chunk_inc_count;
471 *pupperDist = *plower + chunk_inc_count - incr;
472 // Check/correct bounds if needed
473 if( incr > 0 ) {
474 if( *pupperDist < *plower )
475 *pupperDist = i_maxmin< T >::mx;
476 if( plastiter != NULL )
477 *plastiter = *plower <= upper && *pupperDist > upper - incr;
478 if( *pupperDist > upper )
479 *pupperDist = upper; // tracker C73258
480 if( *plower > *pupperDist ) {
481 *pupper = *pupperDist; // no iterations available for the team
482 goto end;
483 }
484 } else {
485 if( *pupperDist > *plower )
486 *pupperDist = i_maxmin< T >::mn;
487 if( plastiter != NULL )
488 *plastiter = *plower >= upper && *pupperDist < upper - incr;
489 if( *pupperDist < upper )
490 *pupperDist = upper; // tracker C73258
491 if( *plower < *pupperDist ) {
492 *pupper = *pupperDist; // no iterations available for the team
493 goto end;
494 }
495 }
496 }
497 // Get the parallel loop chunk now (for thread)
498 // compute trip count for team's chunk
499 if( incr == 1 ) {
500 trip_count = *pupperDist - *plower + 1;
501 } else if(incr == -1) {
502 trip_count = *plower - *pupperDist + 1;
503 } else {
504 trip_count = (ST)(*pupperDist - *plower) / incr + 1;
505 }
506 KMP_DEBUG_ASSERT( trip_count );
507 switch( schedule ) {
508 case kmp_sch_static:
509 {
510 if( trip_count <= nth ) {
511 KMP_DEBUG_ASSERT(
512 __kmp_static == kmp_sch_static_greedy || \
513 __kmp_static == kmp_sch_static_balanced
514 ); // Unknown static scheduling type.
515 if( tid < trip_count )
516 *pupper = *plower = *plower + tid * incr;
517 else
518 *plower = *pupper + incr; // no iterations available
519 if( plastiter != NULL )
520 if( *plastiter != 0 && !( tid == trip_count - 1 ) )
521 *plastiter = 0;
522 } else {
523 if( __kmp_static == kmp_sch_static_balanced ) {
524 register UT chunkL = trip_count / nth;
525 register UT extras = trip_count % nth;
526 *plower += incr * (tid * chunkL + (tid < extras ? tid : extras));
527 *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr);
528 if( plastiter != NULL )
529 if( *plastiter != 0 && !( tid == nth - 1 ) )
530 *plastiter = 0;
531 } else {
532 register T chunk_inc_count =
533 ( trip_count / nth + ( ( trip_count % nth ) ? 1 : 0) ) * incr;
534 register T upper = *pupperDist;
535 KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy );
536 // Unknown static scheduling type.
537 *plower += tid * chunk_inc_count;
538 *pupper = *plower + chunk_inc_count - incr;
539 if( incr > 0 ) {
540 if( *pupper < *plower )
541 *pupper = i_maxmin< T >::mx;
542 if( plastiter != NULL )
543 if( *plastiter != 0 && !(*plower <= upper && *pupper > upper - incr) )
544 *plastiter = 0;
545 if( *pupper > upper )
546 *pupper = upper;//tracker C73258
547 } else {
548 if( *pupper > *plower )
549 *pupper = i_maxmin< T >::mn;
550 if( plastiter != NULL )
551 if( *plastiter != 0 && !(*plower >= upper && *pupper < upper - incr) )
552 *plastiter = 0;
553 if( *pupper < upper )
554 *pupper = upper;//tracker C73258
555 }
556 }
557 }
558 break;
559 }
560 case kmp_sch_static_chunked:
561 {
562 register ST span;
563 if( chunk < 1 )
564 chunk = 1;
565 span = chunk * incr;
566 *pstride = span * nth;
567 *plower = *plower + (span * tid);
568 *pupper = *plower + span - incr;
569 if( plastiter != NULL )
570 if( *plastiter != 0 && !(tid == ((trip_count - 1) / ( UT )chunk) % nth) )
571 *plastiter = 0;
572 break;
573 }
574 default:
575 KMP_ASSERT2( 0, "__kmpc_dist_for_static_init: unknown loop scheduling type" );
576 break;
577 }
578 }
579 end:;
580 #ifdef KMP_DEBUG
581 {
582 const char * buff;
583 // create format specifiers before the debug output
584 buff = __kmp_str_format(
585 "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s "\
586 "stride=%%%s signed?<%s>\n",
587 traits_t< T >::spec, traits_t< T >::spec, traits_t< T >::spec,
588 traits_t< ST >::spec, traits_t< T >::spec );
589 KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pupperDist, *pstride ) );
590 __kmp_str_free( &buff );
591 }
592 #endif
593 KE_TRACE( 10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid ) );
594 return;
595}
596
597template< typename T >
598static void
599__kmp_team_static_init(
600 ident_t *loc,
601 kmp_int32 gtid,
602 kmp_int32 *p_last,
603 T *p_lb,
604 T *p_ub,
605 typename traits_t< T >::signed_t *p_st,
606 typename traits_t< T >::signed_t incr,
607 typename traits_t< T >::signed_t chunk
608) {
609 // The routine returns the first chunk distributed to the team and
610 // stride for next chunks calculation.
611 // Last iteration flag set for the team that will execute
612 // the last iteration of the loop.
613 // The routine is called for dist_schedue(static,chunk) only.
614 typedef typename traits_t< T >::unsigned_t UT;
615 typedef typename traits_t< T >::signed_t ST;
616 kmp_uint32 team_id;
617 kmp_uint32 nteams;
618 UT trip_count;
619 T lower;
620 T upper;
621 ST span;
622 kmp_team_t *team;
623 kmp_info_t *th;
624
625 KMP_DEBUG_ASSERT( p_last && p_lb && p_ub && p_st );
626 KE_TRACE( 10, ("__kmp_team_static_init called (%d)\n", gtid));
627 #ifdef KMP_DEBUG
628 {
629 const char * buff;
630 // create format specifiers before the debug output
631 buff = __kmp_str_format( "__kmp_team_static_init enter: T#%%d liter=%%d "\
632 "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n",
633 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec,
634 traits_t< ST >::spec, traits_t< T >::spec );
635 KD_TRACE(100, ( buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk ) );
636 __kmp_str_free( &buff );
637 }
638 #endif
639
640 lower = *p_lb;
641 upper = *p_ub;
642 if( __kmp_env_consistency_check ) {
643 if( incr == 0 ) {
644 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc );
645 }
646 if( incr > 0 ? (upper < lower) : (lower < upper) ) {
647 // The loop is illegal.
648 // Some zero-trip loops maintained by compiler, e.g.:
649 // for(i=10;i<0;++i) // lower >= upper - run-time check
650 // for(i=0;i>10;--i) // lower <= upper - run-time check
651 // for(i=0;i>10;++i) // incr > 0 - compile-time check
652 // for(i=10;i<0;--i) // incr < 0 - compile-time check
653 // Compiler does not check the following illegal loops:
654 // for(i=0;i<10;i+=incr) // where incr<0
655 // for(i=10;i>0;i-=incr) // where incr<0
656 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc );
657 }
658 }
659 th = __kmp_threads[gtid];
660 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
661 team = th->th.th_team;
662 #if OMP_40_ENABLED
663 nteams = th->th.th_teams_size.nteams;
664 #endif
665 team_id = team->t.t_master_tid;
666 KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc);
667
668 // compute trip count
669 if( incr == 1 ) {
670 trip_count = upper - lower + 1;
671 } else if(incr == -1) {
672 trip_count = lower - upper + 1;
673 } else {
674 trip_count = (ST)(upper - lower) / incr + 1; // cast to signed to cover incr<0 case
675 }
676 if( chunk < 1 )
677 chunk = 1;
678 span = chunk * incr;
679 *p_st = span * nteams;
680 *p_lb = lower + (span * team_id);
681 *p_ub = *p_lb + span - incr;
682 if ( p_last != NULL )
683 *p_last = (team_id == ((trip_count - 1)/(UT)chunk) % nteams);
684 // Correct upper bound if needed
685 if( incr > 0 ) {
686 if( *p_ub < *p_lb ) // overflow?
687 *p_ub = i_maxmin< T >::mx;
688 if( *p_ub > upper )
689 *p_ub = upper; // tracker C73258
690 } else { // incr < 0
691 if( *p_ub > *p_lb )
692 *p_ub = i_maxmin< T >::mn;
693 if( *p_ub < upper )
694 *p_ub = upper; // tracker C73258
695 }
696 #ifdef KMP_DEBUG
697 {
698 const char * buff;
699 // create format specifiers before the debug output
700 buff = __kmp_str_format( "__kmp_team_static_init exit: T#%%d team%%u liter=%%d "\
701 "iter=(%%%s, %%%s, %%%s) chunk %%%s\n",
702 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec,
703 traits_t< ST >::spec );
704 KD_TRACE(100, ( buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk ) );
705 __kmp_str_free( &buff );
706 }
707 #endif
708}
709
Jim Cownie5e8470a2013-09-27 10:38:44 +0000710//--------------------------------------------------------------------------------------
711extern "C" {
712
713/*!
714@ingroup WORK_SHARING
715@param loc Source code location
716@param gtid Global thread id of this thread
717@param schedtype Scheduling type
718@param plastiter Pointer to the "last iteration" flag
719@param plower Pointer to the lower bound
720@param pupper Pointer to the upper bound
721@param pstride Pointer to the stride
722@param incr Loop increment
723@param chunk The chunk size
724
725Each of the four functions here are identical apart from the argument types.
726
727The functions compute the upper and lower bounds and stride to be used for the set of iterations
728to be executed by the current thread from the statically scheduled loop that is described by the
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000729initial values of the bounds, stride, increment and chunk size.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000730
731@{
732*/
733void
734__kmpc_for_static_init_4( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter,
735 kmp_int32 *plower, kmp_int32 *pupper,
736 kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk )
737{
738 __kmp_for_static_init< kmp_int32 >(
739 loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk );
740}
741
742/*!
743 See @ref __kmpc_for_static_init_4
744 */
745void
746__kmpc_for_static_init_4u( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter,
747 kmp_uint32 *plower, kmp_uint32 *pupper,
748 kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk )
749{
750 __kmp_for_static_init< kmp_uint32 >(
751 loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk );
752}
753
754/*!
755 See @ref __kmpc_for_static_init_4
756 */
757void
758__kmpc_for_static_init_8( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter,
759 kmp_int64 *plower, kmp_int64 *pupper,
760 kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk )
761{
762 __kmp_for_static_init< kmp_int64 >(
763 loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk );
764}
765
766/*!
767 See @ref __kmpc_for_static_init_4
768 */
769void
770__kmpc_for_static_init_8u( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter,
771 kmp_uint64 *plower, kmp_uint64 *pupper,
772 kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk )
773{
774 __kmp_for_static_init< kmp_uint64 >(
775 loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk );
776}
777/*!
778@}
779*/
780
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000781/*!
782@ingroup WORK_SHARING
783@param loc Source code location
784@param gtid Global thread id of this thread
Jonathan Peyton81f9cd12015-05-22 22:37:22 +0000785@param schedule Scheduling type for the parallel loop
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000786@param plastiter Pointer to the "last iteration" flag
787@param plower Pointer to the lower bound
788@param pupper Pointer to the upper bound of loop chunk
789@param pupperD Pointer to the upper bound of dist_chunk
Jonathan Peyton81f9cd12015-05-22 22:37:22 +0000790@param pstride Pointer to the stride for parallel loop
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000791@param incr Loop increment
Jonathan Peyton81f9cd12015-05-22 22:37:22 +0000792@param chunk The chunk size for the parallel loop
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000793
794Each of the four functions here are identical apart from the argument types.
795
796The functions compute the upper and lower bounds and strides to be used for the set of iterations
797to be executed by the current thread from the statically scheduled loop that is described by the
798initial values of the bounds, strides, increment and chunks for parallel loop and distribute
799constructs.
800
801@{
802*/
803void
804__kmpc_dist_for_static_init_4(
805 ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter,
806 kmp_int32 *plower, kmp_int32 *pupper, kmp_int32 *pupperD,
807 kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk )
808{
809 __kmp_dist_for_static_init< kmp_int32 >(
810 loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk );
811}
812
813/*!
814 See @ref __kmpc_dist_for_static_init_4
815 */
816void
817__kmpc_dist_for_static_init_4u(
818 ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter,
819 kmp_uint32 *plower, kmp_uint32 *pupper, kmp_uint32 *pupperD,
820 kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk )
821{
822 __kmp_dist_for_static_init< kmp_uint32 >(
823 loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk );
824}
825
826/*!
827 See @ref __kmpc_dist_for_static_init_4
828 */
829void
830__kmpc_dist_for_static_init_8(
831 ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter,
832 kmp_int64 *plower, kmp_int64 *pupper, kmp_int64 *pupperD,
833 kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk )
834{
835 __kmp_dist_for_static_init< kmp_int64 >(
836 loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk );
837}
838
839/*!
840 See @ref __kmpc_dist_for_static_init_4
841 */
842void
843__kmpc_dist_for_static_init_8u(
844 ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter,
845 kmp_uint64 *plower, kmp_uint64 *pupper, kmp_uint64 *pupperD,
846 kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk )
847{
848 __kmp_dist_for_static_init< kmp_uint64 >(
849 loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk );
850}
851/*!
852@}
853*/
854
855//-----------------------------------------------------------------------------------------
856// Auxiliary routines for Distribute Parallel Loop construct implementation
857// Transfer call to template< type T >
858// __kmp_team_static_init( ident_t *loc, int gtid,
859// int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk )
860
861/*!
862@ingroup WORK_SHARING
863@{
864@param loc Source location
865@param gtid Global thread id
866@param p_last pointer to last iteration flag
867@param p_lb pointer to Lower bound
868@param p_ub pointer to Upper bound
869@param p_st Step (or increment if you prefer)
870@param incr Loop increment
871@param chunk The chunk size to block with
872
873The functions compute the upper and lower bounds and stride to be used for the set of iterations
874to be executed by the current team from the statically scheduled loop that is described by the
875initial values of the bounds, stride, increment and chunk for the distribute construct as part of
876composite distribute parallel loop construct.
877These functions are all identical apart from the types of the arguments.
878*/
879
880void
881__kmpc_team_static_init_4(
882 ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
883 kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk )
884{
885 KMP_DEBUG_ASSERT( __kmp_init_serial );
886 __kmp_team_static_init< kmp_int32 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk );
887}
888
889/*!
890 See @ref __kmpc_team_static_init_4
891 */
892void
893__kmpc_team_static_init_4u(
894 ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
895 kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk )
896{
897 KMP_DEBUG_ASSERT( __kmp_init_serial );
898 __kmp_team_static_init< kmp_uint32 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk );
899}
900
901/*!
902 See @ref __kmpc_team_static_init_4
903 */
904void
905__kmpc_team_static_init_8(
906 ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
907 kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk )
908{
909 KMP_DEBUG_ASSERT( __kmp_init_serial );
910 __kmp_team_static_init< kmp_int64 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk );
911}
912
913/*!
914 See @ref __kmpc_team_static_init_4
915 */
916void
917__kmpc_team_static_init_8u(
918 ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
919 kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk )
920{
921 KMP_DEBUG_ASSERT( __kmp_init_serial );
922 __kmp_team_static_init< kmp_uint64 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk );
923}
924/*!
925@}
926*/
927
Jim Cownie5e8470a2013-09-27 10:38:44 +0000928} // extern "C"
929