blob: 46b5987cf56954832e051d27d0ec24d57115cc47 [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
2 * kmp_sched.c -- static scheduling -- iteration initialization
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16/*
17 * Static scheduling initialization.
18 *
19 * NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however
20 * it may change values between parallel regions. __kmp_max_nth
21 * is the largest value __kmp_nth may take, 1 is the smallest.
22 *
23 */
24
25#include "kmp.h"
26#include "kmp_i18n.h"
27#include "kmp_str.h"
28#include "kmp_error.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000029#include "kmp_stats.h"
30#include "kmp_itt.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000031
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000032#if OMPT_SUPPORT
33#include "ompt-specific.h"
34#endif
35
Jim Cownie5e8470a2013-09-27 10:38:44 +000036// template for type limits
37template< typename T >
38struct i_maxmin {
39 static const T mx;
40 static const T mn;
41};
42template<>
43struct i_maxmin< int > {
44 static const int mx = 0x7fffffff;
45 static const int mn = 0x80000000;
46};
47template<>
48struct i_maxmin< unsigned int > {
49 static const unsigned int mx = 0xffffffff;
50 static const unsigned int mn = 0x00000000;
51};
52template<>
53struct i_maxmin< long long > {
54 static const long long mx = 0x7fffffffffffffffLL;
55 static const long long mn = 0x8000000000000000LL;
56};
57template<>
58struct i_maxmin< unsigned long long > {
59 static const unsigned long long mx = 0xffffffffffffffffLL;
60 static const unsigned long long mn = 0x0000000000000000LL;
61};
62//-------------------------------------------------------------------------
63#ifdef KMP_DEBUG
64//-------------------------------------------------------------------------
65// template for debug prints specification ( d, u, lld, llu )
66 char const * traits_t< int >::spec = "d";
67 char const * traits_t< unsigned int >::spec = "u";
68 char const * traits_t< long long >::spec = "lld";
69 char const * traits_t< unsigned long long >::spec = "llu";
70//-------------------------------------------------------------------------
71#endif
72
73template< typename T >
74static void
75__kmp_for_static_init(
76 ident_t *loc,
77 kmp_int32 global_tid,
78 kmp_int32 schedtype,
79 kmp_int32 *plastiter,
80 T *plower,
81 T *pupper,
82 typename traits_t< T >::signed_t *pstride,
83 typename traits_t< T >::signed_t incr,
84 typename traits_t< T >::signed_t chunk
85) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +000086 KMP_COUNT_BLOCK(OMP_FOR_static);
Jim Cownie5e8470a2013-09-27 10:38:44 +000087 typedef typename traits_t< T >::unsigned_t UT;
88 typedef typename traits_t< T >::signed_t ST;
89 /* this all has to be changed back to TID and such.. */
90 register kmp_int32 gtid = global_tid;
91 register kmp_uint32 tid;
92 register kmp_uint32 nth;
93 register UT trip_count;
94 register kmp_team_t *team;
Andrey Churbanov51aecb82015-05-06 19:22:36 +000095 register kmp_info_t *th = __kmp_threads[ gtid ];
Jim Cownie5e8470a2013-09-27 10:38:44 +000096
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000097#if OMPT_SUPPORT && OMPT_TRACE
98 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
99 ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
100#endif
101
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000102 KMP_DEBUG_ASSERT( plastiter && plower && pupper && pstride );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000103 KE_TRACE( 10, ("__kmpc_for_static_init called (%d)\n", global_tid));
104 #ifdef KMP_DEBUG
105 {
106 const char * buff;
107 // create format specifiers before the debug output
108 buff = __kmp_str_format(
109 "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s," \
110 " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n",
111 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec,
112 traits_t< ST >::spec, traits_t< ST >::spec, traits_t< T >::spec );
113 KD_TRACE(100, ( buff, global_tid, schedtype, *plastiter,
114 *plower, *pupper, *pstride, incr, chunk ) );
115 __kmp_str_free( &buff );
116 }
117 #endif
118
119 if ( __kmp_env_consistency_check ) {
120 __kmp_push_workshare( global_tid, ct_pdo, loc );
121 if ( incr == 0 ) {
122 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000123 }
124 }
125 /* special handling for zero-trip loops */
126 if ( incr > 0 ? (*pupper < *plower) : (*plower < *pupper) ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000127 if( plastiter != NULL )
128 *plastiter = FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000129 /* leave pupper and plower set to entire iteration space */
130 *pstride = incr; /* value should never be used */
131 // *plower = *pupper - incr; // let compiler bypass the illegal loop (like for(i=1;i<10;i--)) THIS LINE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE ON A ZERO-TRIP LOOP (lower=1,\
132 upper=0,stride=1) - JPH June 23, 2009.
133 #ifdef KMP_DEBUG
134 {
135 const char * buff;
136 // create format specifiers before the debug output
137 buff = __kmp_str_format(
138 "__kmpc_for_static_init:(ZERO TRIP) liter=%%d lower=%%%s upper=%%%s stride = %%%s signed?<%s>, loc = %%s\n",
139 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, traits_t< T >::spec );
140 KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride, loc->psource ) );
141 __kmp_str_free( &buff );
142 }
143 #endif
144 KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000145
146#if OMPT_SUPPORT && OMPT_TRACE
147 if ((ompt_status == ompt_status_track_callback) &&
148 ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
149 ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
150 team_info->parallel_id, task_info->task_id,
151 team_info->microtask);
152 }
153#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000154 return;
155 }
156
157 #if OMP_40_ENABLED
158 if ( schedtype > kmp_ord_upper ) {
159 // we are in DISTRIBUTE construct
160 schedtype += kmp_sch_static - kmp_distribute_static; // AC: convert to usual schedule type
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000161 tid = th->th.th_team->t.t_master_tid;
162 team = th->th.th_team->t.t_parent;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000163 } else
164 #endif
165 {
166 tid = __kmp_tid_from_gtid( global_tid );
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000167 team = th->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000168 }
169
170 /* determine if "for" loop is an active worksharing construct */
171 if ( team -> t.t_serialized ) {
172 /* serialized parallel, each thread executes whole iteration space */
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000173 if( plastiter != NULL )
174 *plastiter = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000175 /* leave pupper and plower set to entire iteration space */
176 *pstride = (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
177
178 #ifdef KMP_DEBUG
179 {
180 const char * buff;
181 // create format specifiers before the debug output
182 buff = __kmp_str_format(
183 "__kmpc_for_static_init: (serial) liter=%%d lower=%%%s upper=%%%s stride = %%%s\n",
184 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
185 KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) );
186 __kmp_str_free( &buff );
187 }
188 #endif
189 KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000190
191#if OMPT_SUPPORT && OMPT_TRACE
192 if ((ompt_status == ompt_status_track_callback) &&
193 ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
194 ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
195 team_info->parallel_id, task_info->task_id,
196 team_info->microtask);
197 }
198#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000199 return;
200 }
201 nth = team->t.t_nproc;
202 if ( nth == 1 ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000203 if( plastiter != NULL )
204 *plastiter = TRUE;
205 *pstride = (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000206 #ifdef KMP_DEBUG
207 {
208 const char * buff;
209 // create format specifiers before the debug output
210 buff = __kmp_str_format(
211 "__kmpc_for_static_init: (serial) liter=%%d lower=%%%s upper=%%%s stride = %%%s\n",
212 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
213 KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) );
214 __kmp_str_free( &buff );
215 }
216 #endif
217 KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000218
219#if OMPT_SUPPORT && OMPT_TRACE
220 if ((ompt_status == ompt_status_track_callback) &&
221 ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
222 ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
223 team_info->parallel_id, task_info->task_id,
224 team_info->microtask);
225 }
226#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000227 return;
228 }
229
230 /* compute trip count */
231 if ( incr == 1 ) {
232 trip_count = *pupper - *plower + 1;
233 } else if (incr == -1) {
234 trip_count = *plower - *pupper + 1;
235 } else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000236 if ( incr > 1 ) { // the check is needed for unsigned division when incr < 0
Jim Cownie5e8470a2013-09-27 10:38:44 +0000237 trip_count = (*pupper - *plower) / incr + 1;
238 } else {
239 trip_count = (*plower - *pupper) / ( -incr ) + 1;
240 }
241 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000242
Jim Cownie5e8470a2013-09-27 10:38:44 +0000243 if ( __kmp_env_consistency_check ) {
244 /* tripcount overflow? */
245 if ( trip_count == 0 && *pupper != *plower ) {
246 __kmp_error_construct( kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo, loc );
247 }
248 }
249
250 /* compute remaining parameters */
251 switch ( schedtype ) {
252 case kmp_sch_static:
253 {
254 if ( trip_count < nth ) {
255 KMP_DEBUG_ASSERT(
256 __kmp_static == kmp_sch_static_greedy || \
257 __kmp_static == kmp_sch_static_balanced
258 ); // Unknown static scheduling type.
259 if ( tid < trip_count ) {
260 *pupper = *plower = *plower + tid * incr;
261 } else {
262 *plower = *pupper + incr;
263 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000264 if( plastiter != NULL )
265 *plastiter = ( tid == trip_count - 1 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000266 } else {
267 if ( __kmp_static == kmp_sch_static_balanced ) {
268 register UT small_chunk = trip_count / nth;
269 register UT extras = trip_count % nth;
270 *plower += incr * ( tid * small_chunk + ( tid < extras ? tid : extras ) );
271 *pupper = *plower + small_chunk * incr - ( tid < extras ? 0 : incr );
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000272 if( plastiter != NULL )
273 *plastiter = ( tid == nth - 1 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000274 } else {
275 register T big_chunk_inc_count = ( trip_count/nth +
276 ( ( trip_count % nth ) ? 1 : 0) ) * incr;
277 register T old_upper = *pupper;
278
279 KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy );
280 // Unknown static scheduling type.
281
282 *plower += tid * big_chunk_inc_count;
283 *pupper = *plower + big_chunk_inc_count - incr;
284 if ( incr > 0 ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000285 if( *pupper < *plower )
Jim Cownie5e8470a2013-09-27 10:38:44 +0000286 *pupper = i_maxmin< T >::mx;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000287 if( plastiter != NULL )
288 *plastiter = *plower <= old_upper && *pupper > old_upper - incr;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000289 if ( *pupper > old_upper ) *pupper = old_upper; // tracker C73258
290 } else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000291 if( *pupper > *plower )
Jim Cownie5e8470a2013-09-27 10:38:44 +0000292 *pupper = i_maxmin< T >::mn;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000293 if( plastiter != NULL )
294 *plastiter = *plower >= old_upper && *pupper < old_upper - incr;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000295 if ( *pupper < old_upper ) *pupper = old_upper; // tracker C73258
296 }
297 }
298 }
299 break;
300 }
301 case kmp_sch_static_chunked:
302 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000303 register ST span;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000304 if ( chunk < 1 ) {
305 chunk = 1;
306 }
307 span = chunk * incr;
308 *pstride = span * nth;
309 *plower = *plower + (span * tid);
310 *pupper = *plower + span - incr;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000311 if( plastiter != NULL )
312 *plastiter = (tid == ((trip_count - 1)/( UT )chunk) % nth);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000313 break;
314 }
315 default:
316 KMP_ASSERT2( 0, "__kmpc_for_static_init: unknown scheduling type" );
317 break;
318 }
319
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000320#if USE_ITT_BUILD
321 // Report loop metadata
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000322 if ( KMP_MASTER_TID(tid) && __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
323#if OMP_40_ENABLED
324 th->th.th_teams_microtask == NULL &&
325#endif
326 team->t.t_active_level == 1 )
327 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000328 kmp_uint64 cur_chunk = chunk;
329 // Calculate chunk in case it was not specified; it is specified for kmp_sch_static_chunked
330 if ( schedtype == kmp_sch_static ) {
331 cur_chunk = trip_count / nth + ( ( trip_count % nth ) ? 1 : 0);
332 }
333 // 0 - "static" schedule
334 __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk);
335 }
336#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000337 #ifdef KMP_DEBUG
338 {
339 const char * buff;
340 // create format specifiers before the debug output
341 buff = __kmp_str_format(
342 "__kmpc_for_static_init: liter=%%d lower=%%%s upper=%%%s stride = %%%s signed?<%s>\n",
343 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, traits_t< T >::spec );
344 KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) );
345 __kmp_str_free( &buff );
346 }
347 #endif
348 KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000349
350#if OMPT_SUPPORT && OMPT_TRACE
351 if ((ompt_status == ompt_status_track_callback) &&
352 ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
353 ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
354 team_info->parallel_id, task_info->task_id, team_info->microtask);
355 }
356#endif
357
Jim Cownie5e8470a2013-09-27 10:38:44 +0000358 return;
359}
360
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000361template< typename T >
362static void
363__kmp_dist_for_static_init(
364 ident_t *loc,
365 kmp_int32 gtid,
366 kmp_int32 schedule,
367 kmp_int32 *plastiter,
368 T *plower,
369 T *pupper,
370 T *pupperDist,
371 typename traits_t< T >::signed_t *pstride,
372 typename traits_t< T >::signed_t incr,
373 typename traits_t< T >::signed_t chunk
374) {
375 KMP_COUNT_BLOCK(OMP_DISTR_FOR_static);
376 typedef typename traits_t< T >::unsigned_t UT;
377 typedef typename traits_t< T >::signed_t ST;
378 register kmp_uint32 tid;
379 register kmp_uint32 nth;
380 register kmp_uint32 team_id;
381 register kmp_uint32 nteams;
382 register UT trip_count;
383 register kmp_team_t *team;
384 kmp_info_t * th;
385
386 KMP_DEBUG_ASSERT( plastiter && plower && pupper && pupperDist && pstride );
387 KE_TRACE( 10, ("__kmpc_dist_for_static_init called (%d)\n", gtid));
388 #ifdef KMP_DEBUG
389 {
390 const char * buff;
391 // create format specifiers before the debug output
392 buff = __kmp_str_format(
393 "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d "\
394 "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n",
395 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec,
396 traits_t< ST >::spec, traits_t< T >::spec );
397 KD_TRACE(100, ( buff, gtid, schedule, *plastiter,
398 *plower, *pupper, incr, chunk ) );
399 __kmp_str_free( &buff );
400 }
401 #endif
402
403 if( __kmp_env_consistency_check ) {
404 __kmp_push_workshare( gtid, ct_pdo, loc );
405 if( incr == 0 ) {
406 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc );
407 }
408 if( incr > 0 ? (*pupper < *plower) : (*plower < *pupper) ) {
409 // The loop is illegal.
410 // Some zero-trip loops maintained by compiler, e.g.:
411 // for(i=10;i<0;++i) // lower >= upper - run-time check
412 // for(i=0;i>10;--i) // lower <= upper - run-time check
413 // for(i=0;i>10;++i) // incr > 0 - compile-time check
414 // for(i=10;i<0;--i) // incr < 0 - compile-time check
415 // Compiler does not check the following illegal loops:
416 // for(i=0;i<10;i+=incr) // where incr<0
417 // for(i=10;i>0;i-=incr) // where incr<0
418 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc );
419 }
420 }
421 tid = __kmp_tid_from_gtid( gtid );
422 th = __kmp_threads[gtid];
423 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
424 nth = th->th.th_team_nproc;
425 team = th->th.th_team;
426 #if OMP_40_ENABLED
427 nteams = th->th.th_teams_size.nteams;
428 #endif
429 team_id = team->t.t_master_tid;
430 KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc);
431
432 // compute global trip count
433 if( incr == 1 ) {
434 trip_count = *pupper - *plower + 1;
435 } else if(incr == -1) {
436 trip_count = *plower - *pupper + 1;
437 } else {
438 trip_count = (ST)(*pupper - *plower) / incr + 1; // cast to signed to cover incr<0 case
439 }
440 *pstride = *pupper - *plower; // just in case (can be unused)
441 if( trip_count <= nteams ) {
442 KMP_DEBUG_ASSERT(
443 __kmp_static == kmp_sch_static_greedy || \
444 __kmp_static == kmp_sch_static_balanced
445 ); // Unknown static scheduling type.
446 // only masters of some teams get single iteration, other threads get nothing
447 if( team_id < trip_count && tid == 0 ) {
448 *pupper = *pupperDist = *plower = *plower + team_id * incr;
449 } else {
450 *pupperDist = *pupper;
451 *plower = *pupper + incr; // compiler should skip loop body
452 }
453 if( plastiter != NULL )
454 *plastiter = ( tid == 0 && team_id == trip_count - 1 );
455 } else {
456 // Get the team's chunk first (each team gets at most one chunk)
457 if( __kmp_static == kmp_sch_static_balanced ) {
458 register UT chunkD = trip_count / nteams;
459 register UT extras = trip_count % nteams;
460 *plower += incr * ( team_id * chunkD + ( team_id < extras ? team_id : extras ) );
461 *pupperDist = *plower + chunkD * incr - ( team_id < extras ? 0 : incr );
462 if( plastiter != NULL )
463 *plastiter = ( team_id == nteams - 1 );
464 } else {
465 register T chunk_inc_count =
466 ( trip_count / nteams + ( ( trip_count % nteams ) ? 1 : 0) ) * incr;
467 register T upper = *pupper;
468 KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy );
469 // Unknown static scheduling type.
470 *plower += team_id * chunk_inc_count;
471 *pupperDist = *plower + chunk_inc_count - incr;
472 // Check/correct bounds if needed
473 if( incr > 0 ) {
474 if( *pupperDist < *plower )
475 *pupperDist = i_maxmin< T >::mx;
476 if( plastiter != NULL )
477 *plastiter = *plower <= upper && *pupperDist > upper - incr;
478 if( *pupperDist > upper )
479 *pupperDist = upper; // tracker C73258
480 if( *plower > *pupperDist ) {
481 *pupper = *pupperDist; // no iterations available for the team
482 goto end;
483 }
484 } else {
485 if( *pupperDist > *plower )
486 *pupperDist = i_maxmin< T >::mn;
487 if( plastiter != NULL )
488 *plastiter = *plower >= upper && *pupperDist < upper - incr;
489 if( *pupperDist < upper )
490 *pupperDist = upper; // tracker C73258
491 if( *plower < *pupperDist ) {
492 *pupper = *pupperDist; // no iterations available for the team
493 goto end;
494 }
495 }
496 }
497 // Get the parallel loop chunk now (for thread)
498 // compute trip count for team's chunk
499 if( incr == 1 ) {
500 trip_count = *pupperDist - *plower + 1;
501 } else if(incr == -1) {
502 trip_count = *plower - *pupperDist + 1;
503 } else {
504 trip_count = (ST)(*pupperDist - *plower) / incr + 1;
505 }
506 KMP_DEBUG_ASSERT( trip_count );
507 switch( schedule ) {
508 case kmp_sch_static:
509 {
510 if( trip_count <= nth ) {
511 KMP_DEBUG_ASSERT(
512 __kmp_static == kmp_sch_static_greedy || \
513 __kmp_static == kmp_sch_static_balanced
514 ); // Unknown static scheduling type.
515 if( tid < trip_count )
516 *pupper = *plower = *plower + tid * incr;
517 else
518 *plower = *pupper + incr; // no iterations available
519 if( plastiter != NULL )
520 if( *plastiter != 0 && !( tid == trip_count - 1 ) )
521 *plastiter = 0;
522 } else {
523 if( __kmp_static == kmp_sch_static_balanced ) {
524 register UT chunkL = trip_count / nth;
525 register UT extras = trip_count % nth;
526 *plower += incr * (tid * chunkL + (tid < extras ? tid : extras));
527 *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr);
528 if( plastiter != NULL )
529 if( *plastiter != 0 && !( tid == nth - 1 ) )
530 *plastiter = 0;
531 } else {
532 register T chunk_inc_count =
533 ( trip_count / nth + ( ( trip_count % nth ) ? 1 : 0) ) * incr;
534 register T upper = *pupperDist;
535 KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy );
536 // Unknown static scheduling type.
537 *plower += tid * chunk_inc_count;
538 *pupper = *plower + chunk_inc_count - incr;
539 if( incr > 0 ) {
540 if( *pupper < *plower )
541 *pupper = i_maxmin< T >::mx;
542 if( plastiter != NULL )
543 if( *plastiter != 0 && !(*plower <= upper && *pupper > upper - incr) )
544 *plastiter = 0;
545 if( *pupper > upper )
546 *pupper = upper;//tracker C73258
547 } else {
548 if( *pupper > *plower )
549 *pupper = i_maxmin< T >::mn;
550 if( plastiter != NULL )
551 if( *plastiter != 0 && !(*plower >= upper && *pupper < upper - incr) )
552 *plastiter = 0;
553 if( *pupper < upper )
554 *pupper = upper;//tracker C73258
555 }
556 }
557 }
558 break;
559 }
560 case kmp_sch_static_chunked:
561 {
562 register ST span;
563 if( chunk < 1 )
564 chunk = 1;
565 span = chunk * incr;
566 *pstride = span * nth;
567 *plower = *plower + (span * tid);
568 *pupper = *plower + span - incr;
569 if( plastiter != NULL )
570 if( *plastiter != 0 && !(tid == ((trip_count - 1) / ( UT )chunk) % nth) )
571 *plastiter = 0;
572 break;
573 }
574 default:
575 KMP_ASSERT2( 0, "__kmpc_dist_for_static_init: unknown loop scheduling type" );
576 break;
577 }
578 }
579 end:;
580 #ifdef KMP_DEBUG
581 {
582 const char * buff;
583 // create format specifiers before the debug output
584 buff = __kmp_str_format(
585 "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s "\
586 "stride=%%%s signed?<%s>\n",
587 traits_t< T >::spec, traits_t< T >::spec, traits_t< T >::spec,
588 traits_t< ST >::spec, traits_t< T >::spec );
589 KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pupperDist, *pstride ) );
590 __kmp_str_free( &buff );
591 }
592 #endif
593 KE_TRACE( 10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid ) );
594 return;
595}
596
597template< typename T >
598static void
599__kmp_team_static_init(
600 ident_t *loc,
601 kmp_int32 gtid,
602 kmp_int32 *p_last,
603 T *p_lb,
604 T *p_ub,
605 typename traits_t< T >::signed_t *p_st,
606 typename traits_t< T >::signed_t incr,
607 typename traits_t< T >::signed_t chunk
608) {
609 // The routine returns the first chunk distributed to the team and
610 // stride for next chunks calculation.
611 // Last iteration flag set for the team that will execute
612 // the last iteration of the loop.
613 // The routine is called for dist_schedue(static,chunk) only.
614 typedef typename traits_t< T >::unsigned_t UT;
615 typedef typename traits_t< T >::signed_t ST;
616 kmp_uint32 team_id;
617 kmp_uint32 nteams;
618 UT trip_count;
619 T lower;
620 T upper;
621 ST span;
622 kmp_team_t *team;
623 kmp_info_t *th;
624
625 KMP_DEBUG_ASSERT( p_last && p_lb && p_ub && p_st );
626 KE_TRACE( 10, ("__kmp_team_static_init called (%d)\n", gtid));
627 #ifdef KMP_DEBUG
628 {
629 const char * buff;
630 // create format specifiers before the debug output
631 buff = __kmp_str_format( "__kmp_team_static_init enter: T#%%d liter=%%d "\
632 "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n",
633 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec,
634 traits_t< ST >::spec, traits_t< T >::spec );
635 KD_TRACE(100, ( buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk ) );
636 __kmp_str_free( &buff );
637 }
638 #endif
639
640 lower = *p_lb;
641 upper = *p_ub;
642 if( __kmp_env_consistency_check ) {
643 if( incr == 0 ) {
644 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc );
645 }
646 if( incr > 0 ? (upper < lower) : (lower < upper) ) {
647 // The loop is illegal.
648 // Some zero-trip loops maintained by compiler, e.g.:
649 // for(i=10;i<0;++i) // lower >= upper - run-time check
650 // for(i=0;i>10;--i) // lower <= upper - run-time check
651 // for(i=0;i>10;++i) // incr > 0 - compile-time check
652 // for(i=10;i<0;--i) // incr < 0 - compile-time check
653 // Compiler does not check the following illegal loops:
654 // for(i=0;i<10;i+=incr) // where incr<0
655 // for(i=10;i>0;i-=incr) // where incr<0
656 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc );
657 }
658 }
659 th = __kmp_threads[gtid];
660 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
661 team = th->th.th_team;
662 #if OMP_40_ENABLED
663 nteams = th->th.th_teams_size.nteams;
664 #endif
665 team_id = team->t.t_master_tid;
666 KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc);
667
668 // compute trip count
669 if( incr == 1 ) {
670 trip_count = upper - lower + 1;
671 } else if(incr == -1) {
672 trip_count = lower - upper + 1;
673 } else {
674 trip_count = (ST)(upper - lower) / incr + 1; // cast to signed to cover incr<0 case
675 }
676 if( chunk < 1 )
677 chunk = 1;
678 span = chunk * incr;
679 *p_st = span * nteams;
680 *p_lb = lower + (span * team_id);
681 *p_ub = *p_lb + span - incr;
682 if ( p_last != NULL )
683 *p_last = (team_id == ((trip_count - 1)/(UT)chunk) % nteams);
684 // Correct upper bound if needed
685 if( incr > 0 ) {
686 if( *p_ub < *p_lb ) // overflow?
687 *p_ub = i_maxmin< T >::mx;
688 if( *p_ub > upper )
689 *p_ub = upper; // tracker C73258
690 } else { // incr < 0
691 if( *p_ub > *p_lb )
692 *p_ub = i_maxmin< T >::mn;
693 if( *p_ub < upper )
694 *p_ub = upper; // tracker C73258
695 }
696 #ifdef KMP_DEBUG
697 {
698 const char * buff;
699 // create format specifiers before the debug output
700 buff = __kmp_str_format( "__kmp_team_static_init exit: T#%%d team%%u liter=%%d "\
701 "iter=(%%%s, %%%s, %%%s) chunk %%%s\n",
702 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec,
703 traits_t< ST >::spec );
704 KD_TRACE(100, ( buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk ) );
705 __kmp_str_free( &buff );
706 }
707 #endif
708}
709
Jim Cownie5e8470a2013-09-27 10:38:44 +0000710//--------------------------------------------------------------------------------------
711extern "C" {
712
713/*!
714@ingroup WORK_SHARING
715@param loc Source code location
716@param gtid Global thread id of this thread
717@param schedtype Scheduling type
718@param plastiter Pointer to the "last iteration" flag
719@param plower Pointer to the lower bound
720@param pupper Pointer to the upper bound
721@param pstride Pointer to the stride
722@param incr Loop increment
723@param chunk The chunk size
724
725Each of the four functions here are identical apart from the argument types.
726
727The functions compute the upper and lower bounds and stride to be used for the set of iterations
728to be executed by the current thread from the statically scheduled loop that is described by the
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000729initial values of the bounds, stride, increment and chunk size.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000730
731@{
732*/
733void
734__kmpc_for_static_init_4( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter,
735 kmp_int32 *plower, kmp_int32 *pupper,
736 kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk )
737{
738 __kmp_for_static_init< kmp_int32 >(
739 loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk );
740}
741
742/*!
743 See @ref __kmpc_for_static_init_4
744 */
745void
746__kmpc_for_static_init_4u( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter,
747 kmp_uint32 *plower, kmp_uint32 *pupper,
748 kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk )
749{
750 __kmp_for_static_init< kmp_uint32 >(
751 loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk );
752}
753
754/*!
755 See @ref __kmpc_for_static_init_4
756 */
757void
758__kmpc_for_static_init_8( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter,
759 kmp_int64 *plower, kmp_int64 *pupper,
760 kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk )
761{
762 __kmp_for_static_init< kmp_int64 >(
763 loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk );
764}
765
766/*!
767 See @ref __kmpc_for_static_init_4
768 */
769void
770__kmpc_for_static_init_8u( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter,
771 kmp_uint64 *plower, kmp_uint64 *pupper,
772 kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk )
773{
774 __kmp_for_static_init< kmp_uint64 >(
775 loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk );
776}
777/*!
778@}
779*/
780
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000781/*!
782@ingroup WORK_SHARING
783@param loc Source code location
784@param gtid Global thread id of this thread
785@param scheduleD Scheduling type for the distribute
786@param scheduleL Scheduling type for the parallel loop
787@param plastiter Pointer to the "last iteration" flag
788@param plower Pointer to the lower bound
789@param pupper Pointer to the upper bound of loop chunk
790@param pupperD Pointer to the upper bound of dist_chunk
791@param pstrideD Pointer to the stride for distribute
792@param pstrideL Pointer to the stride for parallel loop
793@param incr Loop increment
794@param chunkD The chunk size for the distribute
795@param chunkL The chunk size for the parallel loop
796
797Each of the four functions here are identical apart from the argument types.
798
799The functions compute the upper and lower bounds and strides to be used for the set of iterations
800to be executed by the current thread from the statically scheduled loop that is described by the
801initial values of the bounds, strides, increment and chunks for parallel loop and distribute
802constructs.
803
804@{
805*/
806void
807__kmpc_dist_for_static_init_4(
808 ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter,
809 kmp_int32 *plower, kmp_int32 *pupper, kmp_int32 *pupperD,
810 kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk )
811{
812 __kmp_dist_for_static_init< kmp_int32 >(
813 loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk );
814}
815
816/*!
817 See @ref __kmpc_dist_for_static_init_4
818 */
819void
820__kmpc_dist_for_static_init_4u(
821 ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter,
822 kmp_uint32 *plower, kmp_uint32 *pupper, kmp_uint32 *pupperD,
823 kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk )
824{
825 __kmp_dist_for_static_init< kmp_uint32 >(
826 loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk );
827}
828
829/*!
830 See @ref __kmpc_dist_for_static_init_4
831 */
832void
833__kmpc_dist_for_static_init_8(
834 ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter,
835 kmp_int64 *plower, kmp_int64 *pupper, kmp_int64 *pupperD,
836 kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk )
837{
838 __kmp_dist_for_static_init< kmp_int64 >(
839 loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk );
840}
841
842/*!
843 See @ref __kmpc_dist_for_static_init_4
844 */
845void
846__kmpc_dist_for_static_init_8u(
847 ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter,
848 kmp_uint64 *plower, kmp_uint64 *pupper, kmp_uint64 *pupperD,
849 kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk )
850{
851 __kmp_dist_for_static_init< kmp_uint64 >(
852 loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk );
853}
854/*!
855@}
856*/
857
858//-----------------------------------------------------------------------------------------
859// Auxiliary routines for Distribute Parallel Loop construct implementation
860// Transfer call to template< type T >
861// __kmp_team_static_init( ident_t *loc, int gtid,
862// int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk )
863
864/*!
865@ingroup WORK_SHARING
866@{
867@param loc Source location
868@param gtid Global thread id
869@param p_last pointer to last iteration flag
870@param p_lb pointer to Lower bound
871@param p_ub pointer to Upper bound
872@param p_st Step (or increment if you prefer)
873@param incr Loop increment
874@param chunk The chunk size to block with
875
876The functions compute the upper and lower bounds and stride to be used for the set of iterations
877to be executed by the current team from the statically scheduled loop that is described by the
878initial values of the bounds, stride, increment and chunk for the distribute construct as part of
879composite distribute parallel loop construct.
880These functions are all identical apart from the types of the arguments.
881*/
882
883void
884__kmpc_team_static_init_4(
885 ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
886 kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk )
887{
888 KMP_DEBUG_ASSERT( __kmp_init_serial );
889 __kmp_team_static_init< kmp_int32 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk );
890}
891
892/*!
893 See @ref __kmpc_team_static_init_4
894 */
895void
896__kmpc_team_static_init_4u(
897 ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
898 kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk )
899{
900 KMP_DEBUG_ASSERT( __kmp_init_serial );
901 __kmp_team_static_init< kmp_uint32 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk );
902}
903
904/*!
905 See @ref __kmpc_team_static_init_4
906 */
907void
908__kmpc_team_static_init_8(
909 ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
910 kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk )
911{
912 KMP_DEBUG_ASSERT( __kmp_init_serial );
913 __kmp_team_static_init< kmp_int64 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk );
914}
915
916/*!
917 See @ref __kmpc_team_static_init_4
918 */
919void
920__kmpc_team_static_init_8u(
921 ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
922 kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk )
923{
924 KMP_DEBUG_ASSERT( __kmp_init_serial );
925 __kmp_team_static_init< kmp_uint64 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk );
926}
927/*!
928@}
929*/
930
Jim Cownie5e8470a2013-09-27 10:38:44 +0000931} // extern "C"
932