blob: 7ebbb62eca5dd8f42e4ee56af08a983d6f059446 [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
Jonathan Peytonde4749b2016-12-14 23:01:24 +00002 * kmp_sched.cpp -- static scheduling -- iteration initialization
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16/*
17 * Static scheduling initialization.
18 *
19 * NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however
20 * it may change values between parallel regions. __kmp_max_nth
21 * is the largest value __kmp_nth may take, 1 is the smallest.
22 *
23 */
24
25#include "kmp.h"
26#include "kmp_i18n.h"
27#include "kmp_str.h"
28#include "kmp_error.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000029#include "kmp_stats.h"
30#include "kmp_itt.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000031
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000032#if OMPT_SUPPORT
33#include "ompt-specific.h"
34#endif
35
Jim Cownie5e8470a2013-09-27 10:38:44 +000036#ifdef KMP_DEBUG
37//-------------------------------------------------------------------------
38// template for debug prints specification ( d, u, lld, llu )
39 char const * traits_t< int >::spec = "d";
40 char const * traits_t< unsigned int >::spec = "u";
41 char const * traits_t< long long >::spec = "lld";
42 char const * traits_t< unsigned long long >::spec = "llu";
43//-------------------------------------------------------------------------
44#endif
45
46template< typename T >
47static void
48__kmp_for_static_init(
49 ident_t *loc,
50 kmp_int32 global_tid,
51 kmp_int32 schedtype,
52 kmp_int32 *plastiter,
53 T *plower,
54 T *pupper,
55 typename traits_t< T >::signed_t *pstride,
56 typename traits_t< T >::signed_t incr,
57 typename traits_t< T >::signed_t chunk
58) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +000059 KMP_COUNT_BLOCK(OMP_FOR_static);
Jonathan Peyton11dc82f2016-05-05 16:15:57 +000060 KMP_TIME_PARTITIONED_BLOCK(FOR_static_scheduling);
Jonathan Peyton45be4502015-08-11 21:36:41 +000061
Jim Cownie5e8470a2013-09-27 10:38:44 +000062 typedef typename traits_t< T >::unsigned_t UT;
63 typedef typename traits_t< T >::signed_t ST;
64 /* this all has to be changed back to TID and such.. */
65 register kmp_int32 gtid = global_tid;
66 register kmp_uint32 tid;
67 register kmp_uint32 nth;
68 register UT trip_count;
69 register kmp_team_t *team;
Andrey Churbanov51aecb82015-05-06 19:22:36 +000070 register kmp_info_t *th = __kmp_threads[ gtid ];
Jim Cownie5e8470a2013-09-27 10:38:44 +000071
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000072#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peyton61118492016-05-20 19:03:38 +000073 ompt_team_info_t *team_info = NULL;
74 ompt_task_info_t *task_info = NULL;
Jonathan Peytonf0344bb2015-10-09 17:42:52 +000075
76 if (ompt_enabled) {
77 // Only fully initialize variables needed by OMPT if OMPT is enabled.
78 team_info = __ompt_get_teaminfo(0, NULL);
79 task_info = __ompt_get_taskinfo(0);
80 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000081#endif
82
Jim Cownie4cc4bb42014-10-07 16:25:50 +000083 KMP_DEBUG_ASSERT( plastiter && plower && pupper && pstride );
Jim Cownie5e8470a2013-09-27 10:38:44 +000084 KE_TRACE( 10, ("__kmpc_for_static_init called (%d)\n", global_tid));
85 #ifdef KMP_DEBUG
86 {
87 const char * buff;
88 // create format specifiers before the debug output
89 buff = __kmp_str_format(
90 "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s," \
91 " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n",
92 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec,
93 traits_t< ST >::spec, traits_t< ST >::spec, traits_t< T >::spec );
94 KD_TRACE(100, ( buff, global_tid, schedtype, *plastiter,
95 *plower, *pupper, *pstride, incr, chunk ) );
96 __kmp_str_free( &buff );
97 }
98 #endif
99
100 if ( __kmp_env_consistency_check ) {
101 __kmp_push_workshare( global_tid, ct_pdo, loc );
102 if ( incr == 0 ) {
103 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000104 }
105 }
106 /* special handling for zero-trip loops */
107 if ( incr > 0 ? (*pupper < *plower) : (*plower < *pupper) ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000108 if( plastiter != NULL )
109 *plastiter = FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000110 /* leave pupper and plower set to entire iteration space */
111 *pstride = incr; /* value should never be used */
112 // *plower = *pupper - incr; // let compiler bypass the illegal loop (like for(i=1;i<10;i--)) THIS LINE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE ON A ZERO-TRIP LOOP (lower=1,\
113 upper=0,stride=1) - JPH June 23, 2009.
114 #ifdef KMP_DEBUG
115 {
116 const char * buff;
117 // create format specifiers before the debug output
118 buff = __kmp_str_format(
119 "__kmpc_for_static_init:(ZERO TRIP) liter=%%d lower=%%%s upper=%%%s stride = %%%s signed?<%s>, loc = %%s\n",
120 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, traits_t< T >::spec );
121 KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride, loc->psource ) );
122 __kmp_str_free( &buff );
123 }
124 #endif
125 KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000126
127#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000128 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000129 ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
130 ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
131 team_info->parallel_id, task_info->task_id,
132 team_info->microtask);
133 }
134#endif
Jonathan Peyton45be4502015-08-11 21:36:41 +0000135 KMP_COUNT_VALUE (FOR_static_iterations, 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000136 return;
137 }
138
139 #if OMP_40_ENABLED
Jonathan Peytonea0fe1d2016-02-25 17:55:50 +0000140 // Although there are schedule enumerations above kmp_ord_upper which are not schedules for "distribute",
141 // the only ones which are useful are dynamic, so cannot be seen here, since this codepath is only executed
142 // for static schedules.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000143 if ( schedtype > kmp_ord_upper ) {
144 // we are in DISTRIBUTE construct
145 schedtype += kmp_sch_static - kmp_distribute_static; // AC: convert to usual schedule type
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000146 tid = th->th.th_team->t.t_master_tid;
147 team = th->th.th_team->t.t_parent;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000148 } else
149 #endif
150 {
151 tid = __kmp_tid_from_gtid( global_tid );
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000152 team = th->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000153 }
154
155 /* determine if "for" loop is an active worksharing construct */
156 if ( team -> t.t_serialized ) {
157 /* serialized parallel, each thread executes whole iteration space */
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000158 if( plastiter != NULL )
159 *plastiter = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000160 /* leave pupper and plower set to entire iteration space */
161 *pstride = (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
162
163 #ifdef KMP_DEBUG
164 {
165 const char * buff;
166 // create format specifiers before the debug output
167 buff = __kmp_str_format(
168 "__kmpc_for_static_init: (serial) liter=%%d lower=%%%s upper=%%%s stride = %%%s\n",
169 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
170 KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) );
171 __kmp_str_free( &buff );
172 }
173 #endif
174 KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000175
176#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000177 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000178 ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
179 ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
180 team_info->parallel_id, task_info->task_id,
181 team_info->microtask);
182 }
183#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000184 return;
185 }
186 nth = team->t.t_nproc;
187 if ( nth == 1 ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000188 if( plastiter != NULL )
189 *plastiter = TRUE;
190 *pstride = (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000191 #ifdef KMP_DEBUG
192 {
193 const char * buff;
194 // create format specifiers before the debug output
195 buff = __kmp_str_format(
196 "__kmpc_for_static_init: (serial) liter=%%d lower=%%%s upper=%%%s stride = %%%s\n",
197 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
198 KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) );
199 __kmp_str_free( &buff );
200 }
201 #endif
202 KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000203
204#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000205 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000206 ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
207 ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
208 team_info->parallel_id, task_info->task_id,
209 team_info->microtask);
210 }
211#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000212 return;
213 }
214
215 /* compute trip count */
216 if ( incr == 1 ) {
217 trip_count = *pupper - *plower + 1;
218 } else if (incr == -1) {
219 trip_count = *plower - *pupper + 1;
Jonathan Peyton5235a1b2016-04-18 21:38:29 +0000220 } else if ( incr > 0 ) {
221 // upper-lower can exceed the limit of signed type
222 trip_count = (UT)(*pupper - *plower) / incr + 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000223 } else {
Jonathan Peyton5235a1b2016-04-18 21:38:29 +0000224 trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000225 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000226
Jim Cownie5e8470a2013-09-27 10:38:44 +0000227 if ( __kmp_env_consistency_check ) {
228 /* tripcount overflow? */
229 if ( trip_count == 0 && *pupper != *plower ) {
230 __kmp_error_construct( kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo, loc );
231 }
232 }
Jonathan Peyton45be4502015-08-11 21:36:41 +0000233 KMP_COUNT_VALUE (FOR_static_iterations, trip_count);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000234
235 /* compute remaining parameters */
236 switch ( schedtype ) {
237 case kmp_sch_static:
238 {
239 if ( trip_count < nth ) {
240 KMP_DEBUG_ASSERT(
241 __kmp_static == kmp_sch_static_greedy || \
242 __kmp_static == kmp_sch_static_balanced
243 ); // Unknown static scheduling type.
244 if ( tid < trip_count ) {
245 *pupper = *plower = *plower + tid * incr;
246 } else {
247 *plower = *pupper + incr;
248 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000249 if( plastiter != NULL )
250 *plastiter = ( tid == trip_count - 1 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000251 } else {
252 if ( __kmp_static == kmp_sch_static_balanced ) {
253 register UT small_chunk = trip_count / nth;
254 register UT extras = trip_count % nth;
255 *plower += incr * ( tid * small_chunk + ( tid < extras ? tid : extras ) );
256 *pupper = *plower + small_chunk * incr - ( tid < extras ? 0 : incr );
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000257 if( plastiter != NULL )
258 *plastiter = ( tid == nth - 1 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000259 } else {
260 register T big_chunk_inc_count = ( trip_count/nth +
261 ( ( trip_count % nth ) ? 1 : 0) ) * incr;
262 register T old_upper = *pupper;
263
264 KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy );
265 // Unknown static scheduling type.
266
267 *plower += tid * big_chunk_inc_count;
268 *pupper = *plower + big_chunk_inc_count - incr;
269 if ( incr > 0 ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000270 if( *pupper < *plower )
Jonathan Peyton12313d42017-01-27 18:09:22 +0000271 *pupper = traits_t<T>::max_value;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000272 if( plastiter != NULL )
273 *plastiter = *plower <= old_upper && *pupper > old_upper - incr;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000274 if ( *pupper > old_upper ) *pupper = old_upper; // tracker C73258
275 } else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000276 if( *pupper > *plower )
Jonathan Peyton12313d42017-01-27 18:09:22 +0000277 *pupper = traits_t<T>::min_value;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000278 if( plastiter != NULL )
279 *plastiter = *plower >= old_upper && *pupper < old_upper - incr;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000280 if ( *pupper < old_upper ) *pupper = old_upper; // tracker C73258
281 }
282 }
283 }
Andrey Churbanov3b939d02017-03-21 12:17:22 +0000284 *pstride = trip_count;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000285 break;
286 }
287 case kmp_sch_static_chunked:
288 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000289 register ST span;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000290 if ( chunk < 1 ) {
291 chunk = 1;
292 }
293 span = chunk * incr;
294 *pstride = span * nth;
295 *plower = *plower + (span * tid);
296 *pupper = *plower + span - incr;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000297 if( plastiter != NULL )
298 *plastiter = (tid == ((trip_count - 1)/( UT )chunk) % nth);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000299 break;
300 }
Jonathan Peytondf6818b2016-06-14 17:57:47 +0000301#if OMP_45_ENABLED
Jonathan Peytonef734792016-05-31 19:12:18 +0000302 case kmp_sch_static_balanced_chunked:
303 {
304 register T old_upper = *pupper;
305 // round up to make sure the chunk is enough to cover all iterations
306 register UT span = (trip_count+nth-1) / nth;
307
308 // perform chunk adjustment
309 chunk = (span + chunk - 1) & ~(chunk-1);
310
311 span = chunk * incr;
312 *plower = *plower + (span * tid);
313 *pupper = *plower + span - incr;
314 if ( incr > 0 ) {
315 if ( *pupper > old_upper ) *pupper = old_upper;
316 } else
317 if ( *pupper < old_upper ) *pupper = old_upper;
318
319 if( plastiter != NULL )
320 *plastiter = ( tid == ((trip_count - 1)/( UT )chunk) );
321 break;
322 }
323#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000324 default:
325 KMP_ASSERT2( 0, "__kmpc_for_static_init: unknown scheduling type" );
326 break;
327 }
328
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000329#if USE_ITT_BUILD
330 // Report loop metadata
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000331 if ( KMP_MASTER_TID(tid) && __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
332#if OMP_40_ENABLED
333 th->th.th_teams_microtask == NULL &&
334#endif
335 team->t.t_active_level == 1 )
336 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000337 kmp_uint64 cur_chunk = chunk;
338 // Calculate chunk in case it was not specified; it is specified for kmp_sch_static_chunked
339 if ( schedtype == kmp_sch_static ) {
340 cur_chunk = trip_count / nth + ( ( trip_count % nth ) ? 1 : 0);
341 }
342 // 0 - "static" schedule
343 __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk);
344 }
345#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000346 #ifdef KMP_DEBUG
347 {
348 const char * buff;
349 // create format specifiers before the debug output
350 buff = __kmp_str_format(
351 "__kmpc_for_static_init: liter=%%d lower=%%%s upper=%%%s stride = %%%s signed?<%s>\n",
352 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, traits_t< T >::spec );
353 KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) );
354 __kmp_str_free( &buff );
355 }
356 #endif
357 KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000358
359#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000360 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000361 ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
362 ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
363 team_info->parallel_id, task_info->task_id, team_info->microtask);
364 }
365#endif
366
Jim Cownie5e8470a2013-09-27 10:38:44 +0000367 return;
368}
369
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000370template< typename T >
371static void
372__kmp_dist_for_static_init(
373 ident_t *loc,
374 kmp_int32 gtid,
375 kmp_int32 schedule,
376 kmp_int32 *plastiter,
377 T *plower,
378 T *pupper,
379 T *pupperDist,
380 typename traits_t< T >::signed_t *pstride,
381 typename traits_t< T >::signed_t incr,
382 typename traits_t< T >::signed_t chunk
383) {
Jonathan Peyton45be4502015-08-11 21:36:41 +0000384 KMP_COUNT_BLOCK(OMP_DISTRIBUTE);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000385 typedef typename traits_t< T >::unsigned_t UT;
386 typedef typename traits_t< T >::signed_t ST;
387 register kmp_uint32 tid;
388 register kmp_uint32 nth;
389 register kmp_uint32 team_id;
390 register kmp_uint32 nteams;
391 register UT trip_count;
392 register kmp_team_t *team;
393 kmp_info_t * th;
394
395 KMP_DEBUG_ASSERT( plastiter && plower && pupper && pupperDist && pstride );
396 KE_TRACE( 10, ("__kmpc_dist_for_static_init called (%d)\n", gtid));
397 #ifdef KMP_DEBUG
398 {
399 const char * buff;
400 // create format specifiers before the debug output
401 buff = __kmp_str_format(
402 "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d "\
403 "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n",
404 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec,
405 traits_t< ST >::spec, traits_t< T >::spec );
406 KD_TRACE(100, ( buff, gtid, schedule, *plastiter,
407 *plower, *pupper, incr, chunk ) );
408 __kmp_str_free( &buff );
409 }
410 #endif
411
412 if( __kmp_env_consistency_check ) {
413 __kmp_push_workshare( gtid, ct_pdo, loc );
414 if( incr == 0 ) {
415 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc );
416 }
417 if( incr > 0 ? (*pupper < *plower) : (*plower < *pupper) ) {
418 // The loop is illegal.
419 // Some zero-trip loops maintained by compiler, e.g.:
420 // for(i=10;i<0;++i) // lower >= upper - run-time check
421 // for(i=0;i>10;--i) // lower <= upper - run-time check
422 // for(i=0;i>10;++i) // incr > 0 - compile-time check
423 // for(i=10;i<0;--i) // incr < 0 - compile-time check
424 // Compiler does not check the following illegal loops:
425 // for(i=0;i<10;i+=incr) // where incr<0
426 // for(i=10;i>0;i-=incr) // where incr<0
427 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc );
428 }
429 }
430 tid = __kmp_tid_from_gtid( gtid );
431 th = __kmp_threads[gtid];
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000432 nth = th->th.th_team_nproc;
433 team = th->th.th_team;
434 #if OMP_40_ENABLED
Jonathan Peyton441f3372015-09-21 17:24:46 +0000435 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000436 nteams = th->th.th_teams_size.nteams;
437 #endif
438 team_id = team->t.t_master_tid;
439 KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc);
440
441 // compute global trip count
442 if( incr == 1 ) {
443 trip_count = *pupper - *plower + 1;
444 } else if(incr == -1) {
445 trip_count = *plower - *pupper + 1;
Jonathan Peyton5235a1b2016-04-18 21:38:29 +0000446 } else if ( incr > 0 ) {
447 // upper-lower can exceed the limit of signed type
448 trip_count = (UT)(*pupper - *plower) / incr + 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000449 } else {
Jonathan Peyton5235a1b2016-04-18 21:38:29 +0000450 trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000451 }
Jonathan Peyton45be4502015-08-11 21:36:41 +0000452
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000453 *pstride = *pupper - *plower; // just in case (can be unused)
454 if( trip_count <= nteams ) {
455 KMP_DEBUG_ASSERT(
456 __kmp_static == kmp_sch_static_greedy || \
457 __kmp_static == kmp_sch_static_balanced
458 ); // Unknown static scheduling type.
459 // only masters of some teams get single iteration, other threads get nothing
460 if( team_id < trip_count && tid == 0 ) {
461 *pupper = *pupperDist = *plower = *plower + team_id * incr;
462 } else {
463 *pupperDist = *pupper;
464 *plower = *pupper + incr; // compiler should skip loop body
465 }
466 if( plastiter != NULL )
467 *plastiter = ( tid == 0 && team_id == trip_count - 1 );
468 } else {
469 // Get the team's chunk first (each team gets at most one chunk)
470 if( __kmp_static == kmp_sch_static_balanced ) {
471 register UT chunkD = trip_count / nteams;
472 register UT extras = trip_count % nteams;
473 *plower += incr * ( team_id * chunkD + ( team_id < extras ? team_id : extras ) );
474 *pupperDist = *plower + chunkD * incr - ( team_id < extras ? 0 : incr );
475 if( plastiter != NULL )
476 *plastiter = ( team_id == nteams - 1 );
477 } else {
478 register T chunk_inc_count =
479 ( trip_count / nteams + ( ( trip_count % nteams ) ? 1 : 0) ) * incr;
480 register T upper = *pupper;
481 KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy );
482 // Unknown static scheduling type.
483 *plower += team_id * chunk_inc_count;
484 *pupperDist = *plower + chunk_inc_count - incr;
485 // Check/correct bounds if needed
486 if( incr > 0 ) {
487 if( *pupperDist < *plower )
Jonathan Peyton12313d42017-01-27 18:09:22 +0000488 *pupperDist = traits_t<T>::max_value;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000489 if( plastiter != NULL )
490 *plastiter = *plower <= upper && *pupperDist > upper - incr;
491 if( *pupperDist > upper )
492 *pupperDist = upper; // tracker C73258
493 if( *plower > *pupperDist ) {
494 *pupper = *pupperDist; // no iterations available for the team
495 goto end;
496 }
497 } else {
498 if( *pupperDist > *plower )
Jonathan Peyton12313d42017-01-27 18:09:22 +0000499 *pupperDist = traits_t<T>::min_value;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000500 if( plastiter != NULL )
501 *plastiter = *plower >= upper && *pupperDist < upper - incr;
502 if( *pupperDist < upper )
503 *pupperDist = upper; // tracker C73258
504 if( *plower < *pupperDist ) {
505 *pupper = *pupperDist; // no iterations available for the team
506 goto end;
507 }
508 }
509 }
510 // Get the parallel loop chunk now (for thread)
511 // compute trip count for team's chunk
512 if( incr == 1 ) {
513 trip_count = *pupperDist - *plower + 1;
514 } else if(incr == -1) {
515 trip_count = *plower - *pupperDist + 1;
Jonathan Peyton5235a1b2016-04-18 21:38:29 +0000516 } else if ( incr > 1 ) {
517 // upper-lower can exceed the limit of signed type
518 trip_count = (UT)(*pupperDist - *plower) / incr + 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000519 } else {
Jonathan Peyton5235a1b2016-04-18 21:38:29 +0000520 trip_count = (UT)(*plower - *pupperDist) / (-incr) + 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000521 }
522 KMP_DEBUG_ASSERT( trip_count );
523 switch( schedule ) {
524 case kmp_sch_static:
525 {
526 if( trip_count <= nth ) {
527 KMP_DEBUG_ASSERT(
528 __kmp_static == kmp_sch_static_greedy || \
529 __kmp_static == kmp_sch_static_balanced
530 ); // Unknown static scheduling type.
531 if( tid < trip_count )
532 *pupper = *plower = *plower + tid * incr;
533 else
534 *plower = *pupper + incr; // no iterations available
535 if( plastiter != NULL )
536 if( *plastiter != 0 && !( tid == trip_count - 1 ) )
537 *plastiter = 0;
538 } else {
539 if( __kmp_static == kmp_sch_static_balanced ) {
540 register UT chunkL = trip_count / nth;
541 register UT extras = trip_count % nth;
542 *plower += incr * (tid * chunkL + (tid < extras ? tid : extras));
543 *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr);
544 if( plastiter != NULL )
545 if( *plastiter != 0 && !( tid == nth - 1 ) )
546 *plastiter = 0;
547 } else {
548 register T chunk_inc_count =
549 ( trip_count / nth + ( ( trip_count % nth ) ? 1 : 0) ) * incr;
550 register T upper = *pupperDist;
551 KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy );
552 // Unknown static scheduling type.
553 *plower += tid * chunk_inc_count;
554 *pupper = *plower + chunk_inc_count - incr;
555 if( incr > 0 ) {
556 if( *pupper < *plower )
Jonathan Peyton12313d42017-01-27 18:09:22 +0000557 *pupper = traits_t<T>::max_value;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000558 if( plastiter != NULL )
559 if( *plastiter != 0 && !(*plower <= upper && *pupper > upper - incr) )
560 *plastiter = 0;
561 if( *pupper > upper )
562 *pupper = upper;//tracker C73258
563 } else {
564 if( *pupper > *plower )
Jonathan Peyton12313d42017-01-27 18:09:22 +0000565 *pupper = traits_t<T>::min_value;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000566 if( plastiter != NULL )
567 if( *plastiter != 0 && !(*plower >= upper && *pupper < upper - incr) )
568 *plastiter = 0;
569 if( *pupper < upper )
570 *pupper = upper;//tracker C73258
571 }
572 }
573 }
574 break;
575 }
576 case kmp_sch_static_chunked:
577 {
578 register ST span;
579 if( chunk < 1 )
580 chunk = 1;
581 span = chunk * incr;
582 *pstride = span * nth;
583 *plower = *plower + (span * tid);
584 *pupper = *plower + span - incr;
585 if( plastiter != NULL )
586 if( *plastiter != 0 && !(tid == ((trip_count - 1) / ( UT )chunk) % nth) )
587 *plastiter = 0;
588 break;
589 }
590 default:
591 KMP_ASSERT2( 0, "__kmpc_dist_for_static_init: unknown loop scheduling type" );
592 break;
593 }
594 }
595 end:;
596 #ifdef KMP_DEBUG
597 {
598 const char * buff;
599 // create format specifiers before the debug output
600 buff = __kmp_str_format(
601 "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s "\
602 "stride=%%%s signed?<%s>\n",
603 traits_t< T >::spec, traits_t< T >::spec, traits_t< T >::spec,
604 traits_t< ST >::spec, traits_t< T >::spec );
605 KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pupperDist, *pstride ) );
606 __kmp_str_free( &buff );
607 }
608 #endif
609 KE_TRACE( 10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid ) );
610 return;
611}
612
613template< typename T >
614static void
615__kmp_team_static_init(
616 ident_t *loc,
617 kmp_int32 gtid,
618 kmp_int32 *p_last,
619 T *p_lb,
620 T *p_ub,
621 typename traits_t< T >::signed_t *p_st,
622 typename traits_t< T >::signed_t incr,
623 typename traits_t< T >::signed_t chunk
624) {
625 // The routine returns the first chunk distributed to the team and
626 // stride for next chunks calculation.
627 // Last iteration flag set for the team that will execute
628 // the last iteration of the loop.
629 // The routine is called for dist_schedue(static,chunk) only.
630 typedef typename traits_t< T >::unsigned_t UT;
631 typedef typename traits_t< T >::signed_t ST;
632 kmp_uint32 team_id;
633 kmp_uint32 nteams;
634 UT trip_count;
635 T lower;
636 T upper;
637 ST span;
638 kmp_team_t *team;
639 kmp_info_t *th;
640
641 KMP_DEBUG_ASSERT( p_last && p_lb && p_ub && p_st );
642 KE_TRACE( 10, ("__kmp_team_static_init called (%d)\n", gtid));
643 #ifdef KMP_DEBUG
644 {
645 const char * buff;
646 // create format specifiers before the debug output
647 buff = __kmp_str_format( "__kmp_team_static_init enter: T#%%d liter=%%d "\
648 "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n",
649 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec,
650 traits_t< ST >::spec, traits_t< T >::spec );
651 KD_TRACE(100, ( buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk ) );
652 __kmp_str_free( &buff );
653 }
654 #endif
655
656 lower = *p_lb;
657 upper = *p_ub;
658 if( __kmp_env_consistency_check ) {
659 if( incr == 0 ) {
660 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc );
661 }
662 if( incr > 0 ? (upper < lower) : (lower < upper) ) {
663 // The loop is illegal.
664 // Some zero-trip loops maintained by compiler, e.g.:
665 // for(i=10;i<0;++i) // lower >= upper - run-time check
666 // for(i=0;i>10;--i) // lower <= upper - run-time check
667 // for(i=0;i>10;++i) // incr > 0 - compile-time check
668 // for(i=10;i<0;--i) // incr < 0 - compile-time check
669 // Compiler does not check the following illegal loops:
670 // for(i=0;i<10;i+=incr) // where incr<0
671 // for(i=10;i>0;i-=incr) // where incr<0
672 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc );
673 }
674 }
675 th = __kmp_threads[gtid];
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000676 team = th->th.th_team;
677 #if OMP_40_ENABLED
Jonathan Peyton441f3372015-09-21 17:24:46 +0000678 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000679 nteams = th->th.th_teams_size.nteams;
680 #endif
681 team_id = team->t.t_master_tid;
682 KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc);
683
684 // compute trip count
685 if( incr == 1 ) {
686 trip_count = upper - lower + 1;
687 } else if(incr == -1) {
688 trip_count = lower - upper + 1;
Jonathan Peyton5235a1b2016-04-18 21:38:29 +0000689 } else if ( incr > 0 ) {
690 // upper-lower can exceed the limit of signed type
691 trip_count = (UT)(upper - lower) / incr + 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000692 } else {
Jonathan Peyton5235a1b2016-04-18 21:38:29 +0000693 trip_count = (UT)(lower - upper) / (-incr) + 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000694 }
695 if( chunk < 1 )
696 chunk = 1;
697 span = chunk * incr;
698 *p_st = span * nteams;
699 *p_lb = lower + (span * team_id);
700 *p_ub = *p_lb + span - incr;
701 if ( p_last != NULL )
702 *p_last = (team_id == ((trip_count - 1)/(UT)chunk) % nteams);
703 // Correct upper bound if needed
704 if( incr > 0 ) {
705 if( *p_ub < *p_lb ) // overflow?
Jonathan Peyton12313d42017-01-27 18:09:22 +0000706 *p_ub = traits_t<T>::max_value;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000707 if( *p_ub > upper )
708 *p_ub = upper; // tracker C73258
709 } else { // incr < 0
710 if( *p_ub > *p_lb )
Jonathan Peyton12313d42017-01-27 18:09:22 +0000711 *p_ub = traits_t<T>::min_value;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000712 if( *p_ub < upper )
713 *p_ub = upper; // tracker C73258
714 }
715 #ifdef KMP_DEBUG
716 {
717 const char * buff;
718 // create format specifiers before the debug output
719 buff = __kmp_str_format( "__kmp_team_static_init exit: T#%%d team%%u liter=%%d "\
720 "iter=(%%%s, %%%s, %%%s) chunk %%%s\n",
721 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec,
722 traits_t< ST >::spec );
723 KD_TRACE(100, ( buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk ) );
724 __kmp_str_free( &buff );
725 }
726 #endif
727}
728
Jim Cownie5e8470a2013-09-27 10:38:44 +0000729//--------------------------------------------------------------------------------------
730extern "C" {
731
732/*!
733@ingroup WORK_SHARING
734@param loc Source code location
735@param gtid Global thread id of this thread
736@param schedtype Scheduling type
737@param plastiter Pointer to the "last iteration" flag
738@param plower Pointer to the lower bound
739@param pupper Pointer to the upper bound
740@param pstride Pointer to the stride
741@param incr Loop increment
742@param chunk The chunk size
743
744Each of the four functions here are identical apart from the argument types.
745
746The functions compute the upper and lower bounds and stride to be used for the set of iterations
747to be executed by the current thread from the statically scheduled loop that is described by the
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000748initial values of the bounds, stride, increment and chunk size.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000749
750@{
751*/
752void
753__kmpc_for_static_init_4( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter,
754 kmp_int32 *plower, kmp_int32 *pupper,
755 kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk )
756{
757 __kmp_for_static_init< kmp_int32 >(
758 loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk );
759}
760
761/*!
762 See @ref __kmpc_for_static_init_4
763 */
764void
765__kmpc_for_static_init_4u( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter,
766 kmp_uint32 *plower, kmp_uint32 *pupper,
767 kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk )
768{
769 __kmp_for_static_init< kmp_uint32 >(
770 loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk );
771}
772
773/*!
774 See @ref __kmpc_for_static_init_4
775 */
776void
777__kmpc_for_static_init_8( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter,
778 kmp_int64 *plower, kmp_int64 *pupper,
779 kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk )
780{
781 __kmp_for_static_init< kmp_int64 >(
782 loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk );
783}
784
785/*!
786 See @ref __kmpc_for_static_init_4
787 */
788void
789__kmpc_for_static_init_8u( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter,
790 kmp_uint64 *plower, kmp_uint64 *pupper,
791 kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk )
792{
793 __kmp_for_static_init< kmp_uint64 >(
794 loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk );
795}
796/*!
797@}
798*/
799
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000800/*!
801@ingroup WORK_SHARING
802@param loc Source code location
803@param gtid Global thread id of this thread
Jonathan Peyton81f9cd12015-05-22 22:37:22 +0000804@param schedule Scheduling type for the parallel loop
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000805@param plastiter Pointer to the "last iteration" flag
806@param plower Pointer to the lower bound
807@param pupper Pointer to the upper bound of loop chunk
808@param pupperD Pointer to the upper bound of dist_chunk
Jonathan Peyton81f9cd12015-05-22 22:37:22 +0000809@param pstride Pointer to the stride for parallel loop
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000810@param incr Loop increment
Jonathan Peyton81f9cd12015-05-22 22:37:22 +0000811@param chunk The chunk size for the parallel loop
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000812
813Each of the four functions here are identical apart from the argument types.
814
815The functions compute the upper and lower bounds and strides to be used for the set of iterations
816to be executed by the current thread from the statically scheduled loop that is described by the
817initial values of the bounds, strides, increment and chunks for parallel loop and distribute
818constructs.
819
820@{
821*/
822void
823__kmpc_dist_for_static_init_4(
824 ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter,
825 kmp_int32 *plower, kmp_int32 *pupper, kmp_int32 *pupperD,
826 kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk )
827{
828 __kmp_dist_for_static_init< kmp_int32 >(
829 loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk );
830}
831
832/*!
833 See @ref __kmpc_dist_for_static_init_4
834 */
835void
836__kmpc_dist_for_static_init_4u(
837 ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter,
838 kmp_uint32 *plower, kmp_uint32 *pupper, kmp_uint32 *pupperD,
839 kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk )
840{
841 __kmp_dist_for_static_init< kmp_uint32 >(
842 loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk );
843}
844
845/*!
846 See @ref __kmpc_dist_for_static_init_4
847 */
848void
849__kmpc_dist_for_static_init_8(
850 ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter,
851 kmp_int64 *plower, kmp_int64 *pupper, kmp_int64 *pupperD,
852 kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk )
853{
854 __kmp_dist_for_static_init< kmp_int64 >(
855 loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk );
856}
857
858/*!
859 See @ref __kmpc_dist_for_static_init_4
860 */
861void
862__kmpc_dist_for_static_init_8u(
863 ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter,
864 kmp_uint64 *plower, kmp_uint64 *pupper, kmp_uint64 *pupperD,
865 kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk )
866{
867 __kmp_dist_for_static_init< kmp_uint64 >(
868 loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk );
869}
870/*!
871@}
872*/
873
874//-----------------------------------------------------------------------------------------
875// Auxiliary routines for Distribute Parallel Loop construct implementation
876// Transfer call to template< type T >
877// __kmp_team_static_init( ident_t *loc, int gtid,
878// int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk )
879
880/*!
881@ingroup WORK_SHARING
882@{
883@param loc Source location
884@param gtid Global thread id
885@param p_last pointer to last iteration flag
886@param p_lb pointer to Lower bound
887@param p_ub pointer to Upper bound
888@param p_st Step (or increment if you prefer)
889@param incr Loop increment
890@param chunk The chunk size to block with
891
892The functions compute the upper and lower bounds and stride to be used for the set of iterations
893to be executed by the current team from the statically scheduled loop that is described by the
894initial values of the bounds, stride, increment and chunk for the distribute construct as part of
895composite distribute parallel loop construct.
896These functions are all identical apart from the types of the arguments.
897*/
898
899void
900__kmpc_team_static_init_4(
901 ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
902 kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk )
903{
904 KMP_DEBUG_ASSERT( __kmp_init_serial );
905 __kmp_team_static_init< kmp_int32 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk );
906}
907
908/*!
909 See @ref __kmpc_team_static_init_4
910 */
911void
912__kmpc_team_static_init_4u(
913 ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
914 kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk )
915{
916 KMP_DEBUG_ASSERT( __kmp_init_serial );
917 __kmp_team_static_init< kmp_uint32 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk );
918}
919
920/*!
921 See @ref __kmpc_team_static_init_4
922 */
923void
924__kmpc_team_static_init_8(
925 ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
926 kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk )
927{
928 KMP_DEBUG_ASSERT( __kmp_init_serial );
929 __kmp_team_static_init< kmp_int64 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk );
930}
931
932/*!
933 See @ref __kmpc_team_static_init_4
934 */
935void
936__kmpc_team_static_init_8u(
937 ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
938 kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk )
939{
940 KMP_DEBUG_ASSERT( __kmp_init_serial );
941 __kmp_team_static_init< kmp_uint64 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk );
942}
943/*!
944@}
945*/
946
Jim Cownie5e8470a2013-09-27 10:38:44 +0000947} // extern "C"
948