blob: c19fe442772a06a3361528f712b7dae6a443cbec [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
Jonathan Peytonde4749b2016-12-14 23:01:24 +00002 * kmp_sched.cpp -- static scheduling -- iteration initialization
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16/*
17 * Static scheduling initialization.
18 *
19 * NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however
20 * it may change values between parallel regions. __kmp_max_nth
21 * is the largest value __kmp_nth may take, 1 is the smallest.
22 *
23 */
24
25#include "kmp.h"
26#include "kmp_i18n.h"
27#include "kmp_str.h"
28#include "kmp_error.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000029#include "kmp_stats.h"
30#include "kmp_itt.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000031
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000032#if OMPT_SUPPORT
33#include "ompt-specific.h"
34#endif
35
Jim Cownie5e8470a2013-09-27 10:38:44 +000036#ifdef KMP_DEBUG
37//-------------------------------------------------------------------------
38// template for debug prints specification ( d, u, lld, llu )
39 char const * traits_t< int >::spec = "d";
40 char const * traits_t< unsigned int >::spec = "u";
41 char const * traits_t< long long >::spec = "lld";
42 char const * traits_t< unsigned long long >::spec = "llu";
43//-------------------------------------------------------------------------
44#endif
45
46template< typename T >
47static void
48__kmp_for_static_init(
49 ident_t *loc,
50 kmp_int32 global_tid,
51 kmp_int32 schedtype,
52 kmp_int32 *plastiter,
53 T *plower,
54 T *pupper,
55 typename traits_t< T >::signed_t *pstride,
56 typename traits_t< T >::signed_t incr,
57 typename traits_t< T >::signed_t chunk
58) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +000059 KMP_COUNT_BLOCK(OMP_FOR_static);
Jonathan Peyton11dc82f2016-05-05 16:15:57 +000060 KMP_TIME_PARTITIONED_BLOCK(FOR_static_scheduling);
Jonathan Peyton45be4502015-08-11 21:36:41 +000061
Jim Cownie5e8470a2013-09-27 10:38:44 +000062 typedef typename traits_t< T >::unsigned_t UT;
63 typedef typename traits_t< T >::signed_t ST;
64 /* this all has to be changed back to TID and such.. */
65 register kmp_int32 gtid = global_tid;
66 register kmp_uint32 tid;
67 register kmp_uint32 nth;
68 register UT trip_count;
69 register kmp_team_t *team;
Andrey Churbanov51aecb82015-05-06 19:22:36 +000070 register kmp_info_t *th = __kmp_threads[ gtid ];
Jim Cownie5e8470a2013-09-27 10:38:44 +000071
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000072#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peyton61118492016-05-20 19:03:38 +000073 ompt_team_info_t *team_info = NULL;
74 ompt_task_info_t *task_info = NULL;
Jonathan Peytonf0344bb2015-10-09 17:42:52 +000075
76 if (ompt_enabled) {
77 // Only fully initialize variables needed by OMPT if OMPT is enabled.
78 team_info = __ompt_get_teaminfo(0, NULL);
79 task_info = __ompt_get_taskinfo(0);
80 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000081#endif
82
Jim Cownie4cc4bb42014-10-07 16:25:50 +000083 KMP_DEBUG_ASSERT( plastiter && plower && pupper && pstride );
Jim Cownie5e8470a2013-09-27 10:38:44 +000084 KE_TRACE( 10, ("__kmpc_for_static_init called (%d)\n", global_tid));
85 #ifdef KMP_DEBUG
86 {
87 const char * buff;
88 // create format specifiers before the debug output
89 buff = __kmp_str_format(
90 "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s," \
91 " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n",
92 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec,
93 traits_t< ST >::spec, traits_t< ST >::spec, traits_t< T >::spec );
94 KD_TRACE(100, ( buff, global_tid, schedtype, *plastiter,
95 *plower, *pupper, *pstride, incr, chunk ) );
96 __kmp_str_free( &buff );
97 }
98 #endif
99
100 if ( __kmp_env_consistency_check ) {
101 __kmp_push_workshare( global_tid, ct_pdo, loc );
102 if ( incr == 0 ) {
103 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000104 }
105 }
106 /* special handling for zero-trip loops */
107 if ( incr > 0 ? (*pupper < *plower) : (*plower < *pupper) ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000108 if( plastiter != NULL )
109 *plastiter = FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000110 /* leave pupper and plower set to entire iteration space */
111 *pstride = incr; /* value should never be used */
112 // *plower = *pupper - incr; // let compiler bypass the illegal loop (like for(i=1;i<10;i--)) THIS LINE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE ON A ZERO-TRIP LOOP (lower=1,\
113 upper=0,stride=1) - JPH June 23, 2009.
114 #ifdef KMP_DEBUG
115 {
116 const char * buff;
117 // create format specifiers before the debug output
118 buff = __kmp_str_format(
119 "__kmpc_for_static_init:(ZERO TRIP) liter=%%d lower=%%%s upper=%%%s stride = %%%s signed?<%s>, loc = %%s\n",
120 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, traits_t< T >::spec );
121 KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride, loc->psource ) );
122 __kmp_str_free( &buff );
123 }
124 #endif
125 KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000126
127#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000128 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000129 ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
130 ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
131 team_info->parallel_id, task_info->task_id,
132 team_info->microtask);
133 }
134#endif
Jonathan Peyton45be4502015-08-11 21:36:41 +0000135 KMP_COUNT_VALUE (FOR_static_iterations, 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000136 return;
137 }
138
139 #if OMP_40_ENABLED
Jonathan Peytonea0fe1d2016-02-25 17:55:50 +0000140 // Although there are schedule enumerations above kmp_ord_upper which are not schedules for "distribute",
141 // the only ones which are useful are dynamic, so cannot be seen here, since this codepath is only executed
142 // for static schedules.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000143 if ( schedtype > kmp_ord_upper ) {
144 // we are in DISTRIBUTE construct
145 schedtype += kmp_sch_static - kmp_distribute_static; // AC: convert to usual schedule type
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000146 tid = th->th.th_team->t.t_master_tid;
147 team = th->th.th_team->t.t_parent;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000148 } else
149 #endif
150 {
151 tid = __kmp_tid_from_gtid( global_tid );
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000152 team = th->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000153 }
154
155 /* determine if "for" loop is an active worksharing construct */
156 if ( team -> t.t_serialized ) {
157 /* serialized parallel, each thread executes whole iteration space */
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000158 if( plastiter != NULL )
159 *plastiter = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000160 /* leave pupper and plower set to entire iteration space */
161 *pstride = (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
162
163 #ifdef KMP_DEBUG
164 {
165 const char * buff;
166 // create format specifiers before the debug output
167 buff = __kmp_str_format(
168 "__kmpc_for_static_init: (serial) liter=%%d lower=%%%s upper=%%%s stride = %%%s\n",
169 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
170 KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) );
171 __kmp_str_free( &buff );
172 }
173 #endif
174 KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000175
176#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000177 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000178 ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
179 ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
180 team_info->parallel_id, task_info->task_id,
181 team_info->microtask);
182 }
183#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000184 return;
185 }
186 nth = team->t.t_nproc;
187 if ( nth == 1 ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000188 if( plastiter != NULL )
189 *plastiter = TRUE;
190 *pstride = (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000191 #ifdef KMP_DEBUG
192 {
193 const char * buff;
194 // create format specifiers before the debug output
195 buff = __kmp_str_format(
196 "__kmpc_for_static_init: (serial) liter=%%d lower=%%%s upper=%%%s stride = %%%s\n",
197 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
198 KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) );
199 __kmp_str_free( &buff );
200 }
201 #endif
202 KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000203
204#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000205 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000206 ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
207 ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
208 team_info->parallel_id, task_info->task_id,
209 team_info->microtask);
210 }
211#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000212 return;
213 }
214
215 /* compute trip count */
216 if ( incr == 1 ) {
217 trip_count = *pupper - *plower + 1;
218 } else if (incr == -1) {
219 trip_count = *plower - *pupper + 1;
Jonathan Peyton5235a1b2016-04-18 21:38:29 +0000220 } else if ( incr > 0 ) {
221 // upper-lower can exceed the limit of signed type
222 trip_count = (UT)(*pupper - *plower) / incr + 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000223 } else {
Jonathan Peyton5235a1b2016-04-18 21:38:29 +0000224 trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000225 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000226
Jim Cownie5e8470a2013-09-27 10:38:44 +0000227 if ( __kmp_env_consistency_check ) {
228 /* tripcount overflow? */
229 if ( trip_count == 0 && *pupper != *plower ) {
230 __kmp_error_construct( kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo, loc );
231 }
232 }
Jonathan Peyton45be4502015-08-11 21:36:41 +0000233 KMP_COUNT_VALUE (FOR_static_iterations, trip_count);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000234
235 /* compute remaining parameters */
236 switch ( schedtype ) {
237 case kmp_sch_static:
238 {
239 if ( trip_count < nth ) {
240 KMP_DEBUG_ASSERT(
241 __kmp_static == kmp_sch_static_greedy || \
242 __kmp_static == kmp_sch_static_balanced
243 ); // Unknown static scheduling type.
244 if ( tid < trip_count ) {
245 *pupper = *plower = *plower + tid * incr;
246 } else {
247 *plower = *pupper + incr;
248 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000249 if( plastiter != NULL )
250 *plastiter = ( tid == trip_count - 1 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000251 } else {
252 if ( __kmp_static == kmp_sch_static_balanced ) {
253 register UT small_chunk = trip_count / nth;
254 register UT extras = trip_count % nth;
255 *plower += incr * ( tid * small_chunk + ( tid < extras ? tid : extras ) );
256 *pupper = *plower + small_chunk * incr - ( tid < extras ? 0 : incr );
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000257 if( plastiter != NULL )
258 *plastiter = ( tid == nth - 1 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000259 } else {
260 register T big_chunk_inc_count = ( trip_count/nth +
261 ( ( trip_count % nth ) ? 1 : 0) ) * incr;
262 register T old_upper = *pupper;
263
264 KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy );
265 // Unknown static scheduling type.
266
267 *plower += tid * big_chunk_inc_count;
268 *pupper = *plower + big_chunk_inc_count - incr;
269 if ( incr > 0 ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000270 if( *pupper < *plower )
Jonathan Peyton12313d42017-01-27 18:09:22 +0000271 *pupper = traits_t<T>::max_value;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000272 if( plastiter != NULL )
273 *plastiter = *plower <= old_upper && *pupper > old_upper - incr;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000274 if ( *pupper > old_upper ) *pupper = old_upper; // tracker C73258
275 } else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000276 if( *pupper > *plower )
Jonathan Peyton12313d42017-01-27 18:09:22 +0000277 *pupper = traits_t<T>::min_value;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000278 if( plastiter != NULL )
279 *plastiter = *plower >= old_upper && *pupper < old_upper - incr;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000280 if ( *pupper < old_upper ) *pupper = old_upper; // tracker C73258
281 }
282 }
283 }
284 break;
285 }
286 case kmp_sch_static_chunked:
287 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000288 register ST span;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000289 if ( chunk < 1 ) {
290 chunk = 1;
291 }
292 span = chunk * incr;
293 *pstride = span * nth;
294 *plower = *plower + (span * tid);
295 *pupper = *plower + span - incr;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000296 if( plastiter != NULL )
297 *plastiter = (tid == ((trip_count - 1)/( UT )chunk) % nth);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000298 break;
299 }
Jonathan Peytondf6818b2016-06-14 17:57:47 +0000300#if OMP_45_ENABLED
Jonathan Peytonef734792016-05-31 19:12:18 +0000301 case kmp_sch_static_balanced_chunked:
302 {
303 register T old_upper = *pupper;
304 // round up to make sure the chunk is enough to cover all iterations
305 register UT span = (trip_count+nth-1) / nth;
306
307 // perform chunk adjustment
308 chunk = (span + chunk - 1) & ~(chunk-1);
309
310 span = chunk * incr;
311 *plower = *plower + (span * tid);
312 *pupper = *plower + span - incr;
313 if ( incr > 0 ) {
314 if ( *pupper > old_upper ) *pupper = old_upper;
315 } else
316 if ( *pupper < old_upper ) *pupper = old_upper;
317
318 if( plastiter != NULL )
319 *plastiter = ( tid == ((trip_count - 1)/( UT )chunk) );
320 break;
321 }
322#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000323 default:
324 KMP_ASSERT2( 0, "__kmpc_for_static_init: unknown scheduling type" );
325 break;
326 }
327
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000328#if USE_ITT_BUILD
329 // Report loop metadata
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000330 if ( KMP_MASTER_TID(tid) && __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
331#if OMP_40_ENABLED
332 th->th.th_teams_microtask == NULL &&
333#endif
334 team->t.t_active_level == 1 )
335 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000336 kmp_uint64 cur_chunk = chunk;
337 // Calculate chunk in case it was not specified; it is specified for kmp_sch_static_chunked
338 if ( schedtype == kmp_sch_static ) {
339 cur_chunk = trip_count / nth + ( ( trip_count % nth ) ? 1 : 0);
340 }
341 // 0 - "static" schedule
342 __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk);
343 }
344#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000345 #ifdef KMP_DEBUG
346 {
347 const char * buff;
348 // create format specifiers before the debug output
349 buff = __kmp_str_format(
350 "__kmpc_for_static_init: liter=%%d lower=%%%s upper=%%%s stride = %%%s signed?<%s>\n",
351 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, traits_t< T >::spec );
352 KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) );
353 __kmp_str_free( &buff );
354 }
355 #endif
356 KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000357
358#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000359 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000360 ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
361 ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
362 team_info->parallel_id, task_info->task_id, team_info->microtask);
363 }
364#endif
365
Jim Cownie5e8470a2013-09-27 10:38:44 +0000366 return;
367}
368
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000369template< typename T >
370static void
371__kmp_dist_for_static_init(
372 ident_t *loc,
373 kmp_int32 gtid,
374 kmp_int32 schedule,
375 kmp_int32 *plastiter,
376 T *plower,
377 T *pupper,
378 T *pupperDist,
379 typename traits_t< T >::signed_t *pstride,
380 typename traits_t< T >::signed_t incr,
381 typename traits_t< T >::signed_t chunk
382) {
Jonathan Peyton45be4502015-08-11 21:36:41 +0000383 KMP_COUNT_BLOCK(OMP_DISTRIBUTE);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000384 typedef typename traits_t< T >::unsigned_t UT;
385 typedef typename traits_t< T >::signed_t ST;
386 register kmp_uint32 tid;
387 register kmp_uint32 nth;
388 register kmp_uint32 team_id;
389 register kmp_uint32 nteams;
390 register UT trip_count;
391 register kmp_team_t *team;
392 kmp_info_t * th;
393
394 KMP_DEBUG_ASSERT( plastiter && plower && pupper && pupperDist && pstride );
395 KE_TRACE( 10, ("__kmpc_dist_for_static_init called (%d)\n", gtid));
396 #ifdef KMP_DEBUG
397 {
398 const char * buff;
399 // create format specifiers before the debug output
400 buff = __kmp_str_format(
401 "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d "\
402 "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n",
403 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec,
404 traits_t< ST >::spec, traits_t< T >::spec );
405 KD_TRACE(100, ( buff, gtid, schedule, *plastiter,
406 *plower, *pupper, incr, chunk ) );
407 __kmp_str_free( &buff );
408 }
409 #endif
410
411 if( __kmp_env_consistency_check ) {
412 __kmp_push_workshare( gtid, ct_pdo, loc );
413 if( incr == 0 ) {
414 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc );
415 }
416 if( incr > 0 ? (*pupper < *plower) : (*plower < *pupper) ) {
417 // The loop is illegal.
418 // Some zero-trip loops maintained by compiler, e.g.:
419 // for(i=10;i<0;++i) // lower >= upper - run-time check
420 // for(i=0;i>10;--i) // lower <= upper - run-time check
421 // for(i=0;i>10;++i) // incr > 0 - compile-time check
422 // for(i=10;i<0;--i) // incr < 0 - compile-time check
423 // Compiler does not check the following illegal loops:
424 // for(i=0;i<10;i+=incr) // where incr<0
425 // for(i=10;i>0;i-=incr) // where incr<0
426 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc );
427 }
428 }
429 tid = __kmp_tid_from_gtid( gtid );
430 th = __kmp_threads[gtid];
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000431 nth = th->th.th_team_nproc;
432 team = th->th.th_team;
433 #if OMP_40_ENABLED
Jonathan Peyton441f3372015-09-21 17:24:46 +0000434 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000435 nteams = th->th.th_teams_size.nteams;
436 #endif
437 team_id = team->t.t_master_tid;
438 KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc);
439
440 // compute global trip count
441 if( incr == 1 ) {
442 trip_count = *pupper - *plower + 1;
443 } else if(incr == -1) {
444 trip_count = *plower - *pupper + 1;
Jonathan Peyton5235a1b2016-04-18 21:38:29 +0000445 } else if ( incr > 0 ) {
446 // upper-lower can exceed the limit of signed type
447 trip_count = (UT)(*pupper - *plower) / incr + 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000448 } else {
Jonathan Peyton5235a1b2016-04-18 21:38:29 +0000449 trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000450 }
Jonathan Peyton45be4502015-08-11 21:36:41 +0000451
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000452 *pstride = *pupper - *plower; // just in case (can be unused)
453 if( trip_count <= nteams ) {
454 KMP_DEBUG_ASSERT(
455 __kmp_static == kmp_sch_static_greedy || \
456 __kmp_static == kmp_sch_static_balanced
457 ); // Unknown static scheduling type.
458 // only masters of some teams get single iteration, other threads get nothing
459 if( team_id < trip_count && tid == 0 ) {
460 *pupper = *pupperDist = *plower = *plower + team_id * incr;
461 } else {
462 *pupperDist = *pupper;
463 *plower = *pupper + incr; // compiler should skip loop body
464 }
465 if( plastiter != NULL )
466 *plastiter = ( tid == 0 && team_id == trip_count - 1 );
467 } else {
468 // Get the team's chunk first (each team gets at most one chunk)
469 if( __kmp_static == kmp_sch_static_balanced ) {
470 register UT chunkD = trip_count / nteams;
471 register UT extras = trip_count % nteams;
472 *plower += incr * ( team_id * chunkD + ( team_id < extras ? team_id : extras ) );
473 *pupperDist = *plower + chunkD * incr - ( team_id < extras ? 0 : incr );
474 if( plastiter != NULL )
475 *plastiter = ( team_id == nteams - 1 );
476 } else {
477 register T chunk_inc_count =
478 ( trip_count / nteams + ( ( trip_count % nteams ) ? 1 : 0) ) * incr;
479 register T upper = *pupper;
480 KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy );
481 // Unknown static scheduling type.
482 *plower += team_id * chunk_inc_count;
483 *pupperDist = *plower + chunk_inc_count - incr;
484 // Check/correct bounds if needed
485 if( incr > 0 ) {
486 if( *pupperDist < *plower )
Jonathan Peyton12313d42017-01-27 18:09:22 +0000487 *pupperDist = traits_t<T>::max_value;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000488 if( plastiter != NULL )
489 *plastiter = *plower <= upper && *pupperDist > upper - incr;
490 if( *pupperDist > upper )
491 *pupperDist = upper; // tracker C73258
492 if( *plower > *pupperDist ) {
493 *pupper = *pupperDist; // no iterations available for the team
494 goto end;
495 }
496 } else {
497 if( *pupperDist > *plower )
Jonathan Peyton12313d42017-01-27 18:09:22 +0000498 *pupperDist = traits_t<T>::min_value;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000499 if( plastiter != NULL )
500 *plastiter = *plower >= upper && *pupperDist < upper - incr;
501 if( *pupperDist < upper )
502 *pupperDist = upper; // tracker C73258
503 if( *plower < *pupperDist ) {
504 *pupper = *pupperDist; // no iterations available for the team
505 goto end;
506 }
507 }
508 }
509 // Get the parallel loop chunk now (for thread)
510 // compute trip count for team's chunk
511 if( incr == 1 ) {
512 trip_count = *pupperDist - *plower + 1;
513 } else if(incr == -1) {
514 trip_count = *plower - *pupperDist + 1;
Jonathan Peyton5235a1b2016-04-18 21:38:29 +0000515 } else if ( incr > 1 ) {
516 // upper-lower can exceed the limit of signed type
517 trip_count = (UT)(*pupperDist - *plower) / incr + 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000518 } else {
Jonathan Peyton5235a1b2016-04-18 21:38:29 +0000519 trip_count = (UT)(*plower - *pupperDist) / (-incr) + 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000520 }
521 KMP_DEBUG_ASSERT( trip_count );
522 switch( schedule ) {
523 case kmp_sch_static:
524 {
525 if( trip_count <= nth ) {
526 KMP_DEBUG_ASSERT(
527 __kmp_static == kmp_sch_static_greedy || \
528 __kmp_static == kmp_sch_static_balanced
529 ); // Unknown static scheduling type.
530 if( tid < trip_count )
531 *pupper = *plower = *plower + tid * incr;
532 else
533 *plower = *pupper + incr; // no iterations available
534 if( plastiter != NULL )
535 if( *plastiter != 0 && !( tid == trip_count - 1 ) )
536 *plastiter = 0;
537 } else {
538 if( __kmp_static == kmp_sch_static_balanced ) {
539 register UT chunkL = trip_count / nth;
540 register UT extras = trip_count % nth;
541 *plower += incr * (tid * chunkL + (tid < extras ? tid : extras));
542 *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr);
543 if( plastiter != NULL )
544 if( *plastiter != 0 && !( tid == nth - 1 ) )
545 *plastiter = 0;
546 } else {
547 register T chunk_inc_count =
548 ( trip_count / nth + ( ( trip_count % nth ) ? 1 : 0) ) * incr;
549 register T upper = *pupperDist;
550 KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy );
551 // Unknown static scheduling type.
552 *plower += tid * chunk_inc_count;
553 *pupper = *plower + chunk_inc_count - incr;
554 if( incr > 0 ) {
555 if( *pupper < *plower )
Jonathan Peyton12313d42017-01-27 18:09:22 +0000556 *pupper = traits_t<T>::max_value;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000557 if( plastiter != NULL )
558 if( *plastiter != 0 && !(*plower <= upper && *pupper > upper - incr) )
559 *plastiter = 0;
560 if( *pupper > upper )
561 *pupper = upper;//tracker C73258
562 } else {
563 if( *pupper > *plower )
Jonathan Peyton12313d42017-01-27 18:09:22 +0000564 *pupper = traits_t<T>::min_value;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000565 if( plastiter != NULL )
566 if( *plastiter != 0 && !(*plower >= upper && *pupper < upper - incr) )
567 *plastiter = 0;
568 if( *pupper < upper )
569 *pupper = upper;//tracker C73258
570 }
571 }
572 }
573 break;
574 }
575 case kmp_sch_static_chunked:
576 {
577 register ST span;
578 if( chunk < 1 )
579 chunk = 1;
580 span = chunk * incr;
581 *pstride = span * nth;
582 *plower = *plower + (span * tid);
583 *pupper = *plower + span - incr;
584 if( plastiter != NULL )
585 if( *plastiter != 0 && !(tid == ((trip_count - 1) / ( UT )chunk) % nth) )
586 *plastiter = 0;
587 break;
588 }
589 default:
590 KMP_ASSERT2( 0, "__kmpc_dist_for_static_init: unknown loop scheduling type" );
591 break;
592 }
593 }
594 end:;
595 #ifdef KMP_DEBUG
596 {
597 const char * buff;
598 // create format specifiers before the debug output
599 buff = __kmp_str_format(
600 "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s "\
601 "stride=%%%s signed?<%s>\n",
602 traits_t< T >::spec, traits_t< T >::spec, traits_t< T >::spec,
603 traits_t< ST >::spec, traits_t< T >::spec );
604 KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pupperDist, *pstride ) );
605 __kmp_str_free( &buff );
606 }
607 #endif
608 KE_TRACE( 10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid ) );
609 return;
610}
611
612template< typename T >
613static void
614__kmp_team_static_init(
615 ident_t *loc,
616 kmp_int32 gtid,
617 kmp_int32 *p_last,
618 T *p_lb,
619 T *p_ub,
620 typename traits_t< T >::signed_t *p_st,
621 typename traits_t< T >::signed_t incr,
622 typename traits_t< T >::signed_t chunk
623) {
624 // The routine returns the first chunk distributed to the team and
625 // stride for next chunks calculation.
626 // Last iteration flag set for the team that will execute
627 // the last iteration of the loop.
628 // The routine is called for dist_schedue(static,chunk) only.
629 typedef typename traits_t< T >::unsigned_t UT;
630 typedef typename traits_t< T >::signed_t ST;
631 kmp_uint32 team_id;
632 kmp_uint32 nteams;
633 UT trip_count;
634 T lower;
635 T upper;
636 ST span;
637 kmp_team_t *team;
638 kmp_info_t *th;
639
640 KMP_DEBUG_ASSERT( p_last && p_lb && p_ub && p_st );
641 KE_TRACE( 10, ("__kmp_team_static_init called (%d)\n", gtid));
642 #ifdef KMP_DEBUG
643 {
644 const char * buff;
645 // create format specifiers before the debug output
646 buff = __kmp_str_format( "__kmp_team_static_init enter: T#%%d liter=%%d "\
647 "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n",
648 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec,
649 traits_t< ST >::spec, traits_t< T >::spec );
650 KD_TRACE(100, ( buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk ) );
651 __kmp_str_free( &buff );
652 }
653 #endif
654
655 lower = *p_lb;
656 upper = *p_ub;
657 if( __kmp_env_consistency_check ) {
658 if( incr == 0 ) {
659 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc );
660 }
661 if( incr > 0 ? (upper < lower) : (lower < upper) ) {
662 // The loop is illegal.
663 // Some zero-trip loops maintained by compiler, e.g.:
664 // for(i=10;i<0;++i) // lower >= upper - run-time check
665 // for(i=0;i>10;--i) // lower <= upper - run-time check
666 // for(i=0;i>10;++i) // incr > 0 - compile-time check
667 // for(i=10;i<0;--i) // incr < 0 - compile-time check
668 // Compiler does not check the following illegal loops:
669 // for(i=0;i<10;i+=incr) // where incr<0
670 // for(i=10;i>0;i-=incr) // where incr<0
671 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc );
672 }
673 }
674 th = __kmp_threads[gtid];
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000675 team = th->th.th_team;
676 #if OMP_40_ENABLED
Jonathan Peyton441f3372015-09-21 17:24:46 +0000677 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000678 nteams = th->th.th_teams_size.nteams;
679 #endif
680 team_id = team->t.t_master_tid;
681 KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc);
682
683 // compute trip count
684 if( incr == 1 ) {
685 trip_count = upper - lower + 1;
686 } else if(incr == -1) {
687 trip_count = lower - upper + 1;
Jonathan Peyton5235a1b2016-04-18 21:38:29 +0000688 } else if ( incr > 0 ) {
689 // upper-lower can exceed the limit of signed type
690 trip_count = (UT)(upper - lower) / incr + 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000691 } else {
Jonathan Peyton5235a1b2016-04-18 21:38:29 +0000692 trip_count = (UT)(lower - upper) / (-incr) + 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000693 }
694 if( chunk < 1 )
695 chunk = 1;
696 span = chunk * incr;
697 *p_st = span * nteams;
698 *p_lb = lower + (span * team_id);
699 *p_ub = *p_lb + span - incr;
700 if ( p_last != NULL )
701 *p_last = (team_id == ((trip_count - 1)/(UT)chunk) % nteams);
702 // Correct upper bound if needed
703 if( incr > 0 ) {
704 if( *p_ub < *p_lb ) // overflow?
Jonathan Peyton12313d42017-01-27 18:09:22 +0000705 *p_ub = traits_t<T>::max_value;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000706 if( *p_ub > upper )
707 *p_ub = upper; // tracker C73258
708 } else { // incr < 0
709 if( *p_ub > *p_lb )
Jonathan Peyton12313d42017-01-27 18:09:22 +0000710 *p_ub = traits_t<T>::min_value;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000711 if( *p_ub < upper )
712 *p_ub = upper; // tracker C73258
713 }
714 #ifdef KMP_DEBUG
715 {
716 const char * buff;
717 // create format specifiers before the debug output
718 buff = __kmp_str_format( "__kmp_team_static_init exit: T#%%d team%%u liter=%%d "\
719 "iter=(%%%s, %%%s, %%%s) chunk %%%s\n",
720 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec,
721 traits_t< ST >::spec );
722 KD_TRACE(100, ( buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk ) );
723 __kmp_str_free( &buff );
724 }
725 #endif
726}
727
Jim Cownie5e8470a2013-09-27 10:38:44 +0000728//--------------------------------------------------------------------------------------
729extern "C" {
730
731/*!
732@ingroup WORK_SHARING
733@param loc Source code location
734@param gtid Global thread id of this thread
735@param schedtype Scheduling type
736@param plastiter Pointer to the "last iteration" flag
737@param plower Pointer to the lower bound
738@param pupper Pointer to the upper bound
739@param pstride Pointer to the stride
740@param incr Loop increment
741@param chunk The chunk size
742
743Each of the four functions here are identical apart from the argument types.
744
745The functions compute the upper and lower bounds and stride to be used for the set of iterations
746to be executed by the current thread from the statically scheduled loop that is described by the
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000747initial values of the bounds, stride, increment and chunk size.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000748
749@{
750*/
751void
752__kmpc_for_static_init_4( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter,
753 kmp_int32 *plower, kmp_int32 *pupper,
754 kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk )
755{
756 __kmp_for_static_init< kmp_int32 >(
757 loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk );
758}
759
760/*!
761 See @ref __kmpc_for_static_init_4
762 */
763void
764__kmpc_for_static_init_4u( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter,
765 kmp_uint32 *plower, kmp_uint32 *pupper,
766 kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk )
767{
768 __kmp_for_static_init< kmp_uint32 >(
769 loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk );
770}
771
772/*!
773 See @ref __kmpc_for_static_init_4
774 */
775void
776__kmpc_for_static_init_8( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter,
777 kmp_int64 *plower, kmp_int64 *pupper,
778 kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk )
779{
780 __kmp_for_static_init< kmp_int64 >(
781 loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk );
782}
783
784/*!
785 See @ref __kmpc_for_static_init_4
786 */
787void
788__kmpc_for_static_init_8u( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter,
789 kmp_uint64 *plower, kmp_uint64 *pupper,
790 kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk )
791{
792 __kmp_for_static_init< kmp_uint64 >(
793 loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk );
794}
795/*!
796@}
797*/
798
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000799/*!
800@ingroup WORK_SHARING
801@param loc Source code location
802@param gtid Global thread id of this thread
Jonathan Peyton81f9cd12015-05-22 22:37:22 +0000803@param schedule Scheduling type for the parallel loop
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000804@param plastiter Pointer to the "last iteration" flag
805@param plower Pointer to the lower bound
806@param pupper Pointer to the upper bound of loop chunk
807@param pupperD Pointer to the upper bound of dist_chunk
Jonathan Peyton81f9cd12015-05-22 22:37:22 +0000808@param pstride Pointer to the stride for parallel loop
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000809@param incr Loop increment
Jonathan Peyton81f9cd12015-05-22 22:37:22 +0000810@param chunk The chunk size for the parallel loop
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000811
812Each of the four functions here are identical apart from the argument types.
813
814The functions compute the upper and lower bounds and strides to be used for the set of iterations
815to be executed by the current thread from the statically scheduled loop that is described by the
816initial values of the bounds, strides, increment and chunks for parallel loop and distribute
817constructs.
818
819@{
820*/
821void
822__kmpc_dist_for_static_init_4(
823 ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter,
824 kmp_int32 *plower, kmp_int32 *pupper, kmp_int32 *pupperD,
825 kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk )
826{
827 __kmp_dist_for_static_init< kmp_int32 >(
828 loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk );
829}
830
831/*!
832 See @ref __kmpc_dist_for_static_init_4
833 */
834void
835__kmpc_dist_for_static_init_4u(
836 ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter,
837 kmp_uint32 *plower, kmp_uint32 *pupper, kmp_uint32 *pupperD,
838 kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk )
839{
840 __kmp_dist_for_static_init< kmp_uint32 >(
841 loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk );
842}
843
844/*!
845 See @ref __kmpc_dist_for_static_init_4
846 */
847void
848__kmpc_dist_for_static_init_8(
849 ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter,
850 kmp_int64 *plower, kmp_int64 *pupper, kmp_int64 *pupperD,
851 kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk )
852{
853 __kmp_dist_for_static_init< kmp_int64 >(
854 loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk );
855}
856
857/*!
858 See @ref __kmpc_dist_for_static_init_4
859 */
860void
861__kmpc_dist_for_static_init_8u(
862 ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter,
863 kmp_uint64 *plower, kmp_uint64 *pupper, kmp_uint64 *pupperD,
864 kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk )
865{
866 __kmp_dist_for_static_init< kmp_uint64 >(
867 loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk );
868}
869/*!
870@}
871*/
872
873//-----------------------------------------------------------------------------------------
874// Auxiliary routines for Distribute Parallel Loop construct implementation
875// Transfer call to template< type T >
876// __kmp_team_static_init( ident_t *loc, int gtid,
877// int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk )
878
879/*!
880@ingroup WORK_SHARING
881@{
882@param loc Source location
883@param gtid Global thread id
884@param p_last pointer to last iteration flag
885@param p_lb pointer to Lower bound
886@param p_ub pointer to Upper bound
887@param p_st Step (or increment if you prefer)
888@param incr Loop increment
889@param chunk The chunk size to block with
890
891The functions compute the upper and lower bounds and stride to be used for the set of iterations
892to be executed by the current team from the statically scheduled loop that is described by the
893initial values of the bounds, stride, increment and chunk for the distribute construct as part of
894composite distribute parallel loop construct.
895These functions are all identical apart from the types of the arguments.
896*/
897
898void
899__kmpc_team_static_init_4(
900 ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
901 kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk )
902{
903 KMP_DEBUG_ASSERT( __kmp_init_serial );
904 __kmp_team_static_init< kmp_int32 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk );
905}
906
907/*!
908 See @ref __kmpc_team_static_init_4
909 */
910void
911__kmpc_team_static_init_4u(
912 ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
913 kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk )
914{
915 KMP_DEBUG_ASSERT( __kmp_init_serial );
916 __kmp_team_static_init< kmp_uint32 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk );
917}
918
919/*!
920 See @ref __kmpc_team_static_init_4
921 */
922void
923__kmpc_team_static_init_8(
924 ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
925 kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk )
926{
927 KMP_DEBUG_ASSERT( __kmp_init_serial );
928 __kmp_team_static_init< kmp_int64 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk );
929}
930
931/*!
932 See @ref __kmpc_team_static_init_4
933 */
934void
935__kmpc_team_static_init_8u(
936 ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
937 kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk )
938{
939 KMP_DEBUG_ASSERT( __kmp_init_serial );
940 __kmp_team_static_init< kmp_uint64 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk );
941}
942/*!
943@}
944*/
945
Jim Cownie5e8470a2013-09-27 10:38:44 +0000946} // extern "C"
947