blob: eb2ee6792d0c0e8308f2afd6eff4d35ce32ae2b4 [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
2 * kmp_sched.c -- static scheduling -- iteration initialization
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16/*
17 * Static scheduling initialization.
18 *
19 * NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however
20 * it may change values between parallel regions. __kmp_max_nth
21 * is the largest value __kmp_nth may take, 1 is the smallest.
22 *
23 */
24
25#include "kmp.h"
26#include "kmp_i18n.h"
27#include "kmp_str.h"
28#include "kmp_error.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000029#include "kmp_stats.h"
30#include "kmp_itt.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000031
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000032#if OMPT_SUPPORT
33#include "ompt-specific.h"
34#endif
35
Jim Cownie5e8470a2013-09-27 10:38:44 +000036// template for type limits
37template< typename T >
38struct i_maxmin {
39 static const T mx;
40 static const T mn;
41};
42template<>
43struct i_maxmin< int > {
44 static const int mx = 0x7fffffff;
45 static const int mn = 0x80000000;
46};
47template<>
48struct i_maxmin< unsigned int > {
49 static const unsigned int mx = 0xffffffff;
50 static const unsigned int mn = 0x00000000;
51};
52template<>
53struct i_maxmin< long long > {
54 static const long long mx = 0x7fffffffffffffffLL;
55 static const long long mn = 0x8000000000000000LL;
56};
57template<>
58struct i_maxmin< unsigned long long > {
59 static const unsigned long long mx = 0xffffffffffffffffLL;
60 static const unsigned long long mn = 0x0000000000000000LL;
61};
62//-------------------------------------------------------------------------
63#ifdef KMP_DEBUG
64//-------------------------------------------------------------------------
65// template for debug prints specification ( d, u, lld, llu )
66 char const * traits_t< int >::spec = "d";
67 char const * traits_t< unsigned int >::spec = "u";
68 char const * traits_t< long long >::spec = "lld";
69 char const * traits_t< unsigned long long >::spec = "llu";
70//-------------------------------------------------------------------------
71#endif
72
73template< typename T >
74static void
75__kmp_for_static_init(
76 ident_t *loc,
77 kmp_int32 global_tid,
78 kmp_int32 schedtype,
79 kmp_int32 *plastiter,
80 T *plower,
81 T *pupper,
82 typename traits_t< T >::signed_t *pstride,
83 typename traits_t< T >::signed_t incr,
84 typename traits_t< T >::signed_t chunk
85) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +000086 KMP_COUNT_BLOCK(OMP_FOR_static);
Jonathan Peyton11dc82f2016-05-05 16:15:57 +000087 KMP_TIME_PARTITIONED_BLOCK(FOR_static_scheduling);
Jonathan Peyton45be4502015-08-11 21:36:41 +000088
Jim Cownie5e8470a2013-09-27 10:38:44 +000089 typedef typename traits_t< T >::unsigned_t UT;
90 typedef typename traits_t< T >::signed_t ST;
91 /* this all has to be changed back to TID and such.. */
92 register kmp_int32 gtid = global_tid;
93 register kmp_uint32 tid;
94 register kmp_uint32 nth;
95 register UT trip_count;
96 register kmp_team_t *team;
Andrey Churbanov51aecb82015-05-06 19:22:36 +000097 register kmp_info_t *th = __kmp_threads[ gtid ];
Jim Cownie5e8470a2013-09-27 10:38:44 +000098
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000099#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peytonf0344bb2015-10-09 17:42:52 +0000100 ompt_team_info_t *team_info = NULL;
101 ompt_task_info_t *task_info = NULL;
102
103 if (ompt_enabled) {
104 // Only fully initialize variables needed by OMPT if OMPT is enabled.
105 team_info = __ompt_get_teaminfo(0, NULL);
106 task_info = __ompt_get_taskinfo(0);
107 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000108#endif
109
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000110 KMP_DEBUG_ASSERT( plastiter && plower && pupper && pstride );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000111 KE_TRACE( 10, ("__kmpc_for_static_init called (%d)\n", global_tid));
112 #ifdef KMP_DEBUG
113 {
114 const char * buff;
115 // create format specifiers before the debug output
116 buff = __kmp_str_format(
117 "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s," \
118 " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n",
119 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec,
120 traits_t< ST >::spec, traits_t< ST >::spec, traits_t< T >::spec );
121 KD_TRACE(100, ( buff, global_tid, schedtype, *plastiter,
122 *plower, *pupper, *pstride, incr, chunk ) );
123 __kmp_str_free( &buff );
124 }
125 #endif
126
127 if ( __kmp_env_consistency_check ) {
128 __kmp_push_workshare( global_tid, ct_pdo, loc );
129 if ( incr == 0 ) {
130 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000131 }
132 }
133 /* special handling for zero-trip loops */
134 if ( incr > 0 ? (*pupper < *plower) : (*plower < *pupper) ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000135 if( plastiter != NULL )
136 *plastiter = FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000137 /* leave pupper and plower set to entire iteration space */
138 *pstride = incr; /* value should never be used */
139 // *plower = *pupper - incr; // let compiler bypass the illegal loop (like for(i=1;i<10;i--)) THIS LINE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE ON A ZERO-TRIP LOOP (lower=1,\
140 upper=0,stride=1) - JPH June 23, 2009.
141 #ifdef KMP_DEBUG
142 {
143 const char * buff;
144 // create format specifiers before the debug output
145 buff = __kmp_str_format(
146 "__kmpc_for_static_init:(ZERO TRIP) liter=%%d lower=%%%s upper=%%%s stride = %%%s signed?<%s>, loc = %%s\n",
147 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, traits_t< T >::spec );
148 KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride, loc->psource ) );
149 __kmp_str_free( &buff );
150 }
151 #endif
152 KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000153
154#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000155 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000156 ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
157 ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
158 team_info->parallel_id, task_info->task_id,
159 team_info->microtask);
160 }
161#endif
Jonathan Peyton45be4502015-08-11 21:36:41 +0000162 KMP_COUNT_VALUE (FOR_static_iterations, 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000163 return;
164 }
165
166 #if OMP_40_ENABLED
Jonathan Peytonea0fe1d2016-02-25 17:55:50 +0000167 // Although there are schedule enumerations above kmp_ord_upper which are not schedules for "distribute",
168 // the only ones which are useful are dynamic, so cannot be seen here, since this codepath is only executed
169 // for static schedules.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000170 if ( schedtype > kmp_ord_upper ) {
171 // we are in DISTRIBUTE construct
172 schedtype += kmp_sch_static - kmp_distribute_static; // AC: convert to usual schedule type
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000173 tid = th->th.th_team->t.t_master_tid;
174 team = th->th.th_team->t.t_parent;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000175 } else
176 #endif
177 {
178 tid = __kmp_tid_from_gtid( global_tid );
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000179 team = th->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000180 }
181
182 /* determine if "for" loop is an active worksharing construct */
183 if ( team -> t.t_serialized ) {
184 /* serialized parallel, each thread executes whole iteration space */
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000185 if( plastiter != NULL )
186 *plastiter = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000187 /* leave pupper and plower set to entire iteration space */
188 *pstride = (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
189
190 #ifdef KMP_DEBUG
191 {
192 const char * buff;
193 // create format specifiers before the debug output
194 buff = __kmp_str_format(
195 "__kmpc_for_static_init: (serial) liter=%%d lower=%%%s upper=%%%s stride = %%%s\n",
196 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
197 KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) );
198 __kmp_str_free( &buff );
199 }
200 #endif
201 KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000202
203#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000204 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000205 ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
206 ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
207 team_info->parallel_id, task_info->task_id,
208 team_info->microtask);
209 }
210#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000211 return;
212 }
213 nth = team->t.t_nproc;
214 if ( nth == 1 ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000215 if( plastiter != NULL )
216 *plastiter = TRUE;
217 *pstride = (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000218 #ifdef KMP_DEBUG
219 {
220 const char * buff;
221 // create format specifiers before the debug output
222 buff = __kmp_str_format(
223 "__kmpc_for_static_init: (serial) liter=%%d lower=%%%s upper=%%%s stride = %%%s\n",
224 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
225 KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) );
226 __kmp_str_free( &buff );
227 }
228 #endif
229 KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000230
231#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000232 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000233 ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
234 ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
235 team_info->parallel_id, task_info->task_id,
236 team_info->microtask);
237 }
238#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000239 return;
240 }
241
242 /* compute trip count */
243 if ( incr == 1 ) {
244 trip_count = *pupper - *plower + 1;
245 } else if (incr == -1) {
246 trip_count = *plower - *pupper + 1;
Jonathan Peyton5235a1b2016-04-18 21:38:29 +0000247 } else if ( incr > 0 ) {
248 // upper-lower can exceed the limit of signed type
249 trip_count = (UT)(*pupper - *plower) / incr + 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000250 } else {
Jonathan Peyton5235a1b2016-04-18 21:38:29 +0000251 trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000252 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000253
Jim Cownie5e8470a2013-09-27 10:38:44 +0000254 if ( __kmp_env_consistency_check ) {
255 /* tripcount overflow? */
256 if ( trip_count == 0 && *pupper != *plower ) {
257 __kmp_error_construct( kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo, loc );
258 }
259 }
Jonathan Peyton45be4502015-08-11 21:36:41 +0000260 KMP_COUNT_VALUE (FOR_static_iterations, trip_count);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000261
262 /* compute remaining parameters */
263 switch ( schedtype ) {
264 case kmp_sch_static:
265 {
266 if ( trip_count < nth ) {
267 KMP_DEBUG_ASSERT(
268 __kmp_static == kmp_sch_static_greedy || \
269 __kmp_static == kmp_sch_static_balanced
270 ); // Unknown static scheduling type.
271 if ( tid < trip_count ) {
272 *pupper = *plower = *plower + tid * incr;
273 } else {
274 *plower = *pupper + incr;
275 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000276 if( plastiter != NULL )
277 *plastiter = ( tid == trip_count - 1 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000278 } else {
279 if ( __kmp_static == kmp_sch_static_balanced ) {
280 register UT small_chunk = trip_count / nth;
281 register UT extras = trip_count % nth;
282 *plower += incr * ( tid * small_chunk + ( tid < extras ? tid : extras ) );
283 *pupper = *plower + small_chunk * incr - ( tid < extras ? 0 : incr );
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000284 if( plastiter != NULL )
285 *plastiter = ( tid == nth - 1 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000286 } else {
287 register T big_chunk_inc_count = ( trip_count/nth +
288 ( ( trip_count % nth ) ? 1 : 0) ) * incr;
289 register T old_upper = *pupper;
290
291 KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy );
292 // Unknown static scheduling type.
293
294 *plower += tid * big_chunk_inc_count;
295 *pupper = *plower + big_chunk_inc_count - incr;
296 if ( incr > 0 ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000297 if( *pupper < *plower )
Jim Cownie5e8470a2013-09-27 10:38:44 +0000298 *pupper = i_maxmin< T >::mx;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000299 if( plastiter != NULL )
300 *plastiter = *plower <= old_upper && *pupper > old_upper - incr;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000301 if ( *pupper > old_upper ) *pupper = old_upper; // tracker C73258
302 } else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000303 if( *pupper > *plower )
Jim Cownie5e8470a2013-09-27 10:38:44 +0000304 *pupper = i_maxmin< T >::mn;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000305 if( plastiter != NULL )
306 *plastiter = *plower >= old_upper && *pupper < old_upper - incr;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000307 if ( *pupper < old_upper ) *pupper = old_upper; // tracker C73258
308 }
309 }
310 }
311 break;
312 }
313 case kmp_sch_static_chunked:
314 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000315 register ST span;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000316 if ( chunk < 1 ) {
317 chunk = 1;
318 }
319 span = chunk * incr;
320 *pstride = span * nth;
321 *plower = *plower + (span * tid);
322 *pupper = *plower + span - incr;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000323 if( plastiter != NULL )
324 *plastiter = (tid == ((trip_count - 1)/( UT )chunk) % nth);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000325 break;
326 }
327 default:
328 KMP_ASSERT2( 0, "__kmpc_for_static_init: unknown scheduling type" );
329 break;
330 }
331
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000332#if USE_ITT_BUILD
333 // Report loop metadata
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000334 if ( KMP_MASTER_TID(tid) && __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
335#if OMP_40_ENABLED
336 th->th.th_teams_microtask == NULL &&
337#endif
338 team->t.t_active_level == 1 )
339 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000340 kmp_uint64 cur_chunk = chunk;
341 // Calculate chunk in case it was not specified; it is specified for kmp_sch_static_chunked
342 if ( schedtype == kmp_sch_static ) {
343 cur_chunk = trip_count / nth + ( ( trip_count % nth ) ? 1 : 0);
344 }
345 // 0 - "static" schedule
346 __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk);
347 }
348#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000349 #ifdef KMP_DEBUG
350 {
351 const char * buff;
352 // create format specifiers before the debug output
353 buff = __kmp_str_format(
354 "__kmpc_for_static_init: liter=%%d lower=%%%s upper=%%%s stride = %%%s signed?<%s>\n",
355 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, traits_t< T >::spec );
356 KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) );
357 __kmp_str_free( &buff );
358 }
359 #endif
360 KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000361
362#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000363 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000364 ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
365 ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
366 team_info->parallel_id, task_info->task_id, team_info->microtask);
367 }
368#endif
369
Jim Cownie5e8470a2013-09-27 10:38:44 +0000370 return;
371}
372
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000373template< typename T >
374static void
375__kmp_dist_for_static_init(
376 ident_t *loc,
377 kmp_int32 gtid,
378 kmp_int32 schedule,
379 kmp_int32 *plastiter,
380 T *plower,
381 T *pupper,
382 T *pupperDist,
383 typename traits_t< T >::signed_t *pstride,
384 typename traits_t< T >::signed_t incr,
385 typename traits_t< T >::signed_t chunk
386) {
Jonathan Peyton45be4502015-08-11 21:36:41 +0000387 KMP_COUNT_BLOCK(OMP_DISTRIBUTE);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000388 typedef typename traits_t< T >::unsigned_t UT;
389 typedef typename traits_t< T >::signed_t ST;
390 register kmp_uint32 tid;
391 register kmp_uint32 nth;
392 register kmp_uint32 team_id;
393 register kmp_uint32 nteams;
394 register UT trip_count;
395 register kmp_team_t *team;
396 kmp_info_t * th;
397
398 KMP_DEBUG_ASSERT( plastiter && plower && pupper && pupperDist && pstride );
399 KE_TRACE( 10, ("__kmpc_dist_for_static_init called (%d)\n", gtid));
400 #ifdef KMP_DEBUG
401 {
402 const char * buff;
403 // create format specifiers before the debug output
404 buff = __kmp_str_format(
405 "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d "\
406 "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n",
407 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec,
408 traits_t< ST >::spec, traits_t< T >::spec );
409 KD_TRACE(100, ( buff, gtid, schedule, *plastiter,
410 *plower, *pupper, incr, chunk ) );
411 __kmp_str_free( &buff );
412 }
413 #endif
414
415 if( __kmp_env_consistency_check ) {
416 __kmp_push_workshare( gtid, ct_pdo, loc );
417 if( incr == 0 ) {
418 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc );
419 }
420 if( incr > 0 ? (*pupper < *plower) : (*plower < *pupper) ) {
421 // The loop is illegal.
422 // Some zero-trip loops maintained by compiler, e.g.:
423 // for(i=10;i<0;++i) // lower >= upper - run-time check
424 // for(i=0;i>10;--i) // lower <= upper - run-time check
425 // for(i=0;i>10;++i) // incr > 0 - compile-time check
426 // for(i=10;i<0;--i) // incr < 0 - compile-time check
427 // Compiler does not check the following illegal loops:
428 // for(i=0;i<10;i+=incr) // where incr<0
429 // for(i=10;i>0;i-=incr) // where incr<0
430 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc );
431 }
432 }
433 tid = __kmp_tid_from_gtid( gtid );
434 th = __kmp_threads[gtid];
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000435 nth = th->th.th_team_nproc;
436 team = th->th.th_team;
437 #if OMP_40_ENABLED
Jonathan Peyton441f3372015-09-21 17:24:46 +0000438 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000439 nteams = th->th.th_teams_size.nteams;
440 #endif
441 team_id = team->t.t_master_tid;
442 KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc);
443
444 // compute global trip count
445 if( incr == 1 ) {
446 trip_count = *pupper - *plower + 1;
447 } else if(incr == -1) {
448 trip_count = *plower - *pupper + 1;
Jonathan Peyton5235a1b2016-04-18 21:38:29 +0000449 } else if ( incr > 0 ) {
450 // upper-lower can exceed the limit of signed type
451 trip_count = (UT)(*pupper - *plower) / incr + 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000452 } else {
Jonathan Peyton5235a1b2016-04-18 21:38:29 +0000453 trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000454 }
Jonathan Peyton45be4502015-08-11 21:36:41 +0000455
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000456 *pstride = *pupper - *plower; // just in case (can be unused)
457 if( trip_count <= nteams ) {
458 KMP_DEBUG_ASSERT(
459 __kmp_static == kmp_sch_static_greedy || \
460 __kmp_static == kmp_sch_static_balanced
461 ); // Unknown static scheduling type.
462 // only masters of some teams get single iteration, other threads get nothing
463 if( team_id < trip_count && tid == 0 ) {
464 *pupper = *pupperDist = *plower = *plower + team_id * incr;
465 } else {
466 *pupperDist = *pupper;
467 *plower = *pupper + incr; // compiler should skip loop body
468 }
469 if( plastiter != NULL )
470 *plastiter = ( tid == 0 && team_id == trip_count - 1 );
471 } else {
472 // Get the team's chunk first (each team gets at most one chunk)
473 if( __kmp_static == kmp_sch_static_balanced ) {
474 register UT chunkD = trip_count / nteams;
475 register UT extras = trip_count % nteams;
476 *plower += incr * ( team_id * chunkD + ( team_id < extras ? team_id : extras ) );
477 *pupperDist = *plower + chunkD * incr - ( team_id < extras ? 0 : incr );
478 if( plastiter != NULL )
479 *plastiter = ( team_id == nteams - 1 );
480 } else {
481 register T chunk_inc_count =
482 ( trip_count / nteams + ( ( trip_count % nteams ) ? 1 : 0) ) * incr;
483 register T upper = *pupper;
484 KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy );
485 // Unknown static scheduling type.
486 *plower += team_id * chunk_inc_count;
487 *pupperDist = *plower + chunk_inc_count - incr;
488 // Check/correct bounds if needed
489 if( incr > 0 ) {
490 if( *pupperDist < *plower )
491 *pupperDist = i_maxmin< T >::mx;
492 if( plastiter != NULL )
493 *plastiter = *plower <= upper && *pupperDist > upper - incr;
494 if( *pupperDist > upper )
495 *pupperDist = upper; // tracker C73258
496 if( *plower > *pupperDist ) {
497 *pupper = *pupperDist; // no iterations available for the team
498 goto end;
499 }
500 } else {
501 if( *pupperDist > *plower )
502 *pupperDist = i_maxmin< T >::mn;
503 if( plastiter != NULL )
504 *plastiter = *plower >= upper && *pupperDist < upper - incr;
505 if( *pupperDist < upper )
506 *pupperDist = upper; // tracker C73258
507 if( *plower < *pupperDist ) {
508 *pupper = *pupperDist; // no iterations available for the team
509 goto end;
510 }
511 }
512 }
513 // Get the parallel loop chunk now (for thread)
514 // compute trip count for team's chunk
515 if( incr == 1 ) {
516 trip_count = *pupperDist - *plower + 1;
517 } else if(incr == -1) {
518 trip_count = *plower - *pupperDist + 1;
Jonathan Peyton5235a1b2016-04-18 21:38:29 +0000519 } else if ( incr > 1 ) {
520 // upper-lower can exceed the limit of signed type
521 trip_count = (UT)(*pupperDist - *plower) / incr + 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000522 } else {
Jonathan Peyton5235a1b2016-04-18 21:38:29 +0000523 trip_count = (UT)(*plower - *pupperDist) / (-incr) + 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000524 }
525 KMP_DEBUG_ASSERT( trip_count );
526 switch( schedule ) {
527 case kmp_sch_static:
528 {
529 if( trip_count <= nth ) {
530 KMP_DEBUG_ASSERT(
531 __kmp_static == kmp_sch_static_greedy || \
532 __kmp_static == kmp_sch_static_balanced
533 ); // Unknown static scheduling type.
534 if( tid < trip_count )
535 *pupper = *plower = *plower + tid * incr;
536 else
537 *plower = *pupper + incr; // no iterations available
538 if( plastiter != NULL )
539 if( *plastiter != 0 && !( tid == trip_count - 1 ) )
540 *plastiter = 0;
541 } else {
542 if( __kmp_static == kmp_sch_static_balanced ) {
543 register UT chunkL = trip_count / nth;
544 register UT extras = trip_count % nth;
545 *plower += incr * (tid * chunkL + (tid < extras ? tid : extras));
546 *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr);
547 if( plastiter != NULL )
548 if( *plastiter != 0 && !( tid == nth - 1 ) )
549 *plastiter = 0;
550 } else {
551 register T chunk_inc_count =
552 ( trip_count / nth + ( ( trip_count % nth ) ? 1 : 0) ) * incr;
553 register T upper = *pupperDist;
554 KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy );
555 // Unknown static scheduling type.
556 *plower += tid * chunk_inc_count;
557 *pupper = *plower + chunk_inc_count - incr;
558 if( incr > 0 ) {
559 if( *pupper < *plower )
560 *pupper = i_maxmin< T >::mx;
561 if( plastiter != NULL )
562 if( *plastiter != 0 && !(*plower <= upper && *pupper > upper - incr) )
563 *plastiter = 0;
564 if( *pupper > upper )
565 *pupper = upper;//tracker C73258
566 } else {
567 if( *pupper > *plower )
568 *pupper = i_maxmin< T >::mn;
569 if( plastiter != NULL )
570 if( *plastiter != 0 && !(*plower >= upper && *pupper < upper - incr) )
571 *plastiter = 0;
572 if( *pupper < upper )
573 *pupper = upper;//tracker C73258
574 }
575 }
576 }
577 break;
578 }
579 case kmp_sch_static_chunked:
580 {
581 register ST span;
582 if( chunk < 1 )
583 chunk = 1;
584 span = chunk * incr;
585 *pstride = span * nth;
586 *plower = *plower + (span * tid);
587 *pupper = *plower + span - incr;
588 if( plastiter != NULL )
589 if( *plastiter != 0 && !(tid == ((trip_count - 1) / ( UT )chunk) % nth) )
590 *plastiter = 0;
591 break;
592 }
593 default:
594 KMP_ASSERT2( 0, "__kmpc_dist_for_static_init: unknown loop scheduling type" );
595 break;
596 }
597 }
598 end:;
599 #ifdef KMP_DEBUG
600 {
601 const char * buff;
602 // create format specifiers before the debug output
603 buff = __kmp_str_format(
604 "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s "\
605 "stride=%%%s signed?<%s>\n",
606 traits_t< T >::spec, traits_t< T >::spec, traits_t< T >::spec,
607 traits_t< ST >::spec, traits_t< T >::spec );
608 KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pupperDist, *pstride ) );
609 __kmp_str_free( &buff );
610 }
611 #endif
612 KE_TRACE( 10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid ) );
613 return;
614}
615
616template< typename T >
617static void
618__kmp_team_static_init(
619 ident_t *loc,
620 kmp_int32 gtid,
621 kmp_int32 *p_last,
622 T *p_lb,
623 T *p_ub,
624 typename traits_t< T >::signed_t *p_st,
625 typename traits_t< T >::signed_t incr,
626 typename traits_t< T >::signed_t chunk
627) {
628 // The routine returns the first chunk distributed to the team and
629 // stride for next chunks calculation.
630 // Last iteration flag set for the team that will execute
631 // the last iteration of the loop.
632 // The routine is called for dist_schedue(static,chunk) only.
633 typedef typename traits_t< T >::unsigned_t UT;
634 typedef typename traits_t< T >::signed_t ST;
635 kmp_uint32 team_id;
636 kmp_uint32 nteams;
637 UT trip_count;
638 T lower;
639 T upper;
640 ST span;
641 kmp_team_t *team;
642 kmp_info_t *th;
643
644 KMP_DEBUG_ASSERT( p_last && p_lb && p_ub && p_st );
645 KE_TRACE( 10, ("__kmp_team_static_init called (%d)\n", gtid));
646 #ifdef KMP_DEBUG
647 {
648 const char * buff;
649 // create format specifiers before the debug output
650 buff = __kmp_str_format( "__kmp_team_static_init enter: T#%%d liter=%%d "\
651 "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n",
652 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec,
653 traits_t< ST >::spec, traits_t< T >::spec );
654 KD_TRACE(100, ( buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk ) );
655 __kmp_str_free( &buff );
656 }
657 #endif
658
659 lower = *p_lb;
660 upper = *p_ub;
661 if( __kmp_env_consistency_check ) {
662 if( incr == 0 ) {
663 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc );
664 }
665 if( incr > 0 ? (upper < lower) : (lower < upper) ) {
666 // The loop is illegal.
667 // Some zero-trip loops maintained by compiler, e.g.:
668 // for(i=10;i<0;++i) // lower >= upper - run-time check
669 // for(i=0;i>10;--i) // lower <= upper - run-time check
670 // for(i=0;i>10;++i) // incr > 0 - compile-time check
671 // for(i=10;i<0;--i) // incr < 0 - compile-time check
672 // Compiler does not check the following illegal loops:
673 // for(i=0;i<10;i+=incr) // where incr<0
674 // for(i=10;i>0;i-=incr) // where incr<0
675 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc );
676 }
677 }
678 th = __kmp_threads[gtid];
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000679 team = th->th.th_team;
680 #if OMP_40_ENABLED
Jonathan Peyton441f3372015-09-21 17:24:46 +0000681 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000682 nteams = th->th.th_teams_size.nteams;
683 #endif
684 team_id = team->t.t_master_tid;
685 KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc);
686
687 // compute trip count
688 if( incr == 1 ) {
689 trip_count = upper - lower + 1;
690 } else if(incr == -1) {
691 trip_count = lower - upper + 1;
Jonathan Peyton5235a1b2016-04-18 21:38:29 +0000692 } else if ( incr > 0 ) {
693 // upper-lower can exceed the limit of signed type
694 trip_count = (UT)(upper - lower) / incr + 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000695 } else {
Jonathan Peyton5235a1b2016-04-18 21:38:29 +0000696 trip_count = (UT)(lower - upper) / (-incr) + 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000697 }
698 if( chunk < 1 )
699 chunk = 1;
700 span = chunk * incr;
701 *p_st = span * nteams;
702 *p_lb = lower + (span * team_id);
703 *p_ub = *p_lb + span - incr;
704 if ( p_last != NULL )
705 *p_last = (team_id == ((trip_count - 1)/(UT)chunk) % nteams);
706 // Correct upper bound if needed
707 if( incr > 0 ) {
708 if( *p_ub < *p_lb ) // overflow?
709 *p_ub = i_maxmin< T >::mx;
710 if( *p_ub > upper )
711 *p_ub = upper; // tracker C73258
712 } else { // incr < 0
713 if( *p_ub > *p_lb )
714 *p_ub = i_maxmin< T >::mn;
715 if( *p_ub < upper )
716 *p_ub = upper; // tracker C73258
717 }
718 #ifdef KMP_DEBUG
719 {
720 const char * buff;
721 // create format specifiers before the debug output
722 buff = __kmp_str_format( "__kmp_team_static_init exit: T#%%d team%%u liter=%%d "\
723 "iter=(%%%s, %%%s, %%%s) chunk %%%s\n",
724 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec,
725 traits_t< ST >::spec );
726 KD_TRACE(100, ( buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk ) );
727 __kmp_str_free( &buff );
728 }
729 #endif
730}
731
Jim Cownie5e8470a2013-09-27 10:38:44 +0000732//--------------------------------------------------------------------------------------
733extern "C" {
734
735/*!
736@ingroup WORK_SHARING
737@param loc Source code location
738@param gtid Global thread id of this thread
739@param schedtype Scheduling type
740@param plastiter Pointer to the "last iteration" flag
741@param plower Pointer to the lower bound
742@param pupper Pointer to the upper bound
743@param pstride Pointer to the stride
744@param incr Loop increment
745@param chunk The chunk size
746
747Each of the four functions here are identical apart from the argument types.
748
749The functions compute the upper and lower bounds and stride to be used for the set of iterations
750to be executed by the current thread from the statically scheduled loop that is described by the
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000751initial values of the bounds, stride, increment and chunk size.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000752
753@{
754*/
755void
756__kmpc_for_static_init_4( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter,
757 kmp_int32 *plower, kmp_int32 *pupper,
758 kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk )
759{
760 __kmp_for_static_init< kmp_int32 >(
761 loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk );
762}
763
764/*!
765 See @ref __kmpc_for_static_init_4
766 */
767void
768__kmpc_for_static_init_4u( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter,
769 kmp_uint32 *plower, kmp_uint32 *pupper,
770 kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk )
771{
772 __kmp_for_static_init< kmp_uint32 >(
773 loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk );
774}
775
776/*!
777 See @ref __kmpc_for_static_init_4
778 */
779void
780__kmpc_for_static_init_8( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter,
781 kmp_int64 *plower, kmp_int64 *pupper,
782 kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk )
783{
784 __kmp_for_static_init< kmp_int64 >(
785 loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk );
786}
787
788/*!
789 See @ref __kmpc_for_static_init_4
790 */
791void
792__kmpc_for_static_init_8u( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter,
793 kmp_uint64 *plower, kmp_uint64 *pupper,
794 kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk )
795{
796 __kmp_for_static_init< kmp_uint64 >(
797 loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk );
798}
799/*!
800@}
801*/
802
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000803/*!
804@ingroup WORK_SHARING
805@param loc Source code location
806@param gtid Global thread id of this thread
Jonathan Peyton81f9cd12015-05-22 22:37:22 +0000807@param schedule Scheduling type for the parallel loop
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000808@param plastiter Pointer to the "last iteration" flag
809@param plower Pointer to the lower bound
810@param pupper Pointer to the upper bound of loop chunk
811@param pupperD Pointer to the upper bound of dist_chunk
Jonathan Peyton81f9cd12015-05-22 22:37:22 +0000812@param pstride Pointer to the stride for parallel loop
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000813@param incr Loop increment
Jonathan Peyton81f9cd12015-05-22 22:37:22 +0000814@param chunk The chunk size for the parallel loop
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000815
816Each of the four functions here are identical apart from the argument types.
817
818The functions compute the upper and lower bounds and strides to be used for the set of iterations
819to be executed by the current thread from the statically scheduled loop that is described by the
820initial values of the bounds, strides, increment and chunks for parallel loop and distribute
821constructs.
822
823@{
824*/
825void
826__kmpc_dist_for_static_init_4(
827 ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter,
828 kmp_int32 *plower, kmp_int32 *pupper, kmp_int32 *pupperD,
829 kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk )
830{
831 __kmp_dist_for_static_init< kmp_int32 >(
832 loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk );
833}
834
835/*!
836 See @ref __kmpc_dist_for_static_init_4
837 */
838void
839__kmpc_dist_for_static_init_4u(
840 ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter,
841 kmp_uint32 *plower, kmp_uint32 *pupper, kmp_uint32 *pupperD,
842 kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk )
843{
844 __kmp_dist_for_static_init< kmp_uint32 >(
845 loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk );
846}
847
848/*!
849 See @ref __kmpc_dist_for_static_init_4
850 */
851void
852__kmpc_dist_for_static_init_8(
853 ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter,
854 kmp_int64 *plower, kmp_int64 *pupper, kmp_int64 *pupperD,
855 kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk )
856{
857 __kmp_dist_for_static_init< kmp_int64 >(
858 loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk );
859}
860
861/*!
862 See @ref __kmpc_dist_for_static_init_4
863 */
864void
865__kmpc_dist_for_static_init_8u(
866 ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter,
867 kmp_uint64 *plower, kmp_uint64 *pupper, kmp_uint64 *pupperD,
868 kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk )
869{
870 __kmp_dist_for_static_init< kmp_uint64 >(
871 loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk );
872}
873/*!
874@}
875*/
876
877//-----------------------------------------------------------------------------------------
878// Auxiliary routines for Distribute Parallel Loop construct implementation
879// Transfer call to template< type T >
880// __kmp_team_static_init( ident_t *loc, int gtid,
881// int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk )
882
883/*!
884@ingroup WORK_SHARING
885@{
886@param loc Source location
887@param gtid Global thread id
888@param p_last pointer to last iteration flag
889@param p_lb pointer to Lower bound
890@param p_ub pointer to Upper bound
891@param p_st Step (or increment if you prefer)
892@param incr Loop increment
893@param chunk The chunk size to block with
894
895The functions compute the upper and lower bounds and stride to be used for the set of iterations
896to be executed by the current team from the statically scheduled loop that is described by the
897initial values of the bounds, stride, increment and chunk for the distribute construct as part of
898composite distribute parallel loop construct.
899These functions are all identical apart from the types of the arguments.
900*/
901
902void
903__kmpc_team_static_init_4(
904 ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
905 kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk )
906{
907 KMP_DEBUG_ASSERT( __kmp_init_serial );
908 __kmp_team_static_init< kmp_int32 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk );
909}
910
911/*!
912 See @ref __kmpc_team_static_init_4
913 */
914void
915__kmpc_team_static_init_4u(
916 ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
917 kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk )
918{
919 KMP_DEBUG_ASSERT( __kmp_init_serial );
920 __kmp_team_static_init< kmp_uint32 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk );
921}
922
923/*!
924 See @ref __kmpc_team_static_init_4
925 */
926void
927__kmpc_team_static_init_8(
928 ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
929 kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk )
930{
931 KMP_DEBUG_ASSERT( __kmp_init_serial );
932 __kmp_team_static_init< kmp_int64 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk );
933}
934
935/*!
936 See @ref __kmpc_team_static_init_4
937 */
938void
939__kmpc_team_static_init_8u(
940 ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
941 kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk )
942{
943 KMP_DEBUG_ASSERT( __kmp_init_serial );
944 __kmp_team_static_init< kmp_uint64 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk );
945}
946/*!
947@}
948*/
949
Jim Cownie5e8470a2013-09-27 10:38:44 +0000950} // extern "C"
951