blob: b6f30812b1b018feca157e258c332e4e2c48e86f [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
2 * kmp_sched.c -- static scheduling -- iteration initialization
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16/*
17 * Static scheduling initialization.
18 *
19 * NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however
20 * it may change values between parallel regions. __kmp_max_nth
21 * is the largest value __kmp_nth may take, 1 is the smallest.
22 *
23 */
24
25#include "kmp.h"
26#include "kmp_i18n.h"
27#include "kmp_str.h"
28#include "kmp_error.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000029#include "kmp_stats.h"
30#include "kmp_itt.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000031
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000032#if OMPT_SUPPORT
33#include "ompt-specific.h"
34#endif
35
Jim Cownie5e8470a2013-09-27 10:38:44 +000036// template for type limits
37template< typename T >
38struct i_maxmin {
39 static const T mx;
40 static const T mn;
41};
42template<>
43struct i_maxmin< int > {
44 static const int mx = 0x7fffffff;
45 static const int mn = 0x80000000;
46};
47template<>
48struct i_maxmin< unsigned int > {
49 static const unsigned int mx = 0xffffffff;
50 static const unsigned int mn = 0x00000000;
51};
52template<>
53struct i_maxmin< long long > {
54 static const long long mx = 0x7fffffffffffffffLL;
55 static const long long mn = 0x8000000000000000LL;
56};
57template<>
58struct i_maxmin< unsigned long long > {
59 static const unsigned long long mx = 0xffffffffffffffffLL;
60 static const unsigned long long mn = 0x0000000000000000LL;
61};
62//-------------------------------------------------------------------------
63#ifdef KMP_DEBUG
64//-------------------------------------------------------------------------
65// template for debug prints specification ( d, u, lld, llu )
66 char const * traits_t< int >::spec = "d";
67 char const * traits_t< unsigned int >::spec = "u";
68 char const * traits_t< long long >::spec = "lld";
69 char const * traits_t< unsigned long long >::spec = "llu";
70//-------------------------------------------------------------------------
71#endif
72
73template< typename T >
74static void
75__kmp_for_static_init(
76 ident_t *loc,
77 kmp_int32 global_tid,
78 kmp_int32 schedtype,
79 kmp_int32 *plastiter,
80 T *plower,
81 T *pupper,
82 typename traits_t< T >::signed_t *pstride,
83 typename traits_t< T >::signed_t incr,
84 typename traits_t< T >::signed_t chunk
85) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +000086 KMP_COUNT_BLOCK(OMP_FOR_static);
Jonathan Peyton11dc82f2016-05-05 16:15:57 +000087 KMP_TIME_PARTITIONED_BLOCK(FOR_static_scheduling);
Jonathan Peyton45be4502015-08-11 21:36:41 +000088
Jim Cownie5e8470a2013-09-27 10:38:44 +000089 typedef typename traits_t< T >::unsigned_t UT;
90 typedef typename traits_t< T >::signed_t ST;
91 /* this all has to be changed back to TID and such.. */
92 register kmp_int32 gtid = global_tid;
93 register kmp_uint32 tid;
94 register kmp_uint32 nth;
95 register UT trip_count;
96 register kmp_team_t *team;
Andrey Churbanov51aecb82015-05-06 19:22:36 +000097 register kmp_info_t *th = __kmp_threads[ gtid ];
Jim Cownie5e8470a2013-09-27 10:38:44 +000098
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000099#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peyton61118492016-05-20 19:03:38 +0000100 ompt_team_info_t *team_info = NULL;
101 ompt_task_info_t *task_info = NULL;
Jonathan Peytonf0344bb2015-10-09 17:42:52 +0000102
103 if (ompt_enabled) {
104 // Only fully initialize variables needed by OMPT if OMPT is enabled.
105 team_info = __ompt_get_teaminfo(0, NULL);
106 task_info = __ompt_get_taskinfo(0);
107 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000108#endif
109
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000110 KMP_DEBUG_ASSERT( plastiter && plower && pupper && pstride );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000111 KE_TRACE( 10, ("__kmpc_for_static_init called (%d)\n", global_tid));
112 #ifdef KMP_DEBUG
113 {
114 const char * buff;
115 // create format specifiers before the debug output
116 buff = __kmp_str_format(
117 "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s," \
118 " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n",
119 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec,
120 traits_t< ST >::spec, traits_t< ST >::spec, traits_t< T >::spec );
121 KD_TRACE(100, ( buff, global_tid, schedtype, *plastiter,
122 *plower, *pupper, *pstride, incr, chunk ) );
123 __kmp_str_free( &buff );
124 }
125 #endif
126
127 if ( __kmp_env_consistency_check ) {
128 __kmp_push_workshare( global_tid, ct_pdo, loc );
129 if ( incr == 0 ) {
130 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000131 }
132 }
133 /* special handling for zero-trip loops */
134 if ( incr > 0 ? (*pupper < *plower) : (*plower < *pupper) ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000135 if( plastiter != NULL )
136 *plastiter = FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000137 /* leave pupper and plower set to entire iteration space */
138 *pstride = incr; /* value should never be used */
139 // *plower = *pupper - incr; // let compiler bypass the illegal loop (like for(i=1;i<10;i--)) THIS LINE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE ON A ZERO-TRIP LOOP (lower=1,\
140 upper=0,stride=1) - JPH June 23, 2009.
141 #ifdef KMP_DEBUG
142 {
143 const char * buff;
144 // create format specifiers before the debug output
145 buff = __kmp_str_format(
146 "__kmpc_for_static_init:(ZERO TRIP) liter=%%d lower=%%%s upper=%%%s stride = %%%s signed?<%s>, loc = %%s\n",
147 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, traits_t< T >::spec );
148 KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride, loc->psource ) );
149 __kmp_str_free( &buff );
150 }
151 #endif
152 KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000153
154#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000155 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000156 ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
157 ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
158 team_info->parallel_id, task_info->task_id,
159 team_info->microtask);
160 }
161#endif
Jonathan Peyton45be4502015-08-11 21:36:41 +0000162 KMP_COUNT_VALUE (FOR_static_iterations, 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000163 return;
164 }
165
166 #if OMP_40_ENABLED
Jonathan Peytonea0fe1d2016-02-25 17:55:50 +0000167 // Although there are schedule enumerations above kmp_ord_upper which are not schedules for "distribute",
168 // the only ones which are useful are dynamic, so cannot be seen here, since this codepath is only executed
169 // for static schedules.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000170 if ( schedtype > kmp_ord_upper ) {
171 // we are in DISTRIBUTE construct
172 schedtype += kmp_sch_static - kmp_distribute_static; // AC: convert to usual schedule type
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000173 tid = th->th.th_team->t.t_master_tid;
174 team = th->th.th_team->t.t_parent;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000175 } else
176 #endif
177 {
178 tid = __kmp_tid_from_gtid( global_tid );
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000179 team = th->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000180 }
181
182 /* determine if "for" loop is an active worksharing construct */
183 if ( team -> t.t_serialized ) {
184 /* serialized parallel, each thread executes whole iteration space */
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000185 if( plastiter != NULL )
186 *plastiter = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000187 /* leave pupper and plower set to entire iteration space */
188 *pstride = (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
189
190 #ifdef KMP_DEBUG
191 {
192 const char * buff;
193 // create format specifiers before the debug output
194 buff = __kmp_str_format(
195 "__kmpc_for_static_init: (serial) liter=%%d lower=%%%s upper=%%%s stride = %%%s\n",
196 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
197 KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) );
198 __kmp_str_free( &buff );
199 }
200 #endif
201 KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000202
203#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000204 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000205 ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
206 ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
207 team_info->parallel_id, task_info->task_id,
208 team_info->microtask);
209 }
210#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000211 return;
212 }
213 nth = team->t.t_nproc;
214 if ( nth == 1 ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000215 if( plastiter != NULL )
216 *plastiter = TRUE;
217 *pstride = (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000218 #ifdef KMP_DEBUG
219 {
220 const char * buff;
221 // create format specifiers before the debug output
222 buff = __kmp_str_format(
223 "__kmpc_for_static_init: (serial) liter=%%d lower=%%%s upper=%%%s stride = %%%s\n",
224 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
225 KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) );
226 __kmp_str_free( &buff );
227 }
228 #endif
229 KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000230
231#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000232 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000233 ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
234 ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
235 team_info->parallel_id, task_info->task_id,
236 team_info->microtask);
237 }
238#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000239 return;
240 }
241
242 /* compute trip count */
243 if ( incr == 1 ) {
244 trip_count = *pupper - *plower + 1;
245 } else if (incr == -1) {
246 trip_count = *plower - *pupper + 1;
Jonathan Peyton5235a1b2016-04-18 21:38:29 +0000247 } else if ( incr > 0 ) {
248 // upper-lower can exceed the limit of signed type
249 trip_count = (UT)(*pupper - *plower) / incr + 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000250 } else {
Jonathan Peyton5235a1b2016-04-18 21:38:29 +0000251 trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000252 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000253
Jim Cownie5e8470a2013-09-27 10:38:44 +0000254 if ( __kmp_env_consistency_check ) {
255 /* tripcount overflow? */
256 if ( trip_count == 0 && *pupper != *plower ) {
257 __kmp_error_construct( kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo, loc );
258 }
259 }
Jonathan Peyton45be4502015-08-11 21:36:41 +0000260 KMP_COUNT_VALUE (FOR_static_iterations, trip_count);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000261
262 /* compute remaining parameters */
263 switch ( schedtype ) {
264 case kmp_sch_static:
265 {
266 if ( trip_count < nth ) {
267 KMP_DEBUG_ASSERT(
268 __kmp_static == kmp_sch_static_greedy || \
269 __kmp_static == kmp_sch_static_balanced
270 ); // Unknown static scheduling type.
271 if ( tid < trip_count ) {
272 *pupper = *plower = *plower + tid * incr;
273 } else {
274 *plower = *pupper + incr;
275 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000276 if( plastiter != NULL )
277 *plastiter = ( tid == trip_count - 1 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000278 } else {
279 if ( __kmp_static == kmp_sch_static_balanced ) {
280 register UT small_chunk = trip_count / nth;
281 register UT extras = trip_count % nth;
282 *plower += incr * ( tid * small_chunk + ( tid < extras ? tid : extras ) );
283 *pupper = *plower + small_chunk * incr - ( tid < extras ? 0 : incr );
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000284 if( plastiter != NULL )
285 *plastiter = ( tid == nth - 1 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000286 } else {
287 register T big_chunk_inc_count = ( trip_count/nth +
288 ( ( trip_count % nth ) ? 1 : 0) ) * incr;
289 register T old_upper = *pupper;
290
291 KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy );
292 // Unknown static scheduling type.
293
294 *plower += tid * big_chunk_inc_count;
295 *pupper = *plower + big_chunk_inc_count - incr;
296 if ( incr > 0 ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000297 if( *pupper < *plower )
Jim Cownie5e8470a2013-09-27 10:38:44 +0000298 *pupper = i_maxmin< T >::mx;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000299 if( plastiter != NULL )
300 *plastiter = *plower <= old_upper && *pupper > old_upper - incr;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000301 if ( *pupper > old_upper ) *pupper = old_upper; // tracker C73258
302 } else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000303 if( *pupper > *plower )
Jim Cownie5e8470a2013-09-27 10:38:44 +0000304 *pupper = i_maxmin< T >::mn;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000305 if( plastiter != NULL )
306 *plastiter = *plower >= old_upper && *pupper < old_upper - incr;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000307 if ( *pupper < old_upper ) *pupper = old_upper; // tracker C73258
308 }
309 }
310 }
311 break;
312 }
313 case kmp_sch_static_chunked:
314 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000315 register ST span;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000316 if ( chunk < 1 ) {
317 chunk = 1;
318 }
319 span = chunk * incr;
320 *pstride = span * nth;
321 *plower = *plower + (span * tid);
322 *pupper = *plower + span - incr;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000323 if( plastiter != NULL )
324 *plastiter = (tid == ((trip_count - 1)/( UT )chunk) % nth);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000325 break;
326 }
Jonathan Peytondf6818b2016-06-14 17:57:47 +0000327#if OMP_45_ENABLED
Jonathan Peytonef734792016-05-31 19:12:18 +0000328 case kmp_sch_static_balanced_chunked:
329 {
330 register T old_upper = *pupper;
331 // round up to make sure the chunk is enough to cover all iterations
332 register UT span = (trip_count+nth-1) / nth;
333
334 // perform chunk adjustment
335 chunk = (span + chunk - 1) & ~(chunk-1);
336
337 span = chunk * incr;
338 *plower = *plower + (span * tid);
339 *pupper = *plower + span - incr;
340 if ( incr > 0 ) {
341 if ( *pupper > old_upper ) *pupper = old_upper;
342 } else
343 if ( *pupper < old_upper ) *pupper = old_upper;
344
345 if( plastiter != NULL )
346 *plastiter = ( tid == ((trip_count - 1)/( UT )chunk) );
347 break;
348 }
349#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000350 default:
351 KMP_ASSERT2( 0, "__kmpc_for_static_init: unknown scheduling type" );
352 break;
353 }
354
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000355#if USE_ITT_BUILD
356 // Report loop metadata
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000357 if ( KMP_MASTER_TID(tid) && __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
358#if OMP_40_ENABLED
359 th->th.th_teams_microtask == NULL &&
360#endif
361 team->t.t_active_level == 1 )
362 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000363 kmp_uint64 cur_chunk = chunk;
364 // Calculate chunk in case it was not specified; it is specified for kmp_sch_static_chunked
365 if ( schedtype == kmp_sch_static ) {
366 cur_chunk = trip_count / nth + ( ( trip_count % nth ) ? 1 : 0);
367 }
368 // 0 - "static" schedule
369 __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk);
370 }
371#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000372 #ifdef KMP_DEBUG
373 {
374 const char * buff;
375 // create format specifiers before the debug output
376 buff = __kmp_str_format(
377 "__kmpc_for_static_init: liter=%%d lower=%%%s upper=%%%s stride = %%%s signed?<%s>\n",
378 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, traits_t< T >::spec );
379 KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) );
380 __kmp_str_free( &buff );
381 }
382 #endif
383 KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) );
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000384
385#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000386 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000387 ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
388 ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
389 team_info->parallel_id, task_info->task_id, team_info->microtask);
390 }
391#endif
392
Jim Cownie5e8470a2013-09-27 10:38:44 +0000393 return;
394}
395
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000396template< typename T >
397static void
398__kmp_dist_for_static_init(
399 ident_t *loc,
400 kmp_int32 gtid,
401 kmp_int32 schedule,
402 kmp_int32 *plastiter,
403 T *plower,
404 T *pupper,
405 T *pupperDist,
406 typename traits_t< T >::signed_t *pstride,
407 typename traits_t< T >::signed_t incr,
408 typename traits_t< T >::signed_t chunk
409) {
Jonathan Peyton45be4502015-08-11 21:36:41 +0000410 KMP_COUNT_BLOCK(OMP_DISTRIBUTE);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000411 typedef typename traits_t< T >::unsigned_t UT;
412 typedef typename traits_t< T >::signed_t ST;
413 register kmp_uint32 tid;
414 register kmp_uint32 nth;
415 register kmp_uint32 team_id;
416 register kmp_uint32 nteams;
417 register UT trip_count;
418 register kmp_team_t *team;
419 kmp_info_t * th;
420
421 KMP_DEBUG_ASSERT( plastiter && plower && pupper && pupperDist && pstride );
422 KE_TRACE( 10, ("__kmpc_dist_for_static_init called (%d)\n", gtid));
423 #ifdef KMP_DEBUG
424 {
425 const char * buff;
426 // create format specifiers before the debug output
427 buff = __kmp_str_format(
428 "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d "\
429 "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n",
430 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec,
431 traits_t< ST >::spec, traits_t< T >::spec );
432 KD_TRACE(100, ( buff, gtid, schedule, *plastiter,
433 *plower, *pupper, incr, chunk ) );
434 __kmp_str_free( &buff );
435 }
436 #endif
437
438 if( __kmp_env_consistency_check ) {
439 __kmp_push_workshare( gtid, ct_pdo, loc );
440 if( incr == 0 ) {
441 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc );
442 }
443 if( incr > 0 ? (*pupper < *plower) : (*plower < *pupper) ) {
444 // The loop is illegal.
445 // Some zero-trip loops maintained by compiler, e.g.:
446 // for(i=10;i<0;++i) // lower >= upper - run-time check
447 // for(i=0;i>10;--i) // lower <= upper - run-time check
448 // for(i=0;i>10;++i) // incr > 0 - compile-time check
449 // for(i=10;i<0;--i) // incr < 0 - compile-time check
450 // Compiler does not check the following illegal loops:
451 // for(i=0;i<10;i+=incr) // where incr<0
452 // for(i=10;i>0;i-=incr) // where incr<0
453 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc );
454 }
455 }
456 tid = __kmp_tid_from_gtid( gtid );
457 th = __kmp_threads[gtid];
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000458 nth = th->th.th_team_nproc;
459 team = th->th.th_team;
460 #if OMP_40_ENABLED
Jonathan Peyton441f3372015-09-21 17:24:46 +0000461 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000462 nteams = th->th.th_teams_size.nteams;
463 #endif
464 team_id = team->t.t_master_tid;
465 KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc);
466
467 // compute global trip count
468 if( incr == 1 ) {
469 trip_count = *pupper - *plower + 1;
470 } else if(incr == -1) {
471 trip_count = *plower - *pupper + 1;
Jonathan Peyton5235a1b2016-04-18 21:38:29 +0000472 } else if ( incr > 0 ) {
473 // upper-lower can exceed the limit of signed type
474 trip_count = (UT)(*pupper - *plower) / incr + 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000475 } else {
Jonathan Peyton5235a1b2016-04-18 21:38:29 +0000476 trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000477 }
Jonathan Peyton45be4502015-08-11 21:36:41 +0000478
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000479 *pstride = *pupper - *plower; // just in case (can be unused)
480 if( trip_count <= nteams ) {
481 KMP_DEBUG_ASSERT(
482 __kmp_static == kmp_sch_static_greedy || \
483 __kmp_static == kmp_sch_static_balanced
484 ); // Unknown static scheduling type.
485 // only masters of some teams get single iteration, other threads get nothing
486 if( team_id < trip_count && tid == 0 ) {
487 *pupper = *pupperDist = *plower = *plower + team_id * incr;
488 } else {
489 *pupperDist = *pupper;
490 *plower = *pupper + incr; // compiler should skip loop body
491 }
492 if( plastiter != NULL )
493 *plastiter = ( tid == 0 && team_id == trip_count - 1 );
494 } else {
495 // Get the team's chunk first (each team gets at most one chunk)
496 if( __kmp_static == kmp_sch_static_balanced ) {
497 register UT chunkD = trip_count / nteams;
498 register UT extras = trip_count % nteams;
499 *plower += incr * ( team_id * chunkD + ( team_id < extras ? team_id : extras ) );
500 *pupperDist = *plower + chunkD * incr - ( team_id < extras ? 0 : incr );
501 if( plastiter != NULL )
502 *plastiter = ( team_id == nteams - 1 );
503 } else {
504 register T chunk_inc_count =
505 ( trip_count / nteams + ( ( trip_count % nteams ) ? 1 : 0) ) * incr;
506 register T upper = *pupper;
507 KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy );
508 // Unknown static scheduling type.
509 *plower += team_id * chunk_inc_count;
510 *pupperDist = *plower + chunk_inc_count - incr;
511 // Check/correct bounds if needed
512 if( incr > 0 ) {
513 if( *pupperDist < *plower )
514 *pupperDist = i_maxmin< T >::mx;
515 if( plastiter != NULL )
516 *plastiter = *plower <= upper && *pupperDist > upper - incr;
517 if( *pupperDist > upper )
518 *pupperDist = upper; // tracker C73258
519 if( *plower > *pupperDist ) {
520 *pupper = *pupperDist; // no iterations available for the team
521 goto end;
522 }
523 } else {
524 if( *pupperDist > *plower )
525 *pupperDist = i_maxmin< T >::mn;
526 if( plastiter != NULL )
527 *plastiter = *plower >= upper && *pupperDist < upper - incr;
528 if( *pupperDist < upper )
529 *pupperDist = upper; // tracker C73258
530 if( *plower < *pupperDist ) {
531 *pupper = *pupperDist; // no iterations available for the team
532 goto end;
533 }
534 }
535 }
536 // Get the parallel loop chunk now (for thread)
537 // compute trip count for team's chunk
538 if( incr == 1 ) {
539 trip_count = *pupperDist - *plower + 1;
540 } else if(incr == -1) {
541 trip_count = *plower - *pupperDist + 1;
Jonathan Peyton5235a1b2016-04-18 21:38:29 +0000542 } else if ( incr > 1 ) {
543 // upper-lower can exceed the limit of signed type
544 trip_count = (UT)(*pupperDist - *plower) / incr + 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000545 } else {
Jonathan Peyton5235a1b2016-04-18 21:38:29 +0000546 trip_count = (UT)(*plower - *pupperDist) / (-incr) + 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000547 }
548 KMP_DEBUG_ASSERT( trip_count );
549 switch( schedule ) {
550 case kmp_sch_static:
551 {
552 if( trip_count <= nth ) {
553 KMP_DEBUG_ASSERT(
554 __kmp_static == kmp_sch_static_greedy || \
555 __kmp_static == kmp_sch_static_balanced
556 ); // Unknown static scheduling type.
557 if( tid < trip_count )
558 *pupper = *plower = *plower + tid * incr;
559 else
560 *plower = *pupper + incr; // no iterations available
561 if( plastiter != NULL )
562 if( *plastiter != 0 && !( tid == trip_count - 1 ) )
563 *plastiter = 0;
564 } else {
565 if( __kmp_static == kmp_sch_static_balanced ) {
566 register UT chunkL = trip_count / nth;
567 register UT extras = trip_count % nth;
568 *plower += incr * (tid * chunkL + (tid < extras ? tid : extras));
569 *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr);
570 if( plastiter != NULL )
571 if( *plastiter != 0 && !( tid == nth - 1 ) )
572 *plastiter = 0;
573 } else {
574 register T chunk_inc_count =
575 ( trip_count / nth + ( ( trip_count % nth ) ? 1 : 0) ) * incr;
576 register T upper = *pupperDist;
577 KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy );
578 // Unknown static scheduling type.
579 *plower += tid * chunk_inc_count;
580 *pupper = *plower + chunk_inc_count - incr;
581 if( incr > 0 ) {
582 if( *pupper < *plower )
583 *pupper = i_maxmin< T >::mx;
584 if( plastiter != NULL )
585 if( *plastiter != 0 && !(*plower <= upper && *pupper > upper - incr) )
586 *plastiter = 0;
587 if( *pupper > upper )
588 *pupper = upper;//tracker C73258
589 } else {
590 if( *pupper > *plower )
591 *pupper = i_maxmin< T >::mn;
592 if( plastiter != NULL )
593 if( *plastiter != 0 && !(*plower >= upper && *pupper < upper - incr) )
594 *plastiter = 0;
595 if( *pupper < upper )
596 *pupper = upper;//tracker C73258
597 }
598 }
599 }
600 break;
601 }
602 case kmp_sch_static_chunked:
603 {
604 register ST span;
605 if( chunk < 1 )
606 chunk = 1;
607 span = chunk * incr;
608 *pstride = span * nth;
609 *plower = *plower + (span * tid);
610 *pupper = *plower + span - incr;
611 if( plastiter != NULL )
612 if( *plastiter != 0 && !(tid == ((trip_count - 1) / ( UT )chunk) % nth) )
613 *plastiter = 0;
614 break;
615 }
616 default:
617 KMP_ASSERT2( 0, "__kmpc_dist_for_static_init: unknown loop scheduling type" );
618 break;
619 }
620 }
621 end:;
622 #ifdef KMP_DEBUG
623 {
624 const char * buff;
625 // create format specifiers before the debug output
626 buff = __kmp_str_format(
627 "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s "\
628 "stride=%%%s signed?<%s>\n",
629 traits_t< T >::spec, traits_t< T >::spec, traits_t< T >::spec,
630 traits_t< ST >::spec, traits_t< T >::spec );
631 KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pupperDist, *pstride ) );
632 __kmp_str_free( &buff );
633 }
634 #endif
635 KE_TRACE( 10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid ) );
636 return;
637}
638
639template< typename T >
640static void
641__kmp_team_static_init(
642 ident_t *loc,
643 kmp_int32 gtid,
644 kmp_int32 *p_last,
645 T *p_lb,
646 T *p_ub,
647 typename traits_t< T >::signed_t *p_st,
648 typename traits_t< T >::signed_t incr,
649 typename traits_t< T >::signed_t chunk
650) {
651 // The routine returns the first chunk distributed to the team and
652 // stride for next chunks calculation.
653 // Last iteration flag set for the team that will execute
654 // the last iteration of the loop.
655 // The routine is called for dist_schedue(static,chunk) only.
656 typedef typename traits_t< T >::unsigned_t UT;
657 typedef typename traits_t< T >::signed_t ST;
658 kmp_uint32 team_id;
659 kmp_uint32 nteams;
660 UT trip_count;
661 T lower;
662 T upper;
663 ST span;
664 kmp_team_t *team;
665 kmp_info_t *th;
666
667 KMP_DEBUG_ASSERT( p_last && p_lb && p_ub && p_st );
668 KE_TRACE( 10, ("__kmp_team_static_init called (%d)\n", gtid));
669 #ifdef KMP_DEBUG
670 {
671 const char * buff;
672 // create format specifiers before the debug output
673 buff = __kmp_str_format( "__kmp_team_static_init enter: T#%%d liter=%%d "\
674 "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n",
675 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec,
676 traits_t< ST >::spec, traits_t< T >::spec );
677 KD_TRACE(100, ( buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk ) );
678 __kmp_str_free( &buff );
679 }
680 #endif
681
682 lower = *p_lb;
683 upper = *p_ub;
684 if( __kmp_env_consistency_check ) {
685 if( incr == 0 ) {
686 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc );
687 }
688 if( incr > 0 ? (upper < lower) : (lower < upper) ) {
689 // The loop is illegal.
690 // Some zero-trip loops maintained by compiler, e.g.:
691 // for(i=10;i<0;++i) // lower >= upper - run-time check
692 // for(i=0;i>10;--i) // lower <= upper - run-time check
693 // for(i=0;i>10;++i) // incr > 0 - compile-time check
694 // for(i=10;i<0;--i) // incr < 0 - compile-time check
695 // Compiler does not check the following illegal loops:
696 // for(i=0;i<10;i+=incr) // where incr<0
697 // for(i=10;i>0;i-=incr) // where incr<0
698 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc );
699 }
700 }
701 th = __kmp_threads[gtid];
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000702 team = th->th.th_team;
703 #if OMP_40_ENABLED
Jonathan Peyton441f3372015-09-21 17:24:46 +0000704 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000705 nteams = th->th.th_teams_size.nteams;
706 #endif
707 team_id = team->t.t_master_tid;
708 KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc);
709
710 // compute trip count
711 if( incr == 1 ) {
712 trip_count = upper - lower + 1;
713 } else if(incr == -1) {
714 trip_count = lower - upper + 1;
Jonathan Peyton5235a1b2016-04-18 21:38:29 +0000715 } else if ( incr > 0 ) {
716 // upper-lower can exceed the limit of signed type
717 trip_count = (UT)(upper - lower) / incr + 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000718 } else {
Jonathan Peyton5235a1b2016-04-18 21:38:29 +0000719 trip_count = (UT)(lower - upper) / (-incr) + 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000720 }
721 if( chunk < 1 )
722 chunk = 1;
723 span = chunk * incr;
724 *p_st = span * nteams;
725 *p_lb = lower + (span * team_id);
726 *p_ub = *p_lb + span - incr;
727 if ( p_last != NULL )
728 *p_last = (team_id == ((trip_count - 1)/(UT)chunk) % nteams);
729 // Correct upper bound if needed
730 if( incr > 0 ) {
731 if( *p_ub < *p_lb ) // overflow?
732 *p_ub = i_maxmin< T >::mx;
733 if( *p_ub > upper )
734 *p_ub = upper; // tracker C73258
735 } else { // incr < 0
736 if( *p_ub > *p_lb )
737 *p_ub = i_maxmin< T >::mn;
738 if( *p_ub < upper )
739 *p_ub = upper; // tracker C73258
740 }
741 #ifdef KMP_DEBUG
742 {
743 const char * buff;
744 // create format specifiers before the debug output
745 buff = __kmp_str_format( "__kmp_team_static_init exit: T#%%d team%%u liter=%%d "\
746 "iter=(%%%s, %%%s, %%%s) chunk %%%s\n",
747 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec,
748 traits_t< ST >::spec );
749 KD_TRACE(100, ( buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk ) );
750 __kmp_str_free( &buff );
751 }
752 #endif
753}
754
Jim Cownie5e8470a2013-09-27 10:38:44 +0000755//--------------------------------------------------------------------------------------
756extern "C" {
757
758/*!
759@ingroup WORK_SHARING
760@param loc Source code location
761@param gtid Global thread id of this thread
762@param schedtype Scheduling type
763@param plastiter Pointer to the "last iteration" flag
764@param plower Pointer to the lower bound
765@param pupper Pointer to the upper bound
766@param pstride Pointer to the stride
767@param incr Loop increment
768@param chunk The chunk size
769
770Each of the four functions here are identical apart from the argument types.
771
772The functions compute the upper and lower bounds and stride to be used for the set of iterations
773to be executed by the current thread from the statically scheduled loop that is described by the
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000774initial values of the bounds, stride, increment and chunk size.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000775
776@{
777*/
778void
779__kmpc_for_static_init_4( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter,
780 kmp_int32 *plower, kmp_int32 *pupper,
781 kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk )
782{
783 __kmp_for_static_init< kmp_int32 >(
784 loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk );
785}
786
787/*!
788 See @ref __kmpc_for_static_init_4
789 */
790void
791__kmpc_for_static_init_4u( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter,
792 kmp_uint32 *plower, kmp_uint32 *pupper,
793 kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk )
794{
795 __kmp_for_static_init< kmp_uint32 >(
796 loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk );
797}
798
799/*!
800 See @ref __kmpc_for_static_init_4
801 */
802void
803__kmpc_for_static_init_8( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter,
804 kmp_int64 *plower, kmp_int64 *pupper,
805 kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk )
806{
807 __kmp_for_static_init< kmp_int64 >(
808 loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk );
809}
810
811/*!
812 See @ref __kmpc_for_static_init_4
813 */
814void
815__kmpc_for_static_init_8u( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter,
816 kmp_uint64 *plower, kmp_uint64 *pupper,
817 kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk )
818{
819 __kmp_for_static_init< kmp_uint64 >(
820 loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk );
821}
822/*!
823@}
824*/
825
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000826/*!
827@ingroup WORK_SHARING
828@param loc Source code location
829@param gtid Global thread id of this thread
Jonathan Peyton81f9cd12015-05-22 22:37:22 +0000830@param schedule Scheduling type for the parallel loop
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000831@param plastiter Pointer to the "last iteration" flag
832@param plower Pointer to the lower bound
833@param pupper Pointer to the upper bound of loop chunk
834@param pupperD Pointer to the upper bound of dist_chunk
Jonathan Peyton81f9cd12015-05-22 22:37:22 +0000835@param pstride Pointer to the stride for parallel loop
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000836@param incr Loop increment
Jonathan Peyton81f9cd12015-05-22 22:37:22 +0000837@param chunk The chunk size for the parallel loop
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000838
839Each of the four functions here are identical apart from the argument types.
840
841The functions compute the upper and lower bounds and strides to be used for the set of iterations
842to be executed by the current thread from the statically scheduled loop that is described by the
843initial values of the bounds, strides, increment and chunks for parallel loop and distribute
844constructs.
845
846@{
847*/
848void
849__kmpc_dist_for_static_init_4(
850 ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter,
851 kmp_int32 *plower, kmp_int32 *pupper, kmp_int32 *pupperD,
852 kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk )
853{
854 __kmp_dist_for_static_init< kmp_int32 >(
855 loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk );
856}
857
858/*!
859 See @ref __kmpc_dist_for_static_init_4
860 */
861void
862__kmpc_dist_for_static_init_4u(
863 ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter,
864 kmp_uint32 *plower, kmp_uint32 *pupper, kmp_uint32 *pupperD,
865 kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk )
866{
867 __kmp_dist_for_static_init< kmp_uint32 >(
868 loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk );
869}
870
871/*!
872 See @ref __kmpc_dist_for_static_init_4
873 */
874void
875__kmpc_dist_for_static_init_8(
876 ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter,
877 kmp_int64 *plower, kmp_int64 *pupper, kmp_int64 *pupperD,
878 kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk )
879{
880 __kmp_dist_for_static_init< kmp_int64 >(
881 loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk );
882}
883
884/*!
885 See @ref __kmpc_dist_for_static_init_4
886 */
887void
888__kmpc_dist_for_static_init_8u(
889 ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter,
890 kmp_uint64 *plower, kmp_uint64 *pupper, kmp_uint64 *pupperD,
891 kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk )
892{
893 __kmp_dist_for_static_init< kmp_uint64 >(
894 loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk );
895}
896/*!
897@}
898*/
899
900//-----------------------------------------------------------------------------------------
901// Auxiliary routines for Distribute Parallel Loop construct implementation
902// Transfer call to template< type T >
903// __kmp_team_static_init( ident_t *loc, int gtid,
904// int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk )
905
906/*!
907@ingroup WORK_SHARING
908@{
909@param loc Source location
910@param gtid Global thread id
911@param p_last pointer to last iteration flag
912@param p_lb pointer to Lower bound
913@param p_ub pointer to Upper bound
914@param p_st Step (or increment if you prefer)
915@param incr Loop increment
916@param chunk The chunk size to block with
917
918The functions compute the upper and lower bounds and stride to be used for the set of iterations
919to be executed by the current team from the statically scheduled loop that is described by the
920initial values of the bounds, stride, increment and chunk for the distribute construct as part of
921composite distribute parallel loop construct.
922These functions are all identical apart from the types of the arguments.
923*/
924
925void
926__kmpc_team_static_init_4(
927 ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
928 kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk )
929{
930 KMP_DEBUG_ASSERT( __kmp_init_serial );
931 __kmp_team_static_init< kmp_int32 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk );
932}
933
934/*!
935 See @ref __kmpc_team_static_init_4
936 */
937void
938__kmpc_team_static_init_4u(
939 ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
940 kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk )
941{
942 KMP_DEBUG_ASSERT( __kmp_init_serial );
943 __kmp_team_static_init< kmp_uint32 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk );
944}
945
946/*!
947 See @ref __kmpc_team_static_init_4
948 */
949void
950__kmpc_team_static_init_8(
951 ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
952 kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk )
953{
954 KMP_DEBUG_ASSERT( __kmp_init_serial );
955 __kmp_team_static_init< kmp_int64 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk );
956}
957
958/*!
959 See @ref __kmpc_team_static_init_4
960 */
961void
962__kmpc_team_static_init_8u(
963 ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
964 kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk )
965{
966 KMP_DEBUG_ASSERT( __kmp_init_serial );
967 __kmp_team_static_init< kmp_uint64 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk );
968}
969/*!
970@}
971*/
972
Jim Cownie5e8470a2013-09-27 10:38:44 +0000973} // extern "C"
974