Jim Cownie | 5e8470a | 2013-09-27 10:38:44 +0000 | [diff] [blame] | 1 | /* |
| 2 | * kmp_sched.c -- static scheduling -- iteration initialization |
| 3 | * $Revision: 42358 $ |
| 4 | * $Date: 2013-05-07 13:43:26 -0500 (Tue, 07 May 2013) $ |
| 5 | */ |
| 6 | |
| 7 | |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | // |
| 10 | // The LLVM Compiler Infrastructure |
| 11 | // |
| 12 | // This file is dual licensed under the MIT and the University of Illinois Open |
| 13 | // Source Licenses. See LICENSE.txt for details. |
| 14 | // |
| 15 | //===----------------------------------------------------------------------===// |
| 16 | |
| 17 | |
| 18 | /* |
| 19 | * Static scheduling initialization. |
| 20 | * |
| 21 | * NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however |
| 22 | * it may change values between parallel regions. __kmp_max_nth |
| 23 | * is the largest value __kmp_nth may take, 1 is the smallest. |
| 24 | * |
| 25 | */ |
| 26 | |
| 27 | #include "kmp.h" |
| 28 | #include "kmp_i18n.h" |
| 29 | #include "kmp_str.h" |
| 30 | #include "kmp_error.h" |
| 31 | |
| 32 | // template for type limits |
| 33 | template< typename T > |
| 34 | struct i_maxmin { |
| 35 | static const T mx; |
| 36 | static const T mn; |
| 37 | }; |
| 38 | template<> |
| 39 | struct i_maxmin< int > { |
| 40 | static const int mx = 0x7fffffff; |
| 41 | static const int mn = 0x80000000; |
| 42 | }; |
| 43 | template<> |
| 44 | struct i_maxmin< unsigned int > { |
| 45 | static const unsigned int mx = 0xffffffff; |
| 46 | static const unsigned int mn = 0x00000000; |
| 47 | }; |
| 48 | template<> |
| 49 | struct i_maxmin< long long > { |
| 50 | static const long long mx = 0x7fffffffffffffffLL; |
| 51 | static const long long mn = 0x8000000000000000LL; |
| 52 | }; |
| 53 | template<> |
| 54 | struct i_maxmin< unsigned long long > { |
| 55 | static const unsigned long long mx = 0xffffffffffffffffLL; |
| 56 | static const unsigned long long mn = 0x0000000000000000LL; |
| 57 | }; |
| 58 | //------------------------------------------------------------------------- |
| 59 | #ifdef KMP_DEBUG |
| 60 | //------------------------------------------------------------------------- |
| 61 | // template for debug prints specification ( d, u, lld, llu ) |
| 62 | char const * traits_t< int >::spec = "d"; |
| 63 | char const * traits_t< unsigned int >::spec = "u"; |
| 64 | char const * traits_t< long long >::spec = "lld"; |
| 65 | char const * traits_t< unsigned long long >::spec = "llu"; |
| 66 | //------------------------------------------------------------------------- |
| 67 | #endif |
| 68 | |
| 69 | template< typename T > |
| 70 | static void |
| 71 | __kmp_for_static_init( |
| 72 | ident_t *loc, |
| 73 | kmp_int32 global_tid, |
| 74 | kmp_int32 schedtype, |
| 75 | kmp_int32 *plastiter, |
| 76 | T *plower, |
| 77 | T *pupper, |
| 78 | typename traits_t< T >::signed_t *pstride, |
| 79 | typename traits_t< T >::signed_t incr, |
| 80 | typename traits_t< T >::signed_t chunk |
| 81 | ) { |
| 82 | typedef typename traits_t< T >::unsigned_t UT; |
| 83 | typedef typename traits_t< T >::signed_t ST; |
| 84 | /* this all has to be changed back to TID and such.. */ |
| 85 | register kmp_int32 gtid = global_tid; |
| 86 | register kmp_uint32 tid; |
| 87 | register kmp_uint32 nth; |
| 88 | register UT trip_count; |
| 89 | register kmp_team_t *team; |
| 90 | |
| 91 | KE_TRACE( 10, ("__kmpc_for_static_init called (%d)\n", global_tid)); |
| 92 | #ifdef KMP_DEBUG |
| 93 | { |
| 94 | const char * buff; |
| 95 | // create format specifiers before the debug output |
| 96 | buff = __kmp_str_format( |
| 97 | "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s," \ |
| 98 | " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n", |
| 99 | traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, |
| 100 | traits_t< ST >::spec, traits_t< ST >::spec, traits_t< T >::spec ); |
| 101 | KD_TRACE(100, ( buff, global_tid, schedtype, *plastiter, |
| 102 | *plower, *pupper, *pstride, incr, chunk ) ); |
| 103 | __kmp_str_free( &buff ); |
| 104 | } |
| 105 | #endif |
| 106 | |
| 107 | if ( __kmp_env_consistency_check ) { |
| 108 | __kmp_push_workshare( global_tid, ct_pdo, loc ); |
| 109 | if ( incr == 0 ) { |
| 110 | __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc ); |
| 111 | |
| 112 | } |
| 113 | } |
| 114 | /* special handling for zero-trip loops */ |
| 115 | if ( incr > 0 ? (*pupper < *plower) : (*plower < *pupper) ) { |
| 116 | *plastiter = FALSE; |
| 117 | /* leave pupper and plower set to entire iteration space */ |
| 118 | *pstride = incr; /* value should never be used */ |
| 119 | // *plower = *pupper - incr; // let compiler bypass the illegal loop (like for(i=1;i<10;i--)) THIS LINE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE ON A ZERO-TRIP LOOP (lower=1,\ |
| 120 | upper=0,stride=1) - JPH June 23, 2009. |
| 121 | #ifdef KMP_DEBUG |
| 122 | { |
| 123 | const char * buff; |
| 124 | // create format specifiers before the debug output |
| 125 | buff = __kmp_str_format( |
| 126 | "__kmpc_for_static_init:(ZERO TRIP) liter=%%d lower=%%%s upper=%%%s stride = %%%s signed?<%s>, loc = %%s\n", |
| 127 | traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, traits_t< T >::spec ); |
| 128 | KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride, loc->psource ) ); |
| 129 | __kmp_str_free( &buff ); |
| 130 | } |
| 131 | #endif |
| 132 | KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) ); |
| 133 | return; |
| 134 | } |
| 135 | |
| 136 | #if OMP_40_ENABLED |
| 137 | if ( schedtype > kmp_ord_upper ) { |
| 138 | // we are in DISTRIBUTE construct |
| 139 | schedtype += kmp_sch_static - kmp_distribute_static; // AC: convert to usual schedule type |
| 140 | tid = __kmp_threads[ gtid ]->th.th_team->t.t_master_tid; |
| 141 | team = __kmp_threads[ gtid ]->th.th_team->t.t_parent; |
| 142 | } else |
| 143 | #endif |
| 144 | { |
| 145 | tid = __kmp_tid_from_gtid( global_tid ); |
| 146 | team = __kmp_threads[ gtid ]->th.th_team; |
| 147 | } |
| 148 | |
| 149 | /* determine if "for" loop is an active worksharing construct */ |
| 150 | if ( team -> t.t_serialized ) { |
| 151 | /* serialized parallel, each thread executes whole iteration space */ |
| 152 | *plastiter = TRUE; |
| 153 | /* leave pupper and plower set to entire iteration space */ |
| 154 | *pstride = (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1)); |
| 155 | |
| 156 | #ifdef KMP_DEBUG |
| 157 | { |
| 158 | const char * buff; |
| 159 | // create format specifiers before the debug output |
| 160 | buff = __kmp_str_format( |
| 161 | "__kmpc_for_static_init: (serial) liter=%%d lower=%%%s upper=%%%s stride = %%%s\n", |
| 162 | traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec ); |
| 163 | KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) ); |
| 164 | __kmp_str_free( &buff ); |
| 165 | } |
| 166 | #endif |
| 167 | KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) ); |
| 168 | return; |
| 169 | } |
| 170 | nth = team->t.t_nproc; |
| 171 | if ( nth == 1 ) { |
| 172 | *plastiter = TRUE; |
| 173 | |
| 174 | #ifdef KMP_DEBUG |
| 175 | { |
| 176 | const char * buff; |
| 177 | // create format specifiers before the debug output |
| 178 | buff = __kmp_str_format( |
| 179 | "__kmpc_for_static_init: (serial) liter=%%d lower=%%%s upper=%%%s stride = %%%s\n", |
| 180 | traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec ); |
| 181 | KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) ); |
| 182 | __kmp_str_free( &buff ); |
| 183 | } |
| 184 | #endif |
| 185 | KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) ); |
| 186 | return; |
| 187 | } |
| 188 | |
| 189 | /* compute trip count */ |
| 190 | if ( incr == 1 ) { |
| 191 | trip_count = *pupper - *plower + 1; |
| 192 | } else if (incr == -1) { |
| 193 | trip_count = *plower - *pupper + 1; |
| 194 | } else { |
| 195 | if ( incr > 1 ) { |
| 196 | trip_count = (*pupper - *plower) / incr + 1; |
| 197 | } else { |
| 198 | trip_count = (*plower - *pupper) / ( -incr ) + 1; |
| 199 | } |
| 200 | } |
| 201 | if ( __kmp_env_consistency_check ) { |
| 202 | /* tripcount overflow? */ |
| 203 | if ( trip_count == 0 && *pupper != *plower ) { |
| 204 | __kmp_error_construct( kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo, loc ); |
| 205 | } |
| 206 | } |
| 207 | |
| 208 | /* compute remaining parameters */ |
| 209 | switch ( schedtype ) { |
| 210 | case kmp_sch_static: |
| 211 | { |
| 212 | if ( trip_count < nth ) { |
| 213 | KMP_DEBUG_ASSERT( |
| 214 | __kmp_static == kmp_sch_static_greedy || \ |
| 215 | __kmp_static == kmp_sch_static_balanced |
| 216 | ); // Unknown static scheduling type. |
| 217 | if ( tid < trip_count ) { |
| 218 | *pupper = *plower = *plower + tid * incr; |
| 219 | } else { |
| 220 | *plower = *pupper + incr; |
| 221 | } |
| 222 | *plastiter = ( tid == trip_count - 1 ); |
| 223 | } else { |
| 224 | if ( __kmp_static == kmp_sch_static_balanced ) { |
| 225 | register UT small_chunk = trip_count / nth; |
| 226 | register UT extras = trip_count % nth; |
| 227 | *plower += incr * ( tid * small_chunk + ( tid < extras ? tid : extras ) ); |
| 228 | *pupper = *plower + small_chunk * incr - ( tid < extras ? 0 : incr ); |
| 229 | *plastiter = ( tid == nth - 1 ); |
| 230 | } else { |
| 231 | register T big_chunk_inc_count = ( trip_count/nth + |
| 232 | ( ( trip_count % nth ) ? 1 : 0) ) * incr; |
| 233 | register T old_upper = *pupper; |
| 234 | |
| 235 | KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy ); |
| 236 | // Unknown static scheduling type. |
| 237 | |
| 238 | *plower += tid * big_chunk_inc_count; |
| 239 | *pupper = *plower + big_chunk_inc_count - incr; |
| 240 | if ( incr > 0 ) { |
| 241 | if ( *pupper < *plower ) { |
| 242 | *pupper = i_maxmin< T >::mx; |
| 243 | } |
| 244 | *plastiter = *plower <= old_upper && *pupper > old_upper - incr; |
| 245 | if ( *pupper > old_upper ) *pupper = old_upper; // tracker C73258 |
| 246 | } else { |
| 247 | if ( *pupper > *plower ) { |
| 248 | *pupper = i_maxmin< T >::mn; |
| 249 | } |
| 250 | *plastiter = *plower >= old_upper && *pupper < old_upper - incr; |
| 251 | if ( *pupper < old_upper ) *pupper = old_upper; // tracker C73258 |
| 252 | } |
| 253 | } |
| 254 | } |
| 255 | break; |
| 256 | } |
| 257 | case kmp_sch_static_chunked: |
| 258 | { |
| 259 | register T span; |
| 260 | if ( chunk < 1 ) { |
| 261 | chunk = 1; |
| 262 | } |
| 263 | span = chunk * incr; |
| 264 | *pstride = span * nth; |
| 265 | *plower = *plower + (span * tid); |
| 266 | *pupper = *plower + span - incr; |
| 267 | /* TODO: is the following line a bug? Shouldn't it be plastiter instead of *plastiter ? */ |
| 268 | if (*plastiter) { /* only calculate this if it was requested */ |
| 269 | kmp_int32 lasttid = ((trip_count - 1) / ( UT )chunk) % nth; |
| 270 | *plastiter = (tid == lasttid); |
| 271 | } |
| 272 | break; |
| 273 | } |
| 274 | default: |
| 275 | KMP_ASSERT2( 0, "__kmpc_for_static_init: unknown scheduling type" ); |
| 276 | break; |
| 277 | } |
| 278 | |
| 279 | #ifdef KMP_DEBUG |
| 280 | { |
| 281 | const char * buff; |
| 282 | // create format specifiers before the debug output |
| 283 | buff = __kmp_str_format( |
| 284 | "__kmpc_for_static_init: liter=%%d lower=%%%s upper=%%%s stride = %%%s signed?<%s>\n", |
| 285 | traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, traits_t< T >::spec ); |
| 286 | KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) ); |
| 287 | __kmp_str_free( &buff ); |
| 288 | } |
| 289 | #endif |
| 290 | KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) ); |
| 291 | return; |
| 292 | } |
| 293 | |
| 294 | //-------------------------------------------------------------------------------------- |
| 295 | extern "C" { |
| 296 | |
| 297 | /*! |
| 298 | @ingroup WORK_SHARING |
| 299 | @param loc Source code location |
| 300 | @param gtid Global thread id of this thread |
| 301 | @param schedtype Scheduling type |
| 302 | @param plastiter Pointer to the "last iteration" flag |
| 303 | @param plower Pointer to the lower bound |
| 304 | @param pupper Pointer to the upper bound |
| 305 | @param pstride Pointer to the stride |
| 306 | @param incr Loop increment |
| 307 | @param chunk The chunk size |
| 308 | |
| 309 | Each of the four functions here are identical apart from the argument types. |
| 310 | |
| 311 | The functions compute the upper and lower bounds and stride to be used for the set of iterations |
| 312 | to be executed by the current thread from the statically scheduled loop that is described by the |
| 313 | initial values of the bround, stride, increment and chunk size. |
| 314 | |
| 315 | @{ |
| 316 | */ |
| 317 | void |
| 318 | __kmpc_for_static_init_4( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, |
| 319 | kmp_int32 *plower, kmp_int32 *pupper, |
| 320 | kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk ) |
| 321 | { |
| 322 | __kmp_for_static_init< kmp_int32 >( |
| 323 | loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk ); |
| 324 | } |
| 325 | |
| 326 | /*! |
| 327 | See @ref __kmpc_for_static_init_4 |
| 328 | */ |
| 329 | void |
| 330 | __kmpc_for_static_init_4u( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, |
| 331 | kmp_uint32 *plower, kmp_uint32 *pupper, |
| 332 | kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk ) |
| 333 | { |
| 334 | __kmp_for_static_init< kmp_uint32 >( |
| 335 | loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk ); |
| 336 | } |
| 337 | |
| 338 | /*! |
| 339 | See @ref __kmpc_for_static_init_4 |
| 340 | */ |
| 341 | void |
| 342 | __kmpc_for_static_init_8( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, |
| 343 | kmp_int64 *plower, kmp_int64 *pupper, |
| 344 | kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk ) |
| 345 | { |
| 346 | __kmp_for_static_init< kmp_int64 >( |
| 347 | loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk ); |
| 348 | } |
| 349 | |
| 350 | /*! |
| 351 | See @ref __kmpc_for_static_init_4 |
| 352 | */ |
| 353 | void |
| 354 | __kmpc_for_static_init_8u( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, |
| 355 | kmp_uint64 *plower, kmp_uint64 *pupper, |
| 356 | kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk ) |
| 357 | { |
| 358 | __kmp_for_static_init< kmp_uint64 >( |
| 359 | loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk ); |
| 360 | } |
| 361 | /*! |
| 362 | @} |
| 363 | */ |
| 364 | |
| 365 | } // extern "C" |
| 366 | |