blob: fc43e8a74621b74217daf4834fd1a3b57c7a1b39 [file] [log] [blame]
Jonathan Peyton17078362015-09-10 19:22:07 +00001/*
2 * kmp_affinity.h -- header for affinity management
3 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15#ifndef KMP_AFFINITY_H
16#define KMP_AFFINITY_H
17
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +000018#include "kmp_os.h"
19#include "kmp.h"
20
21#if KMP_AFFINITY_SUPPORTED
22#if KMP_USE_HWLOC
23class KMPHwlocAffinity: public KMPAffinity {
24public:
25 class Mask : public KMPAffinity::Mask {
26 hwloc_cpuset_t mask;
27 public:
28 Mask() { mask = hwloc_bitmap_alloc(); this->zero(); }
29 ~Mask() { hwloc_bitmap_free(mask); }
30 void set(int i) override { hwloc_bitmap_set(mask, i); }
31 bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); }
32 void clear(int i) override { hwloc_bitmap_clr(mask, i); }
33 void zero() override { hwloc_bitmap_zero(mask); }
34 void copy(const KMPAffinity::Mask* src) override {
35 const Mask* convert = static_cast<const Mask*>(src);
36 hwloc_bitmap_copy(mask, convert->mask);
37 }
38 void bitwise_and(const KMPAffinity::Mask* rhs) override {
39 const Mask* convert = static_cast<const Mask*>(rhs);
40 hwloc_bitmap_and(mask, mask, convert->mask);
41 }
42 void bitwise_or(const KMPAffinity::Mask * rhs) override {
43 const Mask* convert = static_cast<const Mask*>(rhs);
44 hwloc_bitmap_or(mask, mask, convert->mask);
45 }
46 void bitwise_not() override { hwloc_bitmap_not(mask, mask); }
47 int begin() const override { return hwloc_bitmap_first(mask); }
48 int end() const override { return -1; }
49 int next(int previous) const override { return hwloc_bitmap_next(mask, previous); }
50 int get_system_affinity(bool abort_on_error) override {
51 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
52 "Illegal get affinity operation when not capable");
53 int retval = hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
54 if (retval >= 0) {
55 return 0;
56 }
57 int error = errno;
58 if (abort_on_error) {
59 __kmp_msg(kmp_ms_fatal, KMP_MSG( FatalSysError ), KMP_ERR( error ), __kmp_msg_null);
60 }
61 return error;
62 }
63 int set_system_affinity(bool abort_on_error) const override {
64 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
65 "Illegal get affinity operation when not capable");
66 int retval = hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
67 if (retval >= 0) {
68 return 0;
69 }
70 int error = errno;
71 if (abort_on_error) {
72 __kmp_msg(kmp_ms_fatal, KMP_MSG( FatalSysError ), KMP_ERR( error ), __kmp_msg_null);
73 }
74 return error;
75 }
76 int get_proc_group() const override {
77 int i;
78 int group = -1;
79# if KMP_OS_WINDOWS
80 if (__kmp_num_proc_groups == 1) {
81 return 1;
82 }
83 for (i = 0; i < __kmp_num_proc_groups; i++) {
84 // On windows, the long type is always 32 bits
85 unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i*2);
86 unsigned long second_32_bits = hwloc_bitmap_to_ith_ulong(mask, i*2+1);
87 if (first_32_bits == 0 && second_32_bits == 0) {
88 continue;
89 }
90 if (group >= 0) {
91 return -1;
92 }
93 group = i;
94 }
95# endif /* KMP_OS_WINDOWS */
96 return group;
97 }
98 };
99 void determine_capable(const char* var) override {
100 const hwloc_topology_support* topology_support;
101 if(__kmp_hwloc_topology == NULL) {
102 if(hwloc_topology_init(&__kmp_hwloc_topology) < 0) {
103 __kmp_hwloc_error = TRUE;
104 if(__kmp_affinity_verbose)
105 KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()");
106 }
107 if(hwloc_topology_load(__kmp_hwloc_topology) < 0) {
108 __kmp_hwloc_error = TRUE;
109 if(__kmp_affinity_verbose)
110 KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()");
111 }
112 }
113 topology_support = hwloc_topology_get_support(__kmp_hwloc_topology);
114 // Is the system capable of setting/getting this thread's affinity?
115 // also, is topology discovery possible? (pu indicates ability to discover processing units)
116 // and finally, were there no errors when calling any hwloc_* API functions?
117 if(topology_support && topology_support->cpubind->set_thisthread_cpubind &&
118 topology_support->cpubind->get_thisthread_cpubind &&
119 topology_support->discovery->pu &&
120 !__kmp_hwloc_error)
121 {
122 // enables affinity according to KMP_AFFINITY_CAPABLE() macro
123 KMP_AFFINITY_ENABLE(TRUE);
124 } else {
125 // indicate that hwloc didn't work and disable affinity
126 __kmp_hwloc_error = TRUE;
127 KMP_AFFINITY_DISABLE();
128 }
129 }
130 void bind_thread(int which) override {
131 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
132 "Illegal set affinity operation when not capable");
133 KMPAffinity::Mask *mask;
134 KMP_CPU_ALLOC_ON_STACK(mask);
135 KMP_CPU_ZERO(mask);
136 KMP_CPU_SET(which, mask);
137 __kmp_set_system_affinity(mask, TRUE);
138 KMP_CPU_FREE_FROM_STACK(mask);
139 }
140 KMPAffinity::Mask* allocate_mask() override { return new Mask(); }
141 void deallocate_mask(KMPAffinity::Mask* m) override { delete m; }
142 KMPAffinity::Mask* allocate_mask_array(int num) override { return new Mask[num]; }
143 void deallocate_mask_array(KMPAffinity::Mask* array) override {
144 Mask* hwloc_array = static_cast<Mask*>(array);
145 delete[] hwloc_array;
146 }
147 KMPAffinity::Mask* index_mask_array(KMPAffinity::Mask* array, int index) override {
148 Mask* hwloc_array = static_cast<Mask*>(array);
149 return &(hwloc_array[index]);
150 }
151 api_type get_api_type() const override { return HWLOC; }
152};
153#endif /* KMP_USE_HWLOC */
154
155#if KMP_OS_LINUX
156/*
157 * On some of the older OS's that we build on, these constants aren't present
158 * in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on
159 * all systems of the same arch where they are defined, and they cannot change.
160 * stone forever.
161 */
162#include <sys/syscall.h>
163# if KMP_ARCH_X86 || KMP_ARCH_ARM
164# ifndef __NR_sched_setaffinity
165# define __NR_sched_setaffinity 241
166# elif __NR_sched_setaffinity != 241
167# error Wrong code for setaffinity system call.
168# endif /* __NR_sched_setaffinity */
169# ifndef __NR_sched_getaffinity
170# define __NR_sched_getaffinity 242
171# elif __NR_sched_getaffinity != 242
172# error Wrong code for getaffinity system call.
173# endif /* __NR_sched_getaffinity */
174# elif KMP_ARCH_AARCH64
175# ifndef __NR_sched_setaffinity
176# define __NR_sched_setaffinity 122
177# elif __NR_sched_setaffinity != 122
178# error Wrong code for setaffinity system call.
179# endif /* __NR_sched_setaffinity */
180# ifndef __NR_sched_getaffinity
181# define __NR_sched_getaffinity 123
182# elif __NR_sched_getaffinity != 123
183# error Wrong code for getaffinity system call.
184# endif /* __NR_sched_getaffinity */
185# elif KMP_ARCH_X86_64
186# ifndef __NR_sched_setaffinity
187# define __NR_sched_setaffinity 203
188# elif __NR_sched_setaffinity != 203
189# error Wrong code for setaffinity system call.
190# endif /* __NR_sched_setaffinity */
191# ifndef __NR_sched_getaffinity
192# define __NR_sched_getaffinity 204
193# elif __NR_sched_getaffinity != 204
194# error Wrong code for getaffinity system call.
195# endif /* __NR_sched_getaffinity */
196# elif KMP_ARCH_PPC64
197# ifndef __NR_sched_setaffinity
198# define __NR_sched_setaffinity 222
199# elif __NR_sched_setaffinity != 222
200# error Wrong code for setaffinity system call.
201# endif /* __NR_sched_setaffinity */
202# ifndef __NR_sched_getaffinity
203# define __NR_sched_getaffinity 223
204# elif __NR_sched_getaffinity != 223
205# error Wrong code for getaffinity system call.
206# endif /* __NR_sched_getaffinity */
207# else
208# error Unknown or unsupported architecture
209# endif /* KMP_ARCH_* */
210class KMPNativeAffinity : public KMPAffinity {
211 class Mask : public KMPAffinity::Mask {
212 typedef unsigned char mask_t;
213 static const int BITS_PER_MASK_T = sizeof(mask_t)*CHAR_BIT;
214 public:
215 mask_t* mask;
216 Mask() { mask = (mask_t*)__kmp_allocate(__kmp_affin_mask_size); }
217 ~Mask() { if (mask) __kmp_free(mask); }
218 void set(int i) override { mask[i/BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T)); }
219 bool is_set(int i) const override { return (mask[i/BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T))); }
220 void clear(int i) override { mask[i/BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T)); }
221 void zero() override {
222 for (size_t i=0; i<__kmp_affin_mask_size; ++i)
223 mask[i] = 0;
224 }
225 void copy(const KMPAffinity::Mask* src) override {
226 const Mask * convert = static_cast<const Mask*>(src);
227 for (size_t i=0; i<__kmp_affin_mask_size; ++i)
228 mask[i] = convert->mask[i];
229 }
230 void bitwise_and(const KMPAffinity::Mask* rhs) override {
231 const Mask * convert = static_cast<const Mask*>(rhs);
232 for (size_t i=0; i<__kmp_affin_mask_size; ++i)
233 mask[i] &= convert->mask[i];
234 }
235 void bitwise_or(const KMPAffinity::Mask* rhs) override {
236 const Mask * convert = static_cast<const Mask*>(rhs);
237 for (size_t i=0; i<__kmp_affin_mask_size; ++i)
238 mask[i] |= convert->mask[i];
239 }
240 void bitwise_not() override {
241 for (size_t i=0; i<__kmp_affin_mask_size; ++i)
242 mask[i] = ~(mask[i]);
243 }
244 int begin() const override {
245 int retval = 0;
246 while (retval < end() && !is_set(retval))
247 ++retval;
248 return retval;
249 }
250 int end() const override { return __kmp_affin_mask_size*BITS_PER_MASK_T; }
251 int next(int previous) const override {
252 int retval = previous+1;
253 while (retval < end() && !is_set(retval))
254 ++retval;
255 return retval;
256 }
257 int get_system_affinity(bool abort_on_error) override {
258 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
259 "Illegal get affinity operation when not capable");
260 int retval = syscall( __NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask );
261 if (retval >= 0) {
262 return 0;
263 }
264 int error = errno;
265 if (abort_on_error) {
266 __kmp_msg(kmp_ms_fatal, KMP_MSG( FatalSysError ), KMP_ERR( error ), __kmp_msg_null);
267 }
268 return error;
269 }
270 int set_system_affinity(bool abort_on_error) const override {
271 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
272 "Illegal get affinity operation when not capable");
273 int retval = syscall( __NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask );
274 if (retval >= 0) {
275 return 0;
276 }
277 int error = errno;
278 if (abort_on_error) {
279 __kmp_msg(kmp_ms_fatal, KMP_MSG( FatalSysError ), KMP_ERR( error ), __kmp_msg_null);
280 }
281 return error;
282 }
283 };
284 void determine_capable(const char* env_var) override {
285 __kmp_affinity_determine_capable(env_var);
286 }
287 void bind_thread(int which) override {
288 __kmp_affinity_bind_thread(which);
289 }
290 KMPAffinity::Mask* allocate_mask() override {
291 KMPNativeAffinity::Mask* retval = new Mask();
292 return retval;
293 }
294 void deallocate_mask(KMPAffinity::Mask* m) override {
295 KMPNativeAffinity::Mask* native_mask = static_cast<KMPNativeAffinity::Mask*>(m);
296 delete m;
297 }
298 KMPAffinity::Mask* allocate_mask_array(int num) override { return new Mask[num]; }
299 void deallocate_mask_array(KMPAffinity::Mask* array) override {
300 Mask* linux_array = static_cast<Mask*>(array);
301 delete[] linux_array;
302 }
303 KMPAffinity::Mask* index_mask_array(KMPAffinity::Mask* array, int index) override {
304 Mask* linux_array = static_cast<Mask*>(array);
305 return &(linux_array[index]);
306 }
307 api_type get_api_type() const override { return NATIVE_OS; }
308};
309#endif /* KMP_OS_LINUX */
310
311#if KMP_OS_WINDOWS
312class KMPNativeAffinity : public KMPAffinity {
313 class Mask : public KMPAffinity::Mask {
314 typedef ULONG_PTR mask_t;
315 static const int BITS_PER_MASK_T = sizeof(mask_t)*CHAR_BIT;
316 mask_t* mask;
317 public:
318 Mask() { mask = (mask_t*)__kmp_allocate(sizeof(mask_t)*__kmp_num_proc_groups); }
319 ~Mask() { if (mask) __kmp_free(mask); }
320 void set(int i) override { mask[i/BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T)); }
321 bool is_set(int i) const override { return (mask[i/BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T))); }
322 void clear(int i) override { mask[i/BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T)); }
323 void zero() override {
324 for (size_t i=0; i<__kmp_num_proc_groups; ++i)
325 mask[i] = 0;
326 }
327 void copy(const KMPAffinity::Mask* src) override {
328 const Mask * convert = static_cast<const Mask*>(src);
329 for (size_t i=0; i<__kmp_num_proc_groups; ++i)
330 mask[i] = convert->mask[i];
331 }
332 void bitwise_and(const KMPAffinity::Mask* rhs) override {
333 const Mask * convert = static_cast<const Mask*>(rhs);
334 for (size_t i=0; i<__kmp_num_proc_groups; ++i)
335 mask[i] &= convert->mask[i];
336 }
337 void bitwise_or(const KMPAffinity::Mask* rhs) override {
338 const Mask * convert = static_cast<const Mask*>(rhs);
339 for (size_t i=0; i<__kmp_num_proc_groups; ++i)
340 mask[i] |= convert->mask[i];
341 }
342 void bitwise_not() override {
343 for (size_t i=0; i<__kmp_num_proc_groups; ++i)
344 mask[i] = ~(mask[i]);
345 }
346 int begin() const override {
347 int retval = 0;
348 while (retval < end() && !is_set(retval))
349 ++retval;
350 return retval;
351 }
352 int end() const override { return __kmp_num_proc_groups*BITS_PER_MASK_T; }
353 int next(int previous) const override {
354 int retval = previous+1;
355 while (retval < end() && !is_set(retval))
356 ++retval;
357 return retval;
358 }
359 int set_system_affinity(bool abort_on_error) const override {
360 if (__kmp_num_proc_groups > 1) {
361 // Check for a valid mask.
362 GROUP_AFFINITY ga;
363 int group = get_proc_group();
364 if (group < 0) {
365 if (abort_on_error) {
366 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
367 }
368 return -1;
369 }
370 // Transform the bit vector into a GROUP_AFFINITY struct
371 // and make the system call to set affinity.
372 ga.Group = group;
373 ga.Mask = mask[group];
374 ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0;
375
376 KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL);
377 if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) {
378 DWORD error = GetLastError();
379 if (abort_on_error) {
380 __kmp_msg(kmp_ms_fatal, KMP_MSG( CantSetThreadAffMask ),
381 KMP_ERR( error ), __kmp_msg_null);
382 }
383 return error;
384 }
385 } else {
386 if (!SetThreadAffinityMask( GetCurrentThread(), *mask )) {
387 DWORD error = GetLastError();
388 if (abort_on_error) {
389 __kmp_msg(kmp_ms_fatal, KMP_MSG( CantSetThreadAffMask ),
390 KMP_ERR( error ), __kmp_msg_null);
391 }
392 return error;
393 }
394 }
395 return 0;
396 }
397 int get_system_affinity(bool abort_on_error) override {
398 if (__kmp_num_proc_groups > 1) {
399 this->zero();
400 GROUP_AFFINITY ga;
401 KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL);
402 if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) {
403 DWORD error = GetLastError();
404 if (abort_on_error) {
405 __kmp_msg(kmp_ms_fatal, KMP_MSG(FunctionError, "GetThreadGroupAffinity()"),
406 KMP_ERR(error), __kmp_msg_null);
407 }
408 return error;
409 }
410 if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) || (ga.Mask == 0)) {
411 return -1;
412 }
413 mask[ga.Group] = ga.Mask;
414 } else {
415 mask_t newMask, sysMask, retval;
416 if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) {
417 DWORD error = GetLastError();
418 if (abort_on_error) {
419 __kmp_msg(kmp_ms_fatal, KMP_MSG(FunctionError, "GetProcessAffinityMask()"),
420 KMP_ERR(error), __kmp_msg_null);
421 }
422 return error;
423 }
424 retval = SetThreadAffinityMask(GetCurrentThread(), newMask);
425 if (! retval) {
426 DWORD error = GetLastError();
427 if (abort_on_error) {
428 __kmp_msg(kmp_ms_fatal, KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
429 KMP_ERR(error), __kmp_msg_null);
430 }
431 return error;
432 }
433 newMask = SetThreadAffinityMask(GetCurrentThread(), retval);
434 if (! newMask) {
435 DWORD error = GetLastError();
436 if (abort_on_error) {
437 __kmp_msg(kmp_ms_fatal, KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
438 KMP_ERR(error), __kmp_msg_null);
439 }
440 }
441 *mask = retval;
442 }
443 return 0;
444 }
445 int get_proc_group() const override {
446 int group = -1;
447 if (__kmp_num_proc_groups == 1) {
448 return 1;
449 }
450 for (int i = 0; i < __kmp_num_proc_groups; i++) {
451 if (mask[i] == 0)
452 continue;
453 if (group >= 0)
454 return -1;
455 group = i;
456 }
457 return group;
458 }
459 };
460 void determine_capable(const char* env_var) override {
461 __kmp_affinity_determine_capable(env_var);
462 }
463 void bind_thread(int which) override {
464 __kmp_affinity_bind_thread(which);
465 }
466 KMPAffinity::Mask* allocate_mask() override { return new Mask(); }
467 void deallocate_mask(KMPAffinity::Mask* m) override { delete m; }
468 KMPAffinity::Mask* allocate_mask_array(int num) override { return new Mask[num]; }
469 void deallocate_mask_array(KMPAffinity::Mask* array) override {
470 Mask* windows_array = static_cast<Mask*>(array);
471 delete[] windows_array;
472 }
473 KMPAffinity::Mask* index_mask_array(KMPAffinity::Mask* array, int index) override {
474 Mask* windows_array = static_cast<Mask*>(array);
475 return &(windows_array[index]);
476 }
477 api_type get_api_type() const override { return NATIVE_OS; }
478};
479#endif /* KMP_OS_WINDOWS */
480#endif /* KMP_AFFINITY_SUPPORTED */
481
Jonathan Peyton17078362015-09-10 19:22:07 +0000482class Address {
483public:
484 static const unsigned maxDepth = 32;
485 unsigned labels[maxDepth];
486 unsigned childNums[maxDepth];
487 unsigned depth;
488 unsigned leader;
489 Address(unsigned _depth)
490 : depth(_depth), leader(FALSE) {
491 }
492 Address &operator=(const Address &b) {
493 depth = b.depth;
494 for (unsigned i = 0; i < depth; i++) {
495 labels[i] = b.labels[i];
496 childNums[i] = b.childNums[i];
497 }
498 leader = FALSE;
499 return *this;
500 }
501 bool operator==(const Address &b) const {
502 if (depth != b.depth)
503 return false;
504 for (unsigned i = 0; i < depth; i++)
505 if(labels[i] != b.labels[i])
506 return false;
507 return true;
508 }
509 bool isClose(const Address &b, int level) const {
510 if (depth != b.depth)
511 return false;
512 if ((unsigned)level >= depth)
513 return true;
514 for (unsigned i = 0; i < (depth - level); i++)
515 if(labels[i] != b.labels[i])
516 return false;
517 return true;
518 }
519 bool operator!=(const Address &b) const {
520 return !operator==(b);
521 }
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000522 void print() const {
523 unsigned i;
524 printf("Depth: %u --- ", depth);
525 for(i=0;i<depth;i++) {
526 printf("%u ", labels[i]);
527 }
528 }
Jonathan Peyton17078362015-09-10 19:22:07 +0000529};
530
531class AddrUnsPair {
532public:
533 Address first;
534 unsigned second;
535 AddrUnsPair(Address _first, unsigned _second)
536 : first(_first), second(_second) {
537 }
538 AddrUnsPair &operator=(const AddrUnsPair &b)
539 {
540 first = b.first;
541 second = b.second;
542 return *this;
543 }
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000544 void print() const {
545 printf("first = "); first.print();
546 printf(" --- second = %u", second);
547 }
548 bool operator==(const AddrUnsPair &b) const {
549 if(first != b.first) return false;
550 if(second != b.second) return false;
551 return true;
552 }
553 bool operator!=(const AddrUnsPair &b) const {
554 return !operator==(b);
555 }
Jonathan Peyton17078362015-09-10 19:22:07 +0000556};
557
558
559static int
560__kmp_affinity_cmp_Address_labels(const void *a, const void *b)
561{
562 const Address *aa = (const Address *)&(((AddrUnsPair *)a)
563 ->first);
564 const Address *bb = (const Address *)&(((AddrUnsPair *)b)
565 ->first);
566 unsigned depth = aa->depth;
567 unsigned i;
568 KMP_DEBUG_ASSERT(depth == bb->depth);
569 for (i = 0; i < depth; i++) {
570 if (aa->labels[i] < bb->labels[i]) return -1;
571 if (aa->labels[i] > bb->labels[i]) return 1;
572 }
573 return 0;
574}
575
576
Jonathan Peytondf4d3dd2015-09-10 20:34:32 +0000577/** A structure for holding machine-specific hierarchy info to be computed once at init.
578 This structure represents a mapping of threads to the actual machine hierarchy, or to
579 our best guess at what the hierarchy might be, for the purpose of performing an
580 efficient barrier. In the worst case, when there is no machine hierarchy information,
581 it produces a tree suitable for a barrier, similar to the tree used in the hyper barrier. */
Jonathan Peyton17078362015-09-10 19:22:07 +0000582class hierarchy_info {
583public:
584 /** Good default values for number of leaves and branching factor, given no affinity information.
585 Behaves a bit like hyper barrier. */
586 static const kmp_uint32 maxLeaves=4;
587 static const kmp_uint32 minBranch=4;
Jonathan Peytondf4d3dd2015-09-10 20:34:32 +0000588 /** Number of levels in the hierarchy. Typical levels are threads/core, cores/package
589 or socket, packages/node, nodes/machine, etc. We don't want to get specific with
590 nomenclature. When the machine is oversubscribed we add levels to duplicate the
591 hierarchy, doubling the thread capacity of the hierarchy each time we add a level. */
Jonathan Peyton17078362015-09-10 19:22:07 +0000592 kmp_uint32 maxLevels;
593
594 /** This is specifically the depth of the machine configuration hierarchy, in terms of the
595 number of levels along the longest path from root to any leaf. It corresponds to the
596 number of entries in numPerLevel if we exclude all but one trailing 1. */
597 kmp_uint32 depth;
598 kmp_uint32 base_num_threads;
599 enum init_status { initialized=0, not_initialized=1, initializing=2 };
600 volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized, 2=initialization in progress
601 volatile kmp_int8 resizing; // 0=not resizing, 1=resizing
602
603 /** Level 0 corresponds to leaves. numPerLevel[i] is the number of children the parent of a
604 node at level i has. For example, if we have a machine with 4 packages, 4 cores/package
605 and 2 HT per core, then numPerLevel = {2, 4, 4, 1, 1}. All empty levels are set to 1. */
606 kmp_uint32 *numPerLevel;
607 kmp_uint32 *skipPerLevel;
608
609 void deriveLevels(AddrUnsPair *adr2os, int num_addrs) {
610 int hier_depth = adr2os[0].first.depth;
611 int level = 0;
612 for (int i=hier_depth-1; i>=0; --i) {
613 int max = -1;
614 for (int j=0; j<num_addrs; ++j) {
615 int next = adr2os[j].first.childNums[i];
616 if (next > max) max = next;
617 }
618 numPerLevel[level] = max+1;
619 ++level;
620 }
621 }
622
623 hierarchy_info() : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {}
624
625 void fini() { if (!uninitialized && numPerLevel) __kmp_free(numPerLevel); }
626
627 void init(AddrUnsPair *adr2os, int num_addrs)
628 {
629 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&uninitialized, not_initialized, initializing);
630 if (bool_result == 0) { // Wait for initialization
631 while (TCR_1(uninitialized) != initialized) KMP_CPU_PAUSE();
632 return;
633 }
634 KMP_DEBUG_ASSERT(bool_result==1);
635
636 /* Added explicit initialization of the data fields here to prevent usage of dirty value
637 observed when static library is re-initialized multiple times (e.g. when
638 non-OpenMP thread repeatedly launches/joins thread that uses OpenMP). */
639 depth = 1;
640 resizing = 0;
641 maxLevels = 7;
642 numPerLevel = (kmp_uint32 *)__kmp_allocate(maxLevels*2*sizeof(kmp_uint32));
643 skipPerLevel = &(numPerLevel[maxLevels]);
644 for (kmp_uint32 i=0; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
645 numPerLevel[i] = 1;
646 skipPerLevel[i] = 1;
647 }
648
649 // Sort table by physical ID
650 if (adr2os) {
651 qsort(adr2os, num_addrs, sizeof(*adr2os), __kmp_affinity_cmp_Address_labels);
652 deriveLevels(adr2os, num_addrs);
653 }
654 else {
655 numPerLevel[0] = maxLeaves;
656 numPerLevel[1] = num_addrs/maxLeaves;
657 if (num_addrs%maxLeaves) numPerLevel[1]++;
658 }
659
660 base_num_threads = num_addrs;
661 for (int i=maxLevels-1; i>=0; --i) // count non-empty levels to get depth
662 if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1'
663 depth++;
664
665 kmp_uint32 branch = minBranch;
666 if (numPerLevel[0] == 1) branch = num_addrs/maxLeaves;
667 if (branch<minBranch) branch=minBranch;
668 for (kmp_uint32 d=0; d<depth-1; ++d) { // optimize hierarchy width
669 while (numPerLevel[d] > branch || (d==0 && numPerLevel[d]>maxLeaves)) { // max 4 on level 0!
670 if (numPerLevel[d] & 1) numPerLevel[d]++;
671 numPerLevel[d] = numPerLevel[d] >> 1;
672 if (numPerLevel[d+1] == 1) depth++;
673 numPerLevel[d+1] = numPerLevel[d+1] << 1;
674 }
675 if(numPerLevel[0] == 1) {
676 branch = branch >> 1;
677 if (branch<4) branch = minBranch;
678 }
679 }
680
681 for (kmp_uint32 i=1; i<depth; ++i)
682 skipPerLevel[i] = numPerLevel[i-1] * skipPerLevel[i-1];
683 // Fill in hierarchy in the case of oversubscription
684 for (kmp_uint32 i=depth; i<maxLevels; ++i)
685 skipPerLevel[i] = 2*skipPerLevel[i-1];
686
687 uninitialized = initialized; // One writer
688
689 }
690
Jonathan Peytondf4d3dd2015-09-10 20:34:32 +0000691 // Resize the hierarchy if nproc changes to something larger than before
Jonathan Peyton17078362015-09-10 19:22:07 +0000692 void resize(kmp_uint32 nproc)
693 {
694 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
Jonathan Peyton7dee82e2015-11-09 16:24:53 +0000695 while (bool_result == 0) { // someone else is trying to resize
696 KMP_CPU_PAUSE();
697 if (nproc <= base_num_threads) // happy with other thread's resize
698 return;
699 else // try to resize
700 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
Jonathan Peyton17078362015-09-10 19:22:07 +0000701 }
702 KMP_DEBUG_ASSERT(bool_result!=0);
Jonathan Peyton7dee82e2015-11-09 16:24:53 +0000703 if (nproc <= base_num_threads) return; // happy with other thread's resize
Jonathan Peyton17078362015-09-10 19:22:07 +0000704
Jonathan Peytondf4d3dd2015-09-10 20:34:32 +0000705 // Calculate new maxLevels
Jonathan Peyton17078362015-09-10 19:22:07 +0000706 kmp_uint32 old_sz = skipPerLevel[depth-1];
Jonathan Peytondf4d3dd2015-09-10 20:34:32 +0000707 kmp_uint32 incs = 0, old_maxLevels = maxLevels;
Jonathan Peyton7dee82e2015-11-09 16:24:53 +0000708 // First see if old maxLevels is enough to contain new size
Jonathan Peytondf4d3dd2015-09-10 20:34:32 +0000709 for (kmp_uint32 i=depth; i<maxLevels && nproc>old_sz; ++i) {
710 skipPerLevel[i] = 2*skipPerLevel[i-1];
Jonathan Peyton7dee82e2015-11-09 16:24:53 +0000711 numPerLevel[i-1] *= 2;
Jonathan Peytondf4d3dd2015-09-10 20:34:32 +0000712 old_sz *= 2;
713 depth++;
714 }
Jonathan Peyton7dee82e2015-11-09 16:24:53 +0000715 if (nproc > old_sz) { // Not enough space, need to expand hierarchy
716 while (nproc > old_sz) {
717 old_sz *=2;
718 incs++;
719 depth++;
720 }
721 maxLevels += incs;
Jonathan Peyton17078362015-09-10 19:22:07 +0000722
Jonathan Peyton7dee82e2015-11-09 16:24:53 +0000723 // Resize arrays
724 kmp_uint32 *old_numPerLevel = numPerLevel;
725 kmp_uint32 *old_skipPerLevel = skipPerLevel;
726 numPerLevel = skipPerLevel = NULL;
727 numPerLevel = (kmp_uint32 *)__kmp_allocate(maxLevels*2*sizeof(kmp_uint32));
728 skipPerLevel = &(numPerLevel[maxLevels]);
Jonathan Peyton17078362015-09-10 19:22:07 +0000729
Jonathan Peyton7dee82e2015-11-09 16:24:53 +0000730 // Copy old elements from old arrays
731 for (kmp_uint32 i=0; i<old_maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
732 numPerLevel[i] = old_numPerLevel[i];
733 skipPerLevel[i] = old_skipPerLevel[i];
734 }
735
736 // Init new elements in arrays to 1
737 for (kmp_uint32 i=old_maxLevels; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
738 numPerLevel[i] = 1;
739 skipPerLevel[i] = 1;
740 }
741
742 // Free old arrays
743 __kmp_free(old_numPerLevel);
Jonathan Peyton17078362015-09-10 19:22:07 +0000744 }
745
Jonathan Peyton17078362015-09-10 19:22:07 +0000746 // Fill in oversubscription levels of hierarchy
747 for (kmp_uint32 i=old_maxLevels; i<maxLevels; ++i)
748 skipPerLevel[i] = 2*skipPerLevel[i-1];
749
750 base_num_threads = nproc;
751 resizing = 0; // One writer
752
753 }
754};
755#endif // KMP_AFFINITY_H