blob: 142acf7f514fbacaa3336563689eb4a010ffabe8 [file] [log] [blame]
Jonathan Peyton17078362015-09-10 19:22:07 +00001/*
2 * kmp_affinity.h -- header for affinity management
3 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15#ifndef KMP_AFFINITY_H
16#define KMP_AFFINITY_H
17
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +000018#include "kmp_os.h"
19#include "kmp.h"
20
21#if KMP_AFFINITY_SUPPORTED
22#if KMP_USE_HWLOC
23class KMPHwlocAffinity: public KMPAffinity {
24public:
25 class Mask : public KMPAffinity::Mask {
26 hwloc_cpuset_t mask;
27 public:
28 Mask() { mask = hwloc_bitmap_alloc(); this->zero(); }
29 ~Mask() { hwloc_bitmap_free(mask); }
30 void set(int i) override { hwloc_bitmap_set(mask, i); }
31 bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); }
32 void clear(int i) override { hwloc_bitmap_clr(mask, i); }
33 void zero() override { hwloc_bitmap_zero(mask); }
34 void copy(const KMPAffinity::Mask* src) override {
35 const Mask* convert = static_cast<const Mask*>(src);
36 hwloc_bitmap_copy(mask, convert->mask);
37 }
38 void bitwise_and(const KMPAffinity::Mask* rhs) override {
39 const Mask* convert = static_cast<const Mask*>(rhs);
40 hwloc_bitmap_and(mask, mask, convert->mask);
41 }
42 void bitwise_or(const KMPAffinity::Mask * rhs) override {
43 const Mask* convert = static_cast<const Mask*>(rhs);
44 hwloc_bitmap_or(mask, mask, convert->mask);
45 }
46 void bitwise_not() override { hwloc_bitmap_not(mask, mask); }
47 int begin() const override { return hwloc_bitmap_first(mask); }
48 int end() const override { return -1; }
49 int next(int previous) const override { return hwloc_bitmap_next(mask, previous); }
50 int get_system_affinity(bool abort_on_error) override {
51 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
52 "Illegal get affinity operation when not capable");
53 int retval = hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
54 if (retval >= 0) {
55 return 0;
56 }
57 int error = errno;
58 if (abort_on_error) {
59 __kmp_msg(kmp_ms_fatal, KMP_MSG( FatalSysError ), KMP_ERR( error ), __kmp_msg_null);
60 }
61 return error;
62 }
63 int set_system_affinity(bool abort_on_error) const override {
64 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
65 "Illegal get affinity operation when not capable");
66 int retval = hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
67 if (retval >= 0) {
68 return 0;
69 }
70 int error = errno;
71 if (abort_on_error) {
72 __kmp_msg(kmp_ms_fatal, KMP_MSG( FatalSysError ), KMP_ERR( error ), __kmp_msg_null);
73 }
74 return error;
75 }
76 int get_proc_group() const override {
77 int i;
78 int group = -1;
79# if KMP_OS_WINDOWS
80 if (__kmp_num_proc_groups == 1) {
81 return 1;
82 }
83 for (i = 0; i < __kmp_num_proc_groups; i++) {
84 // On windows, the long type is always 32 bits
85 unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i*2);
86 unsigned long second_32_bits = hwloc_bitmap_to_ith_ulong(mask, i*2+1);
87 if (first_32_bits == 0 && second_32_bits == 0) {
88 continue;
89 }
90 if (group >= 0) {
91 return -1;
92 }
93 group = i;
94 }
95# endif /* KMP_OS_WINDOWS */
96 return group;
97 }
98 };
99 void determine_capable(const char* var) override {
100 const hwloc_topology_support* topology_support;
101 if(__kmp_hwloc_topology == NULL) {
102 if(hwloc_topology_init(&__kmp_hwloc_topology) < 0) {
103 __kmp_hwloc_error = TRUE;
104 if(__kmp_affinity_verbose)
105 KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()");
106 }
107 if(hwloc_topology_load(__kmp_hwloc_topology) < 0) {
108 __kmp_hwloc_error = TRUE;
109 if(__kmp_affinity_verbose)
110 KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()");
111 }
112 }
113 topology_support = hwloc_topology_get_support(__kmp_hwloc_topology);
114 // Is the system capable of setting/getting this thread's affinity?
115 // also, is topology discovery possible? (pu indicates ability to discover processing units)
116 // and finally, were there no errors when calling any hwloc_* API functions?
117 if(topology_support && topology_support->cpubind->set_thisthread_cpubind &&
118 topology_support->cpubind->get_thisthread_cpubind &&
119 topology_support->discovery->pu &&
120 !__kmp_hwloc_error)
121 {
122 // enables affinity according to KMP_AFFINITY_CAPABLE() macro
123 KMP_AFFINITY_ENABLE(TRUE);
124 } else {
125 // indicate that hwloc didn't work and disable affinity
126 __kmp_hwloc_error = TRUE;
127 KMP_AFFINITY_DISABLE();
128 }
129 }
130 void bind_thread(int which) override {
131 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
132 "Illegal set affinity operation when not capable");
133 KMPAffinity::Mask *mask;
134 KMP_CPU_ALLOC_ON_STACK(mask);
135 KMP_CPU_ZERO(mask);
136 KMP_CPU_SET(which, mask);
137 __kmp_set_system_affinity(mask, TRUE);
138 KMP_CPU_FREE_FROM_STACK(mask);
139 }
140 KMPAffinity::Mask* allocate_mask() override { return new Mask(); }
141 void deallocate_mask(KMPAffinity::Mask* m) override { delete m; }
142 KMPAffinity::Mask* allocate_mask_array(int num) override { return new Mask[num]; }
143 void deallocate_mask_array(KMPAffinity::Mask* array) override {
144 Mask* hwloc_array = static_cast<Mask*>(array);
145 delete[] hwloc_array;
146 }
147 KMPAffinity::Mask* index_mask_array(KMPAffinity::Mask* array, int index) override {
148 Mask* hwloc_array = static_cast<Mask*>(array);
149 return &(hwloc_array[index]);
150 }
151 api_type get_api_type() const override { return HWLOC; }
152};
153#endif /* KMP_USE_HWLOC */
154
155#if KMP_OS_LINUX
156/*
157 * On some of the older OS's that we build on, these constants aren't present
158 * in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on
159 * all systems of the same arch where they are defined, and they cannot change.
160 * stone forever.
161 */
162#include <sys/syscall.h>
163# if KMP_ARCH_X86 || KMP_ARCH_ARM
164# ifndef __NR_sched_setaffinity
165# define __NR_sched_setaffinity 241
166# elif __NR_sched_setaffinity != 241
167# error Wrong code for setaffinity system call.
168# endif /* __NR_sched_setaffinity */
169# ifndef __NR_sched_getaffinity
170# define __NR_sched_getaffinity 242
171# elif __NR_sched_getaffinity != 242
172# error Wrong code for getaffinity system call.
173# endif /* __NR_sched_getaffinity */
174# elif KMP_ARCH_AARCH64
175# ifndef __NR_sched_setaffinity
176# define __NR_sched_setaffinity 122
177# elif __NR_sched_setaffinity != 122
178# error Wrong code for setaffinity system call.
179# endif /* __NR_sched_setaffinity */
180# ifndef __NR_sched_getaffinity
181# define __NR_sched_getaffinity 123
182# elif __NR_sched_getaffinity != 123
183# error Wrong code for getaffinity system call.
184# endif /* __NR_sched_getaffinity */
185# elif KMP_ARCH_X86_64
186# ifndef __NR_sched_setaffinity
187# define __NR_sched_setaffinity 203
188# elif __NR_sched_setaffinity != 203
189# error Wrong code for setaffinity system call.
190# endif /* __NR_sched_setaffinity */
191# ifndef __NR_sched_getaffinity
192# define __NR_sched_getaffinity 204
193# elif __NR_sched_getaffinity != 204
194# error Wrong code for getaffinity system call.
195# endif /* __NR_sched_getaffinity */
196# elif KMP_ARCH_PPC64
197# ifndef __NR_sched_setaffinity
198# define __NR_sched_setaffinity 222
199# elif __NR_sched_setaffinity != 222
200# error Wrong code for setaffinity system call.
201# endif /* __NR_sched_setaffinity */
202# ifndef __NR_sched_getaffinity
203# define __NR_sched_getaffinity 223
204# elif __NR_sched_getaffinity != 223
205# error Wrong code for getaffinity system call.
206# endif /* __NR_sched_getaffinity */
Sylvestre Ledrucd9d3742016-12-08 09:22:24 +0000207# elif KMP_ARCH_MIPS
208# ifndef __NR_sched_setaffinity
209# define __NR_sched_setaffinity 4239
210# elif __NR_sched_setaffinity != 4239
211# error Wrong code for setaffinity system call.
212# endif /* __NR_sched_setaffinity */
213# ifndef __NR_sched_getaffinity
214# define __NR_sched_getaffinity 4240
215# elif __NR_sched_getaffinity != 4240
216# error Wrong code for getaffinity system call.
217# endif /* __NR_sched_getaffinity */
218# elif KMP_ARCH_MIPS64
219# ifndef __NR_sched_setaffinity
220# define __NR_sched_setaffinity 5195
221# elif __NR_sched_setaffinity != 5195
222# error Wrong code for setaffinity system call.
223# endif /* __NR_sched_setaffinity */
224# ifndef __NR_sched_getaffinity
225# define __NR_sched_getaffinity 5196
226# elif __NR_sched_getaffinity != 5196
227# error Wrong code for getaffinity system call.
228# endif /* __NR_sched_getaffinity */
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +0000229# error Unknown or unsupported architecture
230# endif /* KMP_ARCH_* */
231class KMPNativeAffinity : public KMPAffinity {
232 class Mask : public KMPAffinity::Mask {
233 typedef unsigned char mask_t;
234 static const int BITS_PER_MASK_T = sizeof(mask_t)*CHAR_BIT;
235 public:
236 mask_t* mask;
237 Mask() { mask = (mask_t*)__kmp_allocate(__kmp_affin_mask_size); }
238 ~Mask() { if (mask) __kmp_free(mask); }
239 void set(int i) override { mask[i/BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T)); }
240 bool is_set(int i) const override { return (mask[i/BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T))); }
241 void clear(int i) override { mask[i/BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T)); }
242 void zero() override {
243 for (size_t i=0; i<__kmp_affin_mask_size; ++i)
244 mask[i] = 0;
245 }
246 void copy(const KMPAffinity::Mask* src) override {
247 const Mask * convert = static_cast<const Mask*>(src);
248 for (size_t i=0; i<__kmp_affin_mask_size; ++i)
249 mask[i] = convert->mask[i];
250 }
251 void bitwise_and(const KMPAffinity::Mask* rhs) override {
252 const Mask * convert = static_cast<const Mask*>(rhs);
253 for (size_t i=0; i<__kmp_affin_mask_size; ++i)
254 mask[i] &= convert->mask[i];
255 }
256 void bitwise_or(const KMPAffinity::Mask* rhs) override {
257 const Mask * convert = static_cast<const Mask*>(rhs);
258 for (size_t i=0; i<__kmp_affin_mask_size; ++i)
259 mask[i] |= convert->mask[i];
260 }
261 void bitwise_not() override {
262 for (size_t i=0; i<__kmp_affin_mask_size; ++i)
263 mask[i] = ~(mask[i]);
264 }
265 int begin() const override {
266 int retval = 0;
267 while (retval < end() && !is_set(retval))
268 ++retval;
269 return retval;
270 }
271 int end() const override { return __kmp_affin_mask_size*BITS_PER_MASK_T; }
272 int next(int previous) const override {
273 int retval = previous+1;
274 while (retval < end() && !is_set(retval))
275 ++retval;
276 return retval;
277 }
278 int get_system_affinity(bool abort_on_error) override {
279 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
280 "Illegal get affinity operation when not capable");
281 int retval = syscall( __NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask );
282 if (retval >= 0) {
283 return 0;
284 }
285 int error = errno;
286 if (abort_on_error) {
287 __kmp_msg(kmp_ms_fatal, KMP_MSG( FatalSysError ), KMP_ERR( error ), __kmp_msg_null);
288 }
289 return error;
290 }
291 int set_system_affinity(bool abort_on_error) const override {
292 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
293 "Illegal get affinity operation when not capable");
294 int retval = syscall( __NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask );
295 if (retval >= 0) {
296 return 0;
297 }
298 int error = errno;
299 if (abort_on_error) {
300 __kmp_msg(kmp_ms_fatal, KMP_MSG( FatalSysError ), KMP_ERR( error ), __kmp_msg_null);
301 }
302 return error;
303 }
304 };
305 void determine_capable(const char* env_var) override {
306 __kmp_affinity_determine_capable(env_var);
307 }
308 void bind_thread(int which) override {
309 __kmp_affinity_bind_thread(which);
310 }
311 KMPAffinity::Mask* allocate_mask() override {
312 KMPNativeAffinity::Mask* retval = new Mask();
313 return retval;
314 }
315 void deallocate_mask(KMPAffinity::Mask* m) override {
316 KMPNativeAffinity::Mask* native_mask = static_cast<KMPNativeAffinity::Mask*>(m);
317 delete m;
318 }
319 KMPAffinity::Mask* allocate_mask_array(int num) override { return new Mask[num]; }
320 void deallocate_mask_array(KMPAffinity::Mask* array) override {
321 Mask* linux_array = static_cast<Mask*>(array);
322 delete[] linux_array;
323 }
324 KMPAffinity::Mask* index_mask_array(KMPAffinity::Mask* array, int index) override {
325 Mask* linux_array = static_cast<Mask*>(array);
326 return &(linux_array[index]);
327 }
328 api_type get_api_type() const override { return NATIVE_OS; }
329};
330#endif /* KMP_OS_LINUX */
331
332#if KMP_OS_WINDOWS
333class KMPNativeAffinity : public KMPAffinity {
334 class Mask : public KMPAffinity::Mask {
335 typedef ULONG_PTR mask_t;
336 static const int BITS_PER_MASK_T = sizeof(mask_t)*CHAR_BIT;
337 mask_t* mask;
338 public:
339 Mask() { mask = (mask_t*)__kmp_allocate(sizeof(mask_t)*__kmp_num_proc_groups); }
340 ~Mask() { if (mask) __kmp_free(mask); }
341 void set(int i) override { mask[i/BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T)); }
342 bool is_set(int i) const override { return (mask[i/BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T))); }
343 void clear(int i) override { mask[i/BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T)); }
344 void zero() override {
345 for (size_t i=0; i<__kmp_num_proc_groups; ++i)
346 mask[i] = 0;
347 }
348 void copy(const KMPAffinity::Mask* src) override {
349 const Mask * convert = static_cast<const Mask*>(src);
350 for (size_t i=0; i<__kmp_num_proc_groups; ++i)
351 mask[i] = convert->mask[i];
352 }
353 void bitwise_and(const KMPAffinity::Mask* rhs) override {
354 const Mask * convert = static_cast<const Mask*>(rhs);
355 for (size_t i=0; i<__kmp_num_proc_groups; ++i)
356 mask[i] &= convert->mask[i];
357 }
358 void bitwise_or(const KMPAffinity::Mask* rhs) override {
359 const Mask * convert = static_cast<const Mask*>(rhs);
360 for (size_t i=0; i<__kmp_num_proc_groups; ++i)
361 mask[i] |= convert->mask[i];
362 }
363 void bitwise_not() override {
364 for (size_t i=0; i<__kmp_num_proc_groups; ++i)
365 mask[i] = ~(mask[i]);
366 }
367 int begin() const override {
368 int retval = 0;
369 while (retval < end() && !is_set(retval))
370 ++retval;
371 return retval;
372 }
373 int end() const override { return __kmp_num_proc_groups*BITS_PER_MASK_T; }
374 int next(int previous) const override {
375 int retval = previous+1;
376 while (retval < end() && !is_set(retval))
377 ++retval;
378 return retval;
379 }
380 int set_system_affinity(bool abort_on_error) const override {
381 if (__kmp_num_proc_groups > 1) {
382 // Check for a valid mask.
383 GROUP_AFFINITY ga;
384 int group = get_proc_group();
385 if (group < 0) {
386 if (abort_on_error) {
387 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
388 }
389 return -1;
390 }
391 // Transform the bit vector into a GROUP_AFFINITY struct
392 // and make the system call to set affinity.
393 ga.Group = group;
394 ga.Mask = mask[group];
395 ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0;
396
397 KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL);
398 if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) {
399 DWORD error = GetLastError();
400 if (abort_on_error) {
401 __kmp_msg(kmp_ms_fatal, KMP_MSG( CantSetThreadAffMask ),
402 KMP_ERR( error ), __kmp_msg_null);
403 }
404 return error;
405 }
406 } else {
407 if (!SetThreadAffinityMask( GetCurrentThread(), *mask )) {
408 DWORD error = GetLastError();
409 if (abort_on_error) {
410 __kmp_msg(kmp_ms_fatal, KMP_MSG( CantSetThreadAffMask ),
411 KMP_ERR( error ), __kmp_msg_null);
412 }
413 return error;
414 }
415 }
416 return 0;
417 }
418 int get_system_affinity(bool abort_on_error) override {
419 if (__kmp_num_proc_groups > 1) {
420 this->zero();
421 GROUP_AFFINITY ga;
422 KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL);
423 if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) {
424 DWORD error = GetLastError();
425 if (abort_on_error) {
426 __kmp_msg(kmp_ms_fatal, KMP_MSG(FunctionError, "GetThreadGroupAffinity()"),
427 KMP_ERR(error), __kmp_msg_null);
428 }
429 return error;
430 }
431 if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) || (ga.Mask == 0)) {
432 return -1;
433 }
434 mask[ga.Group] = ga.Mask;
435 } else {
436 mask_t newMask, sysMask, retval;
437 if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) {
438 DWORD error = GetLastError();
439 if (abort_on_error) {
440 __kmp_msg(kmp_ms_fatal, KMP_MSG(FunctionError, "GetProcessAffinityMask()"),
441 KMP_ERR(error), __kmp_msg_null);
442 }
443 return error;
444 }
445 retval = SetThreadAffinityMask(GetCurrentThread(), newMask);
446 if (! retval) {
447 DWORD error = GetLastError();
448 if (abort_on_error) {
449 __kmp_msg(kmp_ms_fatal, KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
450 KMP_ERR(error), __kmp_msg_null);
451 }
452 return error;
453 }
454 newMask = SetThreadAffinityMask(GetCurrentThread(), retval);
455 if (! newMask) {
456 DWORD error = GetLastError();
457 if (abort_on_error) {
458 __kmp_msg(kmp_ms_fatal, KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
459 KMP_ERR(error), __kmp_msg_null);
460 }
461 }
462 *mask = retval;
463 }
464 return 0;
465 }
466 int get_proc_group() const override {
467 int group = -1;
468 if (__kmp_num_proc_groups == 1) {
469 return 1;
470 }
471 for (int i = 0; i < __kmp_num_proc_groups; i++) {
472 if (mask[i] == 0)
473 continue;
474 if (group >= 0)
475 return -1;
476 group = i;
477 }
478 return group;
479 }
480 };
481 void determine_capable(const char* env_var) override {
482 __kmp_affinity_determine_capable(env_var);
483 }
484 void bind_thread(int which) override {
485 __kmp_affinity_bind_thread(which);
486 }
487 KMPAffinity::Mask* allocate_mask() override { return new Mask(); }
488 void deallocate_mask(KMPAffinity::Mask* m) override { delete m; }
489 KMPAffinity::Mask* allocate_mask_array(int num) override { return new Mask[num]; }
490 void deallocate_mask_array(KMPAffinity::Mask* array) override {
491 Mask* windows_array = static_cast<Mask*>(array);
492 delete[] windows_array;
493 }
494 KMPAffinity::Mask* index_mask_array(KMPAffinity::Mask* array, int index) override {
495 Mask* windows_array = static_cast<Mask*>(array);
496 return &(windows_array[index]);
497 }
498 api_type get_api_type() const override { return NATIVE_OS; }
499};
500#endif /* KMP_OS_WINDOWS */
501#endif /* KMP_AFFINITY_SUPPORTED */
502
Jonathan Peyton17078362015-09-10 19:22:07 +0000503class Address {
504public:
505 static const unsigned maxDepth = 32;
506 unsigned labels[maxDepth];
507 unsigned childNums[maxDepth];
508 unsigned depth;
509 unsigned leader;
510 Address(unsigned _depth)
511 : depth(_depth), leader(FALSE) {
512 }
513 Address &operator=(const Address &b) {
514 depth = b.depth;
515 for (unsigned i = 0; i < depth; i++) {
516 labels[i] = b.labels[i];
517 childNums[i] = b.childNums[i];
518 }
519 leader = FALSE;
520 return *this;
521 }
522 bool operator==(const Address &b) const {
523 if (depth != b.depth)
524 return false;
525 for (unsigned i = 0; i < depth; i++)
526 if(labels[i] != b.labels[i])
527 return false;
528 return true;
529 }
530 bool isClose(const Address &b, int level) const {
531 if (depth != b.depth)
532 return false;
533 if ((unsigned)level >= depth)
534 return true;
535 for (unsigned i = 0; i < (depth - level); i++)
536 if(labels[i] != b.labels[i])
537 return false;
538 return true;
539 }
540 bool operator!=(const Address &b) const {
541 return !operator==(b);
542 }
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000543 void print() const {
544 unsigned i;
545 printf("Depth: %u --- ", depth);
546 for(i=0;i<depth;i++) {
547 printf("%u ", labels[i]);
548 }
549 }
Jonathan Peyton17078362015-09-10 19:22:07 +0000550};
551
552class AddrUnsPair {
553public:
554 Address first;
555 unsigned second;
556 AddrUnsPair(Address _first, unsigned _second)
557 : first(_first), second(_second) {
558 }
559 AddrUnsPair &operator=(const AddrUnsPair &b)
560 {
561 first = b.first;
562 second = b.second;
563 return *this;
564 }
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000565 void print() const {
566 printf("first = "); first.print();
567 printf(" --- second = %u", second);
568 }
569 bool operator==(const AddrUnsPair &b) const {
570 if(first != b.first) return false;
571 if(second != b.second) return false;
572 return true;
573 }
574 bool operator!=(const AddrUnsPair &b) const {
575 return !operator==(b);
576 }
Jonathan Peyton17078362015-09-10 19:22:07 +0000577};
578
579
580static int
581__kmp_affinity_cmp_Address_labels(const void *a, const void *b)
582{
583 const Address *aa = (const Address *)&(((AddrUnsPair *)a)
584 ->first);
585 const Address *bb = (const Address *)&(((AddrUnsPair *)b)
586 ->first);
587 unsigned depth = aa->depth;
588 unsigned i;
589 KMP_DEBUG_ASSERT(depth == bb->depth);
590 for (i = 0; i < depth; i++) {
591 if (aa->labels[i] < bb->labels[i]) return -1;
592 if (aa->labels[i] > bb->labels[i]) return 1;
593 }
594 return 0;
595}
596
597
Jonathan Peytondf4d3dd2015-09-10 20:34:32 +0000598/** A structure for holding machine-specific hierarchy info to be computed once at init.
599 This structure represents a mapping of threads to the actual machine hierarchy, or to
600 our best guess at what the hierarchy might be, for the purpose of performing an
601 efficient barrier. In the worst case, when there is no machine hierarchy information,
602 it produces a tree suitable for a barrier, similar to the tree used in the hyper barrier. */
Jonathan Peyton17078362015-09-10 19:22:07 +0000603class hierarchy_info {
604public:
605 /** Good default values for number of leaves and branching factor, given no affinity information.
606 Behaves a bit like hyper barrier. */
607 static const kmp_uint32 maxLeaves=4;
608 static const kmp_uint32 minBranch=4;
Jonathan Peytondf4d3dd2015-09-10 20:34:32 +0000609 /** Number of levels in the hierarchy. Typical levels are threads/core, cores/package
610 or socket, packages/node, nodes/machine, etc. We don't want to get specific with
611 nomenclature. When the machine is oversubscribed we add levels to duplicate the
612 hierarchy, doubling the thread capacity of the hierarchy each time we add a level. */
Jonathan Peyton17078362015-09-10 19:22:07 +0000613 kmp_uint32 maxLevels;
614
615 /** This is specifically the depth of the machine configuration hierarchy, in terms of the
616 number of levels along the longest path from root to any leaf. It corresponds to the
617 number of entries in numPerLevel if we exclude all but one trailing 1. */
618 kmp_uint32 depth;
619 kmp_uint32 base_num_threads;
620 enum init_status { initialized=0, not_initialized=1, initializing=2 };
621 volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized, 2=initialization in progress
622 volatile kmp_int8 resizing; // 0=not resizing, 1=resizing
623
624 /** Level 0 corresponds to leaves. numPerLevel[i] is the number of children the parent of a
625 node at level i has. For example, if we have a machine with 4 packages, 4 cores/package
626 and 2 HT per core, then numPerLevel = {2, 4, 4, 1, 1}. All empty levels are set to 1. */
627 kmp_uint32 *numPerLevel;
628 kmp_uint32 *skipPerLevel;
629
630 void deriveLevels(AddrUnsPair *adr2os, int num_addrs) {
631 int hier_depth = adr2os[0].first.depth;
632 int level = 0;
633 for (int i=hier_depth-1; i>=0; --i) {
634 int max = -1;
635 for (int j=0; j<num_addrs; ++j) {
636 int next = adr2os[j].first.childNums[i];
637 if (next > max) max = next;
638 }
639 numPerLevel[level] = max+1;
640 ++level;
641 }
642 }
643
644 hierarchy_info() : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {}
645
646 void fini() { if (!uninitialized && numPerLevel) __kmp_free(numPerLevel); }
647
648 void init(AddrUnsPair *adr2os, int num_addrs)
649 {
650 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&uninitialized, not_initialized, initializing);
651 if (bool_result == 0) { // Wait for initialization
652 while (TCR_1(uninitialized) != initialized) KMP_CPU_PAUSE();
653 return;
654 }
655 KMP_DEBUG_ASSERT(bool_result==1);
656
657 /* Added explicit initialization of the data fields here to prevent usage of dirty value
658 observed when static library is re-initialized multiple times (e.g. when
659 non-OpenMP thread repeatedly launches/joins thread that uses OpenMP). */
660 depth = 1;
661 resizing = 0;
662 maxLevels = 7;
663 numPerLevel = (kmp_uint32 *)__kmp_allocate(maxLevels*2*sizeof(kmp_uint32));
664 skipPerLevel = &(numPerLevel[maxLevels]);
665 for (kmp_uint32 i=0; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
666 numPerLevel[i] = 1;
667 skipPerLevel[i] = 1;
668 }
669
670 // Sort table by physical ID
671 if (adr2os) {
672 qsort(adr2os, num_addrs, sizeof(*adr2os), __kmp_affinity_cmp_Address_labels);
673 deriveLevels(adr2os, num_addrs);
674 }
675 else {
676 numPerLevel[0] = maxLeaves;
677 numPerLevel[1] = num_addrs/maxLeaves;
678 if (num_addrs%maxLeaves) numPerLevel[1]++;
679 }
680
681 base_num_threads = num_addrs;
682 for (int i=maxLevels-1; i>=0; --i) // count non-empty levels to get depth
683 if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1'
684 depth++;
685
686 kmp_uint32 branch = minBranch;
687 if (numPerLevel[0] == 1) branch = num_addrs/maxLeaves;
688 if (branch<minBranch) branch=minBranch;
689 for (kmp_uint32 d=0; d<depth-1; ++d) { // optimize hierarchy width
690 while (numPerLevel[d] > branch || (d==0 && numPerLevel[d]>maxLeaves)) { // max 4 on level 0!
691 if (numPerLevel[d] & 1) numPerLevel[d]++;
692 numPerLevel[d] = numPerLevel[d] >> 1;
693 if (numPerLevel[d+1] == 1) depth++;
694 numPerLevel[d+1] = numPerLevel[d+1] << 1;
695 }
696 if(numPerLevel[0] == 1) {
697 branch = branch >> 1;
698 if (branch<4) branch = minBranch;
699 }
700 }
701
702 for (kmp_uint32 i=1; i<depth; ++i)
703 skipPerLevel[i] = numPerLevel[i-1] * skipPerLevel[i-1];
704 // Fill in hierarchy in the case of oversubscription
705 for (kmp_uint32 i=depth; i<maxLevels; ++i)
706 skipPerLevel[i] = 2*skipPerLevel[i-1];
707
708 uninitialized = initialized; // One writer
709
710 }
711
Jonathan Peytondf4d3dd2015-09-10 20:34:32 +0000712 // Resize the hierarchy if nproc changes to something larger than before
Jonathan Peyton17078362015-09-10 19:22:07 +0000713 void resize(kmp_uint32 nproc)
714 {
715 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
Jonathan Peyton7dee82e2015-11-09 16:24:53 +0000716 while (bool_result == 0) { // someone else is trying to resize
717 KMP_CPU_PAUSE();
718 if (nproc <= base_num_threads) // happy with other thread's resize
719 return;
720 else // try to resize
721 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
Jonathan Peyton17078362015-09-10 19:22:07 +0000722 }
723 KMP_DEBUG_ASSERT(bool_result!=0);
Jonathan Peyton7dee82e2015-11-09 16:24:53 +0000724 if (nproc <= base_num_threads) return; // happy with other thread's resize
Jonathan Peyton17078362015-09-10 19:22:07 +0000725
Jonathan Peytondf4d3dd2015-09-10 20:34:32 +0000726 // Calculate new maxLevels
Jonathan Peyton17078362015-09-10 19:22:07 +0000727 kmp_uint32 old_sz = skipPerLevel[depth-1];
Jonathan Peytondf4d3dd2015-09-10 20:34:32 +0000728 kmp_uint32 incs = 0, old_maxLevels = maxLevels;
Jonathan Peyton7dee82e2015-11-09 16:24:53 +0000729 // First see if old maxLevels is enough to contain new size
Jonathan Peytondf4d3dd2015-09-10 20:34:32 +0000730 for (kmp_uint32 i=depth; i<maxLevels && nproc>old_sz; ++i) {
731 skipPerLevel[i] = 2*skipPerLevel[i-1];
Jonathan Peyton7dee82e2015-11-09 16:24:53 +0000732 numPerLevel[i-1] *= 2;
Jonathan Peytondf4d3dd2015-09-10 20:34:32 +0000733 old_sz *= 2;
734 depth++;
735 }
Jonathan Peyton7dee82e2015-11-09 16:24:53 +0000736 if (nproc > old_sz) { // Not enough space, need to expand hierarchy
737 while (nproc > old_sz) {
738 old_sz *=2;
739 incs++;
740 depth++;
741 }
742 maxLevels += incs;
Jonathan Peyton17078362015-09-10 19:22:07 +0000743
Jonathan Peyton7dee82e2015-11-09 16:24:53 +0000744 // Resize arrays
745 kmp_uint32 *old_numPerLevel = numPerLevel;
746 kmp_uint32 *old_skipPerLevel = skipPerLevel;
747 numPerLevel = skipPerLevel = NULL;
748 numPerLevel = (kmp_uint32 *)__kmp_allocate(maxLevels*2*sizeof(kmp_uint32));
749 skipPerLevel = &(numPerLevel[maxLevels]);
Jonathan Peyton17078362015-09-10 19:22:07 +0000750
Jonathan Peyton7dee82e2015-11-09 16:24:53 +0000751 // Copy old elements from old arrays
752 for (kmp_uint32 i=0; i<old_maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
753 numPerLevel[i] = old_numPerLevel[i];
754 skipPerLevel[i] = old_skipPerLevel[i];
755 }
756
757 // Init new elements in arrays to 1
758 for (kmp_uint32 i=old_maxLevels; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
759 numPerLevel[i] = 1;
760 skipPerLevel[i] = 1;
761 }
762
763 // Free old arrays
764 __kmp_free(old_numPerLevel);
Jonathan Peyton17078362015-09-10 19:22:07 +0000765 }
766
Jonathan Peyton17078362015-09-10 19:22:07 +0000767 // Fill in oversubscription levels of hierarchy
768 for (kmp_uint32 i=old_maxLevels; i<maxLevels; ++i)
769 skipPerLevel[i] = 2*skipPerLevel[i-1];
770
771 base_num_threads = nproc;
772 resizing = 0; // One writer
773
774 }
775};
776#endif // KMP_AFFINITY_H