blob: 67045733f5b686425e1e70fc0fe99347320e3a2f [file] [log] [blame]
Jonathan Peyton17078362015-09-10 19:22:07 +00001/*
2 * kmp_affinity.h -- header for affinity management
3 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
Jonathan Peyton30419822017-05-12 18:01:32 +000015
Jonathan Peyton17078362015-09-10 19:22:07 +000016#ifndef KMP_AFFINITY_H
17#define KMP_AFFINITY_H
18
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +000019#include "kmp.h"
Jonathan Peyton30419822017-05-12 18:01:32 +000020#include "kmp_os.h"
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +000021
22#if KMP_AFFINITY_SUPPORTED
23#if KMP_USE_HWLOC
Jonathan Peyton30419822017-05-12 18:01:32 +000024class KMPHwlocAffinity : public KMPAffinity {
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +000025public:
Jonathan Peyton30419822017-05-12 18:01:32 +000026 class Mask : public KMPAffinity::Mask {
27 hwloc_cpuset_t mask;
28
29 public:
30 Mask() {
31 mask = hwloc_bitmap_alloc();
32 this->zero();
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +000033 }
Jonathan Peyton30419822017-05-12 18:01:32 +000034 ~Mask() { hwloc_bitmap_free(mask); }
35 void set(int i) override { hwloc_bitmap_set(mask, i); }
36 bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); }
37 void clear(int i) override { hwloc_bitmap_clr(mask, i); }
38 void zero() override { hwloc_bitmap_zero(mask); }
39 void copy(const KMPAffinity::Mask *src) override {
40 const Mask *convert = static_cast<const Mask *>(src);
41 hwloc_bitmap_copy(mask, convert->mask);
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +000042 }
Jonathan Peyton30419822017-05-12 18:01:32 +000043 void bitwise_and(const KMPAffinity::Mask *rhs) override {
44 const Mask *convert = static_cast<const Mask *>(rhs);
45 hwloc_bitmap_and(mask, mask, convert->mask);
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +000046 }
Jonathan Peyton30419822017-05-12 18:01:32 +000047 void bitwise_or(const KMPAffinity::Mask *rhs) override {
48 const Mask *convert = static_cast<const Mask *>(rhs);
49 hwloc_bitmap_or(mask, mask, convert->mask);
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +000050 }
Jonathan Peyton30419822017-05-12 18:01:32 +000051 void bitwise_not() override { hwloc_bitmap_not(mask, mask); }
52 int begin() const override { return hwloc_bitmap_first(mask); }
53 int end() const override { return -1; }
54 int next(int previous) const override {
55 return hwloc_bitmap_next(mask, previous);
56 }
57 int get_system_affinity(bool abort_on_error) override {
58 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
59 "Illegal get affinity operation when not capable");
60 int retval =
61 hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
62 if (retval >= 0) {
63 return 0;
64 }
65 int error = errno;
66 if (abort_on_error) {
67 __kmp_msg(kmp_ms_fatal, KMP_MSG(FatalSysError), KMP_ERR(error),
68 __kmp_msg_null);
69 }
70 return error;
71 }
72 int set_system_affinity(bool abort_on_error) const override {
73 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
74 "Illegal get affinity operation when not capable");
75 int retval =
76 hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
77 if (retval >= 0) {
78 return 0;
79 }
80 int error = errno;
81 if (abort_on_error) {
82 __kmp_msg(kmp_ms_fatal, KMP_MSG(FatalSysError), KMP_ERR(error),
83 __kmp_msg_null);
84 }
85 return error;
86 }
87 int get_proc_group() const override {
88 int i;
89 int group = -1;
90#if KMP_OS_WINDOWS
91 if (__kmp_num_proc_groups == 1) {
92 return 1;
93 }
94 for (i = 0; i < __kmp_num_proc_groups; i++) {
95 // On windows, the long type is always 32 bits
96 unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i * 2);
97 unsigned long second_32_bits =
98 hwloc_bitmap_to_ith_ulong(mask, i * 2 + 1);
99 if (first_32_bits == 0 && second_32_bits == 0) {
100 continue;
101 }
102 if (group >= 0) {
103 return -1;
104 }
105 group = i;
106 }
107#endif /* KMP_OS_WINDOWS */
108 return group;
109 }
110 };
111 void determine_capable(const char *var) override {
112 const hwloc_topology_support *topology_support;
113 if (__kmp_hwloc_topology == NULL) {
114 if (hwloc_topology_init(&__kmp_hwloc_topology) < 0) {
115 __kmp_hwloc_error = TRUE;
116 if (__kmp_affinity_verbose)
117 KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()");
118 }
119 if (hwloc_topology_load(__kmp_hwloc_topology) < 0) {
120 __kmp_hwloc_error = TRUE;
121 if (__kmp_affinity_verbose)
122 KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()");
123 }
124 }
125 topology_support = hwloc_topology_get_support(__kmp_hwloc_topology);
126 // Is the system capable of setting/getting this thread's affinity?
127 // Also, is topology discovery possible? (pu indicates ability to discover
128 // processing units). And finally, were there no errors when calling any
129 // hwloc_* API functions?
130 if (topology_support && topology_support->cpubind->set_thisthread_cpubind &&
131 topology_support->cpubind->get_thisthread_cpubind &&
132 topology_support->discovery->pu && !__kmp_hwloc_error) {
133 // enables affinity according to KMP_AFFINITY_CAPABLE() macro
134 KMP_AFFINITY_ENABLE(TRUE);
135 } else {
136 // indicate that hwloc didn't work and disable affinity
137 __kmp_hwloc_error = TRUE;
138 KMP_AFFINITY_DISABLE();
139 }
140 }
141 void bind_thread(int which) override {
142 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
143 "Illegal set affinity operation when not capable");
144 KMPAffinity::Mask *mask;
145 KMP_CPU_ALLOC_ON_STACK(mask);
146 KMP_CPU_ZERO(mask);
147 KMP_CPU_SET(which, mask);
148 __kmp_set_system_affinity(mask, TRUE);
149 KMP_CPU_FREE_FROM_STACK(mask);
150 }
151 KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
152 void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
153 KMPAffinity::Mask *allocate_mask_array(int num) override {
154 return new Mask[num];
155 }
156 void deallocate_mask_array(KMPAffinity::Mask *array) override {
157 Mask *hwloc_array = static_cast<Mask *>(array);
158 delete[] hwloc_array;
159 }
160 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
161 int index) override {
162 Mask *hwloc_array = static_cast<Mask *>(array);
163 return &(hwloc_array[index]);
164 }
165 api_type get_api_type() const override { return HWLOC; }
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +0000166};
167#endif /* KMP_USE_HWLOC */
168
169#if KMP_OS_LINUX
Jonathan Peyton30419822017-05-12 18:01:32 +0000170/* On some of the older OS's that we build on, these constants aren't present
171 in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on
172 all systems of the same arch where they are defined, and they cannot change.
173 stone forever. */
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +0000174#include <sys/syscall.h>
Jonathan Peyton30419822017-05-12 18:01:32 +0000175#if KMP_ARCH_X86 || KMP_ARCH_ARM
176#ifndef __NR_sched_setaffinity
177#define __NR_sched_setaffinity 241
178#elif __NR_sched_setaffinity != 241
179#error Wrong code for setaffinity system call.
180#endif /* __NR_sched_setaffinity */
181#ifndef __NR_sched_getaffinity
182#define __NR_sched_getaffinity 242
183#elif __NR_sched_getaffinity != 242
184#error Wrong code for getaffinity system call.
185#endif /* __NR_sched_getaffinity */
186#elif KMP_ARCH_AARCH64
187#ifndef __NR_sched_setaffinity
188#define __NR_sched_setaffinity 122
189#elif __NR_sched_setaffinity != 122
190#error Wrong code for setaffinity system call.
191#endif /* __NR_sched_setaffinity */
192#ifndef __NR_sched_getaffinity
193#define __NR_sched_getaffinity 123
194#elif __NR_sched_getaffinity != 123
195#error Wrong code for getaffinity system call.
196#endif /* __NR_sched_getaffinity */
197#elif KMP_ARCH_X86_64
198#ifndef __NR_sched_setaffinity
199#define __NR_sched_setaffinity 203
200#elif __NR_sched_setaffinity != 203
201#error Wrong code for setaffinity system call.
202#endif /* __NR_sched_setaffinity */
203#ifndef __NR_sched_getaffinity
204#define __NR_sched_getaffinity 204
205#elif __NR_sched_getaffinity != 204
206#error Wrong code for getaffinity system call.
207#endif /* __NR_sched_getaffinity */
208#elif KMP_ARCH_PPC64
209#ifndef __NR_sched_setaffinity
210#define __NR_sched_setaffinity 222
211#elif __NR_sched_setaffinity != 222
212#error Wrong code for setaffinity system call.
213#endif /* __NR_sched_setaffinity */
214#ifndef __NR_sched_getaffinity
215#define __NR_sched_getaffinity 223
216#elif __NR_sched_getaffinity != 223
217#error Wrong code for getaffinity system call.
218#endif /* __NR_sched_getaffinity */
219#elif KMP_ARCH_MIPS
220#ifndef __NR_sched_setaffinity
221#define __NR_sched_setaffinity 4239
222#elif __NR_sched_setaffinity != 4239
223#error Wrong code for setaffinity system call.
224#endif /* __NR_sched_setaffinity */
225#ifndef __NR_sched_getaffinity
226#define __NR_sched_getaffinity 4240
227#elif __NR_sched_getaffinity != 4240
228#error Wrong code for getaffinity system call.
229#endif /* __NR_sched_getaffinity */
230#elif KMP_ARCH_MIPS64
231#ifndef __NR_sched_setaffinity
232#define __NR_sched_setaffinity 5195
233#elif __NR_sched_setaffinity != 5195
234#error Wrong code for setaffinity system call.
235#endif /* __NR_sched_setaffinity */
236#ifndef __NR_sched_getaffinity
237#define __NR_sched_getaffinity 5196
238#elif __NR_sched_getaffinity != 5196
239#error Wrong code for getaffinity system call.
240#endif /* __NR_sched_getaffinity */
241#error Unknown or unsupported architecture
242#endif /* KMP_ARCH_* */
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +0000243class KMPNativeAffinity : public KMPAffinity {
Jonathan Peyton30419822017-05-12 18:01:32 +0000244 class Mask : public KMPAffinity::Mask {
245 typedef unsigned char mask_t;
246 static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
247
248 public:
249 mask_t *mask;
250 Mask() { mask = (mask_t *)__kmp_allocate(__kmp_affin_mask_size); }
251 ~Mask() {
252 if (mask)
253 __kmp_free(mask);
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +0000254 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000255 void set(int i) override {
256 mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +0000257 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000258 bool is_set(int i) const override {
259 return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +0000260 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000261 void clear(int i) override {
262 mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +0000263 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000264 void zero() override {
265 for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
266 mask[i] = 0;
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +0000267 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000268 void copy(const KMPAffinity::Mask *src) override {
269 const Mask *convert = static_cast<const Mask *>(src);
270 for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
271 mask[i] = convert->mask[i];
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +0000272 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000273 void bitwise_and(const KMPAffinity::Mask *rhs) override {
274 const Mask *convert = static_cast<const Mask *>(rhs);
275 for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
276 mask[i] &= convert->mask[i];
277 }
278 void bitwise_or(const KMPAffinity::Mask *rhs) override {
279 const Mask *convert = static_cast<const Mask *>(rhs);
280 for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
281 mask[i] |= convert->mask[i];
282 }
283 void bitwise_not() override {
284 for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
285 mask[i] = ~(mask[i]);
286 }
287 int begin() const override {
288 int retval = 0;
289 while (retval < end() && !is_set(retval))
290 ++retval;
291 return retval;
292 }
293 int end() const override { return __kmp_affin_mask_size * BITS_PER_MASK_T; }
294 int next(int previous) const override {
295 int retval = previous + 1;
296 while (retval < end() && !is_set(retval))
297 ++retval;
298 return retval;
299 }
300 int get_system_affinity(bool abort_on_error) override {
301 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
302 "Illegal get affinity operation when not capable");
303 int retval =
304 syscall(__NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask);
305 if (retval >= 0) {
306 return 0;
307 }
308 int error = errno;
309 if (abort_on_error) {
310 __kmp_msg(kmp_ms_fatal, KMP_MSG(FatalSysError), KMP_ERR(error),
311 __kmp_msg_null);
312 }
313 return error;
314 }
315 int set_system_affinity(bool abort_on_error) const override {
316 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
317 "Illegal get affinity operation when not capable");
318 int retval =
319 syscall(__NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask);
320 if (retval >= 0) {
321 return 0;
322 }
323 int error = errno;
324 if (abort_on_error) {
325 __kmp_msg(kmp_ms_fatal, KMP_MSG(FatalSysError), KMP_ERR(error),
326 __kmp_msg_null);
327 }
328 return error;
329 }
330 };
331 void determine_capable(const char *env_var) override {
332 __kmp_affinity_determine_capable(env_var);
333 }
334 void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
335 KMPAffinity::Mask *allocate_mask() override {
336 KMPNativeAffinity::Mask *retval = new Mask();
337 return retval;
338 }
339 void deallocate_mask(KMPAffinity::Mask *m) override {
340 KMPNativeAffinity::Mask *native_mask =
341 static_cast<KMPNativeAffinity::Mask *>(m);
342 delete m;
343 }
344 KMPAffinity::Mask *allocate_mask_array(int num) override {
345 return new Mask[num];
346 }
347 void deallocate_mask_array(KMPAffinity::Mask *array) override {
348 Mask *linux_array = static_cast<Mask *>(array);
349 delete[] linux_array;
350 }
351 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
352 int index) override {
353 Mask *linux_array = static_cast<Mask *>(array);
354 return &(linux_array[index]);
355 }
356 api_type get_api_type() const override { return NATIVE_OS; }
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +0000357};
358#endif /* KMP_OS_LINUX */
359
360#if KMP_OS_WINDOWS
361class KMPNativeAffinity : public KMPAffinity {
Jonathan Peyton30419822017-05-12 18:01:32 +0000362 class Mask : public KMPAffinity::Mask {
363 typedef ULONG_PTR mask_t;
364 static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
365 mask_t *mask;
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +0000366
Jonathan Peyton30419822017-05-12 18:01:32 +0000367 public:
368 Mask() {
369 mask = (mask_t *)__kmp_allocate(sizeof(mask_t) * __kmp_num_proc_groups);
370 }
371 ~Mask() {
372 if (mask)
373 __kmp_free(mask);
374 }
375 void set(int i) override {
376 mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
377 }
378 bool is_set(int i) const override {
379 return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
380 }
381 void clear(int i) override {
382 mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
383 }
384 void zero() override {
385 for (size_t i = 0; i < __kmp_num_proc_groups; ++i)
386 mask[i] = 0;
387 }
388 void copy(const KMPAffinity::Mask *src) override {
389 const Mask *convert = static_cast<const Mask *>(src);
390 for (size_t i = 0; i < __kmp_num_proc_groups; ++i)
391 mask[i] = convert->mask[i];
392 }
393 void bitwise_and(const KMPAffinity::Mask *rhs) override {
394 const Mask *convert = static_cast<const Mask *>(rhs);
395 for (size_t i = 0; i < __kmp_num_proc_groups; ++i)
396 mask[i] &= convert->mask[i];
397 }
398 void bitwise_or(const KMPAffinity::Mask *rhs) override {
399 const Mask *convert = static_cast<const Mask *>(rhs);
400 for (size_t i = 0; i < __kmp_num_proc_groups; ++i)
401 mask[i] |= convert->mask[i];
402 }
403 void bitwise_not() override {
404 for (size_t i = 0; i < __kmp_num_proc_groups; ++i)
405 mask[i] = ~(mask[i]);
406 }
407 int begin() const override {
408 int retval = 0;
409 while (retval < end() && !is_set(retval))
410 ++retval;
411 return retval;
412 }
413 int end() const override { return __kmp_num_proc_groups * BITS_PER_MASK_T; }
414 int next(int previous) const override {
415 int retval = previous + 1;
416 while (retval < end() && !is_set(retval))
417 ++retval;
418 return retval;
419 }
420 int set_system_affinity(bool abort_on_error) const override {
421 if (__kmp_num_proc_groups > 1) {
422 // Check for a valid mask.
423 GROUP_AFFINITY ga;
424 int group = get_proc_group();
425 if (group < 0) {
426 if (abort_on_error) {
427 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
428 }
429 return -1;
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +0000430 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000431 // Transform the bit vector into a GROUP_AFFINITY struct
432 // and make the system call to set affinity.
433 ga.Group = group;
434 ga.Mask = mask[group];
435 ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0;
436
437 KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL);
438 if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) {
439 DWORD error = GetLastError();
440 if (abort_on_error) {
441 __kmp_msg(kmp_ms_fatal, KMP_MSG(CantSetThreadAffMask),
442 KMP_ERR(error), __kmp_msg_null);
443 }
444 return error;
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +0000445 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000446 } else {
447 if (!SetThreadAffinityMask(GetCurrentThread(), *mask)) {
448 DWORD error = GetLastError();
449 if (abort_on_error) {
450 __kmp_msg(kmp_ms_fatal, KMP_MSG(CantSetThreadAffMask),
451 KMP_ERR(error), __kmp_msg_null);
452 }
453 return error;
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +0000454 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000455 }
456 return 0;
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +0000457 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000458 int get_system_affinity(bool abort_on_error) override {
459 if (__kmp_num_proc_groups > 1) {
460 this->zero();
461 GROUP_AFFINITY ga;
462 KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL);
463 if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) {
464 DWORD error = GetLastError();
465 if (abort_on_error) {
466 __kmp_msg(kmp_ms_fatal,
467 KMP_MSG(FunctionError, "GetThreadGroupAffinity()"),
468 KMP_ERR(error), __kmp_msg_null);
469 }
470 return error;
471 }
472 if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) ||
473 (ga.Mask == 0)) {
474 return -1;
475 }
476 mask[ga.Group] = ga.Mask;
477 } else {
478 mask_t newMask, sysMask, retval;
479 if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) {
480 DWORD error = GetLastError();
481 if (abort_on_error) {
482 __kmp_msg(kmp_ms_fatal,
483 KMP_MSG(FunctionError, "GetProcessAffinityMask()"),
484 KMP_ERR(error), __kmp_msg_null);
485 }
486 return error;
487 }
488 retval = SetThreadAffinityMask(GetCurrentThread(), newMask);
489 if (!retval) {
490 DWORD error = GetLastError();
491 if (abort_on_error) {
492 __kmp_msg(kmp_ms_fatal,
493 KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
494 KMP_ERR(error), __kmp_msg_null);
495 }
496 return error;
497 }
498 newMask = SetThreadAffinityMask(GetCurrentThread(), retval);
499 if (!newMask) {
500 DWORD error = GetLastError();
501 if (abort_on_error) {
502 __kmp_msg(kmp_ms_fatal,
503 KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
504 KMP_ERR(error), __kmp_msg_null);
505 }
506 }
507 *mask = retval;
508 }
509 return 0;
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +0000510 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000511 int get_proc_group() const override {
512 int group = -1;
513 if (__kmp_num_proc_groups == 1) {
514 return 1;
515 }
516 for (int i = 0; i < __kmp_num_proc_groups; i++) {
517 if (mask[i] == 0)
518 continue;
519 if (group >= 0)
520 return -1;
521 group = i;
522 }
523 return group;
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +0000524 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000525 };
526 void determine_capable(const char *env_var) override {
527 __kmp_affinity_determine_capable(env_var);
528 }
529 void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
530 KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
531 void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
532 KMPAffinity::Mask *allocate_mask_array(int num) override {
533 return new Mask[num];
534 }
535 void deallocate_mask_array(KMPAffinity::Mask *array) override {
536 Mask *windows_array = static_cast<Mask *>(array);
537 delete[] windows_array;
538 }
539 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
540 int index) override {
541 Mask *windows_array = static_cast<Mask *>(array);
542 return &(windows_array[index]);
543 }
544 api_type get_api_type() const override { return NATIVE_OS; }
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +0000545};
546#endif /* KMP_OS_WINDOWS */
547#endif /* KMP_AFFINITY_SUPPORTED */
548
Jonathan Peyton17078362015-09-10 19:22:07 +0000549class Address {
550public:
Jonathan Peyton30419822017-05-12 18:01:32 +0000551 static const unsigned maxDepth = 32;
552 unsigned labels[maxDepth];
553 unsigned childNums[maxDepth];
554 unsigned depth;
555 unsigned leader;
556 Address(unsigned _depth) : depth(_depth), leader(FALSE) {}
557 Address &operator=(const Address &b) {
558 depth = b.depth;
559 for (unsigned i = 0; i < depth; i++) {
560 labels[i] = b.labels[i];
561 childNums[i] = b.childNums[i];
Jonathan Peyton17078362015-09-10 19:22:07 +0000562 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000563 leader = FALSE;
564 return *this;
565 }
566 bool operator==(const Address &b) const {
567 if (depth != b.depth)
568 return false;
569 for (unsigned i = 0; i < depth; i++)
570 if (labels[i] != b.labels[i])
571 return false;
572 return true;
573 }
574 bool isClose(const Address &b, int level) const {
575 if (depth != b.depth)
576 return false;
577 if ((unsigned)level >= depth)
578 return true;
579 for (unsigned i = 0; i < (depth - level); i++)
580 if (labels[i] != b.labels[i])
581 return false;
582 return true;
583 }
584 bool operator!=(const Address &b) const { return !operator==(b); }
585 void print() const {
586 unsigned i;
587 printf("Depth: %u --- ", depth);
588 for (i = 0; i < depth; i++) {
589 printf("%u ", labels[i]);
Jonathan Peyton17078362015-09-10 19:22:07 +0000590 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000591 }
Jonathan Peyton17078362015-09-10 19:22:07 +0000592};
593
594class AddrUnsPair {
595public:
Jonathan Peyton30419822017-05-12 18:01:32 +0000596 Address first;
597 unsigned second;
598 AddrUnsPair(Address _first, unsigned _second)
599 : first(_first), second(_second) {}
600 AddrUnsPair &operator=(const AddrUnsPair &b) {
601 first = b.first;
602 second = b.second;
603 return *this;
604 }
605 void print() const {
606 printf("first = ");
607 first.print();
608 printf(" --- second = %u", second);
609 }
610 bool operator==(const AddrUnsPair &b) const {
611 if (first != b.first)
612 return false;
613 if (second != b.second)
614 return false;
615 return true;
616 }
617 bool operator!=(const AddrUnsPair &b) const { return !operator==(b); }
Jonathan Peyton17078362015-09-10 19:22:07 +0000618};
619
Jonathan Peyton30419822017-05-12 18:01:32 +0000620static int __kmp_affinity_cmp_Address_labels(const void *a, const void *b) {
Andrey Churbanov5ba90c72017-07-17 09:03:14 +0000621 const Address *aa = &(((const AddrUnsPair *)a)->first);
622 const Address *bb = &(((const AddrUnsPair *)b)->first);
Jonathan Peyton30419822017-05-12 18:01:32 +0000623 unsigned depth = aa->depth;
624 unsigned i;
625 KMP_DEBUG_ASSERT(depth == bb->depth);
626 for (i = 0; i < depth; i++) {
627 if (aa->labels[i] < bb->labels[i])
628 return -1;
629 if (aa->labels[i] > bb->labels[i])
630 return 1;
631 }
632 return 0;
Jonathan Peyton17078362015-09-10 19:22:07 +0000633}
634
Jonathan Peyton30419822017-05-12 18:01:32 +0000635/* A structure for holding machine-specific hierarchy info to be computed once
636 at init. This structure represents a mapping of threads to the actual machine
637 hierarchy, or to our best guess at what the hierarchy might be, for the
638 purpose of performing an efficient barrier. In the worst case, when there is
639 no machine hierarchy information, it produces a tree suitable for a barrier,
640 similar to the tree used in the hyper barrier. */
Jonathan Peyton17078362015-09-10 19:22:07 +0000641class hierarchy_info {
642public:
Jonathan Peyton30419822017-05-12 18:01:32 +0000643 /* Good default values for number of leaves and branching factor, given no
644 affinity information. Behaves a bit like hyper barrier. */
645 static const kmp_uint32 maxLeaves = 4;
646 static const kmp_uint32 minBranch = 4;
647 /** Number of levels in the hierarchy. Typical levels are threads/core,
648 cores/package or socket, packages/node, nodes/machine, etc. We don't want
649 to get specific with nomenclature. When the machine is oversubscribed we
650 add levels to duplicate the hierarchy, doubling the thread capacity of the
651 hierarchy each time we add a level. */
652 kmp_uint32 maxLevels;
Jonathan Peyton17078362015-09-10 19:22:07 +0000653
Jonathan Peyton30419822017-05-12 18:01:32 +0000654 /** This is specifically the depth of the machine configuration hierarchy, in
655 terms of the number of levels along the longest path from root to any
656 leaf. It corresponds to the number of entries in numPerLevel if we exclude
657 all but one trailing 1. */
658 kmp_uint32 depth;
659 kmp_uint32 base_num_threads;
660 enum init_status { initialized = 0, not_initialized = 1, initializing = 2 };
661 volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized,
662 // 2=initialization in progress
663 volatile kmp_int8 resizing; // 0=not resizing, 1=resizing
Jonathan Peyton17078362015-09-10 19:22:07 +0000664
Jonathan Peyton30419822017-05-12 18:01:32 +0000665 /** Level 0 corresponds to leaves. numPerLevel[i] is the number of children
666 the parent of a node at level i has. For example, if we have a machine
667 with 4 packages, 4 cores/package and 2 HT per core, then numPerLevel =
668 {2, 4, 4, 1, 1}. All empty levels are set to 1. */
669 kmp_uint32 *numPerLevel;
670 kmp_uint32 *skipPerLevel;
Jonathan Peyton17078362015-09-10 19:22:07 +0000671
Jonathan Peyton30419822017-05-12 18:01:32 +0000672 void deriveLevels(AddrUnsPair *adr2os, int num_addrs) {
673 int hier_depth = adr2os[0].first.depth;
674 int level = 0;
675 for (int i = hier_depth - 1; i >= 0; --i) {
676 int max = -1;
677 for (int j = 0; j < num_addrs; ++j) {
678 int next = adr2os[j].first.childNums[i];
679 if (next > max)
680 max = next;
681 }
682 numPerLevel[level] = max + 1;
683 ++level;
684 }
685 }
686
687 hierarchy_info()
688 : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {}
689
690 void fini() {
Andrey Churbanovddc38722017-07-18 08:30:03 +0000691 if (!uninitialized && numPerLevel) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000692 __kmp_free(numPerLevel);
Andrey Churbanovddc38722017-07-18 08:30:03 +0000693 numPerLevel = NULL;
694 uninitialized = not_initialized;
695 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000696 }
697
698 void init(AddrUnsPair *adr2os, int num_addrs) {
699 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(
700 &uninitialized, not_initialized, initializing);
701 if (bool_result == 0) { // Wait for initialization
702 while (TCR_1(uninitialized) != initialized)
703 KMP_CPU_PAUSE();
704 return;
705 }
706 KMP_DEBUG_ASSERT(bool_result == 1);
707
708 /* Added explicit initialization of the data fields here to prevent usage of
709 dirty value observed when static library is re-initialized multiple times
710 (e.g. when non-OpenMP thread repeatedly launches/joins thread that uses
711 OpenMP). */
712 depth = 1;
713 resizing = 0;
714 maxLevels = 7;
715 numPerLevel =
716 (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
717 skipPerLevel = &(numPerLevel[maxLevels]);
718 for (kmp_uint32 i = 0; i < maxLevels;
719 ++i) { // init numPerLevel[*] to 1 item per level
720 numPerLevel[i] = 1;
721 skipPerLevel[i] = 1;
Jonathan Peyton17078362015-09-10 19:22:07 +0000722 }
723
Jonathan Peyton30419822017-05-12 18:01:32 +0000724 // Sort table by physical ID
725 if (adr2os) {
726 qsort(adr2os, num_addrs, sizeof(*adr2os),
727 __kmp_affinity_cmp_Address_labels);
728 deriveLevels(adr2os, num_addrs);
729 } else {
730 numPerLevel[0] = maxLeaves;
731 numPerLevel[1] = num_addrs / maxLeaves;
732 if (num_addrs % maxLeaves)
733 numPerLevel[1]++;
Jonathan Peyton17078362015-09-10 19:22:07 +0000734 }
735
Jonathan Peyton30419822017-05-12 18:01:32 +0000736 base_num_threads = num_addrs;
737 for (int i = maxLevels - 1; i >= 0;
738 --i) // count non-empty levels to get depth
739 if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1'
740 depth++;
Jonathan Peyton17078362015-09-10 19:22:07 +0000741
Jonathan Peyton30419822017-05-12 18:01:32 +0000742 kmp_uint32 branch = minBranch;
743 if (numPerLevel[0] == 1)
744 branch = num_addrs / maxLeaves;
745 if (branch < minBranch)
746 branch = minBranch;
747 for (kmp_uint32 d = 0; d < depth - 1; ++d) { // optimize hierarchy width
748 while (numPerLevel[d] > branch ||
749 (d == 0 && numPerLevel[d] > maxLeaves)) { // max 4 on level 0!
750 if (numPerLevel[d] & 1)
751 numPerLevel[d]++;
752 numPerLevel[d] = numPerLevel[d] >> 1;
753 if (numPerLevel[d + 1] == 1)
754 depth++;
755 numPerLevel[d + 1] = numPerLevel[d + 1] << 1;
756 }
757 if (numPerLevel[0] == 1) {
758 branch = branch >> 1;
759 if (branch < 4)
760 branch = minBranch;
761 }
Jonathan Peyton17078362015-09-10 19:22:07 +0000762 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000763
764 for (kmp_uint32 i = 1; i < depth; ++i)
765 skipPerLevel[i] = numPerLevel[i - 1] * skipPerLevel[i - 1];
766 // Fill in hierarchy in the case of oversubscription
767 for (kmp_uint32 i = depth; i < maxLevels; ++i)
768 skipPerLevel[i] = 2 * skipPerLevel[i - 1];
769
770 uninitialized = initialized; // One writer
Jonathan Peyton30419822017-05-12 18:01:32 +0000771 }
772
773 // Resize the hierarchy if nproc changes to something larger than before
774 void resize(kmp_uint32 nproc) {
775 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
776 while (bool_result == 0) { // someone else is trying to resize
777 KMP_CPU_PAUSE();
778 if (nproc <= base_num_threads) // happy with other thread's resize
779 return;
780 else // try to resize
781 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
782 }
783 KMP_DEBUG_ASSERT(bool_result != 0);
784 if (nproc <= base_num_threads)
785 return; // happy with other thread's resize
786
787 // Calculate new maxLevels
788 kmp_uint32 old_sz = skipPerLevel[depth - 1];
789 kmp_uint32 incs = 0, old_maxLevels = maxLevels;
790 // First see if old maxLevels is enough to contain new size
791 for (kmp_uint32 i = depth; i < maxLevels && nproc > old_sz; ++i) {
792 skipPerLevel[i] = 2 * skipPerLevel[i - 1];
793 numPerLevel[i - 1] *= 2;
794 old_sz *= 2;
795 depth++;
796 }
797 if (nproc > old_sz) { // Not enough space, need to expand hierarchy
798 while (nproc > old_sz) {
799 old_sz *= 2;
800 incs++;
801 depth++;
802 }
803 maxLevels += incs;
804
805 // Resize arrays
806 kmp_uint32 *old_numPerLevel = numPerLevel;
807 kmp_uint32 *old_skipPerLevel = skipPerLevel;
808 numPerLevel = skipPerLevel = NULL;
809 numPerLevel =
810 (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
811 skipPerLevel = &(numPerLevel[maxLevels]);
812
813 // Copy old elements from old arrays
814 for (kmp_uint32 i = 0; i < old_maxLevels;
815 ++i) { // init numPerLevel[*] to 1 item per level
816 numPerLevel[i] = old_numPerLevel[i];
817 skipPerLevel[i] = old_skipPerLevel[i];
818 }
819
820 // Init new elements in arrays to 1
821 for (kmp_uint32 i = old_maxLevels; i < maxLevels;
822 ++i) { // init numPerLevel[*] to 1 item per level
823 numPerLevel[i] = 1;
824 skipPerLevel[i] = 1;
825 }
826
827 // Free old arrays
828 __kmp_free(old_numPerLevel);
829 }
830
831 // Fill in oversubscription levels of hierarchy
832 for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i)
833 skipPerLevel[i] = 2 * skipPerLevel[i - 1];
834
835 base_num_threads = nproc;
836 resizing = 0; // One writer
Jonathan Peyton30419822017-05-12 18:01:32 +0000837 }
Jonathan Peyton17078362015-09-10 19:22:07 +0000838};
839#endif // KMP_AFFINITY_H