blob: e89016591ba5972a22c86bcbc950223b8a11ba7b [file] [log] [blame]
Jonathan Peyton17078362015-09-10 19:22:07 +00001/*
2 * kmp_affinity.h -- header for affinity management
3 */
4
Jonathan Peyton17078362015-09-10 19:22:07 +00005//===----------------------------------------------------------------------===//
6//
7// The LLVM Compiler Infrastructure
8//
9// This file is dual licensed under the MIT and the University of Illinois Open
10// Source Licenses. See LICENSE.txt for details.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef KMP_AFFINITY_H
15#define KMP_AFFINITY_H
16
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +000017#include "kmp.h"
Jonathan Peyton30419822017-05-12 18:01:32 +000018#include "kmp_os.h"
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +000019
20#if KMP_AFFINITY_SUPPORTED
21#if KMP_USE_HWLOC
Jonathan Peyton30419822017-05-12 18:01:32 +000022class KMPHwlocAffinity : public KMPAffinity {
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +000023public:
Jonathan Peyton30419822017-05-12 18:01:32 +000024 class Mask : public KMPAffinity::Mask {
25 hwloc_cpuset_t mask;
26
27 public:
28 Mask() {
29 mask = hwloc_bitmap_alloc();
30 this->zero();
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +000031 }
Jonathan Peyton30419822017-05-12 18:01:32 +000032 ~Mask() { hwloc_bitmap_free(mask); }
33 void set(int i) override { hwloc_bitmap_set(mask, i); }
34 bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); }
35 void clear(int i) override { hwloc_bitmap_clr(mask, i); }
36 void zero() override { hwloc_bitmap_zero(mask); }
37 void copy(const KMPAffinity::Mask *src) override {
38 const Mask *convert = static_cast<const Mask *>(src);
39 hwloc_bitmap_copy(mask, convert->mask);
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +000040 }
Jonathan Peyton30419822017-05-12 18:01:32 +000041 void bitwise_and(const KMPAffinity::Mask *rhs) override {
42 const Mask *convert = static_cast<const Mask *>(rhs);
43 hwloc_bitmap_and(mask, mask, convert->mask);
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +000044 }
Jonathan Peyton30419822017-05-12 18:01:32 +000045 void bitwise_or(const KMPAffinity::Mask *rhs) override {
46 const Mask *convert = static_cast<const Mask *>(rhs);
47 hwloc_bitmap_or(mask, mask, convert->mask);
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +000048 }
Jonathan Peyton30419822017-05-12 18:01:32 +000049 void bitwise_not() override { hwloc_bitmap_not(mask, mask); }
50 int begin() const override { return hwloc_bitmap_first(mask); }
51 int end() const override { return -1; }
52 int next(int previous) const override {
53 return hwloc_bitmap_next(mask, previous);
54 }
55 int get_system_affinity(bool abort_on_error) override {
56 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
57 "Illegal get affinity operation when not capable");
58 int retval =
59 hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
60 if (retval >= 0) {
61 return 0;
62 }
63 int error = errno;
64 if (abort_on_error) {
Jonathan Peyton6a393f72017-09-05 15:43:58 +000065 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
Jonathan Peyton30419822017-05-12 18:01:32 +000066 }
67 return error;
68 }
69 int set_system_affinity(bool abort_on_error) const override {
70 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
71 "Illegal get affinity operation when not capable");
72 int retval =
73 hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
74 if (retval >= 0) {
75 return 0;
76 }
77 int error = errno;
78 if (abort_on_error) {
Jonathan Peyton6a393f72017-09-05 15:43:58 +000079 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
Jonathan Peyton30419822017-05-12 18:01:32 +000080 }
81 return error;
82 }
83 int get_proc_group() const override {
84 int i;
85 int group = -1;
86#if KMP_OS_WINDOWS
87 if (__kmp_num_proc_groups == 1) {
88 return 1;
89 }
90 for (i = 0; i < __kmp_num_proc_groups; i++) {
91 // On windows, the long type is always 32 bits
92 unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i * 2);
93 unsigned long second_32_bits =
94 hwloc_bitmap_to_ith_ulong(mask, i * 2 + 1);
95 if (first_32_bits == 0 && second_32_bits == 0) {
96 continue;
97 }
98 if (group >= 0) {
99 return -1;
100 }
101 group = i;
102 }
103#endif /* KMP_OS_WINDOWS */
104 return group;
105 }
106 };
107 void determine_capable(const char *var) override {
108 const hwloc_topology_support *topology_support;
109 if (__kmp_hwloc_topology == NULL) {
110 if (hwloc_topology_init(&__kmp_hwloc_topology) < 0) {
111 __kmp_hwloc_error = TRUE;
112 if (__kmp_affinity_verbose)
113 KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()");
114 }
115 if (hwloc_topology_load(__kmp_hwloc_topology) < 0) {
116 __kmp_hwloc_error = TRUE;
117 if (__kmp_affinity_verbose)
118 KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()");
119 }
120 }
121 topology_support = hwloc_topology_get_support(__kmp_hwloc_topology);
122 // Is the system capable of setting/getting this thread's affinity?
123 // Also, is topology discovery possible? (pu indicates ability to discover
124 // processing units). And finally, were there no errors when calling any
125 // hwloc_* API functions?
126 if (topology_support && topology_support->cpubind->set_thisthread_cpubind &&
127 topology_support->cpubind->get_thisthread_cpubind &&
128 topology_support->discovery->pu && !__kmp_hwloc_error) {
129 // enables affinity according to KMP_AFFINITY_CAPABLE() macro
130 KMP_AFFINITY_ENABLE(TRUE);
131 } else {
132 // indicate that hwloc didn't work and disable affinity
133 __kmp_hwloc_error = TRUE;
134 KMP_AFFINITY_DISABLE();
135 }
136 }
137 void bind_thread(int which) override {
138 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
139 "Illegal set affinity operation when not capable");
140 KMPAffinity::Mask *mask;
141 KMP_CPU_ALLOC_ON_STACK(mask);
142 KMP_CPU_ZERO(mask);
143 KMP_CPU_SET(which, mask);
144 __kmp_set_system_affinity(mask, TRUE);
145 KMP_CPU_FREE_FROM_STACK(mask);
146 }
147 KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
148 void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
149 KMPAffinity::Mask *allocate_mask_array(int num) override {
150 return new Mask[num];
151 }
152 void deallocate_mask_array(KMPAffinity::Mask *array) override {
153 Mask *hwloc_array = static_cast<Mask *>(array);
154 delete[] hwloc_array;
155 }
156 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
157 int index) override {
158 Mask *hwloc_array = static_cast<Mask *>(array);
159 return &(hwloc_array[index]);
160 }
161 api_type get_api_type() const override { return HWLOC; }
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +0000162};
163#endif /* KMP_USE_HWLOC */
164
165#if KMP_OS_LINUX
Jonathan Peyton30419822017-05-12 18:01:32 +0000166/* On some of the older OS's that we build on, these constants aren't present
167 in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on
168 all systems of the same arch where they are defined, and they cannot change.
169 stone forever. */
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +0000170#include <sys/syscall.h>
Jonathan Peyton30419822017-05-12 18:01:32 +0000171#if KMP_ARCH_X86 || KMP_ARCH_ARM
172#ifndef __NR_sched_setaffinity
173#define __NR_sched_setaffinity 241
174#elif __NR_sched_setaffinity != 241
175#error Wrong code for setaffinity system call.
176#endif /* __NR_sched_setaffinity */
177#ifndef __NR_sched_getaffinity
178#define __NR_sched_getaffinity 242
179#elif __NR_sched_getaffinity != 242
180#error Wrong code for getaffinity system call.
181#endif /* __NR_sched_getaffinity */
182#elif KMP_ARCH_AARCH64
183#ifndef __NR_sched_setaffinity
184#define __NR_sched_setaffinity 122
185#elif __NR_sched_setaffinity != 122
186#error Wrong code for setaffinity system call.
187#endif /* __NR_sched_setaffinity */
188#ifndef __NR_sched_getaffinity
189#define __NR_sched_getaffinity 123
190#elif __NR_sched_getaffinity != 123
191#error Wrong code for getaffinity system call.
192#endif /* __NR_sched_getaffinity */
193#elif KMP_ARCH_X86_64
194#ifndef __NR_sched_setaffinity
195#define __NR_sched_setaffinity 203
196#elif __NR_sched_setaffinity != 203
197#error Wrong code for setaffinity system call.
198#endif /* __NR_sched_setaffinity */
199#ifndef __NR_sched_getaffinity
200#define __NR_sched_getaffinity 204
201#elif __NR_sched_getaffinity != 204
202#error Wrong code for getaffinity system call.
203#endif /* __NR_sched_getaffinity */
204#elif KMP_ARCH_PPC64
205#ifndef __NR_sched_setaffinity
206#define __NR_sched_setaffinity 222
207#elif __NR_sched_setaffinity != 222
208#error Wrong code for setaffinity system call.
209#endif /* __NR_sched_setaffinity */
210#ifndef __NR_sched_getaffinity
211#define __NR_sched_getaffinity 223
212#elif __NR_sched_getaffinity != 223
213#error Wrong code for getaffinity system call.
214#endif /* __NR_sched_getaffinity */
215#elif KMP_ARCH_MIPS
216#ifndef __NR_sched_setaffinity
217#define __NR_sched_setaffinity 4239
218#elif __NR_sched_setaffinity != 4239
219#error Wrong code for setaffinity system call.
220#endif /* __NR_sched_setaffinity */
221#ifndef __NR_sched_getaffinity
222#define __NR_sched_getaffinity 4240
223#elif __NR_sched_getaffinity != 4240
224#error Wrong code for getaffinity system call.
225#endif /* __NR_sched_getaffinity */
226#elif KMP_ARCH_MIPS64
227#ifndef __NR_sched_setaffinity
228#define __NR_sched_setaffinity 5195
229#elif __NR_sched_setaffinity != 5195
230#error Wrong code for setaffinity system call.
231#endif /* __NR_sched_setaffinity */
232#ifndef __NR_sched_getaffinity
233#define __NR_sched_getaffinity 5196
234#elif __NR_sched_getaffinity != 5196
235#error Wrong code for getaffinity system call.
236#endif /* __NR_sched_getaffinity */
237#error Unknown or unsupported architecture
238#endif /* KMP_ARCH_* */
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +0000239class KMPNativeAffinity : public KMPAffinity {
Jonathan Peyton30419822017-05-12 18:01:32 +0000240 class Mask : public KMPAffinity::Mask {
241 typedef unsigned char mask_t;
242 static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
243
244 public:
245 mask_t *mask;
246 Mask() { mask = (mask_t *)__kmp_allocate(__kmp_affin_mask_size); }
247 ~Mask() {
248 if (mask)
249 __kmp_free(mask);
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +0000250 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000251 void set(int i) override {
252 mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +0000253 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000254 bool is_set(int i) const override {
255 return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +0000256 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000257 void clear(int i) override {
258 mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +0000259 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000260 void zero() override {
261 for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
262 mask[i] = 0;
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +0000263 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000264 void copy(const KMPAffinity::Mask *src) override {
265 const Mask *convert = static_cast<const Mask *>(src);
266 for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
267 mask[i] = convert->mask[i];
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +0000268 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000269 void bitwise_and(const KMPAffinity::Mask *rhs) override {
270 const Mask *convert = static_cast<const Mask *>(rhs);
271 for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
272 mask[i] &= convert->mask[i];
273 }
274 void bitwise_or(const KMPAffinity::Mask *rhs) override {
275 const Mask *convert = static_cast<const Mask *>(rhs);
276 for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
277 mask[i] |= convert->mask[i];
278 }
279 void bitwise_not() override {
280 for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
281 mask[i] = ~(mask[i]);
282 }
283 int begin() const override {
284 int retval = 0;
285 while (retval < end() && !is_set(retval))
286 ++retval;
287 return retval;
288 }
289 int end() const override { return __kmp_affin_mask_size * BITS_PER_MASK_T; }
290 int next(int previous) const override {
291 int retval = previous + 1;
292 while (retval < end() && !is_set(retval))
293 ++retval;
294 return retval;
295 }
296 int get_system_affinity(bool abort_on_error) override {
297 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
298 "Illegal get affinity operation when not capable");
299 int retval =
300 syscall(__NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask);
301 if (retval >= 0) {
302 return 0;
303 }
304 int error = errno;
305 if (abort_on_error) {
Jonathan Peyton6a393f72017-09-05 15:43:58 +0000306 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
Jonathan Peyton30419822017-05-12 18:01:32 +0000307 }
308 return error;
309 }
310 int set_system_affinity(bool abort_on_error) const override {
311 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
312 "Illegal get affinity operation when not capable");
313 int retval =
314 syscall(__NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask);
315 if (retval >= 0) {
316 return 0;
317 }
318 int error = errno;
319 if (abort_on_error) {
Jonathan Peyton6a393f72017-09-05 15:43:58 +0000320 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
Jonathan Peyton30419822017-05-12 18:01:32 +0000321 }
322 return error;
323 }
324 };
325 void determine_capable(const char *env_var) override {
326 __kmp_affinity_determine_capable(env_var);
327 }
328 void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
329 KMPAffinity::Mask *allocate_mask() override {
330 KMPNativeAffinity::Mask *retval = new Mask();
331 return retval;
332 }
333 void deallocate_mask(KMPAffinity::Mask *m) override {
334 KMPNativeAffinity::Mask *native_mask =
335 static_cast<KMPNativeAffinity::Mask *>(m);
336 delete m;
337 }
338 KMPAffinity::Mask *allocate_mask_array(int num) override {
339 return new Mask[num];
340 }
341 void deallocate_mask_array(KMPAffinity::Mask *array) override {
342 Mask *linux_array = static_cast<Mask *>(array);
343 delete[] linux_array;
344 }
345 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
346 int index) override {
347 Mask *linux_array = static_cast<Mask *>(array);
348 return &(linux_array[index]);
349 }
350 api_type get_api_type() const override { return NATIVE_OS; }
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +0000351};
352#endif /* KMP_OS_LINUX */
353
354#if KMP_OS_WINDOWS
355class KMPNativeAffinity : public KMPAffinity {
Jonathan Peyton30419822017-05-12 18:01:32 +0000356 class Mask : public KMPAffinity::Mask {
357 typedef ULONG_PTR mask_t;
358 static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
359 mask_t *mask;
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +0000360
Jonathan Peyton30419822017-05-12 18:01:32 +0000361 public:
362 Mask() {
363 mask = (mask_t *)__kmp_allocate(sizeof(mask_t) * __kmp_num_proc_groups);
364 }
365 ~Mask() {
366 if (mask)
367 __kmp_free(mask);
368 }
369 void set(int i) override {
370 mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
371 }
372 bool is_set(int i) const override {
373 return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
374 }
375 void clear(int i) override {
376 mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
377 }
378 void zero() override {
379 for (size_t i = 0; i < __kmp_num_proc_groups; ++i)
380 mask[i] = 0;
381 }
382 void copy(const KMPAffinity::Mask *src) override {
383 const Mask *convert = static_cast<const Mask *>(src);
384 for (size_t i = 0; i < __kmp_num_proc_groups; ++i)
385 mask[i] = convert->mask[i];
386 }
387 void bitwise_and(const KMPAffinity::Mask *rhs) override {
388 const Mask *convert = static_cast<const Mask *>(rhs);
389 for (size_t i = 0; i < __kmp_num_proc_groups; ++i)
390 mask[i] &= convert->mask[i];
391 }
392 void bitwise_or(const KMPAffinity::Mask *rhs) override {
393 const Mask *convert = static_cast<const Mask *>(rhs);
394 for (size_t i = 0; i < __kmp_num_proc_groups; ++i)
395 mask[i] |= convert->mask[i];
396 }
397 void bitwise_not() override {
398 for (size_t i = 0; i < __kmp_num_proc_groups; ++i)
399 mask[i] = ~(mask[i]);
400 }
401 int begin() const override {
402 int retval = 0;
403 while (retval < end() && !is_set(retval))
404 ++retval;
405 return retval;
406 }
407 int end() const override { return __kmp_num_proc_groups * BITS_PER_MASK_T; }
408 int next(int previous) const override {
409 int retval = previous + 1;
410 while (retval < end() && !is_set(retval))
411 ++retval;
412 return retval;
413 }
414 int set_system_affinity(bool abort_on_error) const override {
415 if (__kmp_num_proc_groups > 1) {
416 // Check for a valid mask.
417 GROUP_AFFINITY ga;
418 int group = get_proc_group();
419 if (group < 0) {
420 if (abort_on_error) {
421 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
422 }
423 return -1;
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +0000424 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000425 // Transform the bit vector into a GROUP_AFFINITY struct
426 // and make the system call to set affinity.
427 ga.Group = group;
428 ga.Mask = mask[group];
429 ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0;
430
431 KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL);
432 if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) {
433 DWORD error = GetLastError();
434 if (abort_on_error) {
Jonathan Peyton6a393f72017-09-05 15:43:58 +0000435 __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
436 __kmp_msg_null);
Jonathan Peyton30419822017-05-12 18:01:32 +0000437 }
438 return error;
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +0000439 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000440 } else {
441 if (!SetThreadAffinityMask(GetCurrentThread(), *mask)) {
442 DWORD error = GetLastError();
443 if (abort_on_error) {
Jonathan Peyton6a393f72017-09-05 15:43:58 +0000444 __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
445 __kmp_msg_null);
Jonathan Peyton30419822017-05-12 18:01:32 +0000446 }
447 return error;
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +0000448 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000449 }
450 return 0;
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +0000451 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000452 int get_system_affinity(bool abort_on_error) override {
453 if (__kmp_num_proc_groups > 1) {
454 this->zero();
455 GROUP_AFFINITY ga;
456 KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL);
457 if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) {
458 DWORD error = GetLastError();
459 if (abort_on_error) {
Jonathan Peyton6a393f72017-09-05 15:43:58 +0000460 __kmp_fatal(KMP_MSG(FunctionError, "GetThreadGroupAffinity()"),
461 KMP_ERR(error), __kmp_msg_null);
Jonathan Peyton30419822017-05-12 18:01:32 +0000462 }
463 return error;
464 }
465 if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) ||
466 (ga.Mask == 0)) {
467 return -1;
468 }
469 mask[ga.Group] = ga.Mask;
470 } else {
471 mask_t newMask, sysMask, retval;
472 if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) {
473 DWORD error = GetLastError();
474 if (abort_on_error) {
Jonathan Peyton6a393f72017-09-05 15:43:58 +0000475 __kmp_fatal(KMP_MSG(FunctionError, "GetProcessAffinityMask()"),
476 KMP_ERR(error), __kmp_msg_null);
Jonathan Peyton30419822017-05-12 18:01:32 +0000477 }
478 return error;
479 }
480 retval = SetThreadAffinityMask(GetCurrentThread(), newMask);
481 if (!retval) {
482 DWORD error = GetLastError();
483 if (abort_on_error) {
Jonathan Peyton6a393f72017-09-05 15:43:58 +0000484 __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
485 KMP_ERR(error), __kmp_msg_null);
Jonathan Peyton30419822017-05-12 18:01:32 +0000486 }
487 return error;
488 }
489 newMask = SetThreadAffinityMask(GetCurrentThread(), retval);
490 if (!newMask) {
491 DWORD error = GetLastError();
492 if (abort_on_error) {
Jonathan Peyton6a393f72017-09-05 15:43:58 +0000493 __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
494 KMP_ERR(error), __kmp_msg_null);
Jonathan Peyton30419822017-05-12 18:01:32 +0000495 }
496 }
497 *mask = retval;
498 }
499 return 0;
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +0000500 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000501 int get_proc_group() const override {
502 int group = -1;
503 if (__kmp_num_proc_groups == 1) {
504 return 1;
505 }
506 for (int i = 0; i < __kmp_num_proc_groups; i++) {
507 if (mask[i] == 0)
508 continue;
509 if (group >= 0)
510 return -1;
511 group = i;
512 }
513 return group;
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +0000514 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000515 };
516 void determine_capable(const char *env_var) override {
517 __kmp_affinity_determine_capable(env_var);
518 }
519 void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
520 KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
521 void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
522 KMPAffinity::Mask *allocate_mask_array(int num) override {
523 return new Mask[num];
524 }
525 void deallocate_mask_array(KMPAffinity::Mask *array) override {
526 Mask *windows_array = static_cast<Mask *>(array);
527 delete[] windows_array;
528 }
529 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
530 int index) override {
531 Mask *windows_array = static_cast<Mask *>(array);
532 return &(windows_array[index]);
533 }
534 api_type get_api_type() const override { return NATIVE_OS; }
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +0000535};
536#endif /* KMP_OS_WINDOWS */
537#endif /* KMP_AFFINITY_SUPPORTED */
538
Jonathan Peyton17078362015-09-10 19:22:07 +0000539class Address {
540public:
Jonathan Peyton30419822017-05-12 18:01:32 +0000541 static const unsigned maxDepth = 32;
542 unsigned labels[maxDepth];
543 unsigned childNums[maxDepth];
544 unsigned depth;
545 unsigned leader;
546 Address(unsigned _depth) : depth(_depth), leader(FALSE) {}
547 Address &operator=(const Address &b) {
548 depth = b.depth;
549 for (unsigned i = 0; i < depth; i++) {
550 labels[i] = b.labels[i];
551 childNums[i] = b.childNums[i];
Jonathan Peyton17078362015-09-10 19:22:07 +0000552 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000553 leader = FALSE;
554 return *this;
555 }
556 bool operator==(const Address &b) const {
557 if (depth != b.depth)
558 return false;
559 for (unsigned i = 0; i < depth; i++)
560 if (labels[i] != b.labels[i])
561 return false;
562 return true;
563 }
564 bool isClose(const Address &b, int level) const {
565 if (depth != b.depth)
566 return false;
567 if ((unsigned)level >= depth)
568 return true;
569 for (unsigned i = 0; i < (depth - level); i++)
570 if (labels[i] != b.labels[i])
571 return false;
572 return true;
573 }
574 bool operator!=(const Address &b) const { return !operator==(b); }
575 void print() const {
576 unsigned i;
577 printf("Depth: %u --- ", depth);
578 for (i = 0; i < depth; i++) {
579 printf("%u ", labels[i]);
Jonathan Peyton17078362015-09-10 19:22:07 +0000580 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000581 }
Jonathan Peyton17078362015-09-10 19:22:07 +0000582};
583
584class AddrUnsPair {
585public:
Jonathan Peyton30419822017-05-12 18:01:32 +0000586 Address first;
587 unsigned second;
588 AddrUnsPair(Address _first, unsigned _second)
589 : first(_first), second(_second) {}
590 AddrUnsPair &operator=(const AddrUnsPair &b) {
591 first = b.first;
592 second = b.second;
593 return *this;
594 }
595 void print() const {
596 printf("first = ");
597 first.print();
598 printf(" --- second = %u", second);
599 }
600 bool operator==(const AddrUnsPair &b) const {
601 if (first != b.first)
602 return false;
603 if (second != b.second)
604 return false;
605 return true;
606 }
607 bool operator!=(const AddrUnsPair &b) const { return !operator==(b); }
Jonathan Peyton17078362015-09-10 19:22:07 +0000608};
609
Jonathan Peyton30419822017-05-12 18:01:32 +0000610static int __kmp_affinity_cmp_Address_labels(const void *a, const void *b) {
Andrey Churbanov5ba90c72017-07-17 09:03:14 +0000611 const Address *aa = &(((const AddrUnsPair *)a)->first);
612 const Address *bb = &(((const AddrUnsPair *)b)->first);
Jonathan Peyton30419822017-05-12 18:01:32 +0000613 unsigned depth = aa->depth;
614 unsigned i;
615 KMP_DEBUG_ASSERT(depth == bb->depth);
616 for (i = 0; i < depth; i++) {
617 if (aa->labels[i] < bb->labels[i])
618 return -1;
619 if (aa->labels[i] > bb->labels[i])
620 return 1;
621 }
622 return 0;
Jonathan Peyton17078362015-09-10 19:22:07 +0000623}
624
Jonathan Peyton30419822017-05-12 18:01:32 +0000625/* A structure for holding machine-specific hierarchy info to be computed once
626 at init. This structure represents a mapping of threads to the actual machine
627 hierarchy, or to our best guess at what the hierarchy might be, for the
628 purpose of performing an efficient barrier. In the worst case, when there is
629 no machine hierarchy information, it produces a tree suitable for a barrier,
630 similar to the tree used in the hyper barrier. */
Jonathan Peyton17078362015-09-10 19:22:07 +0000631class hierarchy_info {
632public:
Jonathan Peyton30419822017-05-12 18:01:32 +0000633 /* Good default values for number of leaves and branching factor, given no
634 affinity information. Behaves a bit like hyper barrier. */
635 static const kmp_uint32 maxLeaves = 4;
636 static const kmp_uint32 minBranch = 4;
637 /** Number of levels in the hierarchy. Typical levels are threads/core,
638 cores/package or socket, packages/node, nodes/machine, etc. We don't want
639 to get specific with nomenclature. When the machine is oversubscribed we
640 add levels to duplicate the hierarchy, doubling the thread capacity of the
641 hierarchy each time we add a level. */
642 kmp_uint32 maxLevels;
Jonathan Peyton17078362015-09-10 19:22:07 +0000643
Jonathan Peyton30419822017-05-12 18:01:32 +0000644 /** This is specifically the depth of the machine configuration hierarchy, in
645 terms of the number of levels along the longest path from root to any
646 leaf. It corresponds to the number of entries in numPerLevel if we exclude
647 all but one trailing 1. */
648 kmp_uint32 depth;
649 kmp_uint32 base_num_threads;
650 enum init_status { initialized = 0, not_initialized = 1, initializing = 2 };
651 volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized,
652 // 2=initialization in progress
653 volatile kmp_int8 resizing; // 0=not resizing, 1=resizing
Jonathan Peyton17078362015-09-10 19:22:07 +0000654
Jonathan Peyton30419822017-05-12 18:01:32 +0000655 /** Level 0 corresponds to leaves. numPerLevel[i] is the number of children
656 the parent of a node at level i has. For example, if we have a machine
657 with 4 packages, 4 cores/package and 2 HT per core, then numPerLevel =
658 {2, 4, 4, 1, 1}. All empty levels are set to 1. */
659 kmp_uint32 *numPerLevel;
660 kmp_uint32 *skipPerLevel;
Jonathan Peyton17078362015-09-10 19:22:07 +0000661
Jonathan Peyton30419822017-05-12 18:01:32 +0000662 void deriveLevels(AddrUnsPair *adr2os, int num_addrs) {
663 int hier_depth = adr2os[0].first.depth;
664 int level = 0;
665 for (int i = hier_depth - 1; i >= 0; --i) {
666 int max = -1;
667 for (int j = 0; j < num_addrs; ++j) {
668 int next = adr2os[j].first.childNums[i];
669 if (next > max)
670 max = next;
671 }
672 numPerLevel[level] = max + 1;
673 ++level;
674 }
675 }
676
677 hierarchy_info()
678 : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {}
679
680 void fini() {
Andrey Churbanovddc38722017-07-18 08:30:03 +0000681 if (!uninitialized && numPerLevel) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000682 __kmp_free(numPerLevel);
Andrey Churbanovddc38722017-07-18 08:30:03 +0000683 numPerLevel = NULL;
684 uninitialized = not_initialized;
685 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000686 }
687
688 void init(AddrUnsPair *adr2os, int num_addrs) {
689 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(
690 &uninitialized, not_initialized, initializing);
691 if (bool_result == 0) { // Wait for initialization
692 while (TCR_1(uninitialized) != initialized)
693 KMP_CPU_PAUSE();
694 return;
695 }
696 KMP_DEBUG_ASSERT(bool_result == 1);
697
698 /* Added explicit initialization of the data fields here to prevent usage of
699 dirty value observed when static library is re-initialized multiple times
700 (e.g. when non-OpenMP thread repeatedly launches/joins thread that uses
701 OpenMP). */
702 depth = 1;
703 resizing = 0;
704 maxLevels = 7;
705 numPerLevel =
706 (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
707 skipPerLevel = &(numPerLevel[maxLevels]);
708 for (kmp_uint32 i = 0; i < maxLevels;
709 ++i) { // init numPerLevel[*] to 1 item per level
710 numPerLevel[i] = 1;
711 skipPerLevel[i] = 1;
Jonathan Peyton17078362015-09-10 19:22:07 +0000712 }
713
Jonathan Peyton30419822017-05-12 18:01:32 +0000714 // Sort table by physical ID
715 if (adr2os) {
716 qsort(adr2os, num_addrs, sizeof(*adr2os),
717 __kmp_affinity_cmp_Address_labels);
718 deriveLevels(adr2os, num_addrs);
719 } else {
720 numPerLevel[0] = maxLeaves;
721 numPerLevel[1] = num_addrs / maxLeaves;
722 if (num_addrs % maxLeaves)
723 numPerLevel[1]++;
Jonathan Peyton17078362015-09-10 19:22:07 +0000724 }
725
Jonathan Peyton30419822017-05-12 18:01:32 +0000726 base_num_threads = num_addrs;
727 for (int i = maxLevels - 1; i >= 0;
728 --i) // count non-empty levels to get depth
729 if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1'
730 depth++;
Jonathan Peyton17078362015-09-10 19:22:07 +0000731
Jonathan Peyton30419822017-05-12 18:01:32 +0000732 kmp_uint32 branch = minBranch;
733 if (numPerLevel[0] == 1)
734 branch = num_addrs / maxLeaves;
735 if (branch < minBranch)
736 branch = minBranch;
737 for (kmp_uint32 d = 0; d < depth - 1; ++d) { // optimize hierarchy width
738 while (numPerLevel[d] > branch ||
739 (d == 0 && numPerLevel[d] > maxLeaves)) { // max 4 on level 0!
740 if (numPerLevel[d] & 1)
741 numPerLevel[d]++;
742 numPerLevel[d] = numPerLevel[d] >> 1;
743 if (numPerLevel[d + 1] == 1)
744 depth++;
745 numPerLevel[d + 1] = numPerLevel[d + 1] << 1;
746 }
747 if (numPerLevel[0] == 1) {
748 branch = branch >> 1;
749 if (branch < 4)
750 branch = minBranch;
751 }
Jonathan Peyton17078362015-09-10 19:22:07 +0000752 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000753
754 for (kmp_uint32 i = 1; i < depth; ++i)
755 skipPerLevel[i] = numPerLevel[i - 1] * skipPerLevel[i - 1];
756 // Fill in hierarchy in the case of oversubscription
757 for (kmp_uint32 i = depth; i < maxLevels; ++i)
758 skipPerLevel[i] = 2 * skipPerLevel[i - 1];
759
760 uninitialized = initialized; // One writer
Jonathan Peyton30419822017-05-12 18:01:32 +0000761 }
762
763 // Resize the hierarchy if nproc changes to something larger than before
764 void resize(kmp_uint32 nproc) {
765 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
766 while (bool_result == 0) { // someone else is trying to resize
767 KMP_CPU_PAUSE();
768 if (nproc <= base_num_threads) // happy with other thread's resize
769 return;
770 else // try to resize
771 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
772 }
773 KMP_DEBUG_ASSERT(bool_result != 0);
774 if (nproc <= base_num_threads)
775 return; // happy with other thread's resize
776
777 // Calculate new maxLevels
778 kmp_uint32 old_sz = skipPerLevel[depth - 1];
779 kmp_uint32 incs = 0, old_maxLevels = maxLevels;
780 // First see if old maxLevels is enough to contain new size
781 for (kmp_uint32 i = depth; i < maxLevels && nproc > old_sz; ++i) {
782 skipPerLevel[i] = 2 * skipPerLevel[i - 1];
783 numPerLevel[i - 1] *= 2;
784 old_sz *= 2;
785 depth++;
786 }
787 if (nproc > old_sz) { // Not enough space, need to expand hierarchy
788 while (nproc > old_sz) {
789 old_sz *= 2;
790 incs++;
791 depth++;
792 }
793 maxLevels += incs;
794
795 // Resize arrays
796 kmp_uint32 *old_numPerLevel = numPerLevel;
797 kmp_uint32 *old_skipPerLevel = skipPerLevel;
798 numPerLevel = skipPerLevel = NULL;
799 numPerLevel =
800 (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
801 skipPerLevel = &(numPerLevel[maxLevels]);
802
803 // Copy old elements from old arrays
804 for (kmp_uint32 i = 0; i < old_maxLevels;
805 ++i) { // init numPerLevel[*] to 1 item per level
806 numPerLevel[i] = old_numPerLevel[i];
807 skipPerLevel[i] = old_skipPerLevel[i];
808 }
809
810 // Init new elements in arrays to 1
811 for (kmp_uint32 i = old_maxLevels; i < maxLevels;
812 ++i) { // init numPerLevel[*] to 1 item per level
813 numPerLevel[i] = 1;
814 skipPerLevel[i] = 1;
815 }
816
817 // Free old arrays
818 __kmp_free(old_numPerLevel);
819 }
820
821 // Fill in oversubscription levels of hierarchy
822 for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i)
823 skipPerLevel[i] = 2 * skipPerLevel[i - 1];
824
825 base_num_threads = nproc;
826 resizing = 0; // One writer
Jonathan Peyton30419822017-05-12 18:01:32 +0000827 }
Jonathan Peyton17078362015-09-10 19:22:07 +0000828};
829#endif // KMP_AFFINITY_H