blob: b8d585c6e2572c4b38a44512a824cd09bdca072f [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
2 * kmp_affinity.cpp -- affinity management
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
Jim Cownie5e8470a2013-09-27 10:38:44 +00005//===----------------------------------------------------------------------===//
6//
Chandler Carruth57b08b02019-01-19 10:56:40 +00007// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Jim Cownie5e8470a2013-09-27 10:38:44 +000010//
11//===----------------------------------------------------------------------===//
12
Jim Cownie5e8470a2013-09-27 10:38:44 +000013#include "kmp.h"
Jonathan Peyton30419822017-05-12 18:01:32 +000014#include "kmp_affinity.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000015#include "kmp_i18n.h"
16#include "kmp_io.h"
17#include "kmp_str.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000018#include "kmp_wrapper_getpid.h"
Jonathan Peytonf6399362018-07-09 17:51:13 +000019#if KMP_USE_HIER_SCHED
20#include "kmp_dispatch_hier.h"
21#endif
Jonathan Peyton17078362015-09-10 19:22:07 +000022
23// Store the real or imagined machine hierarchy here
24static hierarchy_info machine_hierarchy;
25
Jonathan Peyton30419822017-05-12 18:01:32 +000026void __kmp_cleanup_hierarchy() { machine_hierarchy.fini(); }
27
Jonathan Peyton17078362015-09-10 19:22:07 +000028void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
Jonathan Peyton30419822017-05-12 18:01:32 +000029 kmp_uint32 depth;
30 // The test below is true if affinity is available, but set to "none". Need to
31 // init on first use of hierarchical barrier.
32 if (TCR_1(machine_hierarchy.uninitialized))
33 machine_hierarchy.init(NULL, nproc);
Jonathan Peyton17078362015-09-10 19:22:07 +000034
Jonathan Peyton30419822017-05-12 18:01:32 +000035 // Adjust the hierarchy in case num threads exceeds original
36 if (nproc > machine_hierarchy.base_num_threads)
37 machine_hierarchy.resize(nproc);
Jonathan Peyton7dee82e2015-11-09 16:24:53 +000038
Jonathan Peyton30419822017-05-12 18:01:32 +000039 depth = machine_hierarchy.depth;
40 KMP_DEBUG_ASSERT(depth > 0);
Jonathan Peyton17078362015-09-10 19:22:07 +000041
Jonathan Peyton30419822017-05-12 18:01:32 +000042 thr_bar->depth = depth;
43 thr_bar->base_leaf_kids = (kmp_uint8)machine_hierarchy.numPerLevel[0] - 1;
44 thr_bar->skip_per_level = machine_hierarchy.skipPerLevel;
Jonathan Peyton17078362015-09-10 19:22:07 +000045}
Jim Cownie5e8470a2013-09-27 10:38:44 +000046
Alp Toker763b9392014-02-28 09:42:41 +000047#if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +000048
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +000049bool KMPAffinity::picked_api = false;
50
Jonathan Peyton30419822017-05-12 18:01:32 +000051void *KMPAffinity::Mask::operator new(size_t n) { return __kmp_allocate(n); }
52void *KMPAffinity::Mask::operator new[](size_t n) { return __kmp_allocate(n); }
53void KMPAffinity::Mask::operator delete(void *p) { __kmp_free(p); }
54void KMPAffinity::Mask::operator delete[](void *p) { __kmp_free(p); }
55void *KMPAffinity::operator new(size_t n) { return __kmp_allocate(n); }
56void KMPAffinity::operator delete(void *p) { __kmp_free(p); }
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +000057
58void KMPAffinity::pick_api() {
Jonathan Peyton30419822017-05-12 18:01:32 +000059 KMPAffinity *affinity_dispatch;
60 if (picked_api)
61 return;
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +000062#if KMP_USE_HWLOC
Jonathan Peytone3e2aaf2017-05-31 20:35:22 +000063 // Only use Hwloc if affinity isn't explicitly disabled and
64 // user requests Hwloc topology method
65 if (__kmp_affinity_top_method == affinity_top_method_hwloc &&
66 __kmp_affinity_type != affinity_disabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +000067 affinity_dispatch = new KMPHwlocAffinity();
68 } else
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +000069#endif
Jonathan Peyton30419822017-05-12 18:01:32 +000070 {
71 affinity_dispatch = new KMPNativeAffinity();
72 }
73 __kmp_affinity_dispatch = affinity_dispatch;
74 picked_api = true;
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +000075}
76
77void KMPAffinity::destroy_api() {
Jonathan Peyton30419822017-05-12 18:01:32 +000078 if (__kmp_affinity_dispatch != NULL) {
79 delete __kmp_affinity_dispatch;
80 __kmp_affinity_dispatch = NULL;
81 picked_api = false;
82 }
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +000083}
84
Jonathan Peyton6d88e042018-12-13 23:14:24 +000085#define KMP_ADVANCE_SCAN(scan) \
86 while (*scan != '\0') { \
87 scan++; \
88 }
89
Jim Cownie5e8470a2013-09-27 10:38:44 +000090// Print the affinity mask to the character array in a pretty format.
Jonathan Peyton6d88e042018-12-13 23:14:24 +000091// The format is a comma separated list of non-negative integers or integer
92// ranges: e.g., 1,2,3-5,7,9-15
93// The format can also be the string "{<empty>}" if no bits are set in mask
Jonathan Peyton30419822017-05-12 18:01:32 +000094char *__kmp_affinity_print_mask(char *buf, int buf_len,
95 kmp_affin_mask_t *mask) {
Jonathan Peyton6d88e042018-12-13 23:14:24 +000096 int start = 0, finish = 0, previous = 0;
97 bool first_range;
98 KMP_ASSERT(buf);
Jonathan Peyton30419822017-05-12 18:01:32 +000099 KMP_ASSERT(buf_len >= 40);
Jonathan Peyton6d88e042018-12-13 23:14:24 +0000100 KMP_ASSERT(mask);
Jonathan Peyton30419822017-05-12 18:01:32 +0000101 char *scan = buf;
102 char *end = buf + buf_len - 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000103
Jonathan Peyton6d88e042018-12-13 23:14:24 +0000104 // Check for empty set.
105 if (mask->begin() == mask->end()) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000106 KMP_SNPRINTF(scan, end - scan + 1, "{<empty>}");
Jonathan Peyton6d88e042018-12-13 23:14:24 +0000107 KMP_ADVANCE_SCAN(scan);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000108 KMP_ASSERT(scan <= end);
109 return buf;
Jonathan Peyton30419822017-05-12 18:01:32 +0000110 }
111
Jonathan Peyton6d88e042018-12-13 23:14:24 +0000112 first_range = true;
113 start = mask->begin();
114 while (1) {
115 // Find next range
116 // [start, previous] is inclusive range of contiguous bits in mask
117 for (finish = mask->next(start), previous = start;
118 finish == previous + 1 && finish != mask->end();
119 finish = mask->next(finish)) {
120 previous = finish;
Jonathan Peyton30419822017-05-12 18:01:32 +0000121 }
122
Jonathan Peyton6d88e042018-12-13 23:14:24 +0000123 // The first range does not need a comma printed before it, but the rest
124 // of the ranges do need a comma beforehand
125 if (!first_range) {
126 KMP_SNPRINTF(scan, end - scan + 1, "%s", ",");
127 KMP_ADVANCE_SCAN(scan);
128 } else {
129 first_range = false;
Jonathan Peyton30419822017-05-12 18:01:32 +0000130 }
Jonathan Peyton6d88e042018-12-13 23:14:24 +0000131 // Range with three or more contiguous bits in the affinity mask
132 if (previous - start > 1) {
133 KMP_SNPRINTF(scan, end - scan + 1, "%d-%d", static_cast<int>(start),
134 static_cast<int>(previous));
135 } else {
136 // Range with one or two contiguous bits in the affinity mask
137 KMP_SNPRINTF(scan, end - scan + 1, "%d", static_cast<int>(start));
138 KMP_ADVANCE_SCAN(scan);
139 if (previous - start > 0) {
140 KMP_SNPRINTF(scan, end - scan + 1, ",%d", static_cast<int>(previous));
141 }
142 }
143 KMP_ADVANCE_SCAN(scan);
144 // Start over with new start point
145 start = finish;
146 if (start == mask->end())
147 break;
148 // Check for overflow
149 if (end - scan < 2)
150 break;
Jonathan Peyton30419822017-05-12 18:01:32 +0000151 }
Jonathan Peyton6d88e042018-12-13 23:14:24 +0000152
153 // Check for overflow
Jonathan Peyton30419822017-05-12 18:01:32 +0000154 KMP_ASSERT(scan <= end);
155 return buf;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000156}
Jonathan Peyton6d88e042018-12-13 23:14:24 +0000157#undef KMP_ADVANCE_SCAN
158
159// Print the affinity mask to the string buffer object in a pretty format
160// The format is a comma separated list of non-negative integers or integer
161// ranges: e.g., 1,2,3-5,7,9-15
162// The format can also be the string "{<empty>}" if no bits are set in mask
163kmp_str_buf_t *__kmp_affinity_str_buf_mask(kmp_str_buf_t *buf,
164 kmp_affin_mask_t *mask) {
165 int start = 0, finish = 0, previous = 0;
166 bool first_range;
167 KMP_ASSERT(buf);
168 KMP_ASSERT(mask);
169
170 __kmp_str_buf_clear(buf);
171
172 // Check for empty set.
173 if (mask->begin() == mask->end()) {
174 __kmp_str_buf_print(buf, "%s", "{<empty>}");
175 return buf;
176 }
177
178 first_range = true;
179 start = mask->begin();
180 while (1) {
181 // Find next range
182 // [start, previous] is inclusive range of contiguous bits in mask
183 for (finish = mask->next(start), previous = start;
184 finish == previous + 1 && finish != mask->end();
185 finish = mask->next(finish)) {
186 previous = finish;
187 }
188
189 // The first range does not need a comma printed before it, but the rest
190 // of the ranges do need a comma beforehand
191 if (!first_range) {
192 __kmp_str_buf_print(buf, "%s", ",");
193 } else {
194 first_range = false;
195 }
196 // Range with three or more contiguous bits in the affinity mask
197 if (previous - start > 1) {
198 __kmp_str_buf_print(buf, "%d-%d", static_cast<int>(start),
199 static_cast<int>(previous));
200 } else {
201 // Range with one or two contiguous bits in the affinity mask
202 __kmp_str_buf_print(buf, "%d", static_cast<int>(start));
203 if (previous - start > 0) {
204 __kmp_str_buf_print(buf, ",%d", static_cast<int>(previous));
205 }
206 }
207 // Start over with new start point
208 start = finish;
209 if (start == mask->end())
210 break;
211 }
212 return buf;
213}
Jim Cownie5e8470a2013-09-27 10:38:44 +0000214
Jonathan Peyton30419822017-05-12 18:01:32 +0000215void __kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask) {
216 KMP_CPU_ZERO(mask);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000217
Jonathan Peyton30419822017-05-12 18:01:32 +0000218#if KMP_GROUP_AFFINITY
Jim Cownie5e8470a2013-09-27 10:38:44 +0000219
Jonathan Peyton30419822017-05-12 18:01:32 +0000220 if (__kmp_num_proc_groups > 1) {
221 int group;
222 KMP_DEBUG_ASSERT(__kmp_GetActiveProcessorCount != NULL);
223 for (group = 0; group < __kmp_num_proc_groups; group++) {
224 int i;
225 int num = __kmp_GetActiveProcessorCount(group);
226 for (i = 0; i < num; i++) {
227 KMP_CPU_SET(i + group * (CHAR_BIT * sizeof(DWORD_PTR)), mask);
228 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000229 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000230 } else
Jim Cownie5e8470a2013-09-27 10:38:44 +0000231
Jonathan Peyton30419822017-05-12 18:01:32 +0000232#endif /* KMP_GROUP_AFFINITY */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000233
Jonathan Peyton30419822017-05-12 18:01:32 +0000234 {
235 int proc;
236 for (proc = 0; proc < __kmp_xproc; proc++) {
237 KMP_CPU_SET(proc, mask);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000238 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000239 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000240}
241
Jim Cownie5e8470a2013-09-27 10:38:44 +0000242// When sorting by labels, __kmp_affinity_assign_child_nums() must first be
243// called to renumber the labels from [0..n] and place them into the child_num
244// vector of the address object. This is done in case the labels used for
Alp Toker8f2d3f02014-02-24 10:40:15 +0000245// the children at one node of the hierarchy differ from those used for
Jim Cownie5e8470a2013-09-27 10:38:44 +0000246// another node at the same level. Example: suppose the machine has 2 nodes
247// with 2 packages each. The first node contains packages 601 and 602, and
248// second node contains packages 603 and 604. If we try to sort the table
249// for "scatter" affinity, the table will still be sorted 601, 602, 603, 604
250// because we are paying attention to the labels themselves, not the ordinal
251// child numbers. By using the child numbers in the sort, the result is
252// {0,0}=601, {0,1}=603, {1,0}=602, {1,1}=604.
Jonathan Peyton30419822017-05-12 18:01:32 +0000253static void __kmp_affinity_assign_child_nums(AddrUnsPair *address2os,
254 int numAddrs) {
255 KMP_DEBUG_ASSERT(numAddrs > 0);
256 int depth = address2os->first.depth;
257 unsigned *counts = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
258 unsigned *lastLabel = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
259 int labCt;
260 for (labCt = 0; labCt < depth; labCt++) {
261 address2os[0].first.childNums[labCt] = counts[labCt] = 0;
262 lastLabel[labCt] = address2os[0].first.labels[labCt];
263 }
264 int i;
265 for (i = 1; i < numAddrs; i++) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000266 for (labCt = 0; labCt < depth; labCt++) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000267 if (address2os[i].first.labels[labCt] != lastLabel[labCt]) {
268 int labCt2;
269 for (labCt2 = labCt + 1; labCt2 < depth; labCt2++) {
270 counts[labCt2] = 0;
271 lastLabel[labCt2] = address2os[i].first.labels[labCt2];
272 }
273 counts[labCt]++;
274 lastLabel[labCt] = address2os[i].first.labels[labCt];
275 break;
276 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000277 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000278 for (labCt = 0; labCt < depth; labCt++) {
279 address2os[i].first.childNums[labCt] = counts[labCt];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000280 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000281 for (; labCt < (int)Address::maxDepth; labCt++) {
282 address2os[i].first.childNums[labCt] = 0;
283 }
284 }
285 __kmp_free(lastLabel);
286 __kmp_free(counts);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000287}
288
Jim Cownie5e8470a2013-09-27 10:38:44 +0000289// All of the __kmp_affinity_create_*_map() routines should set
290// __kmp_affinity_masks to a vector of affinity mask objects of length
Jonathan Peyton30419822017-05-12 18:01:32 +0000291// __kmp_affinity_num_masks, if __kmp_affinity_type != affinity_none, and return
292// the number of levels in the machine topology tree (zero if
Jim Cownie5e8470a2013-09-27 10:38:44 +0000293// __kmp_affinity_type == affinity_none).
294//
Jonathan Peyton30419822017-05-12 18:01:32 +0000295// All of the __kmp_affinity_create_*_map() routines should set
296// *__kmp_affin_fullMask to the affinity mask for the initialization thread.
297// They need to save and restore the mask, and it could be needed later, so
298// saving it is just an optimization to avoid calling kmp_get_system_affinity()
299// again.
Jonathan Peytonc5304aa2016-06-13 21:28:03 +0000300kmp_affin_mask_t *__kmp_affin_fullMask = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000301
302static int nCoresPerPkg, nPackages;
Andrey Churbanovf696c822015-01-27 16:55:43 +0000303static int __kmp_nThreadsPerCore;
304#ifndef KMP_DFLT_NTH_CORES
305static int __kmp_ncores;
306#endif
Jonathan Peytonfd7cc422016-06-21 15:54:38 +0000307static int *__kmp_pu_os_idx = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000308
Jim Cownie5e8470a2013-09-27 10:38:44 +0000309// __kmp_affinity_uniform_topology() doesn't work when called from
310// places which support arbitrarily many levels in the machine topology
311// map, i.e. the non-default cases in __kmp_affinity_create_cpuinfo_map()
312// __kmp_affinity_create_x2apicid_map().
Jonathan Peyton30419822017-05-12 18:01:32 +0000313inline static bool __kmp_affinity_uniform_topology() {
314 return __kmp_avail_proc == (__kmp_nThreadsPerCore * nCoresPerPkg * nPackages);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000315}
316
Jim Cownie5e8470a2013-09-27 10:38:44 +0000317// Print out the detailed machine topology map, i.e. the physical locations
318// of each OS proc.
Jonathan Peyton30419822017-05-12 18:01:32 +0000319static void __kmp_affinity_print_topology(AddrUnsPair *address2os, int len,
320 int depth, int pkgLevel,
321 int coreLevel, int threadLevel) {
322 int proc;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000323
Jonathan Peyton30419822017-05-12 18:01:32 +0000324 KMP_INFORM(OSProcToPhysicalThreadMap, "KMP_AFFINITY");
325 for (proc = 0; proc < len; proc++) {
326 int level;
327 kmp_str_buf_t buf;
328 __kmp_str_buf_init(&buf);
329 for (level = 0; level < depth; level++) {
330 if (level == threadLevel) {
331 __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Thread));
332 } else if (level == coreLevel) {
333 __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Core));
334 } else if (level == pkgLevel) {
335 __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Package));
336 } else if (level > pkgLevel) {
337 __kmp_str_buf_print(&buf, "%s_%d ", KMP_I18N_STR(Node),
338 level - pkgLevel - 1);
339 } else {
340 __kmp_str_buf_print(&buf, "L%d ", level);
341 }
342 __kmp_str_buf_print(&buf, "%d ", address2os[proc].first.labels[level]);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000343 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000344 KMP_INFORM(OSProcMapToPack, "KMP_AFFINITY", address2os[proc].second,
345 buf.str);
346 __kmp_str_buf_free(&buf);
347 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000348}
349
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000350#if KMP_USE_HWLOC
Jonathan Peyton202a24d2016-06-13 17:30:08 +0000351
Andrey Churbanova5868212017-11-30 11:51:47 +0000352static void __kmp_affinity_print_hwloc_tp(AddrUnsPair *addrP, int len,
353 int depth, int *levels) {
354 int proc;
355 kmp_str_buf_t buf;
356 __kmp_str_buf_init(&buf);
357 KMP_INFORM(OSProcToPhysicalThreadMap, "KMP_AFFINITY");
358 for (proc = 0; proc < len; proc++) {
359 __kmp_str_buf_print(&buf, "%s %d ", KMP_I18N_STR(Package),
360 addrP[proc].first.labels[0]);
361 if (depth > 1) {
362 int level = 1; // iterate over levels
363 int label = 1; // iterate over labels
364 if (__kmp_numa_detected)
365 // node level follows package
366 if (levels[level++] > 0)
367 __kmp_str_buf_print(&buf, "%s %d ", KMP_I18N_STR(Node),
368 addrP[proc].first.labels[label++]);
369 if (__kmp_tile_depth > 0)
370 // tile level follows node if any, or package
371 if (levels[level++] > 0)
372 __kmp_str_buf_print(&buf, "%s %d ", KMP_I18N_STR(Tile),
373 addrP[proc].first.labels[label++]);
374 if (levels[level++] > 0)
375 // core level follows
376 __kmp_str_buf_print(&buf, "%s %d ", KMP_I18N_STR(Core),
377 addrP[proc].first.labels[label++]);
378 if (levels[level++] > 0)
379 // thread level is the latest
380 __kmp_str_buf_print(&buf, "%s %d ", KMP_I18N_STR(Thread),
381 addrP[proc].first.labels[label++]);
382 KMP_DEBUG_ASSERT(label == depth);
383 }
384 KMP_INFORM(OSProcMapToPack, "KMP_AFFINITY", addrP[proc].second, buf.str);
385 __kmp_str_buf_clear(&buf);
386 }
387 __kmp_str_buf_free(&buf);
388}
389
390static int nNodePerPkg, nTilePerPkg, nTilePerNode, nCorePerNode, nCorePerTile;
391
Jonathan Peyton202a24d2016-06-13 17:30:08 +0000392// This function removes the topology levels that are radix 1 and don't offer
393// further information about the topology. The most common example is when you
394// have one thread context per core, we don't want the extra thread context
395// level if it offers no unique labels. So they are removed.
396// return value: the new depth of address2os
Andrey Churbanova5868212017-11-30 11:51:47 +0000397static int __kmp_affinity_remove_radix_one_levels(AddrUnsPair *addrP, int nTh,
398 int depth, int *levels) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000399 int level;
400 int i;
401 int radix1_detected;
Andrey Churbanova5868212017-11-30 11:51:47 +0000402 int new_depth = depth;
403 for (level = depth - 1; level > 0; --level) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000404 // Detect if this level is radix 1
405 radix1_detected = 1;
Andrey Churbanova5868212017-11-30 11:51:47 +0000406 for (i = 1; i < nTh; ++i) {
407 if (addrP[0].first.labels[level] != addrP[i].first.labels[level]) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000408 // There are differing label values for this level so it stays
409 radix1_detected = 0;
410 break;
411 }
Jonathan Peyton202a24d2016-06-13 17:30:08 +0000412 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000413 if (!radix1_detected)
414 continue;
415 // Radix 1 was detected
Andrey Churbanova5868212017-11-30 11:51:47 +0000416 --new_depth;
417 levels[level] = -1; // mark level as not present in address2os array
418 if (level == new_depth) {
419 // "turn off" deepest level, just decrement the depth that removes
420 // the level from address2os array
421 for (i = 0; i < nTh; ++i) {
422 addrP[i].first.depth--;
Jonathan Peyton30419822017-05-12 18:01:32 +0000423 }
Andrey Churbanova5868212017-11-30 11:51:47 +0000424 } else {
425 // For other levels, we move labels over and also reduce the depth
426 int j;
427 for (j = level; j < new_depth; ++j) {
428 for (i = 0; i < nTh; ++i) {
429 addrP[i].first.labels[j] = addrP[i].first.labels[j + 1];
430 addrP[i].first.depth--;
Jonathan Peyton30419822017-05-12 18:01:32 +0000431 }
Andrey Churbanova5868212017-11-30 11:51:47 +0000432 levels[j + 1] -= 1;
Jonathan Peyton30419822017-05-12 18:01:32 +0000433 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000434 }
435 }
Andrey Churbanova5868212017-11-30 11:51:47 +0000436 return new_depth;
Jonathan Peyton202a24d2016-06-13 17:30:08 +0000437}
438
Jonathan Peyton30419822017-05-12 18:01:32 +0000439// Returns the number of objects of type 'type' below 'obj' within the topology
440// tree structure. e.g., if obj is a HWLOC_OBJ_PACKAGE object, and type is
441// HWLOC_OBJ_PU, then this will return the number of PU's under the SOCKET
442// object.
443static int __kmp_hwloc_get_nobjs_under_obj(hwloc_obj_t obj,
444 hwloc_obj_type_t type) {
445 int retval = 0;
446 hwloc_obj_t first;
447 for (first = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, obj->type,
448 obj->logical_index, type, 0);
449 first != NULL &&
450 hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, obj->type, first) ==
451 obj;
452 first = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, first->type,
453 first)) {
454 ++retval;
455 }
456 return retval;
Jonathan Peyton202a24d2016-06-13 17:30:08 +0000457}
458
Andrey Churbanova5868212017-11-30 11:51:47 +0000459static int __kmp_hwloc_count_children_by_depth(hwloc_topology_t t,
460 hwloc_obj_t o, unsigned depth,
461 hwloc_obj_t *f) {
462 if (o->depth == depth) {
463 if (*f == NULL)
464 *f = o; // output first descendant found
465 return 1;
466 }
467 int sum = 0;
468 for (unsigned i = 0; i < o->arity; i++)
469 sum += __kmp_hwloc_count_children_by_depth(t, o->children[i], depth, f);
470 return sum; // will be 0 if no one found (as PU arity is 0)
471}
472
473static int __kmp_hwloc_count_children_by_type(hwloc_topology_t t, hwloc_obj_t o,
474 hwloc_obj_type_t type,
475 hwloc_obj_t *f) {
476 if (!hwloc_compare_types(o->type, type)) {
477 if (*f == NULL)
478 *f = o; // output first descendant found
479 return 1;
480 }
481 int sum = 0;
482 for (unsigned i = 0; i < o->arity; i++)
483 sum += __kmp_hwloc_count_children_by_type(t, o->children[i], type, f);
484 return sum; // will be 0 if no one found (as PU arity is 0)
485}
486
487static int __kmp_hwloc_process_obj_core_pu(AddrUnsPair *addrPair,
488 int &nActiveThreads,
489 int &num_active_cores,
490 hwloc_obj_t obj, int depth,
491 int *labels) {
492 hwloc_obj_t core = NULL;
493 hwloc_topology_t &tp = __kmp_hwloc_topology;
494 int NC = __kmp_hwloc_count_children_by_type(tp, obj, HWLOC_OBJ_CORE, &core);
495 for (int core_id = 0; core_id < NC; ++core_id, core = core->next_cousin) {
496 hwloc_obj_t pu = NULL;
497 KMP_DEBUG_ASSERT(core != NULL);
498 int num_active_threads = 0;
499 int NT = __kmp_hwloc_count_children_by_type(tp, core, HWLOC_OBJ_PU, &pu);
500 // int NT = core->arity; pu = core->first_child; // faster?
501 for (int pu_id = 0; pu_id < NT; ++pu_id, pu = pu->next_cousin) {
502 KMP_DEBUG_ASSERT(pu != NULL);
503 if (!KMP_CPU_ISSET(pu->os_index, __kmp_affin_fullMask))
504 continue; // skip inactive (inaccessible) unit
505 Address addr(depth + 2);
506 KA_TRACE(20, ("Hwloc inserting %d (%d) %d (%d) %d (%d) into address2os\n",
507 obj->os_index, obj->logical_index, core->os_index,
508 core->logical_index, pu->os_index, pu->logical_index));
509 for (int i = 0; i < depth; ++i)
510 addr.labels[i] = labels[i]; // package, etc.
511 addr.labels[depth] = core_id; // core
512 addr.labels[depth + 1] = pu_id; // pu
513 addrPair[nActiveThreads] = AddrUnsPair(addr, pu->os_index);
514 __kmp_pu_os_idx[nActiveThreads] = pu->os_index;
515 nActiveThreads++;
516 ++num_active_threads; // count active threads per core
517 }
518 if (num_active_threads) { // were there any active threads on the core?
519 ++__kmp_ncores; // count total active cores
520 ++num_active_cores; // count active cores per socket
521 if (num_active_threads > __kmp_nThreadsPerCore)
522 __kmp_nThreadsPerCore = num_active_threads; // calc maximum
523 }
524 }
525 return 0;
526}
527
528// Check if NUMA node detected below the package,
529// and if tile object is detected and return its depth
530static int __kmp_hwloc_check_numa() {
531 hwloc_topology_t &tp = __kmp_hwloc_topology;
532 hwloc_obj_t hT, hC, hL, hN, hS; // hwloc objects (pointers to)
Paul Osmialowski0732fcc2019-05-16 13:16:24 +0000533 int depth, l2cache_depth, package_depth;
Andrey Churbanova5868212017-11-30 11:51:47 +0000534
535 // Get some PU
536 hT = hwloc_get_obj_by_type(tp, HWLOC_OBJ_PU, 0);
537 if (hT == NULL) // something has gone wrong
538 return 1;
539
540 // check NUMA node below PACKAGE
541 hN = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hT);
542 hS = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_PACKAGE, hT);
543 KMP_DEBUG_ASSERT(hS != NULL);
544 if (hN != NULL && hN->depth > hS->depth) {
545 __kmp_numa_detected = TRUE; // socket includes node(s)
546 if (__kmp_affinity_gran == affinity_gran_node) {
Jonathan Peyton1ca74612019-04-30 19:13:37 +0000547 __kmp_affinity_gran = affinity_gran_numa;
Andrey Churbanova5868212017-11-30 11:51:47 +0000548 }
549 }
550
Paul Osmialowski0732fcc2019-05-16 13:16:24 +0000551 package_depth = hwloc_get_type_depth(tp, HWLOC_OBJ_PACKAGE);
552 l2cache_depth = hwloc_get_cache_type_depth(tp, 2, HWLOC_OBJ_CACHE_UNIFIED);
Andrey Churbanova5868212017-11-30 11:51:47 +0000553 // check tile, get object by depth because of multiple caches possible
Paul Osmialowski0732fcc2019-05-16 13:16:24 +0000554 depth = (l2cache_depth < package_depth) ? package_depth : l2cache_depth;
Andrey Churbanova5868212017-11-30 11:51:47 +0000555 hL = hwloc_get_ancestor_obj_by_depth(tp, depth, hT);
556 hC = NULL; // not used, but reset it here just in case
557 if (hL != NULL &&
558 __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE, &hC) > 1)
559 __kmp_tile_depth = depth; // tile consists of multiple cores
560 return 0;
561}
562
Jonathan Peyton30419822017-05-12 18:01:32 +0000563static int __kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
564 kmp_i18n_id_t *const msg_id) {
Andrey Churbanova5868212017-11-30 11:51:47 +0000565 hwloc_topology_t &tp = __kmp_hwloc_topology; // shortcut of a long name
Jonathan Peyton30419822017-05-12 18:01:32 +0000566 *address2os = NULL;
567 *msg_id = kmp_i18n_null;
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000568
Jonathan Peyton30419822017-05-12 18:01:32 +0000569 // Save the affinity mask for the current thread.
570 kmp_affin_mask_t *oldMask;
571 KMP_CPU_ALLOC(oldMask);
572 __kmp_get_system_affinity(oldMask, TRUE);
Andrey Churbanova5868212017-11-30 11:51:47 +0000573 __kmp_hwloc_check_numa();
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000574
Jonathan Peyton30419822017-05-12 18:01:32 +0000575 if (!KMP_AFFINITY_CAPABLE()) {
576 // Hack to try and infer the machine topology using only the data
577 // available from cpuid on the current thread, and __kmp_xproc.
578 KMP_ASSERT(__kmp_affinity_type == affinity_none);
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000579
Jonathan Peyton30419822017-05-12 18:01:32 +0000580 nCoresPerPkg = __kmp_hwloc_get_nobjs_under_obj(
Andrey Churbanova5868212017-11-30 11:51:47 +0000581 hwloc_get_obj_by_type(tp, HWLOC_OBJ_PACKAGE, 0), HWLOC_OBJ_CORE);
Jonathan Peyton30419822017-05-12 18:01:32 +0000582 __kmp_nThreadsPerCore = __kmp_hwloc_get_nobjs_under_obj(
Andrey Churbanova5868212017-11-30 11:51:47 +0000583 hwloc_get_obj_by_type(tp, HWLOC_OBJ_CORE, 0), HWLOC_OBJ_PU);
Jonathan Peyton30419822017-05-12 18:01:32 +0000584 __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
585 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000586 if (__kmp_affinity_verbose) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000587 KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY");
588 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
589 if (__kmp_affinity_uniform_topology()) {
590 KMP_INFORM(Uniform, "KMP_AFFINITY");
591 } else {
592 KMP_INFORM(NonUniform, "KMP_AFFINITY");
593 }
594 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
595 __kmp_nThreadsPerCore, __kmp_ncores);
596 }
597 KMP_CPU_FREE(oldMask);
598 return 0;
599 }
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000600
Andrey Churbanova5868212017-11-30 11:51:47 +0000601 int depth = 3;
602 int levels[5] = {0, 1, 2, 3, 4}; // package, [node,] [tile,] core, thread
603 int labels[3] = {0}; // package [,node] [,tile] - head of lables array
604 if (__kmp_numa_detected)
605 ++depth;
606 if (__kmp_tile_depth)
607 ++depth;
608
Jonathan Peyton30419822017-05-12 18:01:32 +0000609 // Allocate the data structure to be returned.
610 AddrUnsPair *retval =
611 (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc);
Andrey Churbanova5868212017-11-30 11:51:47 +0000612 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
Jonathan Peyton30419822017-05-12 18:01:32 +0000613 __kmp_pu_os_idx = (int *)__kmp_allocate(sizeof(int) * __kmp_avail_proc);
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000614
Jonathan Peyton30419822017-05-12 18:01:32 +0000615 // When affinity is off, this routine will still be called to set
616 // __kmp_ncores, as well as __kmp_nThreadsPerCore,
617 // nCoresPerPkg, & nPackages. Make sure all these vars are set
618 // correctly, and return if affinity is not enabled.
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000619
Andrey Churbanova5868212017-11-30 11:51:47 +0000620 hwloc_obj_t socket, node, tile;
Jonathan Peyton30419822017-05-12 18:01:32 +0000621 int nActiveThreads = 0;
Andrey Churbanova5868212017-11-30 11:51:47 +0000622 int socket_id = 0;
Jonathan Peyton30419822017-05-12 18:01:32 +0000623 // re-calculate globals to count only accessible resources
624 __kmp_ncores = nPackages = nCoresPerPkg = __kmp_nThreadsPerCore = 0;
Andrey Churbanova5868212017-11-30 11:51:47 +0000625 nNodePerPkg = nTilePerPkg = nTilePerNode = nCorePerNode = nCorePerTile = 0;
626 for (socket = hwloc_get_obj_by_type(tp, HWLOC_OBJ_PACKAGE, 0); socket != NULL;
627 socket = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PACKAGE, socket),
628 socket_id++) {
629 labels[0] = socket_id;
630 if (__kmp_numa_detected) {
631 int NN;
632 int n_active_nodes = 0;
633 node = NULL;
634 NN = __kmp_hwloc_count_children_by_type(tp, socket, HWLOC_OBJ_NUMANODE,
635 &node);
636 for (int node_id = 0; node_id < NN; ++node_id, node = node->next_cousin) {
637 labels[1] = node_id;
638 if (__kmp_tile_depth) {
639 // NUMA + tiles
640 int NT;
641 int n_active_tiles = 0;
642 tile = NULL;
643 NT = __kmp_hwloc_count_children_by_depth(tp, node, __kmp_tile_depth,
644 &tile);
645 for (int tl_id = 0; tl_id < NT; ++tl_id, tile = tile->next_cousin) {
646 labels[2] = tl_id;
647 int n_active_cores = 0;
648 __kmp_hwloc_process_obj_core_pu(retval, nActiveThreads,
649 n_active_cores, tile, 3, labels);
650 if (n_active_cores) { // were there any active cores on the socket?
651 ++n_active_tiles; // count active tiles per node
652 if (n_active_cores > nCorePerTile)
653 nCorePerTile = n_active_cores; // calc maximum
654 }
655 }
656 if (n_active_tiles) { // were there any active tiles on the socket?
657 ++n_active_nodes; // count active nodes per package
658 if (n_active_tiles > nTilePerNode)
659 nTilePerNode = n_active_tiles; // calc maximum
660 }
661 } else {
662 // NUMA, no tiles
663 int n_active_cores = 0;
664 __kmp_hwloc_process_obj_core_pu(retval, nActiveThreads,
665 n_active_cores, node, 2, labels);
666 if (n_active_cores) { // were there any active cores on the socket?
667 ++n_active_nodes; // count active nodes per package
668 if (n_active_cores > nCorePerNode)
669 nCorePerNode = n_active_cores; // calc maximum
670 }
671 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000672 }
Andrey Churbanova5868212017-11-30 11:51:47 +0000673 if (n_active_nodes) { // were there any active nodes on the socket?
674 ++nPackages; // count total active packages
675 if (n_active_nodes > nNodePerPkg)
676 nNodePerPkg = n_active_nodes; // calc maximum
Jonathan Peyton30419822017-05-12 18:01:32 +0000677 }
Andrey Churbanova5868212017-11-30 11:51:47 +0000678 } else {
679 if (__kmp_tile_depth) {
680 // no NUMA, tiles
681 int NT;
682 int n_active_tiles = 0;
683 tile = NULL;
684 NT = __kmp_hwloc_count_children_by_depth(tp, socket, __kmp_tile_depth,
685 &tile);
686 for (int tl_id = 0; tl_id < NT; ++tl_id, tile = tile->next_cousin) {
687 labels[1] = tl_id;
688 int n_active_cores = 0;
689 __kmp_hwloc_process_obj_core_pu(retval, nActiveThreads,
690 n_active_cores, tile, 2, labels);
691 if (n_active_cores) { // were there any active cores on the socket?
692 ++n_active_tiles; // count active tiles per package
693 if (n_active_cores > nCorePerTile)
694 nCorePerTile = n_active_cores; // calc maximum
695 }
696 }
697 if (n_active_tiles) { // were there any active tiles on the socket?
698 ++nPackages; // count total active packages
699 if (n_active_tiles > nTilePerPkg)
700 nTilePerPkg = n_active_tiles; // calc maximum
701 }
702 } else {
703 // no NUMA, no tiles
704 int n_active_cores = 0;
705 __kmp_hwloc_process_obj_core_pu(retval, nActiveThreads, n_active_cores,
706 socket, 1, labels);
707 if (n_active_cores) { // were there any active cores on the socket?
708 ++nPackages; // count total active packages
709 if (n_active_cores > nCoresPerPkg)
710 nCoresPerPkg = n_active_cores; // calc maximum
711 }
712 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000713 }
714 }
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000715
Jonathan Peyton30419822017-05-12 18:01:32 +0000716 // If there's only one thread context to bind to, return now.
717 KMP_DEBUG_ASSERT(nActiveThreads == __kmp_avail_proc);
718 KMP_ASSERT(nActiveThreads > 0);
719 if (nActiveThreads == 1) {
720 __kmp_ncores = nPackages = 1;
721 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
722 if (__kmp_affinity_verbose) {
723 char buf[KMP_AFFIN_MASK_PRINT_LEN];
724 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
725
726 KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
727 if (__kmp_affinity_respect_mask) {
728 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
729 } else {
730 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
731 }
732 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
733 KMP_INFORM(Uniform, "KMP_AFFINITY");
734 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
735 __kmp_nThreadsPerCore, __kmp_ncores);
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000736 }
737
738 if (__kmp_affinity_type == affinity_none) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000739 __kmp_free(retval);
740 KMP_CPU_FREE(oldMask);
741 return 0;
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000742 }
743
Jonathan Peyton30419822017-05-12 18:01:32 +0000744 // Form an Address object which only includes the package level.
745 Address addr(1);
Andrey Churbanova5868212017-11-30 11:51:47 +0000746 addr.labels[0] = retval[0].first.labels[0];
Jonathan Peyton30419822017-05-12 18:01:32 +0000747 retval[0].first = addr;
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000748
749 if (__kmp_affinity_gran_levels < 0) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000750 __kmp_affinity_gran_levels = 0;
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000751 }
752
753 if (__kmp_affinity_verbose) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000754 __kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000755 }
756
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000757 *address2os = retval;
Jonathan Peyton30419822017-05-12 18:01:32 +0000758 KMP_CPU_FREE(oldMask);
759 return 1;
760 }
761
762 // Sort the table by physical Id.
763 qsort(retval, nActiveThreads, sizeof(*retval),
764 __kmp_affinity_cmp_Address_labels);
765
766 // Check to see if the machine topology is uniform
Andrey Churbanova5868212017-11-30 11:51:47 +0000767 int nPUs = nPackages * __kmp_nThreadsPerCore;
768 if (__kmp_numa_detected) {
769 if (__kmp_tile_depth) { // NUMA + tiles
770 nPUs *= (nNodePerPkg * nTilePerNode * nCorePerTile);
771 } else { // NUMA, no tiles
772 nPUs *= (nNodePerPkg * nCorePerNode);
773 }
774 } else {
775 if (__kmp_tile_depth) { // no NUMA, tiles
776 nPUs *= (nTilePerPkg * nCorePerTile);
777 } else { // no NUMA, no tiles
778 nPUs *= nCoresPerPkg;
779 }
780 }
781 unsigned uniform = (nPUs == nActiveThreads);
Jonathan Peyton30419822017-05-12 18:01:32 +0000782
783 // Print the machine topology summary.
784 if (__kmp_affinity_verbose) {
785 char mask[KMP_AFFIN_MASK_PRINT_LEN];
786 __kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
Jonathan Peyton30419822017-05-12 18:01:32 +0000787 if (__kmp_affinity_respect_mask) {
788 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask);
789 } else {
790 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask);
791 }
792 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
793 if (uniform) {
794 KMP_INFORM(Uniform, "KMP_AFFINITY");
795 } else {
796 KMP_INFORM(NonUniform, "KMP_AFFINITY");
797 }
Andrey Churbanova5868212017-11-30 11:51:47 +0000798 if (__kmp_numa_detected) {
799 if (__kmp_tile_depth) { // NUMA + tiles
800 KMP_INFORM(TopologyExtraNoTi, "KMP_AFFINITY", nPackages, nNodePerPkg,
801 nTilePerNode, nCorePerTile, __kmp_nThreadsPerCore,
802 __kmp_ncores);
803 } else { // NUMA, no tiles
804 KMP_INFORM(TopologyExtraNode, "KMP_AFFINITY", nPackages, nNodePerPkg,
805 nCorePerNode, __kmp_nThreadsPerCore, __kmp_ncores);
806 nPUs *= (nNodePerPkg * nCorePerNode);
807 }
808 } else {
809 if (__kmp_tile_depth) { // no NUMA, tiles
810 KMP_INFORM(TopologyExtraTile, "KMP_AFFINITY", nPackages, nTilePerPkg,
811 nCorePerTile, __kmp_nThreadsPerCore, __kmp_ncores);
812 } else { // no NUMA, no tiles
813 kmp_str_buf_t buf;
814 __kmp_str_buf_init(&buf);
815 __kmp_str_buf_print(&buf, "%d", nPackages);
816 KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg,
817 __kmp_nThreadsPerCore, __kmp_ncores);
818 __kmp_str_buf_free(&buf);
819 }
820 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000821 }
822
823 if (__kmp_affinity_type == affinity_none) {
824 __kmp_free(retval);
825 KMP_CPU_FREE(oldMask);
826 return 0;
827 }
828
Andrey Churbanova5868212017-11-30 11:51:47 +0000829 int depth_full = depth; // number of levels before compressing
Jonathan Peyton30419822017-05-12 18:01:32 +0000830 // Find any levels with radiix 1, and remove them from the map
831 // (except for the package level).
Andrey Churbanova5868212017-11-30 11:51:47 +0000832 depth = __kmp_affinity_remove_radix_one_levels(retval, nActiveThreads, depth,
833 levels);
834 KMP_DEBUG_ASSERT(__kmp_affinity_gran != affinity_gran_default);
Jonathan Peyton30419822017-05-12 18:01:32 +0000835 if (__kmp_affinity_gran_levels < 0) {
836 // Set the granularity level based on what levels are modeled
837 // in the machine topology map.
Andrey Churbanova5868212017-11-30 11:51:47 +0000838 __kmp_affinity_gran_levels = 0; // lowest level (e.g. fine)
839 if (__kmp_affinity_gran > affinity_gran_thread) {
840 for (int i = 1; i <= depth_full; ++i) {
841 if (__kmp_affinity_gran <= i) // only count deeper levels
842 break;
843 if (levels[depth_full - i] > 0)
844 __kmp_affinity_gran_levels++;
845 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000846 }
Andrey Churbanova5868212017-11-30 11:51:47 +0000847 if (__kmp_affinity_gran > affinity_gran_package)
848 __kmp_affinity_gran_levels++; // e.g. granularity = group
Jonathan Peyton30419822017-05-12 18:01:32 +0000849 }
850
Andrey Churbanova5868212017-11-30 11:51:47 +0000851 if (__kmp_affinity_verbose)
852 __kmp_affinity_print_hwloc_tp(retval, nActiveThreads, depth, levels);
Jonathan Peyton30419822017-05-12 18:01:32 +0000853
854 KMP_CPU_FREE(oldMask);
855 *address2os = retval;
856 return depth;
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000857}
858#endif // KMP_USE_HWLOC
Jim Cownie5e8470a2013-09-27 10:38:44 +0000859
Jim Cownie5e8470a2013-09-27 10:38:44 +0000860// If we don't know how to retrieve the machine's processor topology, or
861// encounter an error in doing so, this routine is called to form a "flat"
862// mapping of os thread id's <-> processor id's.
Jonathan Peyton30419822017-05-12 18:01:32 +0000863static int __kmp_affinity_create_flat_map(AddrUnsPair **address2os,
864 kmp_i18n_id_t *const msg_id) {
865 *address2os = NULL;
866 *msg_id = kmp_i18n_null;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000867
Jonathan Peyton30419822017-05-12 18:01:32 +0000868 // Even if __kmp_affinity_type == affinity_none, this routine might still
869 // called to set __kmp_ncores, as well as
870 // __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
871 if (!KMP_AFFINITY_CAPABLE()) {
872 KMP_ASSERT(__kmp_affinity_type == affinity_none);
873 __kmp_ncores = nPackages = __kmp_xproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000874 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000875 if (__kmp_affinity_verbose) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000876 KMP_INFORM(AffFlatTopology, "KMP_AFFINITY");
877 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
878 KMP_INFORM(Uniform, "KMP_AFFINITY");
879 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
880 __kmp_nThreadsPerCore, __kmp_ncores);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000881 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000882 return 0;
883 }
884
885 // When affinity is off, this routine will still be called to set
886 // __kmp_ncores, as well as __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
887 // Make sure all these vars are set correctly, and return now if affinity is
888 // not enabled.
889 __kmp_ncores = nPackages = __kmp_avail_proc;
890 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
891 if (__kmp_affinity_verbose) {
892 char buf[KMP_AFFIN_MASK_PRINT_LEN];
893 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
894 __kmp_affin_fullMask);
895
896 KMP_INFORM(AffCapableUseFlat, "KMP_AFFINITY");
897 if (__kmp_affinity_respect_mask) {
898 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
899 } else {
900 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000901 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000902 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
903 KMP_INFORM(Uniform, "KMP_AFFINITY");
904 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
905 __kmp_nThreadsPerCore, __kmp_ncores);
906 }
907 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
908 __kmp_pu_os_idx = (int *)__kmp_allocate(sizeof(int) * __kmp_avail_proc);
909 if (__kmp_affinity_type == affinity_none) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000910 int avail_ct = 0;
911 int i;
Jonathan Peytonc5304aa2016-06-13 21:28:03 +0000912 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000913 if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask))
914 continue;
915 __kmp_pu_os_idx[avail_ct++] = i; // suppose indices are flat
Jim Cownie5e8470a2013-09-27 10:38:44 +0000916 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000917 return 0;
918 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000919
Jonathan Peyton30419822017-05-12 18:01:32 +0000920 // Contruct the data structure to be returned.
921 *address2os =
922 (AddrUnsPair *)__kmp_allocate(sizeof(**address2os) * __kmp_avail_proc);
923 int avail_ct = 0;
Jonathan Peytonbaad3f62018-08-09 22:04:30 +0000924 int i;
Jonathan Peyton30419822017-05-12 18:01:32 +0000925 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
926 // Skip this proc if it is not included in the machine model.
927 if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
928 continue;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000929 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000930 __kmp_pu_os_idx[avail_ct] = i; // suppose indices are flat
931 Address addr(1);
932 addr.labels[0] = i;
933 (*address2os)[avail_ct++] = AddrUnsPair(addr, i);
934 }
935 if (__kmp_affinity_verbose) {
936 KMP_INFORM(OSProcToPackage, "KMP_AFFINITY");
937 }
938
939 if (__kmp_affinity_gran_levels < 0) {
940 // Only the package level is modeled in the machine topology map,
941 // so the #levels of granularity is either 0 or 1.
942 if (__kmp_affinity_gran > affinity_gran_package) {
943 __kmp_affinity_gran_levels = 1;
944 } else {
945 __kmp_affinity_gran_levels = 0;
946 }
947 }
948 return 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000949}
950
Jonathan Peyton30419822017-05-12 18:01:32 +0000951#if KMP_GROUP_AFFINITY
Jim Cownie5e8470a2013-09-27 10:38:44 +0000952
Jonathan Peyton30419822017-05-12 18:01:32 +0000953// If multiple Windows* OS processor groups exist, we can create a 2-level
954// topology map with the groups at level 0 and the individual procs at level 1.
955// This facilitates letting the threads float among all procs in a group,
956// if granularity=group (the default when there are multiple groups).
957static int __kmp_affinity_create_proc_group_map(AddrUnsPair **address2os,
958 kmp_i18n_id_t *const msg_id) {
959 *address2os = NULL;
960 *msg_id = kmp_i18n_null;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000961
Jonathan Peyton58684992017-05-15 19:05:59 +0000962 // If we aren't affinity capable, then return now.
Jonathan Peyton30419822017-05-12 18:01:32 +0000963 // The flat mapping will be used.
Jonathan Peyton58684992017-05-15 19:05:59 +0000964 if (!KMP_AFFINITY_CAPABLE()) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000965 // FIXME set *msg_id
966 return -1;
967 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000968
Jonathan Peyton30419822017-05-12 18:01:32 +0000969 // Contruct the data structure to be returned.
970 *address2os =
971 (AddrUnsPair *)__kmp_allocate(sizeof(**address2os) * __kmp_avail_proc);
972 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
973 __kmp_pu_os_idx = (int *)__kmp_allocate(sizeof(int) * __kmp_avail_proc);
974 int avail_ct = 0;
975 int i;
976 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
977 // Skip this proc if it is not included in the machine model.
978 if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
979 continue;
980 }
981 __kmp_pu_os_idx[avail_ct] = i; // suppose indices are flat
982 Address addr(2);
983 addr.labels[0] = i / (CHAR_BIT * sizeof(DWORD_PTR));
984 addr.labels[1] = i % (CHAR_BIT * sizeof(DWORD_PTR));
985 (*address2os)[avail_ct++] = AddrUnsPair(addr, i);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000986
Jonathan Peyton30419822017-05-12 18:01:32 +0000987 if (__kmp_affinity_verbose) {
988 KMP_INFORM(AffOSProcToGroup, "KMP_AFFINITY", i, addr.labels[0],
989 addr.labels[1]);
990 }
991 }
992
993 if (__kmp_affinity_gran_levels < 0) {
994 if (__kmp_affinity_gran == affinity_gran_group) {
995 __kmp_affinity_gran_levels = 1;
996 } else if ((__kmp_affinity_gran == affinity_gran_fine) ||
997 (__kmp_affinity_gran == affinity_gran_thread)) {
998 __kmp_affinity_gran_levels = 0;
999 } else {
1000 const char *gran_str = NULL;
1001 if (__kmp_affinity_gran == affinity_gran_core) {
1002 gran_str = "core";
1003 } else if (__kmp_affinity_gran == affinity_gran_package) {
1004 gran_str = "package";
1005 } else if (__kmp_affinity_gran == affinity_gran_node) {
1006 gran_str = "node";
1007 } else {
1008 KMP_ASSERT(0);
1009 }
1010
1011 // Warning: can't use affinity granularity \"gran\" with group topology
1012 // method, using "thread"
1013 __kmp_affinity_gran_levels = 0;
1014 }
1015 }
1016 return 2;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001017}
1018
Jonathan Peyton30419822017-05-12 18:01:32 +00001019#endif /* KMP_GROUP_AFFINITY */
1020
1021#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1022
1023static int __kmp_cpuid_mask_width(int count) {
1024 int r = 0;
1025
1026 while ((1 << r) < count)
1027 ++r;
1028 return r;
1029}
Jim Cownie5e8470a2013-09-27 10:38:44 +00001030
1031class apicThreadInfo {
1032public:
Jonathan Peyton30419822017-05-12 18:01:32 +00001033 unsigned osId; // param to __kmp_affinity_bind_thread
1034 unsigned apicId; // from cpuid after binding
1035 unsigned maxCoresPerPkg; // ""
1036 unsigned maxThreadsPerPkg; // ""
1037 unsigned pkgId; // inferred from above values
1038 unsigned coreId; // ""
1039 unsigned threadId; // ""
Jim Cownie5e8470a2013-09-27 10:38:44 +00001040};
1041
Jonathan Peyton30419822017-05-12 18:01:32 +00001042static int __kmp_affinity_cmp_apicThreadInfo_phys_id(const void *a,
1043 const void *b) {
1044 const apicThreadInfo *aa = (const apicThreadInfo *)a;
1045 const apicThreadInfo *bb = (const apicThreadInfo *)b;
1046 if (aa->pkgId < bb->pkgId)
1047 return -1;
1048 if (aa->pkgId > bb->pkgId)
1049 return 1;
1050 if (aa->coreId < bb->coreId)
1051 return -1;
1052 if (aa->coreId > bb->coreId)
1053 return 1;
1054 if (aa->threadId < bb->threadId)
1055 return -1;
1056 if (aa->threadId > bb->threadId)
1057 return 1;
1058 return 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001059}
1060
Jim Cownie5e8470a2013-09-27 10:38:44 +00001061// On IA-32 architecture and Intel(R) 64 architecture, we attempt to use
1062// an algorithm which cycles through the available os threads, setting
1063// the current thread's affinity mask to that thread, and then retrieves
1064// the Apic Id for each thread context using the cpuid instruction.
Jonathan Peyton30419822017-05-12 18:01:32 +00001065static int __kmp_affinity_create_apicid_map(AddrUnsPair **address2os,
1066 kmp_i18n_id_t *const msg_id) {
1067 kmp_cpuid buf;
Jonathan Peyton30419822017-05-12 18:01:32 +00001068 *address2os = NULL;
1069 *msg_id = kmp_i18n_null;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001070
Jonathan Peyton30419822017-05-12 18:01:32 +00001071 // Check if cpuid leaf 4 is supported.
1072 __kmp_x86_cpuid(0, 0, &buf);
1073 if (buf.eax < 4) {
1074 *msg_id = kmp_i18n_str_NoLeaf4Support;
1075 return -1;
1076 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001077
Jonathan Peyton30419822017-05-12 18:01:32 +00001078 // The algorithm used starts by setting the affinity to each available thread
1079 // and retrieving info from the cpuid instruction, so if we are not capable of
1080 // calling __kmp_get_system_affinity() and _kmp_get_system_affinity(), then we
1081 // need to do something else - use the defaults that we calculated from
1082 // issuing cpuid without binding to each proc.
1083 if (!KMP_AFFINITY_CAPABLE()) {
1084 // Hack to try and infer the machine topology using only the data
1085 // available from cpuid on the current thread, and __kmp_xproc.
1086 KMP_ASSERT(__kmp_affinity_type == affinity_none);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001087
Jonathan Peyton30419822017-05-12 18:01:32 +00001088 // Get an upper bound on the number of threads per package using cpuid(1).
1089 // On some OS/chps combinations where HT is supported by the chip but is
1090 // disabled, this value will be 2 on a single core chip. Usually, it will be
1091 // 2 if HT is enabled and 1 if HT is disabled.
1092 __kmp_x86_cpuid(1, 0, &buf);
1093 int maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
1094 if (maxThreadsPerPkg == 0) {
1095 maxThreadsPerPkg = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001096 }
1097
Jonathan Peyton30419822017-05-12 18:01:32 +00001098 // The num cores per pkg comes from cpuid(4). 1 must be added to the encoded
1099 // value.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001100 //
Jonathan Peyton30419822017-05-12 18:01:32 +00001101 // The author of cpu_count.cpp treated this only an upper bound on the
1102 // number of cores, but I haven't seen any cases where it was greater than
1103 // the actual number of cores, so we will treat it as exact in this block of
1104 // code.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001105 //
Jonathan Peyton30419822017-05-12 18:01:32 +00001106 // First, we need to check if cpuid(4) is supported on this chip. To see if
1107 // cpuid(n) is supported, issue cpuid(0) and check if eax has the value n or
1108 // greater.
1109 __kmp_x86_cpuid(0, 0, &buf);
1110 if (buf.eax >= 4) {
1111 __kmp_x86_cpuid(4, 0, &buf);
1112 nCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
1113 } else {
1114 nCoresPerPkg = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001115 }
1116
Jonathan Peyton30419822017-05-12 18:01:32 +00001117 // There is no way to reliably tell if HT is enabled without issuing the
1118 // cpuid instruction from every thread, can correlating the cpuid info, so
1119 // if the machine is not affinity capable, we assume that HT is off. We have
1120 // seen quite a few machines where maxThreadsPerPkg is 2, yet the machine
1121 // does not support HT.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001122 //
Jonathan Peyton30419822017-05-12 18:01:32 +00001123 // - Older OSes are usually found on machines with older chips, which do not
1124 // support HT.
1125 // - The performance penalty for mistakenly identifying a machine as HT when
1126 // it isn't (which results in blocktime being incorrecly set to 0) is
1127 // greater than the penalty when for mistakenly identifying a machine as
1128 // being 1 thread/core when it is really HT enabled (which results in
1129 // blocktime being incorrectly set to a positive value).
1130 __kmp_ncores = __kmp_xproc;
1131 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001132 __kmp_nThreadsPerCore = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001133 if (__kmp_affinity_verbose) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001134 KMP_INFORM(AffNotCapableUseLocCpuid, "KMP_AFFINITY");
1135 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
1136 if (__kmp_affinity_uniform_topology()) {
1137 KMP_INFORM(Uniform, "KMP_AFFINITY");
1138 } else {
1139 KMP_INFORM(NonUniform, "KMP_AFFINITY");
1140 }
1141 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
1142 __kmp_nThreadsPerCore, __kmp_ncores);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001143 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001144 return 0;
1145 }
1146
1147 // From here on, we can assume that it is safe to call
1148 // __kmp_get_system_affinity() and __kmp_set_system_affinity(), even if
1149 // __kmp_affinity_type = affinity_none.
1150
1151 // Save the affinity mask for the current thread.
1152 kmp_affin_mask_t *oldMask;
1153 KMP_CPU_ALLOC(oldMask);
1154 KMP_ASSERT(oldMask != NULL);
1155 __kmp_get_system_affinity(oldMask, TRUE);
1156
1157 // Run through each of the available contexts, binding the current thread
1158 // to it, and obtaining the pertinent information using the cpuid instr.
1159 //
1160 // The relevant information is:
1161 // - Apic Id: Bits 24:31 of ebx after issuing cpuid(1) - each thread context
1162 // has a uniqie Apic Id, which is of the form pkg# : core# : thread#.
1163 // - Max Threads Per Pkg: Bits 16:23 of ebx after issuing cpuid(1). The value
1164 // of this field determines the width of the core# + thread# fields in the
1165 // Apic Id. It is also an upper bound on the number of threads per
1166 // package, but it has been verified that situations happen were it is not
1167 // exact. In particular, on certain OS/chip combinations where Intel(R)
1168 // Hyper-Threading Technology is supported by the chip but has been
1169 // disabled, the value of this field will be 2 (for a single core chip).
1170 // On other OS/chip combinations supporting Intel(R) Hyper-Threading
1171 // Technology, the value of this field will be 1 when Intel(R)
1172 // Hyper-Threading Technology is disabled and 2 when it is enabled.
1173 // - Max Cores Per Pkg: Bits 26:31 of eax after issuing cpuid(4). The value
1174 // of this field (+1) determines the width of the core# field in the Apic
1175 // Id. The comments in "cpucount.cpp" say that this value is an upper
1176 // bound, but the IA-32 architecture manual says that it is exactly the
1177 // number of cores per package, and I haven't seen any case where it
1178 // wasn't.
1179 //
1180 // From this information, deduce the package Id, core Id, and thread Id,
1181 // and set the corresponding fields in the apicThreadInfo struct.
1182 unsigned i;
1183 apicThreadInfo *threadInfo = (apicThreadInfo *)__kmp_allocate(
1184 __kmp_avail_proc * sizeof(apicThreadInfo));
1185 unsigned nApics = 0;
1186 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
1187 // Skip this proc if it is not included in the machine model.
1188 if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
1189 continue;
Jonathan Peytonfd7cc422016-06-21 15:54:38 +00001190 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001191 KMP_DEBUG_ASSERT((int)nApics < __kmp_avail_proc);
1192
1193 __kmp_affinity_dispatch->bind_thread(i);
1194 threadInfo[nApics].osId = i;
1195
1196 // The apic id and max threads per pkg come from cpuid(1).
1197 __kmp_x86_cpuid(1, 0, &buf);
1198 if (((buf.edx >> 9) & 1) == 0) {
1199 __kmp_set_system_affinity(oldMask, TRUE);
1200 __kmp_free(threadInfo);
1201 KMP_CPU_FREE(oldMask);
1202 *msg_id = kmp_i18n_str_ApicNotPresent;
1203 return -1;
1204 }
1205 threadInfo[nApics].apicId = (buf.ebx >> 24) & 0xff;
1206 threadInfo[nApics].maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
1207 if (threadInfo[nApics].maxThreadsPerPkg == 0) {
1208 threadInfo[nApics].maxThreadsPerPkg = 1;
1209 }
1210
1211 // Max cores per pkg comes from cpuid(4). 1 must be added to the encoded
1212 // value.
1213 //
1214 // First, we need to check if cpuid(4) is supported on this chip. To see if
1215 // cpuid(n) is supported, issue cpuid(0) and check if eax has the value n
1216 // or greater.
1217 __kmp_x86_cpuid(0, 0, &buf);
1218 if (buf.eax >= 4) {
1219 __kmp_x86_cpuid(4, 0, &buf);
1220 threadInfo[nApics].maxCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
1221 } else {
1222 threadInfo[nApics].maxCoresPerPkg = 1;
1223 }
1224
1225 // Infer the pkgId / coreId / threadId using only the info obtained locally.
1226 int widthCT = __kmp_cpuid_mask_width(threadInfo[nApics].maxThreadsPerPkg);
1227 threadInfo[nApics].pkgId = threadInfo[nApics].apicId >> widthCT;
1228
1229 int widthC = __kmp_cpuid_mask_width(threadInfo[nApics].maxCoresPerPkg);
1230 int widthT = widthCT - widthC;
1231 if (widthT < 0) {
1232 // I've never seen this one happen, but I suppose it could, if the cpuid
1233 // instruction on a chip was really screwed up. Make sure to restore the
1234 // affinity mask before the tail call.
1235 __kmp_set_system_affinity(oldMask, TRUE);
1236 __kmp_free(threadInfo);
1237 KMP_CPU_FREE(oldMask);
1238 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1239 return -1;
1240 }
1241
1242 int maskC = (1 << widthC) - 1;
1243 threadInfo[nApics].coreId = (threadInfo[nApics].apicId >> widthT) & maskC;
1244
1245 int maskT = (1 << widthT) - 1;
1246 threadInfo[nApics].threadId = threadInfo[nApics].apicId & maskT;
1247
1248 nApics++;
1249 }
1250
1251 // We've collected all the info we need.
1252 // Restore the old affinity mask for this thread.
1253 __kmp_set_system_affinity(oldMask, TRUE);
1254
1255 // If there's only one thread context to bind to, form an Address object
1256 // with depth 1 and return immediately (or, if affinity is off, set
1257 // address2os to NULL and return).
1258 //
1259 // If it is configured to omit the package level when there is only a single
1260 // package, the logic at the end of this routine won't work if there is only
1261 // a single thread - it would try to form an Address object with depth 0.
1262 KMP_ASSERT(nApics > 0);
1263 if (nApics == 1) {
1264 __kmp_ncores = nPackages = 1;
1265 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
1266 if (__kmp_affinity_verbose) {
1267 char buf[KMP_AFFIN_MASK_PRINT_LEN];
1268 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1269
1270 KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
1271 if (__kmp_affinity_respect_mask) {
1272 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
1273 } else {
1274 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
1275 }
1276 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
1277 KMP_INFORM(Uniform, "KMP_AFFINITY");
1278 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
1279 __kmp_nThreadsPerCore, __kmp_ncores);
1280 }
1281
Jim Cownie5e8470a2013-09-27 10:38:44 +00001282 if (__kmp_affinity_type == affinity_none) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001283 __kmp_free(threadInfo);
1284 KMP_CPU_FREE(oldMask);
1285 return 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001286 }
1287
Jonathan Peyton30419822017-05-12 18:01:32 +00001288 *address2os = (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair));
1289 Address addr(1);
1290 addr.labels[0] = threadInfo[0].pkgId;
1291 (*address2os)[0] = AddrUnsPair(addr, threadInfo[0].osId);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001292
1293 if (__kmp_affinity_gran_levels < 0) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001294 __kmp_affinity_gran_levels = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001295 }
1296
1297 if (__kmp_affinity_verbose) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001298 __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001299 }
1300
1301 __kmp_free(threadInfo);
1302 KMP_CPU_FREE(oldMask);
Jonathan Peyton30419822017-05-12 18:01:32 +00001303 return 1;
1304 }
1305
1306 // Sort the threadInfo table by physical Id.
1307 qsort(threadInfo, nApics, sizeof(*threadInfo),
1308 __kmp_affinity_cmp_apicThreadInfo_phys_id);
1309
1310 // The table is now sorted by pkgId / coreId / threadId, but we really don't
1311 // know the radix of any of the fields. pkgId's may be sparsely assigned among
1312 // the chips on a system. Although coreId's are usually assigned
1313 // [0 .. coresPerPkg-1] and threadId's are usually assigned
1314 // [0..threadsPerCore-1], we don't want to make any such assumptions.
1315 //
1316 // For that matter, we don't know what coresPerPkg and threadsPerCore (or the
1317 // total # packages) are at this point - we want to determine that now. We
1318 // only have an upper bound on the first two figures.
1319 //
1320 // We also perform a consistency check at this point: the values returned by
1321 // the cpuid instruction for any thread bound to a given package had better
1322 // return the same info for maxThreadsPerPkg and maxCoresPerPkg.
1323 nPackages = 1;
1324 nCoresPerPkg = 1;
1325 __kmp_nThreadsPerCore = 1;
1326 unsigned nCores = 1;
1327
1328 unsigned pkgCt = 1; // to determine radii
1329 unsigned lastPkgId = threadInfo[0].pkgId;
1330 unsigned coreCt = 1;
1331 unsigned lastCoreId = threadInfo[0].coreId;
1332 unsigned threadCt = 1;
1333 unsigned lastThreadId = threadInfo[0].threadId;
1334
1335 // intra-pkg consist checks
1336 unsigned prevMaxCoresPerPkg = threadInfo[0].maxCoresPerPkg;
1337 unsigned prevMaxThreadsPerPkg = threadInfo[0].maxThreadsPerPkg;
1338
1339 for (i = 1; i < nApics; i++) {
1340 if (threadInfo[i].pkgId != lastPkgId) {
1341 nCores++;
1342 pkgCt++;
1343 lastPkgId = threadInfo[i].pkgId;
1344 if ((int)coreCt > nCoresPerPkg)
1345 nCoresPerPkg = coreCt;
1346 coreCt = 1;
1347 lastCoreId = threadInfo[i].coreId;
1348 if ((int)threadCt > __kmp_nThreadsPerCore)
1349 __kmp_nThreadsPerCore = threadCt;
1350 threadCt = 1;
1351 lastThreadId = threadInfo[i].threadId;
1352
1353 // This is a different package, so go on to the next iteration without
1354 // doing any consistency checks. Reset the consistency check vars, though.
1355 prevMaxCoresPerPkg = threadInfo[i].maxCoresPerPkg;
1356 prevMaxThreadsPerPkg = threadInfo[i].maxThreadsPerPkg;
1357 continue;
1358 }
1359
1360 if (threadInfo[i].coreId != lastCoreId) {
1361 nCores++;
1362 coreCt++;
1363 lastCoreId = threadInfo[i].coreId;
1364 if ((int)threadCt > __kmp_nThreadsPerCore)
1365 __kmp_nThreadsPerCore = threadCt;
1366 threadCt = 1;
1367 lastThreadId = threadInfo[i].threadId;
1368 } else if (threadInfo[i].threadId != lastThreadId) {
1369 threadCt++;
1370 lastThreadId = threadInfo[i].threadId;
1371 } else {
1372 __kmp_free(threadInfo);
1373 KMP_CPU_FREE(oldMask);
1374 *msg_id = kmp_i18n_str_LegacyApicIDsNotUnique;
1375 return -1;
1376 }
1377
1378 // Check to make certain that the maxCoresPerPkg and maxThreadsPerPkg
1379 // fields agree between all the threads bounds to a given package.
1380 if ((prevMaxCoresPerPkg != threadInfo[i].maxCoresPerPkg) ||
1381 (prevMaxThreadsPerPkg != threadInfo[i].maxThreadsPerPkg)) {
1382 __kmp_free(threadInfo);
1383 KMP_CPU_FREE(oldMask);
1384 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1385 return -1;
1386 }
1387 }
1388 nPackages = pkgCt;
1389 if ((int)coreCt > nCoresPerPkg)
1390 nCoresPerPkg = coreCt;
1391 if ((int)threadCt > __kmp_nThreadsPerCore)
1392 __kmp_nThreadsPerCore = threadCt;
1393
1394 // When affinity is off, this routine will still be called to set
1395 // __kmp_ncores, as well as __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
1396 // Make sure all these vars are set correctly, and return now if affinity is
1397 // not enabled.
1398 __kmp_ncores = nCores;
1399 if (__kmp_affinity_verbose) {
1400 char buf[KMP_AFFIN_MASK_PRINT_LEN];
1401 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1402
1403 KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
1404 if (__kmp_affinity_respect_mask) {
1405 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
1406 } else {
1407 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
1408 }
1409 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
1410 if (__kmp_affinity_uniform_topology()) {
1411 KMP_INFORM(Uniform, "KMP_AFFINITY");
1412 } else {
1413 KMP_INFORM(NonUniform, "KMP_AFFINITY");
1414 }
1415 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
1416 __kmp_nThreadsPerCore, __kmp_ncores);
1417 }
1418 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
Jonathan Peytonbaad3f62018-08-09 22:04:30 +00001419 KMP_DEBUG_ASSERT(nApics == (unsigned)__kmp_avail_proc);
Jonathan Peyton30419822017-05-12 18:01:32 +00001420 __kmp_pu_os_idx = (int *)__kmp_allocate(sizeof(int) * __kmp_avail_proc);
1421 for (i = 0; i < nApics; ++i) {
1422 __kmp_pu_os_idx[i] = threadInfo[i].osId;
1423 }
1424 if (__kmp_affinity_type == affinity_none) {
1425 __kmp_free(threadInfo);
1426 KMP_CPU_FREE(oldMask);
1427 return 0;
1428 }
1429
1430 // Now that we've determined the number of packages, the number of cores per
1431 // package, and the number of threads per core, we can construct the data
1432 // structure that is to be returned.
1433 int pkgLevel = 0;
1434 int coreLevel = (nCoresPerPkg <= 1) ? -1 : 1;
1435 int threadLevel =
1436 (__kmp_nThreadsPerCore <= 1) ? -1 : ((coreLevel >= 0) ? 2 : 1);
1437 unsigned depth = (pkgLevel >= 0) + (coreLevel >= 0) + (threadLevel >= 0);
1438
1439 KMP_ASSERT(depth > 0);
1440 *address2os = (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair) * nApics);
1441
1442 for (i = 0; i < nApics; ++i) {
1443 Address addr(depth);
1444 unsigned os = threadInfo[i].osId;
1445 int d = 0;
1446
1447 if (pkgLevel >= 0) {
1448 addr.labels[d++] = threadInfo[i].pkgId;
1449 }
1450 if (coreLevel >= 0) {
1451 addr.labels[d++] = threadInfo[i].coreId;
1452 }
1453 if (threadLevel >= 0) {
1454 addr.labels[d++] = threadInfo[i].threadId;
1455 }
1456 (*address2os)[i] = AddrUnsPair(addr, os);
1457 }
1458
1459 if (__kmp_affinity_gran_levels < 0) {
1460 // Set the granularity level based on what levels are modeled in the machine
1461 // topology map.
1462 __kmp_affinity_gran_levels = 0;
1463 if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
1464 __kmp_affinity_gran_levels++;
1465 }
1466 if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
1467 __kmp_affinity_gran_levels++;
1468 }
1469 if ((pkgLevel >= 0) && (__kmp_affinity_gran > affinity_gran_package)) {
1470 __kmp_affinity_gran_levels++;
1471 }
1472 }
1473
1474 if (__kmp_affinity_verbose) {
1475 __kmp_affinity_print_topology(*address2os, nApics, depth, pkgLevel,
1476 coreLevel, threadLevel);
1477 }
1478
1479 __kmp_free(threadInfo);
1480 KMP_CPU_FREE(oldMask);
1481 return depth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001482}
1483
Jim Cownie5e8470a2013-09-27 10:38:44 +00001484// Intel(R) microarchitecture code name Nehalem, Dunnington and later
1485// architectures support a newer interface for specifying the x2APIC Ids,
1486// based on cpuid leaf 11.
Jonathan Peyton30419822017-05-12 18:01:32 +00001487static int __kmp_affinity_create_x2apicid_map(AddrUnsPair **address2os,
1488 kmp_i18n_id_t *const msg_id) {
1489 kmp_cpuid buf;
1490 *address2os = NULL;
1491 *msg_id = kmp_i18n_null;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001492
Jonathan Peyton30419822017-05-12 18:01:32 +00001493 // Check to see if cpuid leaf 11 is supported.
1494 __kmp_x86_cpuid(0, 0, &buf);
1495 if (buf.eax < 11) {
1496 *msg_id = kmp_i18n_str_NoLeaf11Support;
1497 return -1;
1498 }
1499 __kmp_x86_cpuid(11, 0, &buf);
1500 if (buf.ebx == 0) {
1501 *msg_id = kmp_i18n_str_NoLeaf11Support;
1502 return -1;
1503 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001504
Jonathan Peyton30419822017-05-12 18:01:32 +00001505 // Find the number of levels in the machine topology. While we're at it, get
1506 // the default values for __kmp_nThreadsPerCore & nCoresPerPkg. We will try to
1507 // get more accurate values later by explicitly counting them, but get
1508 // reasonable defaults now, in case we return early.
1509 int level;
1510 int threadLevel = -1;
1511 int coreLevel = -1;
1512 int pkgLevel = -1;
1513 __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
1514
1515 for (level = 0;; level++) {
1516 if (level > 31) {
1517 // FIXME: Hack for DPD200163180
1518 //
1519 // If level is big then something went wrong -> exiting
1520 //
1521 // There could actually be 32 valid levels in the machine topology, but so
1522 // far, the only machine we have seen which does not exit this loop before
1523 // iteration 32 has fubar x2APIC settings.
1524 //
1525 // For now, just reject this case based upon loop trip count.
1526 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1527 return -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001528 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001529 __kmp_x86_cpuid(11, level, &buf);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001530 if (buf.ebx == 0) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001531 if (pkgLevel < 0) {
1532 // Will infer nPackages from __kmp_xproc
1533 pkgLevel = level;
1534 level++;
1535 }
1536 break;
1537 }
1538 int kind = (buf.ecx >> 8) & 0xff;
1539 if (kind == 1) {
1540 // SMT level
1541 threadLevel = level;
1542 coreLevel = -1;
1543 pkgLevel = -1;
1544 __kmp_nThreadsPerCore = buf.ebx & 0xffff;
1545 if (__kmp_nThreadsPerCore == 0) {
1546 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001547 return -1;
Jonathan Peyton30419822017-05-12 18:01:32 +00001548 }
1549 } else if (kind == 2) {
1550 // core level
1551 coreLevel = level;
1552 pkgLevel = -1;
1553 nCoresPerPkg = buf.ebx & 0xffff;
1554 if (nCoresPerPkg == 0) {
1555 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1556 return -1;
1557 }
1558 } else {
1559 if (level <= 0) {
1560 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1561 return -1;
1562 }
1563 if (pkgLevel >= 0) {
1564 continue;
1565 }
1566 pkgLevel = level;
1567 nPackages = buf.ebx & 0xffff;
1568 if (nPackages == 0) {
1569 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1570 return -1;
1571 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001572 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001573 }
1574 int depth = level;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001575
Jonathan Peyton30419822017-05-12 18:01:32 +00001576 // In the above loop, "level" was counted from the finest level (usually
1577 // thread) to the coarsest. The caller expects that we will place the labels
1578 // in (*address2os)[].first.labels[] in the inverse order, so we need to
1579 // invert the vars saying which level means what.
1580 if (threadLevel >= 0) {
1581 threadLevel = depth - threadLevel - 1;
1582 }
1583 if (coreLevel >= 0) {
1584 coreLevel = depth - coreLevel - 1;
1585 }
1586 KMP_DEBUG_ASSERT(pkgLevel >= 0);
1587 pkgLevel = depth - pkgLevel - 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001588
Jonathan Peyton30419822017-05-12 18:01:32 +00001589 // The algorithm used starts by setting the affinity to each available thread
1590 // and retrieving info from the cpuid instruction, so if we are not capable of
1591 // calling __kmp_get_system_affinity() and _kmp_get_system_affinity(), then we
1592 // need to do something else - use the defaults that we calculated from
1593 // issuing cpuid without binding to each proc.
1594 if (!KMP_AFFINITY_CAPABLE()) {
1595 // Hack to try and infer the machine topology using only the data
1596 // available from cpuid on the current thread, and __kmp_xproc.
1597 KMP_ASSERT(__kmp_affinity_type == affinity_none);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001598
Jonathan Peyton30419822017-05-12 18:01:32 +00001599 __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
1600 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001601 if (__kmp_affinity_verbose) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001602 KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY");
1603 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
1604 if (__kmp_affinity_uniform_topology()) {
1605 KMP_INFORM(Uniform, "KMP_AFFINITY");
1606 } else {
1607 KMP_INFORM(NonUniform, "KMP_AFFINITY");
1608 }
1609 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
1610 __kmp_nThreadsPerCore, __kmp_ncores);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001611 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001612 return 0;
1613 }
1614
1615 // From here on, we can assume that it is safe to call
1616 // __kmp_get_system_affinity() and __kmp_set_system_affinity(), even if
1617 // __kmp_affinity_type = affinity_none.
1618
1619 // Save the affinity mask for the current thread.
1620 kmp_affin_mask_t *oldMask;
1621 KMP_CPU_ALLOC(oldMask);
1622 __kmp_get_system_affinity(oldMask, TRUE);
1623
1624 // Allocate the data structure to be returned.
1625 AddrUnsPair *retval =
1626 (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc);
1627
1628 // Run through each of the available contexts, binding the current thread
1629 // to it, and obtaining the pertinent information using the cpuid instr.
1630 unsigned int proc;
1631 int nApics = 0;
1632 KMP_CPU_SET_ITERATE(proc, __kmp_affin_fullMask) {
1633 // Skip this proc if it is not included in the machine model.
1634 if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
1635 continue;
Jonathan Peytonfd7cc422016-06-21 15:54:38 +00001636 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001637 KMP_DEBUG_ASSERT(nApics < __kmp_avail_proc);
1638
1639 __kmp_affinity_dispatch->bind_thread(proc);
1640
1641 // Extract labels for each level in the machine topology map from Apic ID.
1642 Address addr(depth);
1643 int prev_shift = 0;
1644
1645 for (level = 0; level < depth; level++) {
1646 __kmp_x86_cpuid(11, level, &buf);
1647 unsigned apicId = buf.edx;
1648 if (buf.ebx == 0) {
1649 if (level != depth - 1) {
1650 KMP_CPU_FREE(oldMask);
1651 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1652 return -1;
1653 }
1654 addr.labels[depth - level - 1] = apicId >> prev_shift;
1655 level++;
1656 break;
1657 }
1658 int shift = buf.eax & 0x1f;
1659 int mask = (1 << shift) - 1;
1660 addr.labels[depth - level - 1] = (apicId & mask) >> prev_shift;
1661 prev_shift = shift;
1662 }
1663 if (level != depth) {
1664 KMP_CPU_FREE(oldMask);
1665 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1666 return -1;
1667 }
1668
1669 retval[nApics] = AddrUnsPair(addr, proc);
1670 nApics++;
1671 }
1672
1673 // We've collected all the info we need.
1674 // Restore the old affinity mask for this thread.
1675 __kmp_set_system_affinity(oldMask, TRUE);
1676
1677 // If there's only one thread context to bind to, return now.
1678 KMP_ASSERT(nApics > 0);
1679 if (nApics == 1) {
1680 __kmp_ncores = nPackages = 1;
1681 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
1682 if (__kmp_affinity_verbose) {
1683 char buf[KMP_AFFIN_MASK_PRINT_LEN];
1684 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1685
1686 KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
1687 if (__kmp_affinity_respect_mask) {
1688 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
1689 } else {
1690 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
1691 }
1692 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
1693 KMP_INFORM(Uniform, "KMP_AFFINITY");
1694 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
1695 __kmp_nThreadsPerCore, __kmp_ncores);
1696 }
1697
Jim Cownie5e8470a2013-09-27 10:38:44 +00001698 if (__kmp_affinity_type == affinity_none) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001699 __kmp_free(retval);
1700 KMP_CPU_FREE(oldMask);
1701 return 0;
1702 }
1703
1704 // Form an Address object which only includes the package level.
1705 Address addr(1);
1706 addr.labels[0] = retval[0].first.labels[pkgLevel];
1707 retval[0].first = addr;
1708
1709 if (__kmp_affinity_gran_levels < 0) {
1710 __kmp_affinity_gran_levels = 0;
1711 }
1712
1713 if (__kmp_affinity_verbose) {
1714 __kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
1715 }
1716
1717 *address2os = retval;
1718 KMP_CPU_FREE(oldMask);
1719 return 1;
1720 }
1721
1722 // Sort the table by physical Id.
1723 qsort(retval, nApics, sizeof(*retval), __kmp_affinity_cmp_Address_labels);
1724
1725 // Find the radix at each of the levels.
1726 unsigned *totals = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
1727 unsigned *counts = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
1728 unsigned *maxCt = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
1729 unsigned *last = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
1730 for (level = 0; level < depth; level++) {
1731 totals[level] = 1;
1732 maxCt[level] = 1;
1733 counts[level] = 1;
1734 last[level] = retval[0].first.labels[level];
1735 }
1736
1737 // From here on, the iteration variable "level" runs from the finest level to
1738 // the coarsest, i.e. we iterate forward through
1739 // (*address2os)[].first.labels[] - in the previous loops, we iterated
1740 // backwards.
1741 for (proc = 1; (int)proc < nApics; proc++) {
1742 int level;
1743 for (level = 0; level < depth; level++) {
1744 if (retval[proc].first.labels[level] != last[level]) {
1745 int j;
1746 for (j = level + 1; j < depth; j++) {
1747 totals[j]++;
1748 counts[j] = 1;
1749 // The line below causes printing incorrect topology information in
1750 // case the max value for some level (maxCt[level]) is encountered
1751 // earlier than some less value while going through the array. For
1752 // example, let pkg0 has 4 cores and pkg1 has 2 cores. Then
1753 // maxCt[1] == 2
1754 // whereas it must be 4.
1755 // TODO!!! Check if it can be commented safely
1756 // maxCt[j] = 1;
1757 last[j] = retval[proc].first.labels[j];
1758 }
1759 totals[level]++;
1760 counts[level]++;
1761 if (counts[level] > maxCt[level]) {
1762 maxCt[level] = counts[level];
1763 }
1764 last[level] = retval[proc].first.labels[level];
1765 break;
1766 } else if (level == depth - 1) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001767 __kmp_free(last);
1768 __kmp_free(maxCt);
1769 __kmp_free(counts);
1770 __kmp_free(totals);
1771 __kmp_free(retval);
1772 KMP_CPU_FREE(oldMask);
Jonathan Peyton30419822017-05-12 18:01:32 +00001773 *msg_id = kmp_i18n_str_x2ApicIDsNotUnique;
1774 return -1;
1775 }
1776 }
1777 }
1778
1779 // When affinity is off, this routine will still be called to set
1780 // __kmp_ncores, as well as __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
1781 // Make sure all these vars are set correctly, and return if affinity is not
1782 // enabled.
1783 if (threadLevel >= 0) {
1784 __kmp_nThreadsPerCore = maxCt[threadLevel];
1785 } else {
1786 __kmp_nThreadsPerCore = 1;
1787 }
1788 nPackages = totals[pkgLevel];
1789
1790 if (coreLevel >= 0) {
1791 __kmp_ncores = totals[coreLevel];
1792 nCoresPerPkg = maxCt[coreLevel];
1793 } else {
1794 __kmp_ncores = nPackages;
1795 nCoresPerPkg = 1;
1796 }
1797
1798 // Check to see if the machine topology is uniform
1799 unsigned prod = maxCt[0];
1800 for (level = 1; level < depth; level++) {
1801 prod *= maxCt[level];
1802 }
1803 bool uniform = (prod == totals[level - 1]);
1804
1805 // Print the machine topology summary.
1806 if (__kmp_affinity_verbose) {
1807 char mask[KMP_AFFIN_MASK_PRINT_LEN];
1808 __kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1809
1810 KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
1811 if (__kmp_affinity_respect_mask) {
1812 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask);
1813 } else {
1814 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask);
1815 }
1816 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
1817 if (uniform) {
1818 KMP_INFORM(Uniform, "KMP_AFFINITY");
1819 } else {
1820 KMP_INFORM(NonUniform, "KMP_AFFINITY");
Jim Cownie5e8470a2013-09-27 10:38:44 +00001821 }
1822
Jonathan Peyton30419822017-05-12 18:01:32 +00001823 kmp_str_buf_t buf;
1824 __kmp_str_buf_init(&buf);
1825
1826 __kmp_str_buf_print(&buf, "%d", totals[0]);
1827 for (level = 1; level <= pkgLevel; level++) {
1828 __kmp_str_buf_print(&buf, " x %d", maxCt[level]);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001829 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001830 KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg,
1831 __kmp_nThreadsPerCore, __kmp_ncores);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001832
Jonathan Peyton30419822017-05-12 18:01:32 +00001833 __kmp_str_buf_free(&buf);
1834 }
1835 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
1836 KMP_DEBUG_ASSERT(nApics == __kmp_avail_proc);
1837 __kmp_pu_os_idx = (int *)__kmp_allocate(sizeof(int) * __kmp_avail_proc);
1838 for (proc = 0; (int)proc < nApics; ++proc) {
1839 __kmp_pu_os_idx[proc] = retval[proc].second;
1840 }
1841 if (__kmp_affinity_type == affinity_none) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001842 __kmp_free(last);
1843 __kmp_free(maxCt);
1844 __kmp_free(counts);
1845 __kmp_free(totals);
Jonathan Peyton30419822017-05-12 18:01:32 +00001846 __kmp_free(retval);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001847 KMP_CPU_FREE(oldMask);
Jonathan Peyton30419822017-05-12 18:01:32 +00001848 return 0;
1849 }
1850
1851 // Find any levels with radiix 1, and remove them from the map
1852 // (except for the package level).
1853 int new_depth = 0;
1854 for (level = 0; level < depth; level++) {
1855 if ((maxCt[level] == 1) && (level != pkgLevel)) {
1856 continue;
1857 }
1858 new_depth++;
1859 }
1860
1861 // If we are removing any levels, allocate a new vector to return,
1862 // and copy the relevant information to it.
1863 if (new_depth != depth) {
1864 AddrUnsPair *new_retval =
1865 (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair) * nApics);
1866 for (proc = 0; (int)proc < nApics; proc++) {
1867 Address addr(new_depth);
1868 new_retval[proc] = AddrUnsPair(addr, retval[proc].second);
1869 }
1870 int new_level = 0;
1871 int newPkgLevel = -1;
1872 int newCoreLevel = -1;
1873 int newThreadLevel = -1;
Jonathan Peyton30419822017-05-12 18:01:32 +00001874 for (level = 0; level < depth; level++) {
1875 if ((maxCt[level] == 1) && (level != pkgLevel)) {
1876 // Remove this level. Never remove the package level
1877 continue;
1878 }
1879 if (level == pkgLevel) {
Andrey Churbanova5868212017-11-30 11:51:47 +00001880 newPkgLevel = new_level;
Jonathan Peyton30419822017-05-12 18:01:32 +00001881 }
1882 if (level == coreLevel) {
Andrey Churbanova5868212017-11-30 11:51:47 +00001883 newCoreLevel = new_level;
Jonathan Peyton30419822017-05-12 18:01:32 +00001884 }
1885 if (level == threadLevel) {
Andrey Churbanova5868212017-11-30 11:51:47 +00001886 newThreadLevel = new_level;
Jonathan Peyton30419822017-05-12 18:01:32 +00001887 }
1888 for (proc = 0; (int)proc < nApics; proc++) {
1889 new_retval[proc].first.labels[new_level] =
1890 retval[proc].first.labels[level];
1891 }
1892 new_level++;
1893 }
1894
1895 __kmp_free(retval);
1896 retval = new_retval;
1897 depth = new_depth;
1898 pkgLevel = newPkgLevel;
1899 coreLevel = newCoreLevel;
1900 threadLevel = newThreadLevel;
1901 }
1902
1903 if (__kmp_affinity_gran_levels < 0) {
1904 // Set the granularity level based on what levels are modeled
1905 // in the machine topology map.
1906 __kmp_affinity_gran_levels = 0;
1907 if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
1908 __kmp_affinity_gran_levels++;
1909 }
1910 if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
1911 __kmp_affinity_gran_levels++;
1912 }
1913 if (__kmp_affinity_gran > affinity_gran_package) {
1914 __kmp_affinity_gran_levels++;
1915 }
1916 }
1917
1918 if (__kmp_affinity_verbose) {
1919 __kmp_affinity_print_topology(retval, nApics, depth, pkgLevel, coreLevel,
1920 threadLevel);
1921 }
1922
1923 __kmp_free(last);
1924 __kmp_free(maxCt);
1925 __kmp_free(counts);
1926 __kmp_free(totals);
1927 KMP_CPU_FREE(oldMask);
1928 *address2os = retval;
1929 return depth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001930}
1931
Jonathan Peyton30419822017-05-12 18:01:32 +00001932#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001933
Jonathan Peyton30419822017-05-12 18:01:32 +00001934#define osIdIndex 0
1935#define threadIdIndex 1
1936#define coreIdIndex 2
1937#define pkgIdIndex 3
1938#define nodeIdIndex 4
Jim Cownie5e8470a2013-09-27 10:38:44 +00001939
1940typedef unsigned *ProcCpuInfo;
1941static unsigned maxIndex = pkgIdIndex;
1942
Jonathan Peyton30419822017-05-12 18:01:32 +00001943static int __kmp_affinity_cmp_ProcCpuInfo_phys_id(const void *a,
1944 const void *b) {
1945 unsigned i;
Andrey Churbanov5ba90c72017-07-17 09:03:14 +00001946 const unsigned *aa = *(unsigned *const *)a;
1947 const unsigned *bb = *(unsigned *const *)b;
Jonathan Peyton30419822017-05-12 18:01:32 +00001948 for (i = maxIndex;; i--) {
1949 if (aa[i] < bb[i])
1950 return -1;
1951 if (aa[i] > bb[i])
1952 return 1;
1953 if (i == osIdIndex)
1954 break;
1955 }
1956 return 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001957}
1958
Jonathan Peytonf6399362018-07-09 17:51:13 +00001959#if KMP_USE_HIER_SCHED
1960// Set the array sizes for the hierarchy layers
1961static void __kmp_dispatch_set_hierarchy_values() {
1962 // Set the maximum number of L1's to number of cores
1963 // Set the maximum number of L2's to to either number of cores / 2 for
1964 // Intel(R) Xeon Phi(TM) coprocessor formally codenamed Knights Landing
1965 // Or the number of cores for Intel(R) Xeon(R) processors
1966 // Set the maximum number of NUMA nodes and L3's to number of packages
1967 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_THREAD + 1] =
1968 nPackages * nCoresPerPkg * __kmp_nThreadsPerCore;
1969 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_L1 + 1] = __kmp_ncores;
1970#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
1971 if (__kmp_mic_type >= mic3)
1972 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_L2 + 1] = __kmp_ncores / 2;
1973 else
1974#endif // KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
1975 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_L2 + 1] = __kmp_ncores;
1976 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_L3 + 1] = nPackages;
1977 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_NUMA + 1] = nPackages;
1978 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_LOOP + 1] = 1;
1979 // Set the number of threads per unit
1980 // Number of hardware threads per L1/L2/L3/NUMA/LOOP
1981 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_THREAD + 1] = 1;
1982 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L1 + 1] =
1983 __kmp_nThreadsPerCore;
1984#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
1985 if (__kmp_mic_type >= mic3)
1986 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L2 + 1] =
1987 2 * __kmp_nThreadsPerCore;
1988 else
1989#endif // KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
1990 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L2 + 1] =
1991 __kmp_nThreadsPerCore;
1992 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L3 + 1] =
1993 nCoresPerPkg * __kmp_nThreadsPerCore;
1994 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_NUMA + 1] =
1995 nCoresPerPkg * __kmp_nThreadsPerCore;
1996 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_LOOP + 1] =
1997 nPackages * nCoresPerPkg * __kmp_nThreadsPerCore;
1998}
1999
2000// Return the index into the hierarchy for this tid and layer type (L1, L2, etc)
2001// i.e., this thread's L1 or this thread's L2, etc.
2002int __kmp_dispatch_get_index(int tid, kmp_hier_layer_e type) {
2003 int index = type + 1;
2004 int num_hw_threads = __kmp_hier_max_units[kmp_hier_layer_e::LAYER_THREAD + 1];
2005 KMP_DEBUG_ASSERT(type != kmp_hier_layer_e::LAYER_LAST);
2006 if (type == kmp_hier_layer_e::LAYER_THREAD)
2007 return tid;
2008 else if (type == kmp_hier_layer_e::LAYER_LOOP)
2009 return 0;
2010 KMP_DEBUG_ASSERT(__kmp_hier_max_units[index] != 0);
2011 if (tid >= num_hw_threads)
2012 tid = tid % num_hw_threads;
2013 return (tid / __kmp_hier_threads_per[index]) % __kmp_hier_max_units[index];
2014}
2015
2016// Return the number of t1's per t2
2017int __kmp_dispatch_get_t1_per_t2(kmp_hier_layer_e t1, kmp_hier_layer_e t2) {
2018 int i1 = t1 + 1;
2019 int i2 = t2 + 1;
2020 KMP_DEBUG_ASSERT(i1 <= i2);
2021 KMP_DEBUG_ASSERT(t1 != kmp_hier_layer_e::LAYER_LAST);
2022 KMP_DEBUG_ASSERT(t2 != kmp_hier_layer_e::LAYER_LAST);
2023 KMP_DEBUG_ASSERT(__kmp_hier_threads_per[i1] != 0);
2024 // (nthreads/t2) / (nthreads/t1) = t1 / t2
2025 return __kmp_hier_threads_per[i2] / __kmp_hier_threads_per[i1];
2026}
2027#endif // KMP_USE_HIER_SCHED
2028
Jim Cownie5e8470a2013-09-27 10:38:44 +00002029// Parse /proc/cpuinfo (or an alternate file in the same format) to obtain the
2030// affinity map.
Jonathan Peyton30419822017-05-12 18:01:32 +00002031static int __kmp_affinity_create_cpuinfo_map(AddrUnsPair **address2os,
2032 int *line,
2033 kmp_i18n_id_t *const msg_id,
2034 FILE *f) {
2035 *address2os = NULL;
2036 *msg_id = kmp_i18n_null;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002037
Jonathan Peyton30419822017-05-12 18:01:32 +00002038 // Scan of the file, and count the number of "processor" (osId) fields,
2039 // and find the highest value of <n> for a node_<n> field.
2040 char buf[256];
2041 unsigned num_records = 0;
2042 while (!feof(f)) {
2043 buf[sizeof(buf) - 1] = 1;
2044 if (!fgets(buf, sizeof(buf), f)) {
2045 // Read errors presumably because of EOF
2046 break;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002047 }
2048
Jonathan Peyton30419822017-05-12 18:01:32 +00002049 char s1[] = "processor";
2050 if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
2051 num_records++;
2052 continue;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002053 }
2054
Jonathan Peyton30419822017-05-12 18:01:32 +00002055 // FIXME - this will match "node_<n> <garbage>"
2056 unsigned level;
Andrey Churbanov5ba90c72017-07-17 09:03:14 +00002057 if (KMP_SSCANF(buf, "node_%u id", &level) == 1) {
Jonathan Peyton30419822017-05-12 18:01:32 +00002058 if (nodeIdIndex + level >= maxIndex) {
2059 maxIndex = nodeIdIndex + level;
2060 }
2061 continue;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002062 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002063 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002064
Jonathan Peyton30419822017-05-12 18:01:32 +00002065 // Check for empty file / no valid processor records, or too many. The number
2066 // of records can't exceed the number of valid bits in the affinity mask.
2067 if (num_records == 0) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002068 *line = 0;
Jonathan Peyton30419822017-05-12 18:01:32 +00002069 *msg_id = kmp_i18n_str_NoProcRecords;
2070 return -1;
2071 }
2072 if (num_records > (unsigned)__kmp_xproc) {
2073 *line = 0;
2074 *msg_id = kmp_i18n_str_TooManyProcRecords;
2075 return -1;
2076 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002077
Jonathan Peyton30419822017-05-12 18:01:32 +00002078 // Set the file pointer back to the begginning, so that we can scan the file
2079 // again, this time performing a full parse of the data. Allocate a vector of
2080 // ProcCpuInfo object, where we will place the data. Adding an extra element
2081 // at the end allows us to remove a lot of extra checks for termination
2082 // conditions.
2083 if (fseek(f, 0, SEEK_SET) != 0) {
2084 *line = 0;
2085 *msg_id = kmp_i18n_str_CantRewindCpuinfo;
2086 return -1;
2087 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002088
Jonathan Peyton30419822017-05-12 18:01:32 +00002089 // Allocate the array of records to store the proc info in. The dummy
2090 // element at the end makes the logic in filling them out easier to code.
2091 unsigned **threadInfo =
2092 (unsigned **)__kmp_allocate((num_records + 1) * sizeof(unsigned *));
2093 unsigned i;
2094 for (i = 0; i <= num_records; i++) {
2095 threadInfo[i] =
2096 (unsigned *)__kmp_allocate((maxIndex + 1) * sizeof(unsigned));
2097 }
2098
2099#define CLEANUP_THREAD_INFO \
2100 for (i = 0; i <= num_records; i++) { \
2101 __kmp_free(threadInfo[i]); \
2102 } \
2103 __kmp_free(threadInfo);
2104
2105 // A value of UINT_MAX means that we didn't find the field
2106 unsigned __index;
2107
2108#define INIT_PROC_INFO(p) \
2109 for (__index = 0; __index <= maxIndex; __index++) { \
2110 (p)[__index] = UINT_MAX; \
2111 }
2112
2113 for (i = 0; i <= num_records; i++) {
2114 INIT_PROC_INFO(threadInfo[i]);
2115 }
2116
2117 unsigned num_avail = 0;
2118 *line = 0;
2119 while (!feof(f)) {
2120 // Create an inner scoping level, so that all the goto targets at the end of
2121 // the loop appear in an outer scoping level. This avoids warnings about
2122 // jumping past an initialization to a target in the same block.
2123 {
2124 buf[sizeof(buf) - 1] = 1;
2125 bool long_line = false;
2126 if (!fgets(buf, sizeof(buf), f)) {
2127 // Read errors presumably because of EOF
2128 // If there is valid data in threadInfo[num_avail], then fake
2129 // a blank line in ensure that the last address gets parsed.
2130 bool valid = false;
2131 for (i = 0; i <= maxIndex; i++) {
2132 if (threadInfo[num_avail][i] != UINT_MAX) {
2133 valid = true;
2134 }
2135 }
2136 if (!valid) {
2137 break;
2138 }
2139 buf[0] = 0;
2140 } else if (!buf[sizeof(buf) - 1]) {
2141 // The line is longer than the buffer. Set a flag and don't
2142 // emit an error if we were going to ignore the line, anyway.
2143 long_line = true;
2144
2145#define CHECK_LINE \
2146 if (long_line) { \
2147 CLEANUP_THREAD_INFO; \
2148 *msg_id = kmp_i18n_str_LongLineCpuinfo; \
2149 return -1; \
2150 }
2151 }
2152 (*line)++;
2153
2154 char s1[] = "processor";
2155 if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
2156 CHECK_LINE;
2157 char *p = strchr(buf + sizeof(s1) - 1, ':');
2158 unsigned val;
2159 if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1))
2160 goto no_val;
2161 if (threadInfo[num_avail][osIdIndex] != UINT_MAX)
Paul Osmialowski7634f702017-12-13 16:12:24 +00002162#if KMP_ARCH_AARCH64
2163 // Handle the old AArch64 /proc/cpuinfo layout differently,
2164 // it contains all of the 'processor' entries listed in a
2165 // single 'Processor' section, therefore the normal looking
2166 // for duplicates in that section will always fail.
2167 num_avail++;
2168#else
Jonathan Peyton30419822017-05-12 18:01:32 +00002169 goto dup_field;
Paul Osmialowski7634f702017-12-13 16:12:24 +00002170#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002171 threadInfo[num_avail][osIdIndex] = val;
Jonas Hahnfeldce528ac2017-12-08 15:07:05 +00002172#if KMP_OS_LINUX && !(KMP_ARCH_X86 || KMP_ARCH_X86_64)
Jonathan Peyton30419822017-05-12 18:01:32 +00002173 char path[256];
2174 KMP_SNPRINTF(
2175 path, sizeof(path),
2176 "/sys/devices/system/cpu/cpu%u/topology/physical_package_id",
2177 threadInfo[num_avail][osIdIndex]);
2178 __kmp_read_from_file(path, "%u", &threadInfo[num_avail][pkgIdIndex]);
Jim Cownie181b4bb2013-12-23 17:28:57 +00002179
Jonathan Peyton30419822017-05-12 18:01:32 +00002180 KMP_SNPRINTF(path, sizeof(path),
2181 "/sys/devices/system/cpu/cpu%u/topology/core_id",
2182 threadInfo[num_avail][osIdIndex]);
2183 __kmp_read_from_file(path, "%u", &threadInfo[num_avail][coreIdIndex]);
2184 continue;
Jim Cownie181b4bb2013-12-23 17:28:57 +00002185#else
Jonathan Peyton30419822017-05-12 18:01:32 +00002186 }
2187 char s2[] = "physical id";
2188 if (strncmp(buf, s2, sizeof(s2) - 1) == 0) {
2189 CHECK_LINE;
2190 char *p = strchr(buf + sizeof(s2) - 1, ':');
2191 unsigned val;
2192 if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1))
2193 goto no_val;
2194 if (threadInfo[num_avail][pkgIdIndex] != UINT_MAX)
2195 goto dup_field;
2196 threadInfo[num_avail][pkgIdIndex] = val;
2197 continue;
2198 }
2199 char s3[] = "core id";
2200 if (strncmp(buf, s3, sizeof(s3) - 1) == 0) {
2201 CHECK_LINE;
2202 char *p = strchr(buf + sizeof(s3) - 1, ':');
2203 unsigned val;
2204 if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1))
2205 goto no_val;
2206 if (threadInfo[num_avail][coreIdIndex] != UINT_MAX)
2207 goto dup_field;
2208 threadInfo[num_avail][coreIdIndex] = val;
2209 continue;
Jim Cownie181b4bb2013-12-23 17:28:57 +00002210#endif // KMP_OS_LINUX && USE_SYSFS_INFO
Jonathan Peyton30419822017-05-12 18:01:32 +00002211 }
2212 char s4[] = "thread id";
2213 if (strncmp(buf, s4, sizeof(s4) - 1) == 0) {
2214 CHECK_LINE;
2215 char *p = strchr(buf + sizeof(s4) - 1, ':');
2216 unsigned val;
2217 if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1))
2218 goto no_val;
2219 if (threadInfo[num_avail][threadIdIndex] != UINT_MAX)
2220 goto dup_field;
2221 threadInfo[num_avail][threadIdIndex] = val;
2222 continue;
2223 }
2224 unsigned level;
Jonathan Peyton6a393f72017-09-05 15:43:58 +00002225 if (KMP_SSCANF(buf, "node_%u id", &level) == 1) {
Jonathan Peyton30419822017-05-12 18:01:32 +00002226 CHECK_LINE;
2227 char *p = strchr(buf + sizeof(s4) - 1, ':');
2228 unsigned val;
2229 if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1))
2230 goto no_val;
2231 KMP_ASSERT(nodeIdIndex + level <= maxIndex);
2232 if (threadInfo[num_avail][nodeIdIndex + level] != UINT_MAX)
2233 goto dup_field;
2234 threadInfo[num_avail][nodeIdIndex + level] = val;
2235 continue;
2236 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002237
Jonathan Peyton30419822017-05-12 18:01:32 +00002238 // We didn't recognize the leading token on the line. There are lots of
2239 // leading tokens that we don't recognize - if the line isn't empty, go on
2240 // to the next line.
2241 if ((*buf != 0) && (*buf != '\n')) {
2242 // If the line is longer than the buffer, read characters
2243 // until we find a newline.
2244 if (long_line) {
2245 int ch;
2246 while (((ch = fgetc(f)) != EOF) && (ch != '\n'))
2247 ;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002248 }
2249 continue;
Jonathan Peyton30419822017-05-12 18:01:32 +00002250 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002251
Jonathan Peyton30419822017-05-12 18:01:32 +00002252 // A newline has signalled the end of the processor record.
2253 // Check that there aren't too many procs specified.
2254 if ((int)num_avail == __kmp_xproc) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002255 CLEANUP_THREAD_INFO;
Jonathan Peyton30419822017-05-12 18:01:32 +00002256 *msg_id = kmp_i18n_str_TooManyEntries;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002257 return -1;
Jonathan Peyton30419822017-05-12 18:01:32 +00002258 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002259
Jonathan Peyton30419822017-05-12 18:01:32 +00002260 // Check for missing fields. The osId field must be there, and we
2261 // currently require that the physical id field is specified, also.
2262 if (threadInfo[num_avail][osIdIndex] == UINT_MAX) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002263 CLEANUP_THREAD_INFO;
Jonathan Peyton30419822017-05-12 18:01:32 +00002264 *msg_id = kmp_i18n_str_MissingProcField;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002265 return -1;
Jonathan Peyton30419822017-05-12 18:01:32 +00002266 }
2267 if (threadInfo[0][pkgIdIndex] == UINT_MAX) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002268 CLEANUP_THREAD_INFO;
Jonathan Peyton30419822017-05-12 18:01:32 +00002269 *msg_id = kmp_i18n_str_MissingPhysicalIDField;
2270 return -1;
2271 }
2272
2273 // Skip this proc if it is not included in the machine model.
2274 if (!KMP_CPU_ISSET(threadInfo[num_avail][osIdIndex],
2275 __kmp_affin_fullMask)) {
2276 INIT_PROC_INFO(threadInfo[num_avail]);
2277 continue;
2278 }
2279
2280 // We have a successful parse of this proc's info.
2281 // Increment the counter, and prepare for the next proc.
2282 num_avail++;
2283 KMP_ASSERT(num_avail <= num_records);
2284 INIT_PROC_INFO(threadInfo[num_avail]);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002285 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002286 continue;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002287
Jonathan Peyton30419822017-05-12 18:01:32 +00002288 no_val:
2289 CLEANUP_THREAD_INFO;
2290 *msg_id = kmp_i18n_str_MissingValCpuinfo;
2291 return -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002292
Jonathan Peyton30419822017-05-12 18:01:32 +00002293 dup_field:
2294 CLEANUP_THREAD_INFO;
2295 *msg_id = kmp_i18n_str_DuplicateFieldCpuinfo;
2296 return -1;
2297 }
2298 *line = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002299
Jonathan Peyton30419822017-05-12 18:01:32 +00002300#if KMP_MIC && REDUCE_TEAM_SIZE
2301 unsigned teamSize = 0;
2302#endif // KMP_MIC && REDUCE_TEAM_SIZE
Jim Cownie5e8470a2013-09-27 10:38:44 +00002303
Jonathan Peyton30419822017-05-12 18:01:32 +00002304 // check for num_records == __kmp_xproc ???
Jim Cownie5e8470a2013-09-27 10:38:44 +00002305
Jonathan Peyton30419822017-05-12 18:01:32 +00002306 // If there's only one thread context to bind to, form an Address object with
2307 // depth 1 and return immediately (or, if affinity is off, set address2os to
2308 // NULL and return).
2309 //
2310 // If it is configured to omit the package level when there is only a single
2311 // package, the logic at the end of this routine won't work if there is only a
2312 // single thread - it would try to form an Address object with depth 0.
2313 KMP_ASSERT(num_avail > 0);
2314 KMP_ASSERT(num_avail <= num_records);
2315 if (num_avail == 1) {
2316 __kmp_ncores = 1;
2317 __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002318 if (__kmp_affinity_verbose) {
Jonathan Peyton30419822017-05-12 18:01:32 +00002319 if (!KMP_AFFINITY_CAPABLE()) {
2320 KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
2321 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
2322 KMP_INFORM(Uniform, "KMP_AFFINITY");
2323 } else {
2324 char buf[KMP_AFFIN_MASK_PRINT_LEN];
2325 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
2326 __kmp_affin_fullMask);
2327 KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
2328 if (__kmp_affinity_respect_mask) {
2329 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
2330 } else {
2331 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002332 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002333 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
2334 KMP_INFORM(Uniform, "KMP_AFFINITY");
2335 }
2336 int index;
2337 kmp_str_buf_t buf;
2338 __kmp_str_buf_init(&buf);
2339 __kmp_str_buf_print(&buf, "1");
2340 for (index = maxIndex - 1; index > pkgIdIndex; index--) {
2341 __kmp_str_buf_print(&buf, " x 1");
2342 }
2343 KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, 1, 1, 1);
2344 __kmp_str_buf_free(&buf);
Jonathan Peytonfd7cc422016-06-21 15:54:38 +00002345 }
2346
Jim Cownie5e8470a2013-09-27 10:38:44 +00002347 if (__kmp_affinity_type == affinity_none) {
Jonathan Peyton30419822017-05-12 18:01:32 +00002348 CLEANUP_THREAD_INFO;
2349 return 0;
2350 }
2351
2352 *address2os = (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair));
2353 Address addr(1);
2354 addr.labels[0] = threadInfo[0][pkgIdIndex];
2355 (*address2os)[0] = AddrUnsPair(addr, threadInfo[0][osIdIndex]);
2356
2357 if (__kmp_affinity_gran_levels < 0) {
2358 __kmp_affinity_gran_levels = 0;
2359 }
2360
2361 if (__kmp_affinity_verbose) {
2362 __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
2363 }
2364
2365 CLEANUP_THREAD_INFO;
2366 return 1;
2367 }
2368
2369 // Sort the threadInfo table by physical Id.
2370 qsort(threadInfo, num_avail, sizeof(*threadInfo),
2371 __kmp_affinity_cmp_ProcCpuInfo_phys_id);
2372
2373 // The table is now sorted by pkgId / coreId / threadId, but we really don't
2374 // know the radix of any of the fields. pkgId's may be sparsely assigned among
2375 // the chips on a system. Although coreId's are usually assigned
2376 // [0 .. coresPerPkg-1] and threadId's are usually assigned
2377 // [0..threadsPerCore-1], we don't want to make any such assumptions.
2378 //
2379 // For that matter, we don't know what coresPerPkg and threadsPerCore (or the
2380 // total # packages) are at this point - we want to determine that now. We
2381 // only have an upper bound on the first two figures.
2382 unsigned *counts =
2383 (unsigned *)__kmp_allocate((maxIndex + 1) * sizeof(unsigned));
2384 unsigned *maxCt =
2385 (unsigned *)__kmp_allocate((maxIndex + 1) * sizeof(unsigned));
2386 unsigned *totals =
2387 (unsigned *)__kmp_allocate((maxIndex + 1) * sizeof(unsigned));
2388 unsigned *lastId =
2389 (unsigned *)__kmp_allocate((maxIndex + 1) * sizeof(unsigned));
2390
2391 bool assign_thread_ids = false;
2392 unsigned threadIdCt;
2393 unsigned index;
2394
2395restart_radix_check:
2396 threadIdCt = 0;
2397
2398 // Initialize the counter arrays with data from threadInfo[0].
2399 if (assign_thread_ids) {
2400 if (threadInfo[0][threadIdIndex] == UINT_MAX) {
2401 threadInfo[0][threadIdIndex] = threadIdCt++;
2402 } else if (threadIdCt <= threadInfo[0][threadIdIndex]) {
2403 threadIdCt = threadInfo[0][threadIdIndex] + 1;
2404 }
2405 }
2406 for (index = 0; index <= maxIndex; index++) {
2407 counts[index] = 1;
2408 maxCt[index] = 1;
2409 totals[index] = 1;
2410 lastId[index] = threadInfo[0][index];
2411 ;
2412 }
2413
2414 // Run through the rest of the OS procs.
2415 for (i = 1; i < num_avail; i++) {
2416 // Find the most significant index whose id differs from the id for the
2417 // previous OS proc.
2418 for (index = maxIndex; index >= threadIdIndex; index--) {
2419 if (assign_thread_ids && (index == threadIdIndex)) {
2420 // Auto-assign the thread id field if it wasn't specified.
2421 if (threadInfo[i][threadIdIndex] == UINT_MAX) {
2422 threadInfo[i][threadIdIndex] = threadIdCt++;
2423 }
Jonathan Peyton642688b2017-06-01 16:46:36 +00002424 // Apparently the thread id field was specified for some entries and not
Jonathan Peyton30419822017-05-12 18:01:32 +00002425 // others. Start the thread id counter off at the next higher thread id.
2426 else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
2427 threadIdCt = threadInfo[i][threadIdIndex] + 1;
2428 }
2429 }
2430 if (threadInfo[i][index] != lastId[index]) {
2431 // Run through all indices which are less significant, and reset the
2432 // counts to 1. At all levels up to and including index, we need to
2433 // increment the totals and record the last id.
2434 unsigned index2;
2435 for (index2 = threadIdIndex; index2 < index; index2++) {
2436 totals[index2]++;
2437 if (counts[index2] > maxCt[index2]) {
2438 maxCt[index2] = counts[index2];
2439 }
2440 counts[index2] = 1;
2441 lastId[index2] = threadInfo[i][index2];
2442 }
2443 counts[index]++;
2444 totals[index]++;
2445 lastId[index] = threadInfo[i][index];
2446
2447 if (assign_thread_ids && (index > threadIdIndex)) {
2448
2449#if KMP_MIC && REDUCE_TEAM_SIZE
2450 // The default team size is the total #threads in the machine
2451 // minus 1 thread for every core that has 3 or more threads.
2452 teamSize += (threadIdCt <= 2) ? (threadIdCt) : (threadIdCt - 1);
2453#endif // KMP_MIC && REDUCE_TEAM_SIZE
2454
2455 // Restart the thread counter, as we are on a new core.
2456 threadIdCt = 0;
2457
2458 // Auto-assign the thread id field if it wasn't specified.
2459 if (threadInfo[i][threadIdIndex] == UINT_MAX) {
2460 threadInfo[i][threadIdIndex] = threadIdCt++;
2461 }
2462
2463 // Aparrently the thread id field was specified for some entries and
2464 // not others. Start the thread id counter off at the next higher
2465 // thread id.
2466 else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
2467 threadIdCt = threadInfo[i][threadIdIndex] + 1;
2468 }
2469 }
2470 break;
2471 }
2472 }
2473 if (index < threadIdIndex) {
2474 // If thread ids were specified, it is an error if they are not unique.
2475 // Also, check that we waven't already restarted the loop (to be safe -
2476 // shouldn't need to).
2477 if ((threadInfo[i][threadIdIndex] != UINT_MAX) || assign_thread_ids) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002478 __kmp_free(lastId);
2479 __kmp_free(totals);
2480 __kmp_free(maxCt);
2481 __kmp_free(counts);
2482 CLEANUP_THREAD_INFO;
Jonathan Peyton30419822017-05-12 18:01:32 +00002483 *msg_id = kmp_i18n_str_PhysicalIDsNotUnique;
2484 return -1;
2485 }
2486
2487 // If the thread ids were not specified and we see entries entries that
2488 // are duplicates, start the loop over and assign the thread ids manually.
2489 assign_thread_ids = true;
2490 goto restart_radix_check;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002491 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002492 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002493
Jonathan Peyton30419822017-05-12 18:01:32 +00002494#if KMP_MIC && REDUCE_TEAM_SIZE
2495 // The default team size is the total #threads in the machine
2496 // minus 1 thread for every core that has 3 or more threads.
2497 teamSize += (threadIdCt <= 2) ? (threadIdCt) : (threadIdCt - 1);
2498#endif // KMP_MIC && REDUCE_TEAM_SIZE
2499
2500 for (index = threadIdIndex; index <= maxIndex; index++) {
2501 if (counts[index] > maxCt[index]) {
2502 maxCt[index] = counts[index];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002503 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002504 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002505
Jonathan Peyton30419822017-05-12 18:01:32 +00002506 __kmp_nThreadsPerCore = maxCt[threadIdIndex];
2507 nCoresPerPkg = maxCt[coreIdIndex];
2508 nPackages = totals[pkgIdIndex];
2509
2510 // Check to see if the machine topology is uniform
2511 unsigned prod = totals[maxIndex];
2512 for (index = threadIdIndex; index < maxIndex; index++) {
2513 prod *= maxCt[index];
2514 }
2515 bool uniform = (prod == totals[threadIdIndex]);
2516
2517 // When affinity is off, this routine will still be called to set
2518 // __kmp_ncores, as well as __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
2519 // Make sure all these vars are set correctly, and return now if affinity is
2520 // not enabled.
2521 __kmp_ncores = totals[coreIdIndex];
2522
2523 if (__kmp_affinity_verbose) {
2524 if (!KMP_AFFINITY_CAPABLE()) {
2525 KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
2526 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
2527 if (uniform) {
2528 KMP_INFORM(Uniform, "KMP_AFFINITY");
2529 } else {
2530 KMP_INFORM(NonUniform, "KMP_AFFINITY");
2531 }
2532 } else {
2533 char buf[KMP_AFFIN_MASK_PRINT_LEN];
2534 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
2535 __kmp_affin_fullMask);
2536 KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
2537 if (__kmp_affinity_respect_mask) {
2538 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
2539 } else {
2540 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
2541 }
2542 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
2543 if (uniform) {
2544 KMP_INFORM(Uniform, "KMP_AFFINITY");
2545 } else {
2546 KMP_INFORM(NonUniform, "KMP_AFFINITY");
2547 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002548 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002549 kmp_str_buf_t buf;
2550 __kmp_str_buf_init(&buf);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002551
Jonathan Peyton30419822017-05-12 18:01:32 +00002552 __kmp_str_buf_print(&buf, "%d", totals[maxIndex]);
2553 for (index = maxIndex - 1; index >= pkgIdIndex; index--) {
2554 __kmp_str_buf_print(&buf, " x %d", maxCt[index]);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002555 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002556 KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, maxCt[coreIdIndex],
2557 maxCt[threadIdIndex], __kmp_ncores);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002558
Jonathan Peyton30419822017-05-12 18:01:32 +00002559 __kmp_str_buf_free(&buf);
2560 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002561
Jonathan Peyton30419822017-05-12 18:01:32 +00002562#if KMP_MIC && REDUCE_TEAM_SIZE
2563 // Set the default team size.
2564 if ((__kmp_dflt_team_nth == 0) && (teamSize > 0)) {
2565 __kmp_dflt_team_nth = teamSize;
2566 KA_TRACE(20, ("__kmp_affinity_create_cpuinfo_map: setting "
2567 "__kmp_dflt_team_nth = %d\n",
2568 __kmp_dflt_team_nth));
2569 }
2570#endif // KMP_MIC && REDUCE_TEAM_SIZE
Jim Cownie5e8470a2013-09-27 10:38:44 +00002571
Jonathan Peyton30419822017-05-12 18:01:32 +00002572 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
Jonathan Peytonbaad3f62018-08-09 22:04:30 +00002573 KMP_DEBUG_ASSERT(num_avail == (unsigned)__kmp_avail_proc);
Jonathan Peyton30419822017-05-12 18:01:32 +00002574 __kmp_pu_os_idx = (int *)__kmp_allocate(sizeof(int) * __kmp_avail_proc);
2575 for (i = 0; i < num_avail; ++i) { // fill the os indices
2576 __kmp_pu_os_idx[i] = threadInfo[i][osIdIndex];
2577 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002578
Jonathan Peyton30419822017-05-12 18:01:32 +00002579 if (__kmp_affinity_type == affinity_none) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002580 __kmp_free(lastId);
2581 __kmp_free(totals);
2582 __kmp_free(maxCt);
2583 __kmp_free(counts);
2584 CLEANUP_THREAD_INFO;
Jonathan Peyton30419822017-05-12 18:01:32 +00002585 return 0;
2586 }
2587
2588 // Count the number of levels which have more nodes at that level than at the
2589 // parent's level (with there being an implicit root node of the top level).
2590 // This is equivalent to saying that there is at least one node at this level
2591 // which has a sibling. These levels are in the map, and the package level is
2592 // always in the map.
2593 bool *inMap = (bool *)__kmp_allocate((maxIndex + 1) * sizeof(bool));
Jonathan Peyton30419822017-05-12 18:01:32 +00002594 for (index = threadIdIndex; index < maxIndex; index++) {
2595 KMP_ASSERT(totals[index] >= totals[index + 1]);
2596 inMap[index] = (totals[index] > totals[index + 1]);
2597 }
2598 inMap[maxIndex] = (totals[maxIndex] > 1);
2599 inMap[pkgIdIndex] = true;
2600
2601 int depth = 0;
2602 for (index = threadIdIndex; index <= maxIndex; index++) {
2603 if (inMap[index]) {
2604 depth++;
2605 }
2606 }
2607 KMP_ASSERT(depth > 0);
2608
2609 // Construct the data structure that is to be returned.
2610 *address2os = (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair) * num_avail);
2611 int pkgLevel = -1;
2612 int coreLevel = -1;
2613 int threadLevel = -1;
2614
2615 for (i = 0; i < num_avail; ++i) {
2616 Address addr(depth);
2617 unsigned os = threadInfo[i][osIdIndex];
2618 int src_index;
2619 int dst_index = 0;
2620
2621 for (src_index = maxIndex; src_index >= threadIdIndex; src_index--) {
2622 if (!inMap[src_index]) {
2623 continue;
2624 }
2625 addr.labels[dst_index] = threadInfo[i][src_index];
2626 if (src_index == pkgIdIndex) {
2627 pkgLevel = dst_index;
2628 } else if (src_index == coreIdIndex) {
2629 coreLevel = dst_index;
2630 } else if (src_index == threadIdIndex) {
2631 threadLevel = dst_index;
2632 }
2633 dst_index++;
2634 }
2635 (*address2os)[i] = AddrUnsPair(addr, os);
2636 }
2637
2638 if (__kmp_affinity_gran_levels < 0) {
2639 // Set the granularity level based on what levels are modeled
2640 // in the machine topology map.
2641 unsigned src_index;
2642 __kmp_affinity_gran_levels = 0;
2643 for (src_index = threadIdIndex; src_index <= maxIndex; src_index++) {
2644 if (!inMap[src_index]) {
2645 continue;
2646 }
2647 switch (src_index) {
2648 case threadIdIndex:
2649 if (__kmp_affinity_gran > affinity_gran_thread) {
2650 __kmp_affinity_gran_levels++;
2651 }
2652
2653 break;
2654 case coreIdIndex:
2655 if (__kmp_affinity_gran > affinity_gran_core) {
2656 __kmp_affinity_gran_levels++;
2657 }
2658 break;
2659
2660 case pkgIdIndex:
2661 if (__kmp_affinity_gran > affinity_gran_package) {
2662 __kmp_affinity_gran_levels++;
2663 }
2664 break;
2665 }
2666 }
2667 }
2668
2669 if (__kmp_affinity_verbose) {
2670 __kmp_affinity_print_topology(*address2os, num_avail, depth, pkgLevel,
2671 coreLevel, threadLevel);
2672 }
2673
2674 __kmp_free(inMap);
2675 __kmp_free(lastId);
2676 __kmp_free(totals);
2677 __kmp_free(maxCt);
2678 __kmp_free(counts);
2679 CLEANUP_THREAD_INFO;
2680 return depth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002681}
2682
Jim Cownie5e8470a2013-09-27 10:38:44 +00002683// Create and return a table of affinity masks, indexed by OS thread ID.
2684// This routine handles OR'ing together all the affinity masks of threads
2685// that are sufficiently close, if granularity > fine.
Jonathan Peyton30419822017-05-12 18:01:32 +00002686static kmp_affin_mask_t *__kmp_create_masks(unsigned *maxIndex,
2687 unsigned *numUnique,
2688 AddrUnsPair *address2os,
2689 unsigned numAddrs) {
2690 // First form a table of affinity masks in order of OS thread id.
2691 unsigned depth;
2692 unsigned maxOsId;
2693 unsigned i;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002694
Jonathan Peyton30419822017-05-12 18:01:32 +00002695 KMP_ASSERT(numAddrs > 0);
2696 depth = address2os[0].first.depth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002697
Jonathan Peyton30419822017-05-12 18:01:32 +00002698 maxOsId = 0;
Andrey Churbanova5868212017-11-30 11:51:47 +00002699 for (i = numAddrs - 1;; --i) {
Jonathan Peyton30419822017-05-12 18:01:32 +00002700 unsigned osId = address2os[i].second;
2701 if (osId > maxOsId) {
2702 maxOsId = osId;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002703 }
Andrey Churbanova5868212017-11-30 11:51:47 +00002704 if (i == 0)
2705 break;
Jonathan Peyton30419822017-05-12 18:01:32 +00002706 }
2707 kmp_affin_mask_t *osId2Mask;
2708 KMP_CPU_ALLOC_ARRAY(osId2Mask, (maxOsId + 1));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002709
Jonathan Peyton30419822017-05-12 18:01:32 +00002710 // Sort the address2os table according to physical order. Doing so will put
2711 // all threads on the same core/package/node in consecutive locations.
2712 qsort(address2os, numAddrs, sizeof(*address2os),
2713 __kmp_affinity_cmp_Address_labels);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002714
Jonathan Peyton30419822017-05-12 18:01:32 +00002715 KMP_ASSERT(__kmp_affinity_gran_levels >= 0);
2716 if (__kmp_affinity_verbose && (__kmp_affinity_gran_levels > 0)) {
2717 KMP_INFORM(ThreadsMigrate, "KMP_AFFINITY", __kmp_affinity_gran_levels);
2718 }
2719 if (__kmp_affinity_gran_levels >= (int)depth) {
2720 if (__kmp_affinity_verbose ||
2721 (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) {
2722 KMP_WARNING(AffThreadsMayMigrate);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002723 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002724 }
2725
2726 // Run through the table, forming the masks for all threads on each core.
2727 // Threads on the same core will have identical "Address" objects, not
2728 // considering the last level, which must be the thread id. All threads on a
2729 // core will appear consecutively.
2730 unsigned unique = 0;
2731 unsigned j = 0; // index of 1st thread on core
2732 unsigned leader = 0;
2733 Address *leaderAddr = &(address2os[0].first);
2734 kmp_affin_mask_t *sum;
2735 KMP_CPU_ALLOC_ON_STACK(sum);
2736 KMP_CPU_ZERO(sum);
2737 KMP_CPU_SET(address2os[0].second, sum);
2738 for (i = 1; i < numAddrs; i++) {
2739 // If this thread is sufficiently close to the leader (within the
2740 // granularity setting), then set the bit for this os thread in the
2741 // affinity mask for this group, and go on to the next thread.
2742 if (leaderAddr->isClose(address2os[i].first, __kmp_affinity_gran_levels)) {
2743 KMP_CPU_SET(address2os[i].second, sum);
2744 continue;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002745 }
2746
Jonathan Peyton30419822017-05-12 18:01:32 +00002747 // For every thread in this group, copy the mask to the thread's entry in
2748 // the osId2Mask table. Mark the first address as a leader.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002749 for (; j < i; j++) {
Jonathan Peyton30419822017-05-12 18:01:32 +00002750 unsigned osId = address2os[j].second;
2751 KMP_DEBUG_ASSERT(osId <= maxOsId);
2752 kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
2753 KMP_CPU_COPY(mask, sum);
2754 address2os[j].first.leader = (j == leader);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002755 }
2756 unique++;
2757
Jonathan Peyton30419822017-05-12 18:01:32 +00002758 // Start a new mask.
2759 leader = i;
2760 leaderAddr = &(address2os[i].first);
2761 KMP_CPU_ZERO(sum);
2762 KMP_CPU_SET(address2os[i].second, sum);
2763 }
2764
2765 // For every thread in last group, copy the mask to the thread's
2766 // entry in the osId2Mask table.
2767 for (; j < i; j++) {
2768 unsigned osId = address2os[j].second;
2769 KMP_DEBUG_ASSERT(osId <= maxOsId);
2770 kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
2771 KMP_CPU_COPY(mask, sum);
2772 address2os[j].first.leader = (j == leader);
2773 }
2774 unique++;
2775 KMP_CPU_FREE_FROM_STACK(sum);
2776
2777 *maxIndex = maxOsId;
2778 *numUnique = unique;
2779 return osId2Mask;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002780}
2781
Jim Cownie5e8470a2013-09-27 10:38:44 +00002782// Stuff for the affinity proclist parsers. It's easier to declare these vars
2783// as file-static than to try and pass them through the calling sequence of
2784// the recursive-descent OMP_PLACES parser.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002785static kmp_affin_mask_t *newMasks;
2786static int numNewMasks;
2787static int nextNewMask;
2788
Jonathan Peyton30419822017-05-12 18:01:32 +00002789#define ADD_MASK(_mask) \
2790 { \
2791 if (nextNewMask >= numNewMasks) { \
2792 int i; \
2793 numNewMasks *= 2; \
2794 kmp_affin_mask_t *temp; \
2795 KMP_CPU_INTERNAL_ALLOC_ARRAY(temp, numNewMasks); \
2796 for (i = 0; i < numNewMasks / 2; i++) { \
2797 kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i); \
2798 kmp_affin_mask_t *dest = KMP_CPU_INDEX(temp, i); \
2799 KMP_CPU_COPY(dest, src); \
2800 } \
2801 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks / 2); \
2802 newMasks = temp; \
2803 } \
2804 KMP_CPU_COPY(KMP_CPU_INDEX(newMasks, nextNewMask), (_mask)); \
2805 nextNewMask++; \
2806 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002807
Jonathan Peyton30419822017-05-12 18:01:32 +00002808#define ADD_MASK_OSID(_osId, _osId2Mask, _maxOsId) \
2809 { \
2810 if (((_osId) > _maxOsId) || \
2811 (!KMP_CPU_ISSET((_osId), KMP_CPU_INDEX((_osId2Mask), (_osId))))) { \
2812 if (__kmp_affinity_verbose || \
2813 (__kmp_affinity_warnings && \
2814 (__kmp_affinity_type != affinity_none))) { \
2815 KMP_WARNING(AffIgnoreInvalidProcID, _osId); \
2816 } \
2817 } else { \
2818 ADD_MASK(KMP_CPU_INDEX(_osId2Mask, (_osId))); \
2819 } \
2820 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002821
Jim Cownie5e8470a2013-09-27 10:38:44 +00002822// Re-parse the proclist (for the explicit affinity type), and form the list
2823// of affinity newMasks indexed by gtid.
Jonathan Peyton30419822017-05-12 18:01:32 +00002824static void __kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks,
2825 unsigned int *out_numMasks,
2826 const char *proclist,
2827 kmp_affin_mask_t *osId2Mask,
2828 int maxOsId) {
2829 int i;
2830 const char *scan = proclist;
2831 const char *next = proclist;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002832
Jonathan Peyton30419822017-05-12 18:01:32 +00002833 // We use malloc() for the temporary mask vector, so that we can use
2834 // realloc() to extend it.
2835 numNewMasks = 2;
2836 KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks);
2837 nextNewMask = 0;
2838 kmp_affin_mask_t *sumMask;
2839 KMP_CPU_ALLOC(sumMask);
2840 int setSize = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002841
Jonathan Peyton30419822017-05-12 18:01:32 +00002842 for (;;) {
2843 int start, end, stride;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002844
Jonathan Peyton30419822017-05-12 18:01:32 +00002845 SKIP_WS(scan);
2846 next = scan;
2847 if (*next == '\0') {
2848 break;
2849 }
2850
2851 if (*next == '{') {
2852 int num;
2853 setSize = 0;
2854 next++; // skip '{'
2855 SKIP_WS(next);
2856 scan = next;
2857
2858 // Read the first integer in the set.
2859 KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad proclist");
2860 SKIP_DIGITS(next);
2861 num = __kmp_str_to_int(scan, *next);
2862 KMP_ASSERT2(num >= 0, "bad explicit proc list");
2863
2864 // Copy the mask for that osId to the sum (union) mask.
2865 if ((num > maxOsId) ||
2866 (!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
2867 if (__kmp_affinity_verbose ||
2868 (__kmp_affinity_warnings &&
2869 (__kmp_affinity_type != affinity_none))) {
2870 KMP_WARNING(AffIgnoreInvalidProcID, num);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002871 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002872 KMP_CPU_ZERO(sumMask);
2873 } else {
2874 KMP_CPU_COPY(sumMask, KMP_CPU_INDEX(osId2Mask, num));
2875 setSize = 1;
2876 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002877
Jonathan Peyton30419822017-05-12 18:01:32 +00002878 for (;;) {
2879 // Check for end of set.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002880 SKIP_WS(next);
Jonathan Peyton30419822017-05-12 18:01:32 +00002881 if (*next == '}') {
2882 next++; // skip '}'
2883 break;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002884 }
2885
Jim Cownie5e8470a2013-09-27 10:38:44 +00002886 // Skip optional comma.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002887 if (*next == ',') {
Jonathan Peyton30419822017-05-12 18:01:32 +00002888 next++;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002889 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002890 SKIP_WS(next);
2891
2892 // Read the next integer in the set.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002893 scan = next;
Jonathan Peyton30419822017-05-12 18:01:32 +00002894 KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list");
2895
2896 SKIP_DIGITS(next);
2897 num = __kmp_str_to_int(scan, *next);
2898 KMP_ASSERT2(num >= 0, "bad explicit proc list");
2899
2900 // Add the mask for that osId to the sum mask.
2901 if ((num > maxOsId) ||
2902 (!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
2903 if (__kmp_affinity_verbose ||
2904 (__kmp_affinity_warnings &&
2905 (__kmp_affinity_type != affinity_none))) {
2906 KMP_WARNING(AffIgnoreInvalidProcID, num);
2907 }
2908 } else {
2909 KMP_CPU_UNION(sumMask, KMP_CPU_INDEX(osId2Mask, num));
2910 setSize++;
2911 }
2912 }
2913 if (setSize > 0) {
2914 ADD_MASK(sumMask);
2915 }
2916
2917 SKIP_WS(next);
2918 if (*next == ',') {
2919 next++;
2920 }
2921 scan = next;
2922 continue;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002923 }
2924
Jonathan Peyton30419822017-05-12 18:01:32 +00002925 // Read the first integer.
2926 KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list");
2927 SKIP_DIGITS(next);
2928 start = __kmp_str_to_int(scan, *next);
2929 KMP_ASSERT2(start >= 0, "bad explicit proc list");
2930 SKIP_WS(next);
2931
2932 // If this isn't a range, then add a mask to the list and go on.
2933 if (*next != '-') {
2934 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2935
2936 // Skip optional comma.
2937 if (*next == ',') {
2938 next++;
2939 }
2940 scan = next;
2941 continue;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002942 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002943
2944 // This is a range. Skip over the '-' and read in the 2nd int.
2945 next++; // skip '-'
2946 SKIP_WS(next);
2947 scan = next;
2948 KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list");
2949 SKIP_DIGITS(next);
2950 end = __kmp_str_to_int(scan, *next);
2951 KMP_ASSERT2(end >= 0, "bad explicit proc list");
2952
2953 // Check for a stride parameter
2954 stride = 1;
2955 SKIP_WS(next);
2956 if (*next == ':') {
2957 // A stride is specified. Skip over the ':" and read the 3rd int.
2958 int sign = +1;
2959 next++; // skip ':'
2960 SKIP_WS(next);
2961 scan = next;
2962 if (*next == '-') {
2963 sign = -1;
2964 next++;
2965 SKIP_WS(next);
2966 scan = next;
2967 }
2968 KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list");
2969 SKIP_DIGITS(next);
2970 stride = __kmp_str_to_int(scan, *next);
2971 KMP_ASSERT2(stride >= 0, "bad explicit proc list");
2972 stride *= sign;
Jonathan Peyton01dcf362015-11-30 20:02:59 +00002973 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002974
2975 // Do some range checks.
2976 KMP_ASSERT2(stride != 0, "bad explicit proc list");
2977 if (stride > 0) {
2978 KMP_ASSERT2(start <= end, "bad explicit proc list");
2979 } else {
2980 KMP_ASSERT2(start >= end, "bad explicit proc list");
2981 }
2982 KMP_ASSERT2((end - start) / stride <= 65536, "bad explicit proc list");
2983
2984 // Add the mask for each OS proc # to the list.
2985 if (stride > 0) {
2986 do {
2987 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2988 start += stride;
2989 } while (start <= end);
2990 } else {
2991 do {
2992 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2993 start += stride;
2994 } while (start >= end);
2995 }
2996
2997 // Skip optional comma.
2998 SKIP_WS(next);
2999 if (*next == ',') {
3000 next++;
3001 }
3002 scan = next;
3003 }
3004
3005 *out_numMasks = nextNewMask;
3006 if (nextNewMask == 0) {
3007 *out_masks = NULL;
Jonathan Peyton01dcf362015-11-30 20:02:59 +00003008 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
Jonathan Peyton30419822017-05-12 18:01:32 +00003009 return;
3010 }
3011 KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask);
3012 for (i = 0; i < nextNewMask; i++) {
3013 kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i);
3014 kmp_affin_mask_t *dest = KMP_CPU_INDEX((*out_masks), i);
3015 KMP_CPU_COPY(dest, src);
3016 }
3017 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
3018 KMP_CPU_FREE(sumMask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003019}
3020
Jonathan Peyton30419822017-05-12 18:01:32 +00003021#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00003022
3023/*-----------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +00003024Re-parse the OMP_PLACES proc id list, forming the newMasks for the different
3025places. Again, Here is the grammar:
3026
3027place_list := place
3028place_list := place , place_list
3029place := num
3030place := place : num
3031place := place : num : signed
3032place := { subplacelist }
3033place := ! place // (lowest priority)
3034subplace_list := subplace
3035subplace_list := subplace , subplace_list
3036subplace := num
3037subplace := num : num
3038subplace := num : num : signed
3039signed := num
3040signed := + signed
3041signed := - signed
Jim Cownie5e8470a2013-09-27 10:38:44 +00003042-----------------------------------------------------------------------------*/
3043
Jonathan Peyton30419822017-05-12 18:01:32 +00003044static void __kmp_process_subplace_list(const char **scan,
3045 kmp_affin_mask_t *osId2Mask,
3046 int maxOsId, kmp_affin_mask_t *tempMask,
3047 int *setSize) {
3048 const char *next;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003049
Jonathan Peyton30419822017-05-12 18:01:32 +00003050 for (;;) {
3051 int start, count, stride, i;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003052
Jonathan Peyton30419822017-05-12 18:01:32 +00003053 // Read in the starting proc id
Jim Cownie5e8470a2013-09-27 10:38:44 +00003054 SKIP_WS(*scan);
Jonathan Peyton30419822017-05-12 18:01:32 +00003055 KMP_ASSERT2((**scan >= '0') && (**scan <= '9'), "bad explicit places list");
3056 next = *scan;
3057 SKIP_DIGITS(next);
3058 start = __kmp_str_to_int(*scan, *next);
3059 KMP_ASSERT(start >= 0);
3060 *scan = next;
3061
3062 // valid follow sets are ',' ':' and '}'
3063 SKIP_WS(*scan);
3064 if (**scan == '}' || **scan == ',') {
3065 if ((start > maxOsId) ||
3066 (!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
3067 if (__kmp_affinity_verbose ||
3068 (__kmp_affinity_warnings &&
3069 (__kmp_affinity_type != affinity_none))) {
3070 KMP_WARNING(AffIgnoreInvalidProcID, start);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003071 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003072 } else {
3073 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
3074 (*setSize)++;
3075 }
3076 if (**scan == '}') {
3077 break;
3078 }
3079 (*scan)++; // skip ','
3080 continue;
3081 }
3082 KMP_ASSERT2(**scan == ':', "bad explicit places list");
3083 (*scan)++; // skip ':'
3084
3085 // Read count parameter
3086 SKIP_WS(*scan);
3087 KMP_ASSERT2((**scan >= '0') && (**scan <= '9'), "bad explicit places list");
3088 next = *scan;
3089 SKIP_DIGITS(next);
3090 count = __kmp_str_to_int(*scan, *next);
3091 KMP_ASSERT(count >= 0);
3092 *scan = next;
3093
3094 // valid follow sets are ',' ':' and '}'
3095 SKIP_WS(*scan);
3096 if (**scan == '}' || **scan == ',') {
3097 for (i = 0; i < count; i++) {
3098 if ((start > maxOsId) ||
3099 (!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
3100 if (__kmp_affinity_verbose ||
3101 (__kmp_affinity_warnings &&
3102 (__kmp_affinity_type != affinity_none))) {
3103 KMP_WARNING(AffIgnoreInvalidProcID, start);
3104 }
3105 break; // don't proliferate warnings for large count
3106 } else {
3107 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
3108 start++;
3109 (*setSize)++;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003110 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003111 }
3112 if (**scan == '}') {
3113 break;
3114 }
3115 (*scan)++; // skip ','
3116 continue;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003117 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003118 KMP_ASSERT2(**scan == ':', "bad explicit places list");
3119 (*scan)++; // skip ':'
Jim Cownie5e8470a2013-09-27 10:38:44 +00003120
Jonathan Peyton30419822017-05-12 18:01:32 +00003121 // Read stride parameter
3122 int sign = +1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003123 for (;;) {
Jonathan Peyton30419822017-05-12 18:01:32 +00003124 SKIP_WS(*scan);
3125 if (**scan == '+') {
3126 (*scan)++; // skip '+'
3127 continue;
3128 }
3129 if (**scan == '-') {
3130 sign *= -1;
3131 (*scan)++; // skip '-'
3132 continue;
3133 }
3134 break;
3135 }
3136 SKIP_WS(*scan);
3137 KMP_ASSERT2((**scan >= '0') && (**scan <= '9'), "bad explicit places list");
3138 next = *scan;
3139 SKIP_DIGITS(next);
3140 stride = __kmp_str_to_int(*scan, *next);
3141 KMP_ASSERT(stride >= 0);
3142 *scan = next;
3143 stride *= sign;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003144
Jonathan Peyton30419822017-05-12 18:01:32 +00003145 // valid follow sets are ',' and '}'
3146 SKIP_WS(*scan);
3147 if (**scan == '}' || **scan == ',') {
3148 for (i = 0; i < count; i++) {
3149 if ((start > maxOsId) ||
3150 (!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
3151 if (__kmp_affinity_verbose ||
3152 (__kmp_affinity_warnings &&
3153 (__kmp_affinity_type != affinity_none))) {
3154 KMP_WARNING(AffIgnoreInvalidProcID, start);
3155 }
3156 break; // don't proliferate warnings for large count
3157 } else {
3158 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
3159 start += stride;
3160 (*setSize)++;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003161 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003162 }
3163 if (**scan == '}') {
3164 break;
3165 }
3166 (*scan)++; // skip ','
3167 continue;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003168 }
3169
Jonathan Peyton30419822017-05-12 18:01:32 +00003170 KMP_ASSERT2(0, "bad explicit places list");
3171 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003172}
3173
Jonathan Peyton30419822017-05-12 18:01:32 +00003174static void __kmp_process_place(const char **scan, kmp_affin_mask_t *osId2Mask,
3175 int maxOsId, kmp_affin_mask_t *tempMask,
3176 int *setSize) {
3177 const char *next;
3178
3179 // valid follow sets are '{' '!' and num
3180 SKIP_WS(*scan);
3181 if (**scan == '{') {
3182 (*scan)++; // skip '{'
3183 __kmp_process_subplace_list(scan, osId2Mask, maxOsId, tempMask, setSize);
3184 KMP_ASSERT2(**scan == '}', "bad explicit places list");
3185 (*scan)++; // skip '}'
3186 } else if (**scan == '!') {
3187 (*scan)++; // skip '!'
3188 __kmp_process_place(scan, osId2Mask, maxOsId, tempMask, setSize);
3189 KMP_CPU_COMPLEMENT(maxOsId, tempMask);
3190 } else if ((**scan >= '0') && (**scan <= '9')) {
3191 next = *scan;
3192 SKIP_DIGITS(next);
3193 int num = __kmp_str_to_int(*scan, *next);
3194 KMP_ASSERT(num >= 0);
3195 if ((num > maxOsId) ||
3196 (!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
3197 if (__kmp_affinity_verbose ||
3198 (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) {
3199 KMP_WARNING(AffIgnoreInvalidProcID, num);
3200 }
3201 } else {
3202 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, num));
3203 (*setSize)++;
3204 }
3205 *scan = next; // skip num
3206 } else {
3207 KMP_ASSERT2(0, "bad explicit places list");
3208 }
3209}
3210
3211// static void
3212void __kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
3213 unsigned int *out_numMasks,
3214 const char *placelist,
3215 kmp_affin_mask_t *osId2Mask,
3216 int maxOsId) {
3217 int i, j, count, stride, sign;
3218 const char *scan = placelist;
3219 const char *next = placelist;
3220
3221 numNewMasks = 2;
3222 KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks);
3223 nextNewMask = 0;
3224
3225 // tempMask is modified based on the previous or initial
3226 // place to form the current place
3227 // previousMask contains the previous place
3228 kmp_affin_mask_t *tempMask;
3229 kmp_affin_mask_t *previousMask;
3230 KMP_CPU_ALLOC(tempMask);
3231 KMP_CPU_ZERO(tempMask);
3232 KMP_CPU_ALLOC(previousMask);
3233 KMP_CPU_ZERO(previousMask);
3234 int setSize = 0;
3235
3236 for (;;) {
3237 __kmp_process_place(&scan, osId2Mask, maxOsId, tempMask, &setSize);
3238
3239 // valid follow sets are ',' ':' and EOL
3240 SKIP_WS(scan);
3241 if (*scan == '\0' || *scan == ',') {
3242 if (setSize > 0) {
3243 ADD_MASK(tempMask);
3244 }
3245 KMP_CPU_ZERO(tempMask);
3246 setSize = 0;
3247 if (*scan == '\0') {
3248 break;
3249 }
3250 scan++; // skip ','
3251 continue;
3252 }
3253
3254 KMP_ASSERT2(*scan == ':', "bad explicit places list");
3255 scan++; // skip ':'
3256
3257 // Read count parameter
3258 SKIP_WS(scan);
3259 KMP_ASSERT2((*scan >= '0') && (*scan <= '9'), "bad explicit places list");
3260 next = scan;
3261 SKIP_DIGITS(next);
3262 count = __kmp_str_to_int(scan, *next);
3263 KMP_ASSERT(count >= 0);
3264 scan = next;
3265
3266 // valid follow sets are ',' ':' and EOL
3267 SKIP_WS(scan);
3268 if (*scan == '\0' || *scan == ',') {
3269 stride = +1;
3270 } else {
3271 KMP_ASSERT2(*scan == ':', "bad explicit places list");
3272 scan++; // skip ':'
3273
3274 // Read stride parameter
3275 sign = +1;
3276 for (;;) {
3277 SKIP_WS(scan);
3278 if (*scan == '+') {
3279 scan++; // skip '+'
3280 continue;
3281 }
3282 if (*scan == '-') {
3283 sign *= -1;
3284 scan++; // skip '-'
3285 continue;
3286 }
3287 break;
3288 }
3289 SKIP_WS(scan);
3290 KMP_ASSERT2((*scan >= '0') && (*scan <= '9'), "bad explicit places list");
3291 next = scan;
3292 SKIP_DIGITS(next);
3293 stride = __kmp_str_to_int(scan, *next);
3294 KMP_DEBUG_ASSERT(stride >= 0);
3295 scan = next;
3296 stride *= sign;
3297 }
3298
3299 // Add places determined by initial_place : count : stride
3300 for (i = 0; i < count; i++) {
3301 if (setSize == 0) {
3302 break;
3303 }
3304 // Add the current place, then build the next place (tempMask) from that
3305 KMP_CPU_COPY(previousMask, tempMask);
3306 ADD_MASK(previousMask);
3307 KMP_CPU_ZERO(tempMask);
3308 setSize = 0;
3309 KMP_CPU_SET_ITERATE(j, previousMask) {
3310 if (!KMP_CPU_ISSET(j, previousMask)) {
3311 continue;
3312 }
3313 if ((j + stride > maxOsId) || (j + stride < 0) ||
3314 (!KMP_CPU_ISSET(j, __kmp_affin_fullMask)) ||
3315 (!KMP_CPU_ISSET(j + stride,
3316 KMP_CPU_INDEX(osId2Mask, j + stride)))) {
3317 if ((__kmp_affinity_verbose ||
3318 (__kmp_affinity_warnings &&
3319 (__kmp_affinity_type != affinity_none))) &&
3320 i < count - 1) {
3321 KMP_WARNING(AffIgnoreInvalidProcID, j + stride);
3322 }
3323 continue;
3324 }
3325 KMP_CPU_SET(j + stride, tempMask);
3326 setSize++;
3327 }
3328 }
3329 KMP_CPU_ZERO(tempMask);
3330 setSize = 0;
3331
3332 // valid follow sets are ',' and EOL
3333 SKIP_WS(scan);
3334 if (*scan == '\0') {
3335 break;
3336 }
3337 if (*scan == ',') {
3338 scan++; // skip ','
3339 continue;
3340 }
3341
3342 KMP_ASSERT2(0, "bad explicit places list");
3343 }
3344
3345 *out_numMasks = nextNewMask;
3346 if (nextNewMask == 0) {
3347 *out_masks = NULL;
3348 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
3349 return;
3350 }
3351 KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask);
3352 KMP_CPU_FREE(tempMask);
3353 KMP_CPU_FREE(previousMask);
3354 for (i = 0; i < nextNewMask; i++) {
3355 kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i);
3356 kmp_affin_mask_t *dest = KMP_CPU_INDEX((*out_masks), i);
3357 KMP_CPU_COPY(dest, src);
3358 }
3359 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
3360}
3361
3362#endif /* OMP_40_ENABLED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003363
3364#undef ADD_MASK
3365#undef ADD_MASK_OSID
3366
Andrey Churbanov4a9a8922017-04-13 17:15:07 +00003367#if KMP_USE_HWLOC
Jonathan Peyton30419822017-05-12 18:01:32 +00003368static int __kmp_hwloc_skip_PUs_obj(hwloc_topology_t t, hwloc_obj_t o) {
3369 // skip PUs descendants of the object o
3370 int skipped = 0;
3371 hwloc_obj_t hT = NULL;
3372 int N = __kmp_hwloc_count_children_by_type(t, o, HWLOC_OBJ_PU, &hT);
3373 for (int i = 0; i < N; ++i) {
3374 KMP_DEBUG_ASSERT(hT);
3375 unsigned idx = hT->os_index;
3376 if (KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
3377 KMP_CPU_CLR(idx, __kmp_affin_fullMask);
3378 KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx));
3379 ++skipped;
Andrey Churbanov4a9a8922017-04-13 17:15:07 +00003380 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003381 hT = hwloc_get_next_obj_by_type(t, HWLOC_OBJ_PU, hT);
3382 }
3383 return skipped; // count number of skipped units
Andrey Churbanov4a9a8922017-04-13 17:15:07 +00003384}
3385
Jonathan Peyton30419822017-05-12 18:01:32 +00003386static int __kmp_hwloc_obj_has_PUs(hwloc_topology_t t, hwloc_obj_t o) {
3387 // check if obj has PUs present in fullMask
3388 hwloc_obj_t hT = NULL;
3389 int N = __kmp_hwloc_count_children_by_type(t, o, HWLOC_OBJ_PU, &hT);
3390 for (int i = 0; i < N; ++i) {
3391 KMP_DEBUG_ASSERT(hT);
3392 unsigned idx = hT->os_index;
3393 if (KMP_CPU_ISSET(idx, __kmp_affin_fullMask))
3394 return 1; // found PU
3395 hT = hwloc_get_next_obj_by_type(t, HWLOC_OBJ_PU, hT);
3396 }
3397 return 0; // no PUs found
Andrey Churbanov4a9a8922017-04-13 17:15:07 +00003398}
3399#endif // KMP_USE_HWLOC
3400
Jonathan Peyton30419822017-05-12 18:01:32 +00003401static void __kmp_apply_thread_places(AddrUnsPair **pAddr, int depth) {
3402 AddrUnsPair *newAddr;
3403 if (__kmp_hws_requested == 0)
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003404 goto _exit; // no topology limiting actions requested, exit
Andrey Churbanov4a9a8922017-04-13 17:15:07 +00003405#if KMP_USE_HWLOC
Jonathan Peyton30419822017-05-12 18:01:32 +00003406 if (__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC) {
3407 // Number of subobjects calculated dynamically, this works fine for
3408 // any non-uniform topology.
3409 // L2 cache objects are determined by depth, other objects - by type.
3410 hwloc_topology_t tp = __kmp_hwloc_topology;
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003411 int nS = 0, nN = 0, nL = 0, nC = 0,
3412 nT = 0; // logical index including skipped
3413 int nCr = 0, nTr = 0; // number of requested units
3414 int nPkg = 0, nCo = 0, n_new = 0, n_old = 0, nCpP = 0, nTpC = 0; // counters
Jonathan Peyton30419822017-05-12 18:01:32 +00003415 hwloc_obj_t hT, hC, hL, hN, hS; // hwloc objects (pointers to)
3416 int L2depth, idx;
Jonathan Peytondd4aa9b2015-10-08 17:55:54 +00003417
Jonathan Peyton30419822017-05-12 18:01:32 +00003418 // check support of extensions ----------------------------------
3419 int numa_support = 0, tile_support = 0;
3420 if (__kmp_pu_os_idx)
3421 hT = hwloc_get_pu_obj_by_os_index(tp,
3422 __kmp_pu_os_idx[__kmp_avail_proc - 1]);
3423 else
3424 hT = hwloc_get_obj_by_type(tp, HWLOC_OBJ_PU, __kmp_avail_proc - 1);
3425 if (hT == NULL) { // something's gone wrong
3426 KMP_WARNING(AffHWSubsetUnsupported);
3427 goto _exit;
3428 }
3429 // check NUMA node
3430 hN = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hT);
3431 hS = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_PACKAGE, hT);
3432 if (hN != NULL && hN->depth > hS->depth) {
3433 numa_support = 1; // 1 in case socket includes node(s)
3434 } else if (__kmp_hws_node.num > 0) {
3435 // don't support sockets inside NUMA node (no such HW found for testing)
3436 KMP_WARNING(AffHWSubsetUnsupported);
3437 goto _exit;
3438 }
3439 // check L2 cahce, get object by depth because of multiple caches
3440 L2depth = hwloc_get_cache_type_depth(tp, 2, HWLOC_OBJ_CACHE_UNIFIED);
3441 hL = hwloc_get_ancestor_obj_by_depth(tp, L2depth, hT);
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003442 if (hL != NULL &&
3443 __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE, &hC) > 1) {
Jonathan Peyton30419822017-05-12 18:01:32 +00003444 tile_support = 1; // no sense to count L2 if it includes single core
3445 } else if (__kmp_hws_tile.num > 0) {
3446 if (__kmp_hws_core.num == 0) {
3447 __kmp_hws_core = __kmp_hws_tile; // replace L2 with core
3448 __kmp_hws_tile.num = 0;
3449 } else {
3450 // L2 and core are both requested, but represent same object
3451 KMP_WARNING(AffHWSubsetInvalid);
Andrey Churbanov4a9a8922017-04-13 17:15:07 +00003452 goto _exit;
3453 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003454 }
3455 // end of check of extensions -----------------------------------
3456
3457 // fill in unset items, validate settings -----------------------
3458 if (__kmp_hws_socket.num == 0)
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003459 __kmp_hws_socket.num = nPackages; // use all available sockets
Jonathan Peyton30419822017-05-12 18:01:32 +00003460 if (__kmp_hws_socket.offset >= nPackages) {
3461 KMP_WARNING(AffHWSubsetManySockets);
3462 goto _exit;
3463 }
3464 if (numa_support) {
Andrey Churbanova5868212017-11-30 11:51:47 +00003465 hN = NULL;
Jonathan Peyton30419822017-05-12 18:01:32 +00003466 int NN = __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_NUMANODE,
3467 &hN); // num nodes in socket
3468 if (__kmp_hws_node.num == 0)
3469 __kmp_hws_node.num = NN; // use all available nodes
3470 if (__kmp_hws_node.offset >= NN) {
3471 KMP_WARNING(AffHWSubsetManyNodes);
Andrey Churbanov4a9a8922017-04-13 17:15:07 +00003472 goto _exit;
3473 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003474 if (tile_support) {
3475 // get num tiles in node
3476 int NL = __kmp_hwloc_count_children_by_depth(tp, hN, L2depth, &hL);
3477 if (__kmp_hws_tile.num == 0) {
3478 __kmp_hws_tile.num = NL + 1;
3479 } // use all available tiles, some node may have more tiles, thus +1
3480 if (__kmp_hws_tile.offset >= NL) {
3481 KMP_WARNING(AffHWSubsetManyTiles);
Andrey Churbanov4a9a8922017-04-13 17:15:07 +00003482 goto _exit;
3483 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003484 int NC = __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE,
3485 &hC); // num cores in tile
3486 if (__kmp_hws_core.num == 0)
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003487 __kmp_hws_core.num = NC; // use all available cores
Jonathan Peyton30419822017-05-12 18:01:32 +00003488 if (__kmp_hws_core.offset >= NC) {
3489 KMP_WARNING(AffHWSubsetManyCores);
Andrey Churbanov4a9a8922017-04-13 17:15:07 +00003490 goto _exit;
Jonathan Peyton30419822017-05-12 18:01:32 +00003491 }
3492 } else { // tile_support
3493 int NC = __kmp_hwloc_count_children_by_type(tp, hN, HWLOC_OBJ_CORE,
3494 &hC); // num cores in node
3495 if (__kmp_hws_core.num == 0)
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003496 __kmp_hws_core.num = NC; // use all available cores
Jonathan Peyton30419822017-05-12 18:01:32 +00003497 if (__kmp_hws_core.offset >= NC) {
3498 KMP_WARNING(AffHWSubsetManyCores);
3499 goto _exit;
3500 }
3501 } // tile_support
3502 } else { // numa_support
3503 if (tile_support) {
3504 // get num tiles in socket
3505 int NL = __kmp_hwloc_count_children_by_depth(tp, hS, L2depth, &hL);
3506 if (__kmp_hws_tile.num == 0)
3507 __kmp_hws_tile.num = NL; // use all available tiles
3508 if (__kmp_hws_tile.offset >= NL) {
3509 KMP_WARNING(AffHWSubsetManyTiles);
3510 goto _exit;
3511 }
3512 int NC = __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE,
3513 &hC); // num cores in tile
3514 if (__kmp_hws_core.num == 0)
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003515 __kmp_hws_core.num = NC; // use all available cores
Jonathan Peyton30419822017-05-12 18:01:32 +00003516 if (__kmp_hws_core.offset >= NC) {
3517 KMP_WARNING(AffHWSubsetManyCores);
3518 goto _exit;
3519 }
3520 } else { // tile_support
3521 int NC = __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_CORE,
3522 &hC); // num cores in socket
3523 if (__kmp_hws_core.num == 0)
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003524 __kmp_hws_core.num = NC; // use all available cores
Jonathan Peyton30419822017-05-12 18:01:32 +00003525 if (__kmp_hws_core.offset >= NC) {
3526 KMP_WARNING(AffHWSubsetManyCores);
3527 goto _exit;
3528 }
3529 } // tile_support
3530 }
3531 if (__kmp_hws_proc.num == 0)
3532 __kmp_hws_proc.num = __kmp_nThreadsPerCore; // use all available procs
3533 if (__kmp_hws_proc.offset >= __kmp_nThreadsPerCore) {
3534 KMP_WARNING(AffHWSubsetManyProcs);
3535 goto _exit;
3536 }
3537 // end of validation --------------------------------------------
3538
3539 if (pAddr) // pAddr is NULL in case of affinity_none
3540 newAddr = (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair) *
3541 __kmp_avail_proc); // max size
3542 // main loop to form HW subset ----------------------------------
3543 hS = NULL;
3544 int NP = hwloc_get_nbobjs_by_type(tp, HWLOC_OBJ_PACKAGE);
3545 for (int s = 0; s < NP; ++s) {
3546 // Check Socket -----------------------------------------------
3547 hS = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PACKAGE, hS);
3548 if (!__kmp_hwloc_obj_has_PUs(tp, hS))
3549 continue; // skip socket if all PUs are out of fullMask
3550 ++nS; // only count objects those have PUs in affinity mask
3551 if (nS <= __kmp_hws_socket.offset ||
3552 nS > __kmp_hws_socket.num + __kmp_hws_socket.offset) {
3553 n_old += __kmp_hwloc_skip_PUs_obj(tp, hS); // skip socket
3554 continue; // move to next socket
Andrey Churbanov4a9a8922017-04-13 17:15:07 +00003555 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003556 nCr = 0; // count number of cores per socket
3557 // socket requested, go down the topology tree
3558 // check 4 cases: (+NUMA+Tile), (+NUMA-Tile), (-NUMA+Tile), (-NUMA-Tile)
Andrey Churbanov4a9a8922017-04-13 17:15:07 +00003559 if (numa_support) {
Jonathan Peyton30419822017-05-12 18:01:32 +00003560 nN = 0;
3561 hN = NULL;
3562 // num nodes in current socket
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003563 int NN =
3564 __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_NUMANODE, &hN);
Jonathan Peyton30419822017-05-12 18:01:32 +00003565 for (int n = 0; n < NN; ++n) {
3566 // Check NUMA Node ----------------------------------------
3567 if (!__kmp_hwloc_obj_has_PUs(tp, hN)) {
Andrey Churbanov4a9a8922017-04-13 17:15:07 +00003568 hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN);
Jonathan Peyton30419822017-05-12 18:01:32 +00003569 continue; // skip node if all PUs are out of fullMask
3570 }
3571 ++nN;
3572 if (nN <= __kmp_hws_node.offset ||
3573 nN > __kmp_hws_node.num + __kmp_hws_node.offset) {
3574 // skip node as not requested
3575 n_old += __kmp_hwloc_skip_PUs_obj(tp, hN); // skip node
3576 hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN);
3577 continue; // move to next node
3578 }
3579 // node requested, go down the topology tree
Andrey Churbanov4a9a8922017-04-13 17:15:07 +00003580 if (tile_support) {
3581 nL = 0;
3582 hL = NULL;
Jonathan Peyton30419822017-05-12 18:01:32 +00003583 int NL = __kmp_hwloc_count_children_by_depth(tp, hN, L2depth, &hL);
Andrey Churbanov4a9a8922017-04-13 17:15:07 +00003584 for (int l = 0; l < NL; ++l) {
3585 // Check L2 (tile) ------------------------------------
3586 if (!__kmp_hwloc_obj_has_PUs(tp, hL)) {
3587 hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
3588 continue; // skip tile if all PUs are out of fullMask
3589 }
3590 ++nL;
3591 if (nL <= __kmp_hws_tile.offset ||
3592 nL > __kmp_hws_tile.num + __kmp_hws_tile.offset) {
3593 // skip tile as not requested
3594 n_old += __kmp_hwloc_skip_PUs_obj(tp, hL); // skip tile
3595 hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
3596 continue; // move to next tile
3597 }
3598 // tile requested, go down the topology tree
3599 nC = 0;
3600 hC = NULL;
Jonathan Peyton30419822017-05-12 18:01:32 +00003601 // num cores in current tile
3602 int NC = __kmp_hwloc_count_children_by_type(tp, hL,
3603 HWLOC_OBJ_CORE, &hC);
Andrey Churbanov4a9a8922017-04-13 17:15:07 +00003604 for (int c = 0; c < NC; ++c) {
3605 // Check Core ---------------------------------------
3606 if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
3607 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3608 continue; // skip core if all PUs are out of fullMask
3609 }
3610 ++nC;
3611 if (nC <= __kmp_hws_core.offset ||
3612 nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
3613 // skip node as not requested
3614 n_old += __kmp_hwloc_skip_PUs_obj(tp, hC); // skip core
3615 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3616 continue; // move to next node
3617 }
3618 // core requested, go down to PUs
3619 nT = 0;
3620 nTr = 0;
3621 hT = NULL;
Jonathan Peyton30419822017-05-12 18:01:32 +00003622 // num procs in current core
3623 int NT = __kmp_hwloc_count_children_by_type(tp, hC,
3624 HWLOC_OBJ_PU, &hT);
Andrey Churbanov4a9a8922017-04-13 17:15:07 +00003625 for (int t = 0; t < NT; ++t) {
3626 // Check PU ---------------------------------------
3627 idx = hT->os_index;
3628 if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
3629 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3630 continue; // skip PU if not in fullMask
3631 }
3632 ++nT;
3633 if (nT <= __kmp_hws_proc.offset ||
3634 nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
3635 // skip PU
3636 KMP_CPU_CLR(idx, __kmp_affin_fullMask);
3637 ++n_old;
3638 KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx));
3639 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3640 continue; // move to next node
3641 }
3642 ++nTr;
3643 if (pAddr) // collect requested thread's data
3644 newAddr[n_new] = (*pAddr)[n_old];
3645 ++n_new;
3646 ++n_old;
3647 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3648 } // threads loop
3649 if (nTr > 0) {
3650 ++nCr; // num cores per socket
3651 ++nCo; // total num cores
3652 if (nTr > nTpC)
3653 nTpC = nTr; // calc max threads per core
3654 }
3655 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3656 } // cores loop
3657 hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
3658 } // tiles loop
3659 } else { // tile_support
3660 // no tiles, check cores
3661 nC = 0;
3662 hC = NULL;
Jonathan Peyton30419822017-05-12 18:01:32 +00003663 // num cores in current node
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003664 int NC =
3665 __kmp_hwloc_count_children_by_type(tp, hN, HWLOC_OBJ_CORE, &hC);
Andrey Churbanov4a9a8922017-04-13 17:15:07 +00003666 for (int c = 0; c < NC; ++c) {
Jonathan Peyton30419822017-05-12 18:01:32 +00003667 // Check Core ---------------------------------------
Andrey Churbanov4a9a8922017-04-13 17:15:07 +00003668 if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
3669 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3670 continue; // skip core if all PUs are out of fullMask
3671 }
3672 ++nC;
3673 if (nC <= __kmp_hws_core.offset ||
3674 nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
3675 // skip node as not requested
3676 n_old += __kmp_hwloc_skip_PUs_obj(tp, hC); // skip core
3677 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3678 continue; // move to next node
3679 }
3680 // core requested, go down to PUs
3681 nT = 0;
3682 nTr = 0;
3683 hT = NULL;
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003684 int NT =
3685 __kmp_hwloc_count_children_by_type(tp, hC, HWLOC_OBJ_PU, &hT);
Andrey Churbanov4a9a8922017-04-13 17:15:07 +00003686 for (int t = 0; t < NT; ++t) {
3687 // Check PU ---------------------------------------
3688 idx = hT->os_index;
3689 if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
3690 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3691 continue; // skip PU if not in fullMask
3692 }
3693 ++nT;
3694 if (nT <= __kmp_hws_proc.offset ||
3695 nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
3696 // skip PU
3697 KMP_CPU_CLR(idx, __kmp_affin_fullMask);
3698 ++n_old;
3699 KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx));
3700 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3701 continue; // move to next node
3702 }
3703 ++nTr;
3704 if (pAddr) // collect requested thread's data
3705 newAddr[n_new] = (*pAddr)[n_old];
3706 ++n_new;
3707 ++n_old;
3708 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3709 } // threads loop
3710 if (nTr > 0) {
3711 ++nCr; // num cores per socket
3712 ++nCo; // total num cores
3713 if (nTr > nTpC)
3714 nTpC = nTr; // calc max threads per core
3715 }
3716 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3717 } // cores loop
3718 } // tiles support
Jonathan Peyton30419822017-05-12 18:01:32 +00003719 hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN);
3720 } // nodes loop
3721 } else { // numa_support
3722 // no NUMA support
3723 if (tile_support) {
3724 nL = 0;
3725 hL = NULL;
3726 // num tiles in current socket
3727 int NL = __kmp_hwloc_count_children_by_depth(tp, hS, L2depth, &hL);
3728 for (int l = 0; l < NL; ++l) {
3729 // Check L2 (tile) ------------------------------------
3730 if (!__kmp_hwloc_obj_has_PUs(tp, hL)) {
3731 hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
3732 continue; // skip tile if all PUs are out of fullMask
3733 }
3734 ++nL;
3735 if (nL <= __kmp_hws_tile.offset ||
3736 nL > __kmp_hws_tile.num + __kmp_hws_tile.offset) {
3737 // skip tile as not requested
3738 n_old += __kmp_hwloc_skip_PUs_obj(tp, hL); // skip tile
3739 hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
3740 continue; // move to next tile
3741 }
3742 // tile requested, go down the topology tree
3743 nC = 0;
3744 hC = NULL;
3745 // num cores per tile
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003746 int NC =
3747 __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE, &hC);
Jonathan Peyton30419822017-05-12 18:01:32 +00003748 for (int c = 0; c < NC; ++c) {
3749 // Check Core ---------------------------------------
3750 if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
3751 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3752 continue; // skip core if all PUs are out of fullMask
Andrey Churbanov4a9a8922017-04-13 17:15:07 +00003753 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003754 ++nC;
3755 if (nC <= __kmp_hws_core.offset ||
3756 nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
3757 // skip node as not requested
3758 n_old += __kmp_hwloc_skip_PUs_obj(tp, hC); // skip core
3759 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3760 continue; // move to next node
3761 }
3762 // core requested, go down to PUs
3763 nT = 0;
3764 nTr = 0;
3765 hT = NULL;
3766 // num procs per core
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003767 int NT =
3768 __kmp_hwloc_count_children_by_type(tp, hC, HWLOC_OBJ_PU, &hT);
Jonathan Peyton30419822017-05-12 18:01:32 +00003769 for (int t = 0; t < NT; ++t) {
3770 // Check PU ---------------------------------------
3771 idx = hT->os_index;
3772 if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
3773 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3774 continue; // skip PU if not in fullMask
3775 }
3776 ++nT;
3777 if (nT <= __kmp_hws_proc.offset ||
3778 nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
3779 // skip PU
3780 KMP_CPU_CLR(idx, __kmp_affin_fullMask);
3781 ++n_old;
3782 KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx));
3783 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3784 continue; // move to next node
3785 }
3786 ++nTr;
3787 if (pAddr) // collect requested thread's data
3788 newAddr[n_new] = (*pAddr)[n_old];
3789 ++n_new;
3790 ++n_old;
3791 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3792 } // threads loop
3793 if (nTr > 0) {
3794 ++nCr; // num cores per socket
3795 ++nCo; // total num cores
3796 if (nTr > nTpC)
3797 nTpC = nTr; // calc max threads per core
3798 }
3799 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3800 } // cores loop
3801 hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
3802 } // tiles loop
3803 } else { // tile_support
3804 // no tiles, check cores
3805 nC = 0;
3806 hC = NULL;
3807 // num cores in socket
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003808 int NC =
3809 __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_CORE, &hC);
Jonathan Peyton30419822017-05-12 18:01:32 +00003810 for (int c = 0; c < NC; ++c) {
3811 // Check Core -------------------------------------------
3812 if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
3813 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3814 continue; // skip core if all PUs are out of fullMask
3815 }
3816 ++nC;
3817 if (nC <= __kmp_hws_core.offset ||
3818 nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
3819 // skip node as not requested
3820 n_old += __kmp_hwloc_skip_PUs_obj(tp, hC); // skip core
3821 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3822 continue; // move to next node
3823 }
3824 // core requested, go down to PUs
3825 nT = 0;
3826 nTr = 0;
3827 hT = NULL;
3828 // num procs per core
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003829 int NT =
3830 __kmp_hwloc_count_children_by_type(tp, hC, HWLOC_OBJ_PU, &hT);
Jonathan Peyton30419822017-05-12 18:01:32 +00003831 for (int t = 0; t < NT; ++t) {
3832 // Check PU ---------------------------------------
3833 idx = hT->os_index;
3834 if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
3835 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3836 continue; // skip PU if not in fullMask
3837 }
3838 ++nT;
3839 if (nT <= __kmp_hws_proc.offset ||
3840 nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
3841 // skip PU
3842 KMP_CPU_CLR(idx, __kmp_affin_fullMask);
3843 ++n_old;
3844 KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx));
3845 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3846 continue; // move to next node
3847 }
3848 ++nTr;
3849 if (pAddr) // collect requested thread's data
3850 newAddr[n_new] = (*pAddr)[n_old];
3851 ++n_new;
3852 ++n_old;
3853 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3854 } // threads loop
3855 if (nTr > 0) {
3856 ++nCr; // num cores per socket
3857 ++nCo; // total num cores
3858 if (nTr > nTpC)
3859 nTpC = nTr; // calc max threads per core
3860 }
3861 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3862 } // cores loop
3863 } // tiles support
3864 } // numa_support
3865 if (nCr > 0) { // found cores?
3866 ++nPkg; // num sockets
3867 if (nCr > nCpP)
3868 nCpP = nCr; // calc max cores per socket
3869 }
3870 } // sockets loop
3871
3872 // check the subset is valid
3873 KMP_DEBUG_ASSERT(n_old == __kmp_avail_proc);
3874 KMP_DEBUG_ASSERT(nPkg > 0);
3875 KMP_DEBUG_ASSERT(nCpP > 0);
3876 KMP_DEBUG_ASSERT(nTpC > 0);
3877 KMP_DEBUG_ASSERT(nCo > 0);
3878 KMP_DEBUG_ASSERT(nPkg <= nPackages);
3879 KMP_DEBUG_ASSERT(nCpP <= nCoresPerPkg);
3880 KMP_DEBUG_ASSERT(nTpC <= __kmp_nThreadsPerCore);
3881 KMP_DEBUG_ASSERT(nCo <= __kmp_ncores);
3882
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003883 nPackages = nPkg; // correct num sockets
3884 nCoresPerPkg = nCpP; // correct num cores per socket
Jonathan Peyton30419822017-05-12 18:01:32 +00003885 __kmp_nThreadsPerCore = nTpC; // correct num threads per core
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003886 __kmp_avail_proc = n_new; // correct num procs
3887 __kmp_ncores = nCo; // correct num cores
Jonathan Peyton30419822017-05-12 18:01:32 +00003888 // hwloc topology method end
3889 } else
3890#endif // KMP_USE_HWLOC
3891 {
3892 int n_old = 0, n_new = 0, proc_num = 0;
3893 if (__kmp_hws_node.num > 0 || __kmp_hws_tile.num > 0) {
3894 KMP_WARNING(AffHWSubsetNoHWLOC);
3895 goto _exit;
3896 }
3897 if (__kmp_hws_socket.num == 0)
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003898 __kmp_hws_socket.num = nPackages; // use all available sockets
Jonathan Peyton30419822017-05-12 18:01:32 +00003899 if (__kmp_hws_core.num == 0)
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003900 __kmp_hws_core.num = nCoresPerPkg; // use all available cores
3901 if (__kmp_hws_proc.num == 0 || __kmp_hws_proc.num > __kmp_nThreadsPerCore)
Jonathan Peyton30419822017-05-12 18:01:32 +00003902 __kmp_hws_proc.num = __kmp_nThreadsPerCore; // use all HW contexts
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003903 if (!__kmp_affinity_uniform_topology()) {
3904 KMP_WARNING(AffHWSubsetNonUniform);
Jonathan Peyton30419822017-05-12 18:01:32 +00003905 goto _exit; // don't support non-uniform topology
3906 }
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003907 if (depth > 3) {
3908 KMP_WARNING(AffHWSubsetNonThreeLevel);
Jonathan Peyton30419822017-05-12 18:01:32 +00003909 goto _exit; // don't support not-3-level topology
3910 }
3911 if (__kmp_hws_socket.offset + __kmp_hws_socket.num > nPackages) {
3912 KMP_WARNING(AffHWSubsetManySockets);
3913 goto _exit;
3914 }
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003915 if (__kmp_hws_core.offset + __kmp_hws_core.num > nCoresPerPkg) {
3916 KMP_WARNING(AffHWSubsetManyCores);
Jonathan Peyton30419822017-05-12 18:01:32 +00003917 goto _exit;
3918 }
3919 // Form the requested subset
3920 if (pAddr) // pAddr is NULL in case of affinity_none
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003921 newAddr = (AddrUnsPair *)__kmp_allocate(
3922 sizeof(AddrUnsPair) * __kmp_hws_socket.num * __kmp_hws_core.num *
3923 __kmp_hws_proc.num);
Jonathan Peyton30419822017-05-12 18:01:32 +00003924 for (int i = 0; i < nPackages; ++i) {
3925 if (i < __kmp_hws_socket.offset ||
3926 i >= __kmp_hws_socket.offset + __kmp_hws_socket.num) {
3927 // skip not-requested socket
3928 n_old += nCoresPerPkg * __kmp_nThreadsPerCore;
3929 if (__kmp_pu_os_idx != NULL) {
3930 // walk through skipped socket
3931 for (int j = 0; j < nCoresPerPkg; ++j) {
3932 for (int k = 0; k < __kmp_nThreadsPerCore; ++k) {
3933 KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
3934 ++proc_num;
Jonathan Peytonfd7cc422016-06-21 15:54:38 +00003935 }
Andrey Churbanov4a9a8922017-04-13 17:15:07 +00003936 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003937 }
3938 } else {
3939 // walk through requested socket
3940 for (int j = 0; j < nCoresPerPkg; ++j) {
3941 if (j < __kmp_hws_core.offset ||
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003942 j >= __kmp_hws_core.offset +
3943 __kmp_hws_core.num) { // skip not-requested core
3944 n_old += __kmp_nThreadsPerCore;
3945 if (__kmp_pu_os_idx != NULL) {
3946 for (int k = 0; k < __kmp_nThreadsPerCore; ++k) {
3947 KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
3948 ++proc_num;
Andrey Churbanov4a9a8922017-04-13 17:15:07 +00003949 }
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003950 }
3951 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00003952 // walk through requested core
3953 for (int k = 0; k < __kmp_nThreadsPerCore; ++k) {
3954 if (k < __kmp_hws_proc.num) {
3955 if (pAddr) // collect requested thread's data
3956 newAddr[n_new] = (*pAddr)[n_old];
3957 n_new++;
3958 } else {
3959 if (__kmp_pu_os_idx != NULL)
3960 KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
Andrey Churbanov4a9a8922017-04-13 17:15:07 +00003961 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003962 n_old++;
3963 ++proc_num;
Jonathan Peytonfd7cc422016-06-21 15:54:38 +00003964 }
Andrey Churbanov4a9a8922017-04-13 17:15:07 +00003965 }
Jonathan Peytonfd7cc422016-06-21 15:54:38 +00003966 }
Andrey Churbanov4a9a8922017-04-13 17:15:07 +00003967 }
Andrey Churbanov4a9a8922017-04-13 17:15:07 +00003968 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003969 KMP_DEBUG_ASSERT(n_old == nPackages * nCoresPerPkg * __kmp_nThreadsPerCore);
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003970 KMP_DEBUG_ASSERT(n_new ==
3971 __kmp_hws_socket.num * __kmp_hws_core.num *
3972 __kmp_hws_proc.num);
3973 nPackages = __kmp_hws_socket.num; // correct nPackages
3974 nCoresPerPkg = __kmp_hws_core.num; // correct nCoresPerPkg
Jonathan Peyton30419822017-05-12 18:01:32 +00003975 __kmp_nThreadsPerCore = __kmp_hws_proc.num; // correct __kmp_nThreadsPerCore
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003976 __kmp_avail_proc = n_new; // correct avail_proc
Jonathan Peyton30419822017-05-12 18:01:32 +00003977 __kmp_ncores = nPackages * __kmp_hws_core.num; // correct ncores
3978 } // non-hwloc topology method
3979 if (pAddr) {
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003980 __kmp_free(*pAddr);
3981 *pAddr = newAddr; // replace old topology with new one
Jonathan Peyton30419822017-05-12 18:01:32 +00003982 }
3983 if (__kmp_affinity_verbose) {
3984 char m[KMP_AFFIN_MASK_PRINT_LEN];
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003985 __kmp_affinity_print_mask(m, KMP_AFFIN_MASK_PRINT_LEN,
3986 __kmp_affin_fullMask);
Jonathan Peyton30419822017-05-12 18:01:32 +00003987 if (__kmp_affinity_respect_mask) {
3988 KMP_INFORM(InitOSProcSetRespect, "KMP_HW_SUBSET", m);
Paul Osmialowskiecbe2ea2016-07-29 20:55:03 +00003989 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00003990 KMP_INFORM(InitOSProcSetNotRespect, "KMP_HW_SUBSET", m);
Paul Osmialowskiecbe2ea2016-07-29 20:55:03 +00003991 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003992 KMP_INFORM(AvailableOSProc, "KMP_HW_SUBSET", __kmp_avail_proc);
3993 kmp_str_buf_t buf;
3994 __kmp_str_buf_init(&buf);
3995 __kmp_str_buf_print(&buf, "%d", nPackages);
3996 KMP_INFORM(TopologyExtra, "KMP_HW_SUBSET", buf.str, nCoresPerPkg,
3997 __kmp_nThreadsPerCore, __kmp_ncores);
3998 __kmp_str_buf_free(&buf);
3999 }
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00004000_exit:
Jonathan Peyton30419822017-05-12 18:01:32 +00004001 if (__kmp_pu_os_idx != NULL) {
4002 __kmp_free(__kmp_pu_os_idx);
4003 __kmp_pu_os_idx = NULL;
4004 }
4005}
4006
4007// This function figures out the deepest level at which there is at least one
4008// cluster/core with more than one processing unit bound to it.
4009static int __kmp_affinity_find_core_level(const AddrUnsPair *address2os,
4010 int nprocs, int bottom_level) {
4011 int core_level = 0;
4012
4013 for (int i = 0; i < nprocs; i++) {
4014 for (int j = bottom_level; j > 0; j--) {
4015 if (address2os[i].first.labels[j] > 0) {
4016 if (core_level < (j - 1)) {
4017 core_level = j - 1;
4018 }
4019 }
4020 }
4021 }
4022 return core_level;
4023}
4024
4025// This function counts number of clusters/cores at given level.
4026static int __kmp_affinity_compute_ncores(const AddrUnsPair *address2os,
4027 int nprocs, int bottom_level,
4028 int core_level) {
4029 int ncores = 0;
4030 int i, j;
4031
4032 j = bottom_level;
4033 for (i = 0; i < nprocs; i++) {
4034 for (j = bottom_level; j > core_level; j--) {
4035 if ((i + 1) < nprocs) {
4036 if (address2os[i + 1].first.labels[j] > 0) {
4037 break;
4038 }
4039 }
4040 }
4041 if (j == core_level) {
4042 ncores++;
4043 }
4044 }
4045 if (j > core_level) {
4046 // In case of ( nprocs < __kmp_avail_proc ) we may end too deep and miss one
4047 // core. May occur when called from __kmp_affinity_find_core().
4048 ncores++;
4049 }
4050 return ncores;
4051}
4052
4053// This function finds to which cluster/core given processing unit is bound.
4054static int __kmp_affinity_find_core(const AddrUnsPair *address2os, int proc,
4055 int bottom_level, int core_level) {
4056 return __kmp_affinity_compute_ncores(address2os, proc + 1, bottom_level,
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00004057 core_level) -
4058 1;
Jonathan Peyton30419822017-05-12 18:01:32 +00004059}
4060
4061// This function finds maximal number of processing units bound to a
4062// cluster/core at given level.
4063static int __kmp_affinity_max_proc_per_core(const AddrUnsPair *address2os,
4064 int nprocs, int bottom_level,
4065 int core_level) {
4066 int maxprocpercore = 0;
4067
4068 if (core_level < bottom_level) {
4069 for (int i = 0; i < nprocs; i++) {
4070 int percore = address2os[i].first.labels[core_level + 1] + 1;
4071
4072 if (percore > maxprocpercore) {
4073 maxprocpercore = percore;
4074 }
4075 }
4076 } else {
4077 maxprocpercore = 1;
4078 }
4079 return maxprocpercore;
Paul Osmialowskiecbe2ea2016-07-29 20:55:03 +00004080}
Jim Cownie5e8470a2013-09-27 10:38:44 +00004081
4082static AddrUnsPair *address2os = NULL;
Jonathan Peyton30419822017-05-12 18:01:32 +00004083static int *procarr = NULL;
4084static int __kmp_aff_depth = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004085
Jonathan Peytonf6399362018-07-09 17:51:13 +00004086#if KMP_USE_HIER_SCHED
4087#define KMP_EXIT_AFF_NONE \
4088 KMP_ASSERT(__kmp_affinity_type == affinity_none); \
4089 KMP_ASSERT(address2os == NULL); \
4090 __kmp_apply_thread_places(NULL, 0); \
4091 __kmp_create_affinity_none_places(); \
4092 __kmp_dispatch_set_hierarchy_values(); \
4093 return;
4094#else
Jonathan Peyton30419822017-05-12 18:01:32 +00004095#define KMP_EXIT_AFF_NONE \
4096 KMP_ASSERT(__kmp_affinity_type == affinity_none); \
4097 KMP_ASSERT(address2os == NULL); \
4098 __kmp_apply_thread_places(NULL, 0); \
Jonathan Peyton1482db92018-04-18 19:25:48 +00004099 __kmp_create_affinity_none_places(); \
Jonathan Peyton30419822017-05-12 18:01:32 +00004100 return;
Jonathan Peytonf6399362018-07-09 17:51:13 +00004101#endif
Jonathan Peytonfd7cc422016-06-21 15:54:38 +00004102
Jonathan Peyton1482db92018-04-18 19:25:48 +00004103// Create a one element mask array (set of places) which only contains the
4104// initial process's affinity mask
4105static void __kmp_create_affinity_none_places() {
4106 KMP_ASSERT(__kmp_affin_fullMask != NULL);
4107 KMP_ASSERT(__kmp_affinity_type == affinity_none);
4108 __kmp_affinity_num_masks = 1;
4109 KMP_CPU_ALLOC_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
4110 kmp_affin_mask_t *dest = KMP_CPU_INDEX(__kmp_affinity_masks, 0);
4111 KMP_CPU_COPY(dest, __kmp_affin_fullMask);
4112}
4113
Jonathan Peyton30419822017-05-12 18:01:32 +00004114static int __kmp_affinity_cmp_Address_child_num(const void *a, const void *b) {
Andrey Churbanov5ba90c72017-07-17 09:03:14 +00004115 const Address *aa = &(((const AddrUnsPair *)a)->first);
4116 const Address *bb = &(((const AddrUnsPair *)b)->first);
Jonathan Peyton30419822017-05-12 18:01:32 +00004117 unsigned depth = aa->depth;
4118 unsigned i;
4119 KMP_DEBUG_ASSERT(depth == bb->depth);
4120 KMP_DEBUG_ASSERT((unsigned)__kmp_affinity_compact <= depth);
4121 KMP_DEBUG_ASSERT(__kmp_affinity_compact >= 0);
4122 for (i = 0; i < (unsigned)__kmp_affinity_compact; i++) {
4123 int j = depth - i - 1;
4124 if (aa->childNums[j] < bb->childNums[j])
4125 return -1;
4126 if (aa->childNums[j] > bb->childNums[j])
4127 return 1;
4128 }
4129 for (; i < depth; i++) {
4130 int j = i - __kmp_affinity_compact;
4131 if (aa->childNums[j] < bb->childNums[j])
4132 return -1;
4133 if (aa->childNums[j] > bb->childNums[j])
4134 return 1;
4135 }
4136 return 0;
Jonathan Peytone6abe522016-09-02 20:54:58 +00004137}
4138
Jonathan Peyton30419822017-05-12 18:01:32 +00004139static void __kmp_aux_affinity_initialize(void) {
4140 if (__kmp_affinity_masks != NULL) {
4141 KMP_ASSERT(__kmp_affin_fullMask != NULL);
4142 return;
4143 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004144
Jonathan Peyton30419822017-05-12 18:01:32 +00004145 // Create the "full" mask - this defines all of the processors that we
4146 // consider to be in the machine model. If respect is set, then it is the
4147 // initialization thread's affinity mask. Otherwise, it is all processors that
4148 // we know about on the machine.
4149 if (__kmp_affin_fullMask == NULL) {
4150 KMP_CPU_ALLOC(__kmp_affin_fullMask);
4151 }
4152 if (KMP_AFFINITY_CAPABLE()) {
4153 if (__kmp_affinity_respect_mask) {
4154 __kmp_get_system_affinity(__kmp_affin_fullMask, TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004155
Jonathan Peyton30419822017-05-12 18:01:32 +00004156 // Count the number of available processors.
4157 unsigned i;
4158 __kmp_avail_proc = 0;
4159 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
4160 if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
4161 continue;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004162 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004163 __kmp_avail_proc++;
4164 }
4165 if (__kmp_avail_proc > __kmp_xproc) {
4166 if (__kmp_affinity_verbose ||
4167 (__kmp_affinity_warnings &&
4168 (__kmp_affinity_type != affinity_none))) {
4169 KMP_WARNING(ErrorInitializeAffinity);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004170 }
4171 __kmp_affinity_type = affinity_none;
Andrey Churbanov1f037e42015-03-10 09:15:26 +00004172 KMP_AFFINITY_DISABLE();
Jim Cownie5e8470a2013-09-27 10:38:44 +00004173 return;
Jonathan Peyton30419822017-05-12 18:01:32 +00004174 }
4175 } else {
4176 __kmp_affinity_entire_machine_mask(__kmp_affin_fullMask);
4177 __kmp_avail_proc = __kmp_xproc;
4178 }
4179 }
4180
Jonathan Peyton64249502017-11-29 22:27:18 +00004181 if (__kmp_affinity_gran == affinity_gran_tile &&
4182 // check if user's request is valid
4183 __kmp_affinity_dispatch->get_api_type() == KMPAffinity::NATIVE_OS) {
4184 KMP_WARNING(AffTilesNoHWLOC, "KMP_AFFINITY");
4185 __kmp_affinity_gran = affinity_gran_package;
4186 }
4187
Jonathan Peyton30419822017-05-12 18:01:32 +00004188 int depth = -1;
4189 kmp_i18n_id_t msg_id = kmp_i18n_null;
4190
4191 // For backward compatibility, setting KMP_CPUINFO_FILE =>
4192 // KMP_TOPOLOGY_METHOD=cpuinfo
4193 if ((__kmp_cpuinfo_file != NULL) &&
4194 (__kmp_affinity_top_method == affinity_top_method_all)) {
4195 __kmp_affinity_top_method = affinity_top_method_cpuinfo;
4196 }
4197
4198 if (__kmp_affinity_top_method == affinity_top_method_all) {
4199 // In the default code path, errors are not fatal - we just try using
4200 // another method. We only emit a warning message if affinity is on, or the
4201 // verbose flag is set, an the nowarnings flag was not set.
4202 const char *file_name = NULL;
4203 int line = 0;
4204#if KMP_USE_HWLOC
4205 if (depth < 0 &&
4206 __kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC) {
4207 if (__kmp_affinity_verbose) {
4208 KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
4209 }
4210 if (!__kmp_hwloc_error) {
4211 depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id);
4212 if (depth == 0) {
4213 KMP_EXIT_AFF_NONE;
4214 } else if (depth < 0 && __kmp_affinity_verbose) {
4215 KMP_INFORM(AffIgnoringHwloc, "KMP_AFFINITY");
4216 }
4217 } else if (__kmp_affinity_verbose) {
4218 KMP_INFORM(AffIgnoringHwloc, "KMP_AFFINITY");
4219 }
4220 }
4221#endif
4222
4223#if KMP_ARCH_X86 || KMP_ARCH_X86_64
4224
4225 if (depth < 0) {
4226 if (__kmp_affinity_verbose) {
4227 KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
4228 }
4229
4230 file_name = NULL;
4231 depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
4232 if (depth == 0) {
4233 KMP_EXIT_AFF_NONE;
4234 }
4235
4236 if (depth < 0) {
4237 if (__kmp_affinity_verbose) {
4238 if (msg_id != kmp_i18n_null) {
4239 KMP_INFORM(AffInfoStrStr, "KMP_AFFINITY",
4240 __kmp_i18n_catgets(msg_id),
4241 KMP_I18N_STR(DecodingLegacyAPIC));
4242 } else {
4243 KMP_INFORM(AffInfoStr, "KMP_AFFINITY",
4244 KMP_I18N_STR(DecodingLegacyAPIC));
4245 }
4246 }
4247
4248 file_name = NULL;
4249 depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
4250 if (depth == 0) {
4251 KMP_EXIT_AFF_NONE;
4252 }
4253 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004254 }
4255
Jonathan Peyton30419822017-05-12 18:01:32 +00004256#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004257
Jonathan Peyton30419822017-05-12 18:01:32 +00004258#if KMP_OS_LINUX
Jim Cownie5e8470a2013-09-27 10:38:44 +00004259
Jonathan Peyton30419822017-05-12 18:01:32 +00004260 if (depth < 0) {
4261 if (__kmp_affinity_verbose) {
4262 if (msg_id != kmp_i18n_null) {
4263 KMP_INFORM(AffStrParseFilename, "KMP_AFFINITY",
4264 __kmp_i18n_catgets(msg_id), "/proc/cpuinfo");
Jim Cownie5e8470a2013-09-27 10:38:44 +00004265 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00004266 KMP_INFORM(AffParseFilename, "KMP_AFFINITY", "/proc/cpuinfo");
Jim Cownie5e8470a2013-09-27 10:38:44 +00004267 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004268 }
4269
4270 FILE *f = fopen("/proc/cpuinfo", "r");
4271 if (f == NULL) {
4272 msg_id = kmp_i18n_str_CantOpenCpuinfo;
4273 } else {
4274 file_name = "/proc/cpuinfo";
4275 depth =
4276 __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
4277 fclose(f);
4278 if (depth == 0) {
4279 KMP_EXIT_AFF_NONE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004280 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004281 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004282 }
4283
Jonathan Peyton30419822017-05-12 18:01:32 +00004284#endif /* KMP_OS_LINUX */
4285
4286#if KMP_GROUP_AFFINITY
4287
4288 if ((depth < 0) && (__kmp_num_proc_groups > 1)) {
4289 if (__kmp_affinity_verbose) {
4290 KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
4291 }
4292
4293 depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
4294 KMP_ASSERT(depth != 0);
4295 }
4296
4297#endif /* KMP_GROUP_AFFINITY */
4298
4299 if (depth < 0) {
4300 if (__kmp_affinity_verbose && (msg_id != kmp_i18n_null)) {
4301 if (file_name == NULL) {
4302 KMP_INFORM(UsingFlatOS, __kmp_i18n_catgets(msg_id));
4303 } else if (line == 0) {
4304 KMP_INFORM(UsingFlatOSFile, file_name, __kmp_i18n_catgets(msg_id));
4305 } else {
4306 KMP_INFORM(UsingFlatOSFileLine, file_name, line,
4307 __kmp_i18n_catgets(msg_id));
4308 }
4309 }
4310 // FIXME - print msg if msg_id = kmp_i18n_null ???
4311
4312 file_name = "";
4313 depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
4314 if (depth == 0) {
4315 KMP_EXIT_AFF_NONE;
4316 }
4317 KMP_ASSERT(depth > 0);
4318 KMP_ASSERT(address2os != NULL);
4319 }
4320 }
4321
Andrey Churbanova5868212017-11-30 11:51:47 +00004322#if KMP_USE_HWLOC
4323 else if (__kmp_affinity_top_method == affinity_top_method_hwloc) {
4324 KMP_ASSERT(__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC);
4325 if (__kmp_affinity_verbose) {
4326 KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
4327 }
4328 depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id);
4329 if (depth == 0) {
4330 KMP_EXIT_AFF_NONE;
4331 }
4332 }
4333#endif // KMP_USE_HWLOC
4334
Jonathan Peyton30419822017-05-12 18:01:32 +00004335// If the user has specified that a paricular topology discovery method is to be
4336// used, then we abort if that method fails. The exception is group affinity,
4337// which might have been implicitly set.
4338
4339#if KMP_ARCH_X86 || KMP_ARCH_X86_64
4340
4341 else if (__kmp_affinity_top_method == affinity_top_method_x2apicid) {
4342 if (__kmp_affinity_verbose) {
4343 KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
4344 }
4345
4346 depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
4347 if (depth == 0) {
4348 KMP_EXIT_AFF_NONE;
4349 }
4350 if (depth < 0) {
4351 KMP_ASSERT(msg_id != kmp_i18n_null);
4352 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
4353 }
4354 } else if (__kmp_affinity_top_method == affinity_top_method_apicid) {
4355 if (__kmp_affinity_verbose) {
4356 KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(DecodingLegacyAPIC));
4357 }
4358
4359 depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
4360 if (depth == 0) {
4361 KMP_EXIT_AFF_NONE;
4362 }
4363 if (depth < 0) {
4364 KMP_ASSERT(msg_id != kmp_i18n_null);
4365 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
4366 }
4367 }
4368
4369#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
4370
4371 else if (__kmp_affinity_top_method == affinity_top_method_cpuinfo) {
4372 const char *filename;
4373 if (__kmp_cpuinfo_file != NULL) {
4374 filename = __kmp_cpuinfo_file;
4375 } else {
4376 filename = "/proc/cpuinfo";
4377 }
4378
4379 if (__kmp_affinity_verbose) {
4380 KMP_INFORM(AffParseFilename, "KMP_AFFINITY", filename);
4381 }
4382
4383 FILE *f = fopen(filename, "r");
4384 if (f == NULL) {
4385 int code = errno;
4386 if (__kmp_cpuinfo_file != NULL) {
Jonathan Peyton6a393f72017-09-05 15:43:58 +00004387 __kmp_fatal(KMP_MSG(CantOpenFileForReading, filename), KMP_ERR(code),
4388 KMP_HNT(NameComesFrom_CPUINFO_FILE), __kmp_msg_null);
Jonathan Peyton30419822017-05-12 18:01:32 +00004389 } else {
Jonathan Peyton6a393f72017-09-05 15:43:58 +00004390 __kmp_fatal(KMP_MSG(CantOpenFileForReading, filename), KMP_ERR(code),
4391 __kmp_msg_null);
Jonathan Peyton30419822017-05-12 18:01:32 +00004392 }
4393 }
4394 int line = 0;
4395 depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
4396 fclose(f);
4397 if (depth < 0) {
4398 KMP_ASSERT(msg_id != kmp_i18n_null);
4399 if (line > 0) {
4400 KMP_FATAL(FileLineMsgExiting, filename, line,
4401 __kmp_i18n_catgets(msg_id));
4402 } else {
4403 KMP_FATAL(FileMsgExiting, filename, __kmp_i18n_catgets(msg_id));
4404 }
4405 }
4406 if (__kmp_affinity_type == affinity_none) {
4407 KMP_ASSERT(depth == 0);
4408 KMP_EXIT_AFF_NONE;
4409 }
4410 }
4411
4412#if KMP_GROUP_AFFINITY
4413
4414 else if (__kmp_affinity_top_method == affinity_top_method_group) {
4415 if (__kmp_affinity_verbose) {
4416 KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
4417 }
4418
4419 depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
4420 KMP_ASSERT(depth != 0);
4421 if (depth < 0) {
4422 KMP_ASSERT(msg_id != kmp_i18n_null);
4423 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
4424 }
4425 }
4426
4427#endif /* KMP_GROUP_AFFINITY */
4428
4429 else if (__kmp_affinity_top_method == affinity_top_method_flat) {
4430 if (__kmp_affinity_verbose) {
4431 KMP_INFORM(AffUsingFlatOS, "KMP_AFFINITY");
4432 }
4433
4434 depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
4435 if (depth == 0) {
4436 KMP_EXIT_AFF_NONE;
4437 }
4438 // should not fail
4439 KMP_ASSERT(depth > 0);
4440 KMP_ASSERT(address2os != NULL);
4441 }
4442
Jonathan Peytonf6399362018-07-09 17:51:13 +00004443#if KMP_USE_HIER_SCHED
4444 __kmp_dispatch_set_hierarchy_values();
4445#endif
4446
Jonathan Peyton30419822017-05-12 18:01:32 +00004447 if (address2os == NULL) {
4448 if (KMP_AFFINITY_CAPABLE() &&
4449 (__kmp_affinity_verbose ||
4450 (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none)))) {
4451 KMP_WARNING(ErrorInitializeAffinity);
4452 }
4453 __kmp_affinity_type = affinity_none;
Jonathan Peyton1482db92018-04-18 19:25:48 +00004454 __kmp_create_affinity_none_places();
Jonathan Peyton30419822017-05-12 18:01:32 +00004455 KMP_AFFINITY_DISABLE();
4456 return;
4457 }
4458
Andrey Churbanova5868212017-11-30 11:51:47 +00004459 if (__kmp_affinity_gran == affinity_gran_tile
4460#if KMP_USE_HWLOC
4461 && __kmp_tile_depth == 0
4462#endif
4463 ) {
Jonathan Peyton64249502017-11-29 22:27:18 +00004464 // tiles requested but not detected, warn user on this
4465 KMP_WARNING(AffTilesNoTiles, "KMP_AFFINITY");
4466 }
4467
Jonathan Peyton30419822017-05-12 18:01:32 +00004468 __kmp_apply_thread_places(&address2os, depth);
4469
4470 // Create the table of masks, indexed by thread Id.
4471 unsigned maxIndex;
4472 unsigned numUnique;
4473 kmp_affin_mask_t *osId2Mask =
4474 __kmp_create_masks(&maxIndex, &numUnique, address2os, __kmp_avail_proc);
4475 if (__kmp_affinity_gran_levels == 0) {
4476 KMP_DEBUG_ASSERT((int)numUnique == __kmp_avail_proc);
4477 }
4478
4479 // Set the childNums vector in all Address objects. This must be done before
4480 // we can sort using __kmp_affinity_cmp_Address_child_num(), which takes into
4481 // account the setting of __kmp_affinity_compact.
4482 __kmp_affinity_assign_child_nums(address2os, __kmp_avail_proc);
4483
4484 switch (__kmp_affinity_type) {
4485
4486 case affinity_explicit:
4487 KMP_DEBUG_ASSERT(__kmp_affinity_proclist != NULL);
4488#if OMP_40_ENABLED
4489 if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
4490#endif
4491 {
4492 __kmp_affinity_process_proclist(
4493 &__kmp_affinity_masks, &__kmp_affinity_num_masks,
4494 __kmp_affinity_proclist, osId2Mask, maxIndex);
4495 }
4496#if OMP_40_ENABLED
4497 else {
4498 __kmp_affinity_process_placelist(
4499 &__kmp_affinity_masks, &__kmp_affinity_num_masks,
4500 __kmp_affinity_proclist, osId2Mask, maxIndex);
4501 }
4502#endif
4503 if (__kmp_affinity_num_masks == 0) {
4504 if (__kmp_affinity_verbose ||
4505 (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) {
4506 KMP_WARNING(AffNoValidProcID);
4507 }
4508 __kmp_affinity_type = affinity_none;
Jonathan Peyton9355d0d2019-01-15 19:39:32 +00004509 __kmp_create_affinity_none_places();
Jonathan Peyton30419822017-05-12 18:01:32 +00004510 return;
4511 }
4512 break;
4513
4514 // The other affinity types rely on sorting the Addresses according to some
4515 // permutation of the machine topology tree. Set __kmp_affinity_compact and
4516 // __kmp_affinity_offset appropriately, then jump to a common code fragment
4517 // to do the sort and create the array of affinity masks.
4518
4519 case affinity_logical:
4520 __kmp_affinity_compact = 0;
4521 if (__kmp_affinity_offset) {
4522 __kmp_affinity_offset =
4523 __kmp_nThreadsPerCore * __kmp_affinity_offset % __kmp_avail_proc;
4524 }
4525 goto sortAddresses;
4526
4527 case affinity_physical:
4528 if (__kmp_nThreadsPerCore > 1) {
4529 __kmp_affinity_compact = 1;
4530 if (__kmp_affinity_compact >= depth) {
4531 __kmp_affinity_compact = 0;
4532 }
4533 } else {
4534 __kmp_affinity_compact = 0;
4535 }
4536 if (__kmp_affinity_offset) {
4537 __kmp_affinity_offset =
4538 __kmp_nThreadsPerCore * __kmp_affinity_offset % __kmp_avail_proc;
4539 }
4540 goto sortAddresses;
4541
4542 case affinity_scatter:
4543 if (__kmp_affinity_compact >= depth) {
4544 __kmp_affinity_compact = 0;
4545 } else {
4546 __kmp_affinity_compact = depth - 1 - __kmp_affinity_compact;
4547 }
4548 goto sortAddresses;
4549
4550 case affinity_compact:
4551 if (__kmp_affinity_compact >= depth) {
4552 __kmp_affinity_compact = depth - 1;
4553 }
4554 goto sortAddresses;
4555
4556 case affinity_balanced:
4557 if (depth <= 1) {
4558 if (__kmp_affinity_verbose || __kmp_affinity_warnings) {
4559 KMP_WARNING(AffBalancedNotAvail, "KMP_AFFINITY");
4560 }
4561 __kmp_affinity_type = affinity_none;
Jonathan Peyton9355d0d2019-01-15 19:39:32 +00004562 __kmp_create_affinity_none_places();
Jonathan Peyton30419822017-05-12 18:01:32 +00004563 return;
Jonathan Peyton9355d0d2019-01-15 19:39:32 +00004564 } else if (!__kmp_affinity_uniform_topology()) {
Jonathan Peyton30419822017-05-12 18:01:32 +00004565 // Save the depth for further usage
4566 __kmp_aff_depth = depth;
4567
4568 int core_level = __kmp_affinity_find_core_level(
4569 address2os, __kmp_avail_proc, depth - 1);
4570 int ncores = __kmp_affinity_compute_ncores(address2os, __kmp_avail_proc,
4571 depth - 1, core_level);
4572 int maxprocpercore = __kmp_affinity_max_proc_per_core(
4573 address2os, __kmp_avail_proc, depth - 1, core_level);
4574
4575 int nproc = ncores * maxprocpercore;
4576 if ((nproc < 2) || (nproc < __kmp_avail_proc)) {
4577 if (__kmp_affinity_verbose || __kmp_affinity_warnings) {
4578 KMP_WARNING(AffBalancedNotAvail, "KMP_AFFINITY");
4579 }
4580 __kmp_affinity_type = affinity_none;
4581 return;
4582 }
4583
4584 procarr = (int *)__kmp_allocate(sizeof(int) * nproc);
4585 for (int i = 0; i < nproc; i++) {
4586 procarr[i] = -1;
4587 }
4588
4589 int lastcore = -1;
4590 int inlastcore = 0;
4591 for (int i = 0; i < __kmp_avail_proc; i++) {
4592 int proc = address2os[i].second;
4593 int core =
4594 __kmp_affinity_find_core(address2os, i, depth - 1, core_level);
4595
4596 if (core == lastcore) {
4597 inlastcore++;
4598 } else {
4599 inlastcore = 0;
4600 }
4601 lastcore = core;
4602
4603 procarr[core * maxprocpercore + inlastcore] = proc;
4604 }
Jonathan Peyton9355d0d2019-01-15 19:39:32 +00004605 }
4606 if (__kmp_affinity_compact >= depth) {
4607 __kmp_affinity_compact = depth - 1;
Jonathan Peyton30419822017-05-12 18:01:32 +00004608 }
4609
4610 sortAddresses:
4611 // Allocate the gtid->affinity mask table.
4612 if (__kmp_affinity_dups) {
4613 __kmp_affinity_num_masks = __kmp_avail_proc;
4614 } else {
4615 __kmp_affinity_num_masks = numUnique;
4616 }
4617
4618#if OMP_40_ENABLED
4619 if ((__kmp_nested_proc_bind.bind_types[0] != proc_bind_intel) &&
4620 (__kmp_affinity_num_places > 0) &&
4621 ((unsigned)__kmp_affinity_num_places < __kmp_affinity_num_masks)) {
4622 __kmp_affinity_num_masks = __kmp_affinity_num_places;
4623 }
4624#endif
4625
4626 KMP_CPU_ALLOC_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
4627
4628 // Sort the address2os table according to the current setting of
4629 // __kmp_affinity_compact, then fill out __kmp_affinity_masks.
4630 qsort(address2os, __kmp_avail_proc, sizeof(*address2os),
4631 __kmp_affinity_cmp_Address_child_num);
4632 {
4633 int i;
4634 unsigned j;
4635 for (i = 0, j = 0; i < __kmp_avail_proc; i++) {
4636 if ((!__kmp_affinity_dups) && (!address2os[i].first.leader)) {
4637 continue;
4638 }
4639 unsigned osId = address2os[i].second;
4640 kmp_affin_mask_t *src = KMP_CPU_INDEX(osId2Mask, osId);
4641 kmp_affin_mask_t *dest = KMP_CPU_INDEX(__kmp_affinity_masks, j);
4642 KMP_ASSERT(KMP_CPU_ISSET(osId, src));
4643 KMP_CPU_COPY(dest, src);
4644 if (++j >= __kmp_affinity_num_masks) {
4645 break;
4646 }
4647 }
4648 KMP_DEBUG_ASSERT(j == __kmp_affinity_num_masks);
4649 }
4650 break;
4651
4652 default:
4653 KMP_ASSERT2(0, "Unexpected affinity setting");
4654 }
4655
4656 KMP_CPU_FREE_ARRAY(osId2Mask, maxIndex + 1);
4657 machine_hierarchy.init(address2os, __kmp_avail_proc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004658}
Jonathan Peytonfd7cc422016-06-21 15:54:38 +00004659#undef KMP_EXIT_AFF_NONE
Jim Cownie5e8470a2013-09-27 10:38:44 +00004660
Jonathan Peyton30419822017-05-12 18:01:32 +00004661void __kmp_affinity_initialize(void) {
4662 // Much of the code above was written assumming that if a machine was not
4663 // affinity capable, then __kmp_affinity_type == affinity_none. We now
4664 // explicitly represent this as __kmp_affinity_type == affinity_disabled.
4665 // There are too many checks for __kmp_affinity_type == affinity_none
4666 // in this code. Instead of trying to change them all, check if
4667 // __kmp_affinity_type == affinity_disabled, and if so, slam it with
4668 // affinity_none, call the real initialization routine, then restore
4669 // __kmp_affinity_type to affinity_disabled.
4670 int disabled = (__kmp_affinity_type == affinity_disabled);
4671 if (!KMP_AFFINITY_CAPABLE()) {
4672 KMP_ASSERT(disabled);
4673 }
4674 if (disabled) {
4675 __kmp_affinity_type = affinity_none;
4676 }
4677 __kmp_aux_affinity_initialize();
4678 if (disabled) {
4679 __kmp_affinity_type = affinity_disabled;
4680 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004681}
4682
Jonathan Peyton30419822017-05-12 18:01:32 +00004683void __kmp_affinity_uninitialize(void) {
4684 if (__kmp_affinity_masks != NULL) {
4685 KMP_CPU_FREE_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
4686 __kmp_affinity_masks = NULL;
4687 }
4688 if (__kmp_affin_fullMask != NULL) {
4689 KMP_CPU_FREE(__kmp_affin_fullMask);
4690 __kmp_affin_fullMask = NULL;
4691 }
4692 __kmp_affinity_num_masks = 0;
4693 __kmp_affinity_type = affinity_default;
4694#if OMP_40_ENABLED
4695 __kmp_affinity_num_places = 0;
4696#endif
4697 if (__kmp_affinity_proclist != NULL) {
4698 __kmp_free(__kmp_affinity_proclist);
4699 __kmp_affinity_proclist = NULL;
4700 }
4701 if (address2os != NULL) {
4702 __kmp_free(address2os);
4703 address2os = NULL;
4704 }
4705 if (procarr != NULL) {
4706 __kmp_free(procarr);
4707 procarr = NULL;
4708 }
4709#if KMP_USE_HWLOC
4710 if (__kmp_hwloc_topology != NULL) {
4711 hwloc_topology_destroy(__kmp_hwloc_topology);
4712 __kmp_hwloc_topology = NULL;
4713 }
4714#endif
4715 KMPAffinity::destroy_api();
Jim Cownie5e8470a2013-09-27 10:38:44 +00004716}
4717
Jonathan Peyton30419822017-05-12 18:01:32 +00004718void __kmp_affinity_set_init_mask(int gtid, int isa_root) {
4719 if (!KMP_AFFINITY_CAPABLE()) {
4720 return;
4721 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004722
Jonathan Peyton30419822017-05-12 18:01:32 +00004723 kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
4724 if (th->th.th_affin_mask == NULL) {
4725 KMP_CPU_ALLOC(th->th.th_affin_mask);
4726 } else {
4727 KMP_CPU_ZERO(th->th.th_affin_mask);
4728 }
4729
4730 // Copy the thread mask to the kmp_info_t strucuture. If
4731 // __kmp_affinity_type == affinity_none, copy the "full" mask, i.e. one that
4732 // has all of the OS proc ids set, or if __kmp_affinity_respect_mask is set,
4733 // then the full mask is the same as the mask of the initialization thread.
4734 kmp_affin_mask_t *mask;
4735 int i;
4736
4737#if OMP_40_ENABLED
Jonathan Peyton1482db92018-04-18 19:25:48 +00004738 if (KMP_AFFINITY_NON_PROC_BIND)
Jonathan Peyton30419822017-05-12 18:01:32 +00004739#endif
4740 {
4741 if ((__kmp_affinity_type == affinity_none) ||
4742 (__kmp_affinity_type == affinity_balanced)) {
4743#if KMP_GROUP_AFFINITY
4744 if (__kmp_num_proc_groups > 1) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004745 return;
Jonathan Peyton30419822017-05-12 18:01:32 +00004746 }
4747#endif
4748 KMP_ASSERT(__kmp_affin_fullMask != NULL);
Jonathan Peyton1482db92018-04-18 19:25:48 +00004749 i = 0;
Jonathan Peyton30419822017-05-12 18:01:32 +00004750 mask = __kmp_affin_fullMask;
4751 } else {
4752 KMP_DEBUG_ASSERT(__kmp_affinity_num_masks > 0);
4753 i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
4754 mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004755 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004756 }
4757#if OMP_40_ENABLED
4758 else {
4759 if ((!isa_root) ||
4760 (__kmp_nested_proc_bind.bind_types[0] == proc_bind_false)) {
4761#if KMP_GROUP_AFFINITY
4762 if (__kmp_num_proc_groups > 1) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004763 return;
Jonathan Peyton30419822017-05-12 18:01:32 +00004764 }
4765#endif
4766 KMP_ASSERT(__kmp_affin_fullMask != NULL);
4767 i = KMP_PLACE_ALL;
4768 mask = __kmp_affin_fullMask;
4769 } else {
4770 // int i = some hash function or just a counter that doesn't
4771 // always start at 0. Use gtid for now.
4772 KMP_DEBUG_ASSERT(__kmp_affinity_num_masks > 0);
4773 i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
4774 mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004775 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004776 }
4777#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004778
Jonathan Peyton30419822017-05-12 18:01:32 +00004779#if OMP_40_ENABLED
4780 th->th.th_current_place = i;
4781 if (isa_root) {
4782 th->th.th_new_place = i;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004783 th->th.th_first_place = 0;
4784 th->th.th_last_place = __kmp_affinity_num_masks - 1;
Jonathan Peyton9355d0d2019-01-15 19:39:32 +00004785 } else if (KMP_AFFINITY_NON_PROC_BIND) {
4786 // When using a Non-OMP_PROC_BIND affinity method,
4787 // set all threads' place-partition-var to the entire place list
4788 th->th.th_first_place = 0;
4789 th->th.th_last_place = __kmp_affinity_num_masks - 1;
Jonathan Peyton30419822017-05-12 18:01:32 +00004790 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004791
Jonathan Peyton30419822017-05-12 18:01:32 +00004792 if (i == KMP_PLACE_ALL) {
4793 KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to all places\n",
4794 gtid));
4795 } else {
4796 KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to place %d\n",
4797 gtid, i));
4798 }
4799#else
4800 if (i == -1) {
4801 KA_TRACE(
4802 100,
4803 ("__kmp_affinity_set_init_mask: binding T#%d to __kmp_affin_fullMask\n",
4804 gtid));
4805 } else {
4806 KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to mask %d\n",
4807 gtid, i));
4808 }
4809#endif /* OMP_40_ENABLED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004810
Jonathan Peyton30419822017-05-12 18:01:32 +00004811 KMP_CPU_COPY(th->th.th_affin_mask, mask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004812
Jonathan Peyton125203e2017-12-06 21:07:41 +00004813 if (__kmp_affinity_verbose
4814 /* to avoid duplicate printing (will be correctly printed on barrier) */
Jonathan Peyton2c3e5d82018-08-24 20:35:42 +00004815 && (__kmp_affinity_type == affinity_none ||
4816 (i != KMP_PLACE_ALL && __kmp_affinity_type != affinity_balanced))) {
Jonathan Peyton30419822017-05-12 18:01:32 +00004817 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4818 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4819 th->th.th_affin_mask);
4820 KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
4821 __kmp_gettid(), gtid, buf);
4822 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004823
Jonathan Peyton30419822017-05-12 18:01:32 +00004824#if KMP_OS_WINDOWS
4825 // On Windows* OS, the process affinity mask might have changed. If the user
4826 // didn't request affinity and this call fails, just continue silently.
4827 // See CQ171393.
4828 if (__kmp_affinity_type == affinity_none) {
4829 __kmp_set_system_affinity(th->th.th_affin_mask, FALSE);
4830 } else
Jonathan Peyton7c465a52016-09-12 19:02:53 +00004831#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004832 __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
Jonathan Peyton7c465a52016-09-12 19:02:53 +00004833}
4834
Jonathan Peyton30419822017-05-12 18:01:32 +00004835#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004836
Jonathan Peyton30419822017-05-12 18:01:32 +00004837void __kmp_affinity_set_place(int gtid) {
Jonathan Peyton30419822017-05-12 18:01:32 +00004838 if (!KMP_AFFINITY_CAPABLE()) {
4839 return;
4840 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004841
Jonathan Peyton30419822017-05-12 18:01:32 +00004842 kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
4843
4844 KA_TRACE(100, ("__kmp_affinity_set_place: binding T#%d to place %d (current "
4845 "place = %d)\n",
4846 gtid, th->th.th_new_place, th->th.th_current_place));
4847
4848 // Check that the new place is within this thread's partition.
4849 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
4850 KMP_ASSERT(th->th.th_new_place >= 0);
4851 KMP_ASSERT((unsigned)th->th.th_new_place <= __kmp_affinity_num_masks);
4852 if (th->th.th_first_place <= th->th.th_last_place) {
4853 KMP_ASSERT((th->th.th_new_place >= th->th.th_first_place) &&
4854 (th->th.th_new_place <= th->th.th_last_place));
4855 } else {
4856 KMP_ASSERT((th->th.th_new_place <= th->th.th_first_place) ||
4857 (th->th.th_new_place >= th->th.th_last_place));
4858 }
4859
4860 // Copy the thread mask to the kmp_info_t strucuture,
4861 // and set this thread's affinity.
4862 kmp_affin_mask_t *mask =
4863 KMP_CPU_INDEX(__kmp_affinity_masks, th->th.th_new_place);
4864 KMP_CPU_COPY(th->th.th_affin_mask, mask);
4865 th->th.th_current_place = th->th.th_new_place;
4866
4867 if (__kmp_affinity_verbose) {
4868 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4869 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4870 th->th.th_affin_mask);
4871 KMP_INFORM(BoundToOSProcSet, "OMP_PROC_BIND", (kmp_int32)getpid(),
4872 __kmp_gettid(), gtid, buf);
4873 }
4874 __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
4875}
4876
4877#endif /* OMP_40_ENABLED */
4878
4879int __kmp_aux_set_affinity(void **mask) {
4880 int gtid;
4881 kmp_info_t *th;
4882 int retval;
4883
4884 if (!KMP_AFFINITY_CAPABLE()) {
4885 return -1;
4886 }
4887
4888 gtid = __kmp_entry_gtid();
Joachim Protze6b840cc2019-01-16 11:35:11 +00004889 KA_TRACE(1000, (""); {
Jonathan Peyton30419822017-05-12 18:01:32 +00004890 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4891 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4892 (kmp_affin_mask_t *)(*mask));
4893 __kmp_debug_printf(
4894 "kmp_set_affinity: setting affinity mask for thread %d = %s\n", gtid,
4895 buf);
4896 });
4897
4898 if (__kmp_env_consistency_check) {
4899 if ((mask == NULL) || (*mask == NULL)) {
4900 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
4901 } else {
4902 unsigned proc;
4903 int num_procs = 0;
4904
4905 KMP_CPU_SET_ITERATE(proc, ((kmp_affin_mask_t *)(*mask))) {
4906 if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
4907 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
Jim Cownie5e8470a2013-09-27 10:38:44 +00004908 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004909 if (!KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask))) {
4910 continue;
4911 }
4912 num_procs++;
4913 }
4914 if (num_procs == 0) {
4915 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
4916 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004917
Jonathan Peyton30419822017-05-12 18:01:32 +00004918#if KMP_GROUP_AFFINITY
4919 if (__kmp_get_proc_group((kmp_affin_mask_t *)(*mask)) < 0) {
4920 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
4921 }
4922#endif /* KMP_GROUP_AFFINITY */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004923 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004924 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004925
Jonathan Peyton30419822017-05-12 18:01:32 +00004926 th = __kmp_threads[gtid];
4927 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
4928 retval = __kmp_set_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
4929 if (retval == 0) {
4930 KMP_CPU_COPY(th->th.th_affin_mask, (kmp_affin_mask_t *)(*mask));
4931 }
4932
4933#if OMP_40_ENABLED
4934 th->th.th_current_place = KMP_PLACE_UNDEFINED;
4935 th->th.th_new_place = KMP_PLACE_UNDEFINED;
4936 th->th.th_first_place = 0;
4937 th->th.th_last_place = __kmp_affinity_num_masks - 1;
4938
4939 // Turn off 4.0 affinity for the current tread at this parallel level.
4940 th->th.th_current_task->td_icvs.proc_bind = proc_bind_false;
4941#endif
4942
4943 return retval;
4944}
4945
4946int __kmp_aux_get_affinity(void **mask) {
4947 int gtid;
4948 int retval;
4949 kmp_info_t *th;
4950
4951 if (!KMP_AFFINITY_CAPABLE()) {
4952 return -1;
4953 }
4954
4955 gtid = __kmp_entry_gtid();
4956 th = __kmp_threads[gtid];
4957 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
4958
Joachim Protze6b840cc2019-01-16 11:35:11 +00004959 KA_TRACE(1000, (""); {
Jonathan Peyton30419822017-05-12 18:01:32 +00004960 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4961 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4962 th->th.th_affin_mask);
4963 __kmp_printf("kmp_get_affinity: stored affinity mask for thread %d = %s\n",
4964 gtid, buf);
4965 });
4966
4967 if (__kmp_env_consistency_check) {
4968 if ((mask == NULL) || (*mask == NULL)) {
4969 KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity");
4970 }
4971 }
4972
4973#if !KMP_OS_WINDOWS
4974
4975 retval = __kmp_get_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
Joachim Protze6b840cc2019-01-16 11:35:11 +00004976 KA_TRACE(1000, (""); {
Jonathan Peyton30419822017-05-12 18:01:32 +00004977 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4978 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4979 (kmp_affin_mask_t *)(*mask));
4980 __kmp_printf("kmp_get_affinity: system affinity mask for thread %d = %s\n",
4981 gtid, buf);
4982 });
4983 return retval;
4984
4985#else
4986
4987 KMP_CPU_COPY((kmp_affin_mask_t *)(*mask), th->th.th_affin_mask);
4988 return 0;
4989
4990#endif /* KMP_OS_WINDOWS */
4991}
4992
4993int __kmp_aux_get_affinity_max_proc() {
4994 if (!KMP_AFFINITY_CAPABLE()) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004995 return 0;
Jonathan Peyton30419822017-05-12 18:01:32 +00004996 }
4997#if KMP_GROUP_AFFINITY
4998 if (__kmp_num_proc_groups > 1) {
4999 return (int)(__kmp_num_proc_groups * sizeof(DWORD_PTR) * CHAR_BIT);
5000 }
5001#endif
5002 return __kmp_xproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005003}
5004
Jonathan Peyton30419822017-05-12 18:01:32 +00005005int __kmp_aux_set_affinity_mask_proc(int proc, void **mask) {
Jonathan Peyton30419822017-05-12 18:01:32 +00005006 if (!KMP_AFFINITY_CAPABLE()) {
5007 return -1;
5008 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005009
Joachim Protze6b840cc2019-01-16 11:35:11 +00005010 KA_TRACE(1000, (""); {
Jonathan Peyton30419822017-05-12 18:01:32 +00005011 int gtid = __kmp_entry_gtid();
5012 char buf[KMP_AFFIN_MASK_PRINT_LEN];
5013 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
5014 (kmp_affin_mask_t *)(*mask));
5015 __kmp_debug_printf("kmp_set_affinity_mask_proc: setting proc %d in "
5016 "affinity mask for thread %d = %s\n",
5017 proc, gtid, buf);
5018 });
5019
5020 if (__kmp_env_consistency_check) {
5021 if ((mask == NULL) || (*mask == NULL)) {
5022 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity_mask_proc");
Jim Cownie5e8470a2013-09-27 10:38:44 +00005023 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005024 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005025
Jonathan Peyton30419822017-05-12 18:01:32 +00005026 if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) {
5027 return -1;
5028 }
5029 if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
5030 return -2;
5031 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005032
Jonathan Peyton30419822017-05-12 18:01:32 +00005033 KMP_CPU_SET(proc, (kmp_affin_mask_t *)(*mask));
5034 return 0;
5035}
5036
5037int __kmp_aux_unset_affinity_mask_proc(int proc, void **mask) {
Jonathan Peyton30419822017-05-12 18:01:32 +00005038 if (!KMP_AFFINITY_CAPABLE()) {
5039 return -1;
5040 }
5041
Joachim Protze6b840cc2019-01-16 11:35:11 +00005042 KA_TRACE(1000, (""); {
Jonathan Peyton30419822017-05-12 18:01:32 +00005043 int gtid = __kmp_entry_gtid();
5044 char buf[KMP_AFFIN_MASK_PRINT_LEN];
5045 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
5046 (kmp_affin_mask_t *)(*mask));
5047 __kmp_debug_printf("kmp_unset_affinity_mask_proc: unsetting proc %d in "
5048 "affinity mask for thread %d = %s\n",
5049 proc, gtid, buf);
5050 });
5051
5052 if (__kmp_env_consistency_check) {
5053 if ((mask == NULL) || (*mask == NULL)) {
5054 KMP_FATAL(AffinityInvalidMask, "kmp_unset_affinity_mask_proc");
Jim Cownie5e8470a2013-09-27 10:38:44 +00005055 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005056 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005057
Jonathan Peyton30419822017-05-12 18:01:32 +00005058 if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) {
5059 return -1;
5060 }
5061 if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
5062 return -2;
5063 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005064
Jonathan Peyton30419822017-05-12 18:01:32 +00005065 KMP_CPU_CLR(proc, (kmp_affin_mask_t *)(*mask));
5066 return 0;
5067}
5068
5069int __kmp_aux_get_affinity_mask_proc(int proc, void **mask) {
Jonathan Peyton30419822017-05-12 18:01:32 +00005070 if (!KMP_AFFINITY_CAPABLE()) {
5071 return -1;
5072 }
5073
Joachim Protze6b840cc2019-01-16 11:35:11 +00005074 KA_TRACE(1000, (""); {
Jonathan Peyton30419822017-05-12 18:01:32 +00005075 int gtid = __kmp_entry_gtid();
5076 char buf[KMP_AFFIN_MASK_PRINT_LEN];
5077 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
5078 (kmp_affin_mask_t *)(*mask));
5079 __kmp_debug_printf("kmp_get_affinity_mask_proc: getting proc %d in "
5080 "affinity mask for thread %d = %s\n",
5081 proc, gtid, buf);
5082 });
5083
5084 if (__kmp_env_consistency_check) {
5085 if ((mask == NULL) || (*mask == NULL)) {
5086 KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity_mask_proc");
5087 }
5088 }
5089
5090 if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) {
5091 return -1;
5092 }
5093 if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00005094 return 0;
Jonathan Peyton30419822017-05-12 18:01:32 +00005095 }
5096
5097 return KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005098}
5099
Jim Cownie5e8470a2013-09-27 10:38:44 +00005100// Dynamic affinity settings - Affinity balanced
Jonathan Peytone525f0d2018-09-26 20:43:23 +00005101void __kmp_balanced_affinity(kmp_info_t *th, int nthreads) {
5102 KMP_DEBUG_ASSERT(th);
Jonathan Peyton30419822017-05-12 18:01:32 +00005103 bool fine_gran = true;
Jonathan Peytone525f0d2018-09-26 20:43:23 +00005104 int tid = th->th.th_info.ds.ds_tid;
Paul Osmialowskiecbe2ea2016-07-29 20:55:03 +00005105
Jonathan Peyton30419822017-05-12 18:01:32 +00005106 switch (__kmp_affinity_gran) {
5107 case affinity_gran_fine:
5108 case affinity_gran_thread:
5109 break;
5110 case affinity_gran_core:
5111 if (__kmp_nThreadsPerCore > 1) {
5112 fine_gran = false;
5113 }
5114 break;
5115 case affinity_gran_package:
5116 if (nCoresPerPkg > 1) {
5117 fine_gran = false;
5118 }
5119 break;
5120 default:
5121 fine_gran = false;
5122 }
5123
5124 if (__kmp_affinity_uniform_topology()) {
5125 int coreID;
5126 int threadID;
5127 // Number of hyper threads per core in HT machine
5128 int __kmp_nth_per_core = __kmp_avail_proc / __kmp_ncores;
5129 // Number of cores
5130 int ncores = __kmp_ncores;
5131 if ((nPackages > 1) && (__kmp_nth_per_core <= 1)) {
5132 __kmp_nth_per_core = __kmp_avail_proc / nPackages;
5133 ncores = nPackages;
5134 }
5135 // How many threads will be bound to each core
5136 int chunk = nthreads / ncores;
5137 // How many cores will have an additional thread bound to it - "big cores"
5138 int big_cores = nthreads % ncores;
5139 // Number of threads on the big cores
5140 int big_nth = (chunk + 1) * big_cores;
5141 if (tid < big_nth) {
5142 coreID = tid / (chunk + 1);
5143 threadID = (tid % (chunk + 1)) % __kmp_nth_per_core;
5144 } else { // tid >= big_nth
5145 coreID = (tid - big_cores) / chunk;
5146 threadID = ((tid - big_cores) % chunk) % __kmp_nth_per_core;
Paul Osmialowskiecbe2ea2016-07-29 20:55:03 +00005147 }
5148
Jonathan Peyton30419822017-05-12 18:01:32 +00005149 KMP_DEBUG_ASSERT2(KMP_AFFINITY_CAPABLE(),
5150 "Illegal set affinity operation when not capable");
5151
Jonathan Peytone525f0d2018-09-26 20:43:23 +00005152 kmp_affin_mask_t *mask = th->th.th_affin_mask;
Jonathan Peyton30419822017-05-12 18:01:32 +00005153 KMP_CPU_ZERO(mask);
5154
5155 if (fine_gran) {
5156 int osID = address2os[coreID * __kmp_nth_per_core + threadID].second;
5157 KMP_CPU_SET(osID, mask);
5158 } else {
5159 for (int i = 0; i < __kmp_nth_per_core; i++) {
5160 int osID;
5161 osID = address2os[coreID * __kmp_nth_per_core + i].second;
5162 KMP_CPU_SET(osID, mask);
5163 }
5164 }
5165 if (__kmp_affinity_verbose) {
5166 char buf[KMP_AFFIN_MASK_PRINT_LEN];
5167 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
5168 KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
5169 __kmp_gettid(), tid, buf);
5170 }
5171 __kmp_set_system_affinity(mask, TRUE);
Jonathan Peyton30419822017-05-12 18:01:32 +00005172 } else { // Non-uniform topology
5173
Jonathan Peytone525f0d2018-09-26 20:43:23 +00005174 kmp_affin_mask_t *mask = th->th.th_affin_mask;
Jonathan Peyton30419822017-05-12 18:01:32 +00005175 KMP_CPU_ZERO(mask);
5176
5177 int core_level = __kmp_affinity_find_core_level(
5178 address2os, __kmp_avail_proc, __kmp_aff_depth - 1);
5179 int ncores = __kmp_affinity_compute_ncores(address2os, __kmp_avail_proc,
5180 __kmp_aff_depth - 1, core_level);
5181 int nth_per_core = __kmp_affinity_max_proc_per_core(
5182 address2os, __kmp_avail_proc, __kmp_aff_depth - 1, core_level);
5183
5184 // For performance gain consider the special case nthreads ==
5185 // __kmp_avail_proc
5186 if (nthreads == __kmp_avail_proc) {
5187 if (fine_gran) {
5188 int osID = address2os[tid].second;
5189 KMP_CPU_SET(osID, mask);
5190 } else {
5191 int core = __kmp_affinity_find_core(address2os, tid,
5192 __kmp_aff_depth - 1, core_level);
5193 for (int i = 0; i < __kmp_avail_proc; i++) {
5194 int osID = address2os[i].second;
5195 if (__kmp_affinity_find_core(address2os, i, __kmp_aff_depth - 1,
5196 core_level) == core) {
5197 KMP_CPU_SET(osID, mask);
5198 }
Paul Osmialowskiecbe2ea2016-07-29 20:55:03 +00005199 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005200 }
5201 } else if (nthreads <= ncores) {
5202
5203 int core = 0;
5204 for (int i = 0; i < ncores; i++) {
5205 // Check if this core from procarr[] is in the mask
5206 int in_mask = 0;
5207 for (int j = 0; j < nth_per_core; j++) {
5208 if (procarr[i * nth_per_core + j] != -1) {
5209 in_mask = 1;
5210 break;
5211 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005212 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005213 if (in_mask) {
5214 if (tid == core) {
5215 for (int j = 0; j < nth_per_core; j++) {
5216 int osID = procarr[i * nth_per_core + j];
5217 if (osID != -1) {
5218 KMP_CPU_SET(osID, mask);
5219 // For fine granularity it is enough to set the first available
5220 // osID for this core
5221 if (fine_gran) {
5222 break;
5223 }
5224 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005225 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005226 break;
5227 } else {
5228 core++;
5229 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005230 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005231 }
5232 } else { // nthreads > ncores
5233 // Array to save the number of processors at each core
5234 int *nproc_at_core = (int *)KMP_ALLOCA(sizeof(int) * ncores);
5235 // Array to save the number of cores with "x" available processors;
5236 int *ncores_with_x_procs =
5237 (int *)KMP_ALLOCA(sizeof(int) * (nth_per_core + 1));
5238 // Array to save the number of cores with # procs from x to nth_per_core
5239 int *ncores_with_x_to_max_procs =
5240 (int *)KMP_ALLOCA(sizeof(int) * (nth_per_core + 1));
5241
5242 for (int i = 0; i <= nth_per_core; i++) {
5243 ncores_with_x_procs[i] = 0;
5244 ncores_with_x_to_max_procs[i] = 0;
5245 }
5246
5247 for (int i = 0; i < ncores; i++) {
5248 int cnt = 0;
5249 for (int j = 0; j < nth_per_core; j++) {
5250 if (procarr[i * nth_per_core + j] != -1) {
5251 cnt++;
5252 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005253 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005254 nproc_at_core[i] = cnt;
5255 ncores_with_x_procs[cnt]++;
5256 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005257
Jonathan Peyton30419822017-05-12 18:01:32 +00005258 for (int i = 0; i <= nth_per_core; i++) {
5259 for (int j = i; j <= nth_per_core; j++) {
5260 ncores_with_x_to_max_procs[i] += ncores_with_x_procs[j];
5261 }
5262 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005263
Jonathan Peyton30419822017-05-12 18:01:32 +00005264 // Max number of processors
5265 int nproc = nth_per_core * ncores;
5266 // An array to keep number of threads per each context
5267 int *newarr = (int *)__kmp_allocate(sizeof(int) * nproc);
5268 for (int i = 0; i < nproc; i++) {
5269 newarr[i] = 0;
5270 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005271
Jonathan Peyton30419822017-05-12 18:01:32 +00005272 int nth = nthreads;
5273 int flag = 0;
5274 while (nth > 0) {
5275 for (int j = 1; j <= nth_per_core; j++) {
5276 int cnt = ncores_with_x_to_max_procs[j];
5277 for (int i = 0; i < ncores; i++) {
5278 // Skip the core with 0 processors
5279 if (nproc_at_core[i] == 0) {
5280 continue;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005281 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005282 for (int k = 0; k < nth_per_core; k++) {
5283 if (procarr[i * nth_per_core + k] != -1) {
5284 if (newarr[i * nth_per_core + k] == 0) {
5285 newarr[i * nth_per_core + k] = 1;
5286 cnt--;
5287 nth--;
5288 break;
5289 } else {
5290 if (flag != 0) {
5291 newarr[i * nth_per_core + k]++;
5292 cnt--;
5293 nth--;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005294 break;
Jonathan Peyton30419822017-05-12 18:01:32 +00005295 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005296 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005297 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005298 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005299 if (cnt == 0 || nth == 0) {
5300 break;
5301 }
5302 }
5303 if (nth == 0) {
5304 break;
5305 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005306 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005307 flag = 1;
5308 }
5309 int sum = 0;
5310 for (int i = 0; i < nproc; i++) {
5311 sum += newarr[i];
5312 if (sum > tid) {
5313 if (fine_gran) {
5314 int osID = procarr[i];
5315 KMP_CPU_SET(osID, mask);
5316 } else {
5317 int coreID = i / nth_per_core;
5318 for (int ii = 0; ii < nth_per_core; ii++) {
5319 int osID = procarr[coreID * nth_per_core + ii];
5320 if (osID != -1) {
5321 KMP_CPU_SET(osID, mask);
5322 }
5323 }
5324 }
5325 break;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005326 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005327 }
5328 __kmp_free(newarr);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005329 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005330
5331 if (__kmp_affinity_verbose) {
5332 char buf[KMP_AFFIN_MASK_PRINT_LEN];
5333 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
5334 KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
5335 __kmp_gettid(), tid, buf);
5336 }
5337 __kmp_set_system_affinity(mask, TRUE);
Jonathan Peyton30419822017-05-12 18:01:32 +00005338 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005339}
5340
Jonathan Peyton3076fa42016-01-12 17:21:55 +00005341#if KMP_OS_LINUX
5342// We don't need this entry for Windows because
5343// there is GetProcessAffinityMask() api
5344//
5345// The intended usage is indicated by these steps:
5346// 1) The user gets the current affinity mask
5347// 2) Then sets the affinity by calling this function
5348// 3) Error check the return value
5349// 4) Use non-OpenMP parallelization
5350// 5) Reset the affinity to what was stored in step 1)
5351#ifdef __cplusplus
5352extern "C"
5353#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00005354 int
5355 kmp_set_thread_affinity_mask_initial()
Jonathan Peyton3076fa42016-01-12 17:21:55 +00005356// the function returns 0 on success,
5357// -1 if we cannot bind thread
5358// >0 (errno) if an error happened during binding
5359{
Jonathan Peyton30419822017-05-12 18:01:32 +00005360 int gtid = __kmp_get_gtid();
5361 if (gtid < 0) {
5362 // Do not touch non-omp threads
5363 KA_TRACE(30, ("kmp_set_thread_affinity_mask_initial: "
5364 "non-omp thread, returning\n"));
5365 return -1;
5366 }
5367 if (!KMP_AFFINITY_CAPABLE() || !__kmp_init_middle) {
5368 KA_TRACE(30, ("kmp_set_thread_affinity_mask_initial: "
5369 "affinity not initialized, returning\n"));
5370 return -1;
5371 }
5372 KA_TRACE(30, ("kmp_set_thread_affinity_mask_initial: "
5373 "set full mask for thread %d\n",
5374 gtid));
5375 KMP_DEBUG_ASSERT(__kmp_affin_fullMask != NULL);
5376 return __kmp_set_system_affinity(__kmp_affin_fullMask, FALSE);
Jonathan Peyton3076fa42016-01-12 17:21:55 +00005377}
5378#endif
5379
Alp Toker763b9392014-02-28 09:42:41 +00005380#endif // KMP_AFFINITY_SUPPORTED