blob: 5460bd9ed42df95bd2bfb6c7ff38f186876758e5 [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
2 * kmp_affinity.cpp -- affinity management
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16#include "kmp.h"
Jonathan Peyton30419822017-05-12 18:01:32 +000017#include "kmp_affinity.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000018#include "kmp_i18n.h"
19#include "kmp_io.h"
20#include "kmp_str.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000021#include "kmp_wrapper_getpid.h"
Jonathan Peyton17078362015-09-10 19:22:07 +000022
23// Store the real or imagined machine hierarchy here
24static hierarchy_info machine_hierarchy;
25
Jonathan Peyton30419822017-05-12 18:01:32 +000026void __kmp_cleanup_hierarchy() { machine_hierarchy.fini(); }
27
Jonathan Peyton17078362015-09-10 19:22:07 +000028
29void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
Jonathan Peyton30419822017-05-12 18:01:32 +000030 kmp_uint32 depth;
31 // The test below is true if affinity is available, but set to "none". Need to
32 // init on first use of hierarchical barrier.
33 if (TCR_1(machine_hierarchy.uninitialized))
34 machine_hierarchy.init(NULL, nproc);
Jonathan Peyton17078362015-09-10 19:22:07 +000035
Jonathan Peyton30419822017-05-12 18:01:32 +000036 // Adjust the hierarchy in case num threads exceeds original
37 if (nproc > machine_hierarchy.base_num_threads)
38 machine_hierarchy.resize(nproc);
Jonathan Peyton7dee82e2015-11-09 16:24:53 +000039
Jonathan Peyton30419822017-05-12 18:01:32 +000040 depth = machine_hierarchy.depth;
41 KMP_DEBUG_ASSERT(depth > 0);
Jonathan Peyton17078362015-09-10 19:22:07 +000042
Jonathan Peyton30419822017-05-12 18:01:32 +000043 thr_bar->depth = depth;
44 thr_bar->base_leaf_kids = (kmp_uint8)machine_hierarchy.numPerLevel[0] - 1;
45 thr_bar->skip_per_level = machine_hierarchy.skipPerLevel;
Jonathan Peyton17078362015-09-10 19:22:07 +000046}
Jim Cownie5e8470a2013-09-27 10:38:44 +000047
Alp Toker763b9392014-02-28 09:42:41 +000048#if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +000049
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +000050bool KMPAffinity::picked_api = false;
51
Jonathan Peyton30419822017-05-12 18:01:32 +000052void *KMPAffinity::Mask::operator new(size_t n) { return __kmp_allocate(n); }
53void *KMPAffinity::Mask::operator new[](size_t n) { return __kmp_allocate(n); }
54void KMPAffinity::Mask::operator delete(void *p) { __kmp_free(p); }
55void KMPAffinity::Mask::operator delete[](void *p) { __kmp_free(p); }
56void *KMPAffinity::operator new(size_t n) { return __kmp_allocate(n); }
57void KMPAffinity::operator delete(void *p) { __kmp_free(p); }
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +000058
59void KMPAffinity::pick_api() {
Jonathan Peyton30419822017-05-12 18:01:32 +000060 KMPAffinity *affinity_dispatch;
61 if (picked_api)
62 return;
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +000063#if KMP_USE_HWLOC
Jonathan Peytone3e2aaf2017-05-31 20:35:22 +000064 // Only use Hwloc if affinity isn't explicitly disabled and
65 // user requests Hwloc topology method
66 if (__kmp_affinity_top_method == affinity_top_method_hwloc &&
67 __kmp_affinity_type != affinity_disabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +000068 affinity_dispatch = new KMPHwlocAffinity();
69 } else
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +000070#endif
Jonathan Peyton30419822017-05-12 18:01:32 +000071 {
72 affinity_dispatch = new KMPNativeAffinity();
73 }
74 __kmp_affinity_dispatch = affinity_dispatch;
75 picked_api = true;
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +000076}
77
78void KMPAffinity::destroy_api() {
Jonathan Peyton30419822017-05-12 18:01:32 +000079 if (__kmp_affinity_dispatch != NULL) {
80 delete __kmp_affinity_dispatch;
81 __kmp_affinity_dispatch = NULL;
82 picked_api = false;
83 }
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +000084}
85
Jim Cownie5e8470a2013-09-27 10:38:44 +000086// Print the affinity mask to the character array in a pretty format.
Jonathan Peyton30419822017-05-12 18:01:32 +000087char *__kmp_affinity_print_mask(char *buf, int buf_len,
88 kmp_affin_mask_t *mask) {
89 KMP_ASSERT(buf_len >= 40);
90 char *scan = buf;
91 char *end = buf + buf_len - 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +000092
Jonathan Peyton30419822017-05-12 18:01:32 +000093 // Find first element / check for empty set.
94 size_t i;
95 i = mask->begin();
96 if (i == mask->end()) {
97 KMP_SNPRINTF(scan, end - scan + 1, "{<empty>}");
98 while (*scan != '\0')
99 scan++;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000100 KMP_ASSERT(scan <= end);
101 return buf;
Jonathan Peyton30419822017-05-12 18:01:32 +0000102 }
103
104 KMP_SNPRINTF(scan, end - scan + 1, "{%ld", (long)i);
105 while (*scan != '\0')
106 scan++;
107 i++;
108 for (; i != mask->end(); i = mask->next(i)) {
109 if (!KMP_CPU_ISSET(i, mask)) {
110 continue;
111 }
112
113 // Check for buffer overflow. A string of the form ",<n>" will have at most
114 // 10 characters, plus we want to leave room to print ",...}" if the set is
115 // too large to print for a total of 15 characters. We already left room for
116 // '\0' in setting end.
117 if (end - scan < 15) {
118 break;
119 }
120 KMP_SNPRINTF(scan, end - scan + 1, ",%-ld", (long)i);
121 while (*scan != '\0')
122 scan++;
123 }
124 if (i != mask->end()) {
125 KMP_SNPRINTF(scan, end - scan + 1, ",...");
126 while (*scan != '\0')
127 scan++;
128 }
129 KMP_SNPRINTF(scan, end - scan + 1, "}");
130 while (*scan != '\0')
131 scan++;
132 KMP_ASSERT(scan <= end);
133 return buf;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000134}
135
Jonathan Peyton30419822017-05-12 18:01:32 +0000136void __kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask) {
137 KMP_CPU_ZERO(mask);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000138
Jonathan Peyton30419822017-05-12 18:01:32 +0000139#if KMP_GROUP_AFFINITY
Jim Cownie5e8470a2013-09-27 10:38:44 +0000140
Jonathan Peyton30419822017-05-12 18:01:32 +0000141 if (__kmp_num_proc_groups > 1) {
142 int group;
143 KMP_DEBUG_ASSERT(__kmp_GetActiveProcessorCount != NULL);
144 for (group = 0; group < __kmp_num_proc_groups; group++) {
145 int i;
146 int num = __kmp_GetActiveProcessorCount(group);
147 for (i = 0; i < num; i++) {
148 KMP_CPU_SET(i + group * (CHAR_BIT * sizeof(DWORD_PTR)), mask);
149 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000150 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000151 } else
Jim Cownie5e8470a2013-09-27 10:38:44 +0000152
Jonathan Peyton30419822017-05-12 18:01:32 +0000153#endif /* KMP_GROUP_AFFINITY */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000154
Jonathan Peyton30419822017-05-12 18:01:32 +0000155 {
156 int proc;
157 for (proc = 0; proc < __kmp_xproc; proc++) {
158 KMP_CPU_SET(proc, mask);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000159 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000160 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000161}
162
Jim Cownie5e8470a2013-09-27 10:38:44 +0000163// When sorting by labels, __kmp_affinity_assign_child_nums() must first be
164// called to renumber the labels from [0..n] and place them into the child_num
165// vector of the address object. This is done in case the labels used for
Alp Toker8f2d3f02014-02-24 10:40:15 +0000166// the children at one node of the hierarchy differ from those used for
Jim Cownie5e8470a2013-09-27 10:38:44 +0000167// another node at the same level. Example: suppose the machine has 2 nodes
168// with 2 packages each. The first node contains packages 601 and 602, and
169// second node contains packages 603 and 604. If we try to sort the table
170// for "scatter" affinity, the table will still be sorted 601, 602, 603, 604
171// because we are paying attention to the labels themselves, not the ordinal
172// child numbers. By using the child numbers in the sort, the result is
173// {0,0}=601, {0,1}=603, {1,0}=602, {1,1}=604.
Jonathan Peyton30419822017-05-12 18:01:32 +0000174static void __kmp_affinity_assign_child_nums(AddrUnsPair *address2os,
175 int numAddrs) {
176 KMP_DEBUG_ASSERT(numAddrs > 0);
177 int depth = address2os->first.depth;
178 unsigned *counts = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
179 unsigned *lastLabel = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
180 int labCt;
181 for (labCt = 0; labCt < depth; labCt++) {
182 address2os[0].first.childNums[labCt] = counts[labCt] = 0;
183 lastLabel[labCt] = address2os[0].first.labels[labCt];
184 }
185 int i;
186 for (i = 1; i < numAddrs; i++) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000187 for (labCt = 0; labCt < depth; labCt++) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000188 if (address2os[i].first.labels[labCt] != lastLabel[labCt]) {
189 int labCt2;
190 for (labCt2 = labCt + 1; labCt2 < depth; labCt2++) {
191 counts[labCt2] = 0;
192 lastLabel[labCt2] = address2os[i].first.labels[labCt2];
193 }
194 counts[labCt]++;
195 lastLabel[labCt] = address2os[i].first.labels[labCt];
196 break;
197 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000198 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000199 for (labCt = 0; labCt < depth; labCt++) {
200 address2os[i].first.childNums[labCt] = counts[labCt];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000201 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000202 for (; labCt < (int)Address::maxDepth; labCt++) {
203 address2os[i].first.childNums[labCt] = 0;
204 }
205 }
206 __kmp_free(lastLabel);
207 __kmp_free(counts);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000208}
209
Jim Cownie5e8470a2013-09-27 10:38:44 +0000210// All of the __kmp_affinity_create_*_map() routines should set
211// __kmp_affinity_masks to a vector of affinity mask objects of length
Jonathan Peyton30419822017-05-12 18:01:32 +0000212// __kmp_affinity_num_masks, if __kmp_affinity_type != affinity_none, and return
213// the number of levels in the machine topology tree (zero if
Jim Cownie5e8470a2013-09-27 10:38:44 +0000214// __kmp_affinity_type == affinity_none).
215//
Jonathan Peyton30419822017-05-12 18:01:32 +0000216// All of the __kmp_affinity_create_*_map() routines should set
217// *__kmp_affin_fullMask to the affinity mask for the initialization thread.
218// They need to save and restore the mask, and it could be needed later, so
219// saving it is just an optimization to avoid calling kmp_get_system_affinity()
220// again.
Jonathan Peytonc5304aa2016-06-13 21:28:03 +0000221kmp_affin_mask_t *__kmp_affin_fullMask = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000222
223static int nCoresPerPkg, nPackages;
Andrey Churbanovf696c822015-01-27 16:55:43 +0000224static int __kmp_nThreadsPerCore;
225#ifndef KMP_DFLT_NTH_CORES
226static int __kmp_ncores;
227#endif
Jonathan Peytonfd7cc422016-06-21 15:54:38 +0000228static int *__kmp_pu_os_idx = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000229
Jim Cownie5e8470a2013-09-27 10:38:44 +0000230// __kmp_affinity_uniform_topology() doesn't work when called from
231// places which support arbitrarily many levels in the machine topology
232// map, i.e. the non-default cases in __kmp_affinity_create_cpuinfo_map()
233// __kmp_affinity_create_x2apicid_map().
Jonathan Peyton30419822017-05-12 18:01:32 +0000234inline static bool __kmp_affinity_uniform_topology() {
235 return __kmp_avail_proc == (__kmp_nThreadsPerCore * nCoresPerPkg * nPackages);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000236}
237
Jim Cownie5e8470a2013-09-27 10:38:44 +0000238// Print out the detailed machine topology map, i.e. the physical locations
239// of each OS proc.
Jonathan Peyton30419822017-05-12 18:01:32 +0000240static void __kmp_affinity_print_topology(AddrUnsPair *address2os, int len,
241 int depth, int pkgLevel,
242 int coreLevel, int threadLevel) {
243 int proc;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000244
Jonathan Peyton30419822017-05-12 18:01:32 +0000245 KMP_INFORM(OSProcToPhysicalThreadMap, "KMP_AFFINITY");
246 for (proc = 0; proc < len; proc++) {
247 int level;
248 kmp_str_buf_t buf;
249 __kmp_str_buf_init(&buf);
250 for (level = 0; level < depth; level++) {
251 if (level == threadLevel) {
252 __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Thread));
253 } else if (level == coreLevel) {
254 __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Core));
255 } else if (level == pkgLevel) {
256 __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Package));
257 } else if (level > pkgLevel) {
258 __kmp_str_buf_print(&buf, "%s_%d ", KMP_I18N_STR(Node),
259 level - pkgLevel - 1);
260 } else {
261 __kmp_str_buf_print(&buf, "L%d ", level);
262 }
263 __kmp_str_buf_print(&buf, "%d ", address2os[proc].first.labels[level]);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000264 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000265 KMP_INFORM(OSProcMapToPack, "KMP_AFFINITY", address2os[proc].second,
266 buf.str);
267 __kmp_str_buf_free(&buf);
268 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000269}
270
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000271#if KMP_USE_HWLOC
Jonathan Peyton202a24d2016-06-13 17:30:08 +0000272
273// This function removes the topology levels that are radix 1 and don't offer
274// further information about the topology. The most common example is when you
275// have one thread context per core, we don't want the extra thread context
276// level if it offers no unique labels. So they are removed.
277// return value: the new depth of address2os
Jonathan Peyton30419822017-05-12 18:01:32 +0000278static int __kmp_affinity_remove_radix_one_levels(AddrUnsPair *address2os,
279 int nActiveThreads, int depth,
280 int *pkgLevel, int *coreLevel,
281 int *threadLevel) {
282 int level;
283 int i;
284 int radix1_detected;
Jonathan Peyton202a24d2016-06-13 17:30:08 +0000285
Jonathan Peyton30419822017-05-12 18:01:32 +0000286 for (level = depth - 1; level >= 0; --level) {
287 // Always keep the package level
288 if (level == *pkgLevel)
289 continue;
290 // Detect if this level is radix 1
291 radix1_detected = 1;
292 for (i = 1; i < nActiveThreads; ++i) {
293 if (address2os[0].first.labels[level] !=
294 address2os[i].first.labels[level]) {
295 // There are differing label values for this level so it stays
296 radix1_detected = 0;
297 break;
298 }
Jonathan Peyton202a24d2016-06-13 17:30:08 +0000299 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000300 if (!radix1_detected)
301 continue;
302 // Radix 1 was detected
303 if (level == *threadLevel) {
304 // If only one thread per core, then just decrement
305 // the depth which removes the threadlevel from address2os
306 for (i = 0; i < nActiveThreads; ++i) {
307 address2os[i].first.depth--;
308 }
309 *threadLevel = -1;
310 } else if (level == *coreLevel) {
311 // For core level, we move the thread labels over if they are still
312 // valid (*threadLevel != -1), and also reduce the depth another level
313 for (i = 0; i < nActiveThreads; ++i) {
314 if (*threadLevel != -1) {
315 address2os[i].first.labels[*coreLevel] =
316 address2os[i].first.labels[*threadLevel];
317 }
318 address2os[i].first.depth--;
319 }
320 *coreLevel = -1;
321 }
322 }
323 return address2os[0].first.depth;
Jonathan Peyton202a24d2016-06-13 17:30:08 +0000324}
325
Jonathan Peyton30419822017-05-12 18:01:32 +0000326// Returns the number of objects of type 'type' below 'obj' within the topology
327// tree structure. e.g., if obj is a HWLOC_OBJ_PACKAGE object, and type is
328// HWLOC_OBJ_PU, then this will return the number of PU's under the SOCKET
329// object.
330static int __kmp_hwloc_get_nobjs_under_obj(hwloc_obj_t obj,
331 hwloc_obj_type_t type) {
332 int retval = 0;
333 hwloc_obj_t first;
334 for (first = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, obj->type,
335 obj->logical_index, type, 0);
336 first != NULL &&
337 hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, obj->type, first) ==
338 obj;
339 first = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, first->type,
340 first)) {
341 ++retval;
342 }
343 return retval;
Jonathan Peyton202a24d2016-06-13 17:30:08 +0000344}
345
Jonathan Peyton30419822017-05-12 18:01:32 +0000346static int __kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
347 kmp_i18n_id_t *const msg_id) {
348 *address2os = NULL;
349 *msg_id = kmp_i18n_null;
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000350
Jonathan Peyton30419822017-05-12 18:01:32 +0000351 // Save the affinity mask for the current thread.
352 kmp_affin_mask_t *oldMask;
353 KMP_CPU_ALLOC(oldMask);
354 __kmp_get_system_affinity(oldMask, TRUE);
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000355
Jonathan Peyton30419822017-05-12 18:01:32 +0000356 int depth = 3;
357 int pkgLevel = 0;
358 int coreLevel = 1;
359 int threadLevel = 2;
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000360
Jonathan Peyton30419822017-05-12 18:01:32 +0000361 if (!KMP_AFFINITY_CAPABLE()) {
362 // Hack to try and infer the machine topology using only the data
363 // available from cpuid on the current thread, and __kmp_xproc.
364 KMP_ASSERT(__kmp_affinity_type == affinity_none);
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000365
Jonathan Peyton30419822017-05-12 18:01:32 +0000366 nCoresPerPkg = __kmp_hwloc_get_nobjs_under_obj(
367 hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_PACKAGE, 0),
368 HWLOC_OBJ_CORE);
369 __kmp_nThreadsPerCore = __kmp_hwloc_get_nobjs_under_obj(
370 hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_CORE, 0),
371 HWLOC_OBJ_PU);
372 __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
373 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000374 if (__kmp_affinity_verbose) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000375 KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY");
376 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
377 if (__kmp_affinity_uniform_topology()) {
378 KMP_INFORM(Uniform, "KMP_AFFINITY");
379 } else {
380 KMP_INFORM(NonUniform, "KMP_AFFINITY");
381 }
382 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
383 __kmp_nThreadsPerCore, __kmp_ncores);
384 }
385 KMP_CPU_FREE(oldMask);
386 return 0;
387 }
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000388
Jonathan Peyton30419822017-05-12 18:01:32 +0000389 // Allocate the data structure to be returned.
390 AddrUnsPair *retval =
391 (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc);
392 __kmp_pu_os_idx = (int *)__kmp_allocate(sizeof(int) * __kmp_avail_proc);
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000393
Jonathan Peyton30419822017-05-12 18:01:32 +0000394 // When affinity is off, this routine will still be called to set
395 // __kmp_ncores, as well as __kmp_nThreadsPerCore,
396 // nCoresPerPkg, & nPackages. Make sure all these vars are set
397 // correctly, and return if affinity is not enabled.
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000398
Jonathan Peyton30419822017-05-12 18:01:32 +0000399 hwloc_obj_t pu;
400 hwloc_obj_t core;
401 hwloc_obj_t socket;
402 int nActiveThreads = 0;
403 int socket_identifier = 0;
404 // re-calculate globals to count only accessible resources
405 __kmp_ncores = nPackages = nCoresPerPkg = __kmp_nThreadsPerCore = 0;
406 for (socket =
407 hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_PACKAGE, 0);
Andrey Churbanovc47afcd2017-07-03 11:24:08 +0000408 socket != NULL; socket = hwloc_get_next_obj_by_type(
409 __kmp_hwloc_topology, HWLOC_OBJ_PACKAGE, socket),
410 socket_identifier++) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000411 int core_identifier = 0;
412 int num_active_cores = 0;
413 for (core = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, socket->type,
414 socket->logical_index,
415 HWLOC_OBJ_CORE, 0);
416 core != NULL &&
417 hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, socket->type,
418 core) == socket;
419 core = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_CORE,
420 core),
Andrey Churbanovc47afcd2017-07-03 11:24:08 +0000421 core_identifier++) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000422 int pu_identifier = 0;
423 int num_active_threads = 0;
424 for (pu = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, core->type,
425 core->logical_index, HWLOC_OBJ_PU,
426 0);
427 pu != NULL &&
428 hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, core->type,
429 pu) == core;
430 pu = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_PU,
431 pu),
Andrey Churbanovc47afcd2017-07-03 11:24:08 +0000432 pu_identifier++) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000433 Address addr(3);
Andrey Churbanovc47afcd2017-07-03 11:24:08 +0000434 if (!KMP_CPU_ISSET(pu->os_index, __kmp_affin_fullMask))
Jonathan Peyton30419822017-05-12 18:01:32 +0000435 continue; // skip inactive (inaccessible) unit
436 KA_TRACE(20,
437 ("Hwloc inserting %d (%d) %d (%d) %d (%d) into address2os\n",
438 socket->os_index, socket->logical_index, core->os_index,
Andrey Churbanovc47afcd2017-07-03 11:24:08 +0000439 core->logical_index, pu->os_index, pu->logical_index));
Jonathan Peyton30419822017-05-12 18:01:32 +0000440 addr.labels[0] = socket_identifier; // package
441 addr.labels[1] = core_identifier; // core
442 addr.labels[2] = pu_identifier; // pu
443 retval[nActiveThreads] = AddrUnsPair(addr, pu->os_index);
444 __kmp_pu_os_idx[nActiveThreads] =
445 pu->os_index; // keep os index for each active pu
446 nActiveThreads++;
447 ++num_active_threads; // count active threads per core
448 }
449 if (num_active_threads) { // were there any active threads on the core?
450 ++__kmp_ncores; // count total active cores
451 ++num_active_cores; // count active cores per socket
452 if (num_active_threads > __kmp_nThreadsPerCore)
453 __kmp_nThreadsPerCore = num_active_threads; // calc maximum
454 }
455 }
456 if (num_active_cores) { // were there any active cores on the socket?
457 ++nPackages; // count total active packages
458 if (num_active_cores > nCoresPerPkg)
459 nCoresPerPkg = num_active_cores; // calc maximum
460 }
461 }
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000462
Jonathan Peyton30419822017-05-12 18:01:32 +0000463 // If there's only one thread context to bind to, return now.
464 KMP_DEBUG_ASSERT(nActiveThreads == __kmp_avail_proc);
465 KMP_ASSERT(nActiveThreads > 0);
466 if (nActiveThreads == 1) {
467 __kmp_ncores = nPackages = 1;
468 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
469 if (__kmp_affinity_verbose) {
470 char buf[KMP_AFFIN_MASK_PRINT_LEN];
471 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
472
473 KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
474 if (__kmp_affinity_respect_mask) {
475 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
476 } else {
477 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
478 }
479 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
480 KMP_INFORM(Uniform, "KMP_AFFINITY");
481 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
482 __kmp_nThreadsPerCore, __kmp_ncores);
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000483 }
484
485 if (__kmp_affinity_type == affinity_none) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000486 __kmp_free(retval);
487 KMP_CPU_FREE(oldMask);
488 return 0;
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000489 }
490
Jonathan Peyton30419822017-05-12 18:01:32 +0000491 // Form an Address object which only includes the package level.
492 Address addr(1);
493 addr.labels[0] = retval[0].first.labels[pkgLevel];
494 retval[0].first = addr;
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000495
496 if (__kmp_affinity_gran_levels < 0) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000497 __kmp_affinity_gran_levels = 0;
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000498 }
499
500 if (__kmp_affinity_verbose) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000501 __kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000502 }
503
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000504 *address2os = retval;
Jonathan Peyton30419822017-05-12 18:01:32 +0000505 KMP_CPU_FREE(oldMask);
506 return 1;
507 }
508
509 // Sort the table by physical Id.
510 qsort(retval, nActiveThreads, sizeof(*retval),
511 __kmp_affinity_cmp_Address_labels);
512
513 // Check to see if the machine topology is uniform
514 unsigned uniform =
515 (nPackages * nCoresPerPkg * __kmp_nThreadsPerCore == nActiveThreads);
516
517 // Print the machine topology summary.
518 if (__kmp_affinity_verbose) {
519 char mask[KMP_AFFIN_MASK_PRINT_LEN];
520 __kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
521
522 KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
523 if (__kmp_affinity_respect_mask) {
524 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask);
525 } else {
526 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask);
527 }
528 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
529 if (uniform) {
530 KMP_INFORM(Uniform, "KMP_AFFINITY");
531 } else {
532 KMP_INFORM(NonUniform, "KMP_AFFINITY");
533 }
534
535 kmp_str_buf_t buf;
536 __kmp_str_buf_init(&buf);
537
538 __kmp_str_buf_print(&buf, "%d", nPackages);
539 // for (level = 1; level <= pkgLevel; level++) {
540 // __kmp_str_buf_print(&buf, " x %d", maxCt[level]);
541 // }
542 KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg,
543 __kmp_nThreadsPerCore, __kmp_ncores);
544
545 __kmp_str_buf_free(&buf);
546 }
547
548 if (__kmp_affinity_type == affinity_none) {
549 __kmp_free(retval);
550 KMP_CPU_FREE(oldMask);
551 return 0;
552 }
553
554 // Find any levels with radiix 1, and remove them from the map
555 // (except for the package level).
556 depth = __kmp_affinity_remove_radix_one_levels(
557 retval, nActiveThreads, depth, &pkgLevel, &coreLevel, &threadLevel);
558
559 if (__kmp_affinity_gran_levels < 0) {
560 // Set the granularity level based on what levels are modeled
561 // in the machine topology map.
562 __kmp_affinity_gran_levels = 0;
563 if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
564 __kmp_affinity_gran_levels++;
565 }
566 if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
567 __kmp_affinity_gran_levels++;
568 }
569 if (__kmp_affinity_gran > affinity_gran_package) {
570 __kmp_affinity_gran_levels++;
571 }
572 }
573
574 if (__kmp_affinity_verbose) {
575 __kmp_affinity_print_topology(retval, nActiveThreads, depth, pkgLevel,
576 coreLevel, threadLevel);
577 }
578
579 KMP_CPU_FREE(oldMask);
580 *address2os = retval;
581 return depth;
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000582}
583#endif // KMP_USE_HWLOC
Jim Cownie5e8470a2013-09-27 10:38:44 +0000584
Jim Cownie5e8470a2013-09-27 10:38:44 +0000585// If we don't know how to retrieve the machine's processor topology, or
586// encounter an error in doing so, this routine is called to form a "flat"
587// mapping of os thread id's <-> processor id's.
Jonathan Peyton30419822017-05-12 18:01:32 +0000588static int __kmp_affinity_create_flat_map(AddrUnsPair **address2os,
589 kmp_i18n_id_t *const msg_id) {
590 *address2os = NULL;
591 *msg_id = kmp_i18n_null;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000592
Jonathan Peyton30419822017-05-12 18:01:32 +0000593 // Even if __kmp_affinity_type == affinity_none, this routine might still
594 // called to set __kmp_ncores, as well as
595 // __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
596 if (!KMP_AFFINITY_CAPABLE()) {
597 KMP_ASSERT(__kmp_affinity_type == affinity_none);
598 __kmp_ncores = nPackages = __kmp_xproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000599 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000600 if (__kmp_affinity_verbose) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000601 KMP_INFORM(AffFlatTopology, "KMP_AFFINITY");
602 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
603 KMP_INFORM(Uniform, "KMP_AFFINITY");
604 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
605 __kmp_nThreadsPerCore, __kmp_ncores);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000606 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000607 return 0;
608 }
609
610 // When affinity is off, this routine will still be called to set
611 // __kmp_ncores, as well as __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
612 // Make sure all these vars are set correctly, and return now if affinity is
613 // not enabled.
614 __kmp_ncores = nPackages = __kmp_avail_proc;
615 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
616 if (__kmp_affinity_verbose) {
617 char buf[KMP_AFFIN_MASK_PRINT_LEN];
618 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
619 __kmp_affin_fullMask);
620
621 KMP_INFORM(AffCapableUseFlat, "KMP_AFFINITY");
622 if (__kmp_affinity_respect_mask) {
623 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
624 } else {
625 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000626 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000627 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
628 KMP_INFORM(Uniform, "KMP_AFFINITY");
629 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
630 __kmp_nThreadsPerCore, __kmp_ncores);
631 }
632 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
633 __kmp_pu_os_idx = (int *)__kmp_allocate(sizeof(int) * __kmp_avail_proc);
634 if (__kmp_affinity_type == affinity_none) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000635 int avail_ct = 0;
636 int i;
Jonathan Peytonc5304aa2016-06-13 21:28:03 +0000637 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000638 if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask))
639 continue;
640 __kmp_pu_os_idx[avail_ct++] = i; // suppose indices are flat
Jim Cownie5e8470a2013-09-27 10:38:44 +0000641 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000642 return 0;
643 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000644
Jonathan Peyton30419822017-05-12 18:01:32 +0000645 // Contruct the data structure to be returned.
646 *address2os =
647 (AddrUnsPair *)__kmp_allocate(sizeof(**address2os) * __kmp_avail_proc);
648 int avail_ct = 0;
649 unsigned int i;
650 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
651 // Skip this proc if it is not included in the machine model.
652 if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
653 continue;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000654 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000655 __kmp_pu_os_idx[avail_ct] = i; // suppose indices are flat
656 Address addr(1);
657 addr.labels[0] = i;
658 (*address2os)[avail_ct++] = AddrUnsPair(addr, i);
659 }
660 if (__kmp_affinity_verbose) {
661 KMP_INFORM(OSProcToPackage, "KMP_AFFINITY");
662 }
663
664 if (__kmp_affinity_gran_levels < 0) {
665 // Only the package level is modeled in the machine topology map,
666 // so the #levels of granularity is either 0 or 1.
667 if (__kmp_affinity_gran > affinity_gran_package) {
668 __kmp_affinity_gran_levels = 1;
669 } else {
670 __kmp_affinity_gran_levels = 0;
671 }
672 }
673 return 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000674}
675
Jonathan Peyton30419822017-05-12 18:01:32 +0000676#if KMP_GROUP_AFFINITY
Jim Cownie5e8470a2013-09-27 10:38:44 +0000677
Jonathan Peyton30419822017-05-12 18:01:32 +0000678// If multiple Windows* OS processor groups exist, we can create a 2-level
679// topology map with the groups at level 0 and the individual procs at level 1.
680// This facilitates letting the threads float among all procs in a group,
681// if granularity=group (the default when there are multiple groups).
682static int __kmp_affinity_create_proc_group_map(AddrUnsPair **address2os,
683 kmp_i18n_id_t *const msg_id) {
684 *address2os = NULL;
685 *msg_id = kmp_i18n_null;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000686
Jonathan Peyton58684992017-05-15 19:05:59 +0000687 // If we aren't affinity capable, then return now.
Jonathan Peyton30419822017-05-12 18:01:32 +0000688 // The flat mapping will be used.
Jonathan Peyton58684992017-05-15 19:05:59 +0000689 if (!KMP_AFFINITY_CAPABLE()) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000690 // FIXME set *msg_id
691 return -1;
692 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000693
Jonathan Peyton30419822017-05-12 18:01:32 +0000694 // Contruct the data structure to be returned.
695 *address2os =
696 (AddrUnsPair *)__kmp_allocate(sizeof(**address2os) * __kmp_avail_proc);
697 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
698 __kmp_pu_os_idx = (int *)__kmp_allocate(sizeof(int) * __kmp_avail_proc);
699 int avail_ct = 0;
700 int i;
701 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
702 // Skip this proc if it is not included in the machine model.
703 if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
704 continue;
705 }
706 __kmp_pu_os_idx[avail_ct] = i; // suppose indices are flat
707 Address addr(2);
708 addr.labels[0] = i / (CHAR_BIT * sizeof(DWORD_PTR));
709 addr.labels[1] = i % (CHAR_BIT * sizeof(DWORD_PTR));
710 (*address2os)[avail_ct++] = AddrUnsPair(addr, i);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000711
Jonathan Peyton30419822017-05-12 18:01:32 +0000712 if (__kmp_affinity_verbose) {
713 KMP_INFORM(AffOSProcToGroup, "KMP_AFFINITY", i, addr.labels[0],
714 addr.labels[1]);
715 }
716 }
717
718 if (__kmp_affinity_gran_levels < 0) {
719 if (__kmp_affinity_gran == affinity_gran_group) {
720 __kmp_affinity_gran_levels = 1;
721 } else if ((__kmp_affinity_gran == affinity_gran_fine) ||
722 (__kmp_affinity_gran == affinity_gran_thread)) {
723 __kmp_affinity_gran_levels = 0;
724 } else {
725 const char *gran_str = NULL;
726 if (__kmp_affinity_gran == affinity_gran_core) {
727 gran_str = "core";
728 } else if (__kmp_affinity_gran == affinity_gran_package) {
729 gran_str = "package";
730 } else if (__kmp_affinity_gran == affinity_gran_node) {
731 gran_str = "node";
732 } else {
733 KMP_ASSERT(0);
734 }
735
736 // Warning: can't use affinity granularity \"gran\" with group topology
737 // method, using "thread"
738 __kmp_affinity_gran_levels = 0;
739 }
740 }
741 return 2;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000742}
743
Jonathan Peyton30419822017-05-12 18:01:32 +0000744#endif /* KMP_GROUP_AFFINITY */
745
746#if KMP_ARCH_X86 || KMP_ARCH_X86_64
747
748static int __kmp_cpuid_mask_width(int count) {
749 int r = 0;
750
751 while ((1 << r) < count)
752 ++r;
753 return r;
754}
Jim Cownie5e8470a2013-09-27 10:38:44 +0000755
756class apicThreadInfo {
757public:
Jonathan Peyton30419822017-05-12 18:01:32 +0000758 unsigned osId; // param to __kmp_affinity_bind_thread
759 unsigned apicId; // from cpuid after binding
760 unsigned maxCoresPerPkg; // ""
761 unsigned maxThreadsPerPkg; // ""
762 unsigned pkgId; // inferred from above values
763 unsigned coreId; // ""
764 unsigned threadId; // ""
Jim Cownie5e8470a2013-09-27 10:38:44 +0000765};
766
Jonathan Peyton30419822017-05-12 18:01:32 +0000767static int __kmp_affinity_cmp_apicThreadInfo_os_id(const void *a,
768 const void *b) {
769 const apicThreadInfo *aa = (const apicThreadInfo *)a;
770 const apicThreadInfo *bb = (const apicThreadInfo *)b;
771 if (aa->osId < bb->osId)
772 return -1;
773 if (aa->osId > bb->osId)
774 return 1;
775 return 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000776}
777
Jonathan Peyton30419822017-05-12 18:01:32 +0000778static int __kmp_affinity_cmp_apicThreadInfo_phys_id(const void *a,
779 const void *b) {
780 const apicThreadInfo *aa = (const apicThreadInfo *)a;
781 const apicThreadInfo *bb = (const apicThreadInfo *)b;
782 if (aa->pkgId < bb->pkgId)
783 return -1;
784 if (aa->pkgId > bb->pkgId)
785 return 1;
786 if (aa->coreId < bb->coreId)
787 return -1;
788 if (aa->coreId > bb->coreId)
789 return 1;
790 if (aa->threadId < bb->threadId)
791 return -1;
792 if (aa->threadId > bb->threadId)
793 return 1;
794 return 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000795}
796
Jim Cownie5e8470a2013-09-27 10:38:44 +0000797// On IA-32 architecture and Intel(R) 64 architecture, we attempt to use
798// an algorithm which cycles through the available os threads, setting
799// the current thread's affinity mask to that thread, and then retrieves
800// the Apic Id for each thread context using the cpuid instruction.
Jonathan Peyton30419822017-05-12 18:01:32 +0000801static int __kmp_affinity_create_apicid_map(AddrUnsPair **address2os,
802 kmp_i18n_id_t *const msg_id) {
803 kmp_cpuid buf;
804 int rc;
805 *address2os = NULL;
806 *msg_id = kmp_i18n_null;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000807
Jonathan Peyton30419822017-05-12 18:01:32 +0000808 // Check if cpuid leaf 4 is supported.
809 __kmp_x86_cpuid(0, 0, &buf);
810 if (buf.eax < 4) {
811 *msg_id = kmp_i18n_str_NoLeaf4Support;
812 return -1;
813 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000814
Jonathan Peyton30419822017-05-12 18:01:32 +0000815 // The algorithm used starts by setting the affinity to each available thread
816 // and retrieving info from the cpuid instruction, so if we are not capable of
817 // calling __kmp_get_system_affinity() and _kmp_get_system_affinity(), then we
818 // need to do something else - use the defaults that we calculated from
819 // issuing cpuid without binding to each proc.
820 if (!KMP_AFFINITY_CAPABLE()) {
821 // Hack to try and infer the machine topology using only the data
822 // available from cpuid on the current thread, and __kmp_xproc.
823 KMP_ASSERT(__kmp_affinity_type == affinity_none);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000824
Jonathan Peyton30419822017-05-12 18:01:32 +0000825 // Get an upper bound on the number of threads per package using cpuid(1).
826 // On some OS/chps combinations where HT is supported by the chip but is
827 // disabled, this value will be 2 on a single core chip. Usually, it will be
828 // 2 if HT is enabled and 1 if HT is disabled.
829 __kmp_x86_cpuid(1, 0, &buf);
830 int maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
831 if (maxThreadsPerPkg == 0) {
832 maxThreadsPerPkg = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000833 }
834
Jonathan Peyton30419822017-05-12 18:01:32 +0000835 // The num cores per pkg comes from cpuid(4). 1 must be added to the encoded
836 // value.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000837 //
Jonathan Peyton30419822017-05-12 18:01:32 +0000838 // The author of cpu_count.cpp treated this only an upper bound on the
839 // number of cores, but I haven't seen any cases where it was greater than
840 // the actual number of cores, so we will treat it as exact in this block of
841 // code.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000842 //
Jonathan Peyton30419822017-05-12 18:01:32 +0000843 // First, we need to check if cpuid(4) is supported on this chip. To see if
844 // cpuid(n) is supported, issue cpuid(0) and check if eax has the value n or
845 // greater.
846 __kmp_x86_cpuid(0, 0, &buf);
847 if (buf.eax >= 4) {
848 __kmp_x86_cpuid(4, 0, &buf);
849 nCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
850 } else {
851 nCoresPerPkg = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000852 }
853
Jonathan Peyton30419822017-05-12 18:01:32 +0000854 // There is no way to reliably tell if HT is enabled without issuing the
855 // cpuid instruction from every thread, can correlating the cpuid info, so
856 // if the machine is not affinity capable, we assume that HT is off. We have
857 // seen quite a few machines where maxThreadsPerPkg is 2, yet the machine
858 // does not support HT.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000859 //
Jonathan Peyton30419822017-05-12 18:01:32 +0000860 // - Older OSes are usually found on machines with older chips, which do not
861 // support HT.
862 // - The performance penalty for mistakenly identifying a machine as HT when
863 // it isn't (which results in blocktime being incorrecly set to 0) is
864 // greater than the penalty when for mistakenly identifying a machine as
865 // being 1 thread/core when it is really HT enabled (which results in
866 // blocktime being incorrectly set to a positive value).
867 __kmp_ncores = __kmp_xproc;
868 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000869 __kmp_nThreadsPerCore = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000870 if (__kmp_affinity_verbose) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000871 KMP_INFORM(AffNotCapableUseLocCpuid, "KMP_AFFINITY");
872 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
873 if (__kmp_affinity_uniform_topology()) {
874 KMP_INFORM(Uniform, "KMP_AFFINITY");
875 } else {
876 KMP_INFORM(NonUniform, "KMP_AFFINITY");
877 }
878 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
879 __kmp_nThreadsPerCore, __kmp_ncores);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000880 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000881 return 0;
882 }
883
884 // From here on, we can assume that it is safe to call
885 // __kmp_get_system_affinity() and __kmp_set_system_affinity(), even if
886 // __kmp_affinity_type = affinity_none.
887
888 // Save the affinity mask for the current thread.
889 kmp_affin_mask_t *oldMask;
890 KMP_CPU_ALLOC(oldMask);
891 KMP_ASSERT(oldMask != NULL);
892 __kmp_get_system_affinity(oldMask, TRUE);
893
894 // Run through each of the available contexts, binding the current thread
895 // to it, and obtaining the pertinent information using the cpuid instr.
896 //
897 // The relevant information is:
898 // - Apic Id: Bits 24:31 of ebx after issuing cpuid(1) - each thread context
899 // has a uniqie Apic Id, which is of the form pkg# : core# : thread#.
900 // - Max Threads Per Pkg: Bits 16:23 of ebx after issuing cpuid(1). The value
901 // of this field determines the width of the core# + thread# fields in the
902 // Apic Id. It is also an upper bound on the number of threads per
903 // package, but it has been verified that situations happen were it is not
904 // exact. In particular, on certain OS/chip combinations where Intel(R)
905 // Hyper-Threading Technology is supported by the chip but has been
906 // disabled, the value of this field will be 2 (for a single core chip).
907 // On other OS/chip combinations supporting Intel(R) Hyper-Threading
908 // Technology, the value of this field will be 1 when Intel(R)
909 // Hyper-Threading Technology is disabled and 2 when it is enabled.
910 // - Max Cores Per Pkg: Bits 26:31 of eax after issuing cpuid(4). The value
911 // of this field (+1) determines the width of the core# field in the Apic
912 // Id. The comments in "cpucount.cpp" say that this value is an upper
913 // bound, but the IA-32 architecture manual says that it is exactly the
914 // number of cores per package, and I haven't seen any case where it
915 // wasn't.
916 //
917 // From this information, deduce the package Id, core Id, and thread Id,
918 // and set the corresponding fields in the apicThreadInfo struct.
919 unsigned i;
920 apicThreadInfo *threadInfo = (apicThreadInfo *)__kmp_allocate(
921 __kmp_avail_proc * sizeof(apicThreadInfo));
922 unsigned nApics = 0;
923 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
924 // Skip this proc if it is not included in the machine model.
925 if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
926 continue;
Jonathan Peytonfd7cc422016-06-21 15:54:38 +0000927 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000928 KMP_DEBUG_ASSERT((int)nApics < __kmp_avail_proc);
929
930 __kmp_affinity_dispatch->bind_thread(i);
931 threadInfo[nApics].osId = i;
932
933 // The apic id and max threads per pkg come from cpuid(1).
934 __kmp_x86_cpuid(1, 0, &buf);
935 if (((buf.edx >> 9) & 1) == 0) {
936 __kmp_set_system_affinity(oldMask, TRUE);
937 __kmp_free(threadInfo);
938 KMP_CPU_FREE(oldMask);
939 *msg_id = kmp_i18n_str_ApicNotPresent;
940 return -1;
941 }
942 threadInfo[nApics].apicId = (buf.ebx >> 24) & 0xff;
943 threadInfo[nApics].maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
944 if (threadInfo[nApics].maxThreadsPerPkg == 0) {
945 threadInfo[nApics].maxThreadsPerPkg = 1;
946 }
947
948 // Max cores per pkg comes from cpuid(4). 1 must be added to the encoded
949 // value.
950 //
951 // First, we need to check if cpuid(4) is supported on this chip. To see if
952 // cpuid(n) is supported, issue cpuid(0) and check if eax has the value n
953 // or greater.
954 __kmp_x86_cpuid(0, 0, &buf);
955 if (buf.eax >= 4) {
956 __kmp_x86_cpuid(4, 0, &buf);
957 threadInfo[nApics].maxCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
958 } else {
959 threadInfo[nApics].maxCoresPerPkg = 1;
960 }
961
962 // Infer the pkgId / coreId / threadId using only the info obtained locally.
963 int widthCT = __kmp_cpuid_mask_width(threadInfo[nApics].maxThreadsPerPkg);
964 threadInfo[nApics].pkgId = threadInfo[nApics].apicId >> widthCT;
965
966 int widthC = __kmp_cpuid_mask_width(threadInfo[nApics].maxCoresPerPkg);
967 int widthT = widthCT - widthC;
968 if (widthT < 0) {
969 // I've never seen this one happen, but I suppose it could, if the cpuid
970 // instruction on a chip was really screwed up. Make sure to restore the
971 // affinity mask before the tail call.
972 __kmp_set_system_affinity(oldMask, TRUE);
973 __kmp_free(threadInfo);
974 KMP_CPU_FREE(oldMask);
975 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
976 return -1;
977 }
978
979 int maskC = (1 << widthC) - 1;
980 threadInfo[nApics].coreId = (threadInfo[nApics].apicId >> widthT) & maskC;
981
982 int maskT = (1 << widthT) - 1;
983 threadInfo[nApics].threadId = threadInfo[nApics].apicId & maskT;
984
985 nApics++;
986 }
987
988 // We've collected all the info we need.
989 // Restore the old affinity mask for this thread.
990 __kmp_set_system_affinity(oldMask, TRUE);
991
992 // If there's only one thread context to bind to, form an Address object
993 // with depth 1 and return immediately (or, if affinity is off, set
994 // address2os to NULL and return).
995 //
996 // If it is configured to omit the package level when there is only a single
997 // package, the logic at the end of this routine won't work if there is only
998 // a single thread - it would try to form an Address object with depth 0.
999 KMP_ASSERT(nApics > 0);
1000 if (nApics == 1) {
1001 __kmp_ncores = nPackages = 1;
1002 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
1003 if (__kmp_affinity_verbose) {
1004 char buf[KMP_AFFIN_MASK_PRINT_LEN];
1005 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1006
1007 KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
1008 if (__kmp_affinity_respect_mask) {
1009 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
1010 } else {
1011 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
1012 }
1013 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
1014 KMP_INFORM(Uniform, "KMP_AFFINITY");
1015 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
1016 __kmp_nThreadsPerCore, __kmp_ncores);
1017 }
1018
Jim Cownie5e8470a2013-09-27 10:38:44 +00001019 if (__kmp_affinity_type == affinity_none) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001020 __kmp_free(threadInfo);
1021 KMP_CPU_FREE(oldMask);
1022 return 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001023 }
1024
Jonathan Peyton30419822017-05-12 18:01:32 +00001025 *address2os = (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair));
1026 Address addr(1);
1027 addr.labels[0] = threadInfo[0].pkgId;
1028 (*address2os)[0] = AddrUnsPair(addr, threadInfo[0].osId);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001029
1030 if (__kmp_affinity_gran_levels < 0) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001031 __kmp_affinity_gran_levels = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001032 }
1033
1034 if (__kmp_affinity_verbose) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001035 __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001036 }
1037
1038 __kmp_free(threadInfo);
1039 KMP_CPU_FREE(oldMask);
Jonathan Peyton30419822017-05-12 18:01:32 +00001040 return 1;
1041 }
1042
1043 // Sort the threadInfo table by physical Id.
1044 qsort(threadInfo, nApics, sizeof(*threadInfo),
1045 __kmp_affinity_cmp_apicThreadInfo_phys_id);
1046
1047 // The table is now sorted by pkgId / coreId / threadId, but we really don't
1048 // know the radix of any of the fields. pkgId's may be sparsely assigned among
1049 // the chips on a system. Although coreId's are usually assigned
1050 // [0 .. coresPerPkg-1] and threadId's are usually assigned
1051 // [0..threadsPerCore-1], we don't want to make any such assumptions.
1052 //
1053 // For that matter, we don't know what coresPerPkg and threadsPerCore (or the
1054 // total # packages) are at this point - we want to determine that now. We
1055 // only have an upper bound on the first two figures.
1056 //
1057 // We also perform a consistency check at this point: the values returned by
1058 // the cpuid instruction for any thread bound to a given package had better
1059 // return the same info for maxThreadsPerPkg and maxCoresPerPkg.
1060 nPackages = 1;
1061 nCoresPerPkg = 1;
1062 __kmp_nThreadsPerCore = 1;
1063 unsigned nCores = 1;
1064
1065 unsigned pkgCt = 1; // to determine radii
1066 unsigned lastPkgId = threadInfo[0].pkgId;
1067 unsigned coreCt = 1;
1068 unsigned lastCoreId = threadInfo[0].coreId;
1069 unsigned threadCt = 1;
1070 unsigned lastThreadId = threadInfo[0].threadId;
1071
1072 // intra-pkg consist checks
1073 unsigned prevMaxCoresPerPkg = threadInfo[0].maxCoresPerPkg;
1074 unsigned prevMaxThreadsPerPkg = threadInfo[0].maxThreadsPerPkg;
1075
1076 for (i = 1; i < nApics; i++) {
1077 if (threadInfo[i].pkgId != lastPkgId) {
1078 nCores++;
1079 pkgCt++;
1080 lastPkgId = threadInfo[i].pkgId;
1081 if ((int)coreCt > nCoresPerPkg)
1082 nCoresPerPkg = coreCt;
1083 coreCt = 1;
1084 lastCoreId = threadInfo[i].coreId;
1085 if ((int)threadCt > __kmp_nThreadsPerCore)
1086 __kmp_nThreadsPerCore = threadCt;
1087 threadCt = 1;
1088 lastThreadId = threadInfo[i].threadId;
1089
1090 // This is a different package, so go on to the next iteration without
1091 // doing any consistency checks. Reset the consistency check vars, though.
1092 prevMaxCoresPerPkg = threadInfo[i].maxCoresPerPkg;
1093 prevMaxThreadsPerPkg = threadInfo[i].maxThreadsPerPkg;
1094 continue;
1095 }
1096
1097 if (threadInfo[i].coreId != lastCoreId) {
1098 nCores++;
1099 coreCt++;
1100 lastCoreId = threadInfo[i].coreId;
1101 if ((int)threadCt > __kmp_nThreadsPerCore)
1102 __kmp_nThreadsPerCore = threadCt;
1103 threadCt = 1;
1104 lastThreadId = threadInfo[i].threadId;
1105 } else if (threadInfo[i].threadId != lastThreadId) {
1106 threadCt++;
1107 lastThreadId = threadInfo[i].threadId;
1108 } else {
1109 __kmp_free(threadInfo);
1110 KMP_CPU_FREE(oldMask);
1111 *msg_id = kmp_i18n_str_LegacyApicIDsNotUnique;
1112 return -1;
1113 }
1114
1115 // Check to make certain that the maxCoresPerPkg and maxThreadsPerPkg
1116 // fields agree between all the threads bounds to a given package.
1117 if ((prevMaxCoresPerPkg != threadInfo[i].maxCoresPerPkg) ||
1118 (prevMaxThreadsPerPkg != threadInfo[i].maxThreadsPerPkg)) {
1119 __kmp_free(threadInfo);
1120 KMP_CPU_FREE(oldMask);
1121 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1122 return -1;
1123 }
1124 }
1125 nPackages = pkgCt;
1126 if ((int)coreCt > nCoresPerPkg)
1127 nCoresPerPkg = coreCt;
1128 if ((int)threadCt > __kmp_nThreadsPerCore)
1129 __kmp_nThreadsPerCore = threadCt;
1130
1131 // When affinity is off, this routine will still be called to set
1132 // __kmp_ncores, as well as __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
1133 // Make sure all these vars are set correctly, and return now if affinity is
1134 // not enabled.
1135 __kmp_ncores = nCores;
1136 if (__kmp_affinity_verbose) {
1137 char buf[KMP_AFFIN_MASK_PRINT_LEN];
1138 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1139
1140 KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
1141 if (__kmp_affinity_respect_mask) {
1142 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
1143 } else {
1144 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
1145 }
1146 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
1147 if (__kmp_affinity_uniform_topology()) {
1148 KMP_INFORM(Uniform, "KMP_AFFINITY");
1149 } else {
1150 KMP_INFORM(NonUniform, "KMP_AFFINITY");
1151 }
1152 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
1153 __kmp_nThreadsPerCore, __kmp_ncores);
1154 }
1155 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
1156 KMP_DEBUG_ASSERT(nApics == __kmp_avail_proc);
1157 __kmp_pu_os_idx = (int *)__kmp_allocate(sizeof(int) * __kmp_avail_proc);
1158 for (i = 0; i < nApics; ++i) {
1159 __kmp_pu_os_idx[i] = threadInfo[i].osId;
1160 }
1161 if (__kmp_affinity_type == affinity_none) {
1162 __kmp_free(threadInfo);
1163 KMP_CPU_FREE(oldMask);
1164 return 0;
1165 }
1166
1167 // Now that we've determined the number of packages, the number of cores per
1168 // package, and the number of threads per core, we can construct the data
1169 // structure that is to be returned.
1170 int pkgLevel = 0;
1171 int coreLevel = (nCoresPerPkg <= 1) ? -1 : 1;
1172 int threadLevel =
1173 (__kmp_nThreadsPerCore <= 1) ? -1 : ((coreLevel >= 0) ? 2 : 1);
1174 unsigned depth = (pkgLevel >= 0) + (coreLevel >= 0) + (threadLevel >= 0);
1175
1176 KMP_ASSERT(depth > 0);
1177 *address2os = (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair) * nApics);
1178
1179 for (i = 0; i < nApics; ++i) {
1180 Address addr(depth);
1181 unsigned os = threadInfo[i].osId;
1182 int d = 0;
1183
1184 if (pkgLevel >= 0) {
1185 addr.labels[d++] = threadInfo[i].pkgId;
1186 }
1187 if (coreLevel >= 0) {
1188 addr.labels[d++] = threadInfo[i].coreId;
1189 }
1190 if (threadLevel >= 0) {
1191 addr.labels[d++] = threadInfo[i].threadId;
1192 }
1193 (*address2os)[i] = AddrUnsPair(addr, os);
1194 }
1195
1196 if (__kmp_affinity_gran_levels < 0) {
1197 // Set the granularity level based on what levels are modeled in the machine
1198 // topology map.
1199 __kmp_affinity_gran_levels = 0;
1200 if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
1201 __kmp_affinity_gran_levels++;
1202 }
1203 if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
1204 __kmp_affinity_gran_levels++;
1205 }
1206 if ((pkgLevel >= 0) && (__kmp_affinity_gran > affinity_gran_package)) {
1207 __kmp_affinity_gran_levels++;
1208 }
1209 }
1210
1211 if (__kmp_affinity_verbose) {
1212 __kmp_affinity_print_topology(*address2os, nApics, depth, pkgLevel,
1213 coreLevel, threadLevel);
1214 }
1215
1216 __kmp_free(threadInfo);
1217 KMP_CPU_FREE(oldMask);
1218 return depth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001219}
1220
Jim Cownie5e8470a2013-09-27 10:38:44 +00001221// Intel(R) microarchitecture code name Nehalem, Dunnington and later
1222// architectures support a newer interface for specifying the x2APIC Ids,
1223// based on cpuid leaf 11.
Jonathan Peyton30419822017-05-12 18:01:32 +00001224static int __kmp_affinity_create_x2apicid_map(AddrUnsPair **address2os,
1225 kmp_i18n_id_t *const msg_id) {
1226 kmp_cpuid buf;
1227 *address2os = NULL;
1228 *msg_id = kmp_i18n_null;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001229
Jonathan Peyton30419822017-05-12 18:01:32 +00001230 // Check to see if cpuid leaf 11 is supported.
1231 __kmp_x86_cpuid(0, 0, &buf);
1232 if (buf.eax < 11) {
1233 *msg_id = kmp_i18n_str_NoLeaf11Support;
1234 return -1;
1235 }
1236 __kmp_x86_cpuid(11, 0, &buf);
1237 if (buf.ebx == 0) {
1238 *msg_id = kmp_i18n_str_NoLeaf11Support;
1239 return -1;
1240 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001241
Jonathan Peyton30419822017-05-12 18:01:32 +00001242 // Find the number of levels in the machine topology. While we're at it, get
1243 // the default values for __kmp_nThreadsPerCore & nCoresPerPkg. We will try to
1244 // get more accurate values later by explicitly counting them, but get
1245 // reasonable defaults now, in case we return early.
1246 int level;
1247 int threadLevel = -1;
1248 int coreLevel = -1;
1249 int pkgLevel = -1;
1250 __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
1251
1252 for (level = 0;; level++) {
1253 if (level > 31) {
1254 // FIXME: Hack for DPD200163180
1255 //
1256 // If level is big then something went wrong -> exiting
1257 //
1258 // There could actually be 32 valid levels in the machine topology, but so
1259 // far, the only machine we have seen which does not exit this loop before
1260 // iteration 32 has fubar x2APIC settings.
1261 //
1262 // For now, just reject this case based upon loop trip count.
1263 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1264 return -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001265 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001266 __kmp_x86_cpuid(11, level, &buf);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001267 if (buf.ebx == 0) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001268 if (pkgLevel < 0) {
1269 // Will infer nPackages from __kmp_xproc
1270 pkgLevel = level;
1271 level++;
1272 }
1273 break;
1274 }
1275 int kind = (buf.ecx >> 8) & 0xff;
1276 if (kind == 1) {
1277 // SMT level
1278 threadLevel = level;
1279 coreLevel = -1;
1280 pkgLevel = -1;
1281 __kmp_nThreadsPerCore = buf.ebx & 0xffff;
1282 if (__kmp_nThreadsPerCore == 0) {
1283 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001284 return -1;
Jonathan Peyton30419822017-05-12 18:01:32 +00001285 }
1286 } else if (kind == 2) {
1287 // core level
1288 coreLevel = level;
1289 pkgLevel = -1;
1290 nCoresPerPkg = buf.ebx & 0xffff;
1291 if (nCoresPerPkg == 0) {
1292 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1293 return -1;
1294 }
1295 } else {
1296 if (level <= 0) {
1297 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1298 return -1;
1299 }
1300 if (pkgLevel >= 0) {
1301 continue;
1302 }
1303 pkgLevel = level;
1304 nPackages = buf.ebx & 0xffff;
1305 if (nPackages == 0) {
1306 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1307 return -1;
1308 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001309 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001310 }
1311 int depth = level;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001312
Jonathan Peyton30419822017-05-12 18:01:32 +00001313 // In the above loop, "level" was counted from the finest level (usually
1314 // thread) to the coarsest. The caller expects that we will place the labels
1315 // in (*address2os)[].first.labels[] in the inverse order, so we need to
1316 // invert the vars saying which level means what.
1317 if (threadLevel >= 0) {
1318 threadLevel = depth - threadLevel - 1;
1319 }
1320 if (coreLevel >= 0) {
1321 coreLevel = depth - coreLevel - 1;
1322 }
1323 KMP_DEBUG_ASSERT(pkgLevel >= 0);
1324 pkgLevel = depth - pkgLevel - 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001325
Jonathan Peyton30419822017-05-12 18:01:32 +00001326 // The algorithm used starts by setting the affinity to each available thread
1327 // and retrieving info from the cpuid instruction, so if we are not capable of
1328 // calling __kmp_get_system_affinity() and _kmp_get_system_affinity(), then we
1329 // need to do something else - use the defaults that we calculated from
1330 // issuing cpuid without binding to each proc.
1331 if (!KMP_AFFINITY_CAPABLE()) {
1332 // Hack to try and infer the machine topology using only the data
1333 // available from cpuid on the current thread, and __kmp_xproc.
1334 KMP_ASSERT(__kmp_affinity_type == affinity_none);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001335
Jonathan Peyton30419822017-05-12 18:01:32 +00001336 __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
1337 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001338 if (__kmp_affinity_verbose) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001339 KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY");
1340 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
1341 if (__kmp_affinity_uniform_topology()) {
1342 KMP_INFORM(Uniform, "KMP_AFFINITY");
1343 } else {
1344 KMP_INFORM(NonUniform, "KMP_AFFINITY");
1345 }
1346 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
1347 __kmp_nThreadsPerCore, __kmp_ncores);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001348 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001349 return 0;
1350 }
1351
1352 // From here on, we can assume that it is safe to call
1353 // __kmp_get_system_affinity() and __kmp_set_system_affinity(), even if
1354 // __kmp_affinity_type = affinity_none.
1355
1356 // Save the affinity mask for the current thread.
1357 kmp_affin_mask_t *oldMask;
1358 KMP_CPU_ALLOC(oldMask);
1359 __kmp_get_system_affinity(oldMask, TRUE);
1360
1361 // Allocate the data structure to be returned.
1362 AddrUnsPair *retval =
1363 (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc);
1364
1365 // Run through each of the available contexts, binding the current thread
1366 // to it, and obtaining the pertinent information using the cpuid instr.
1367 unsigned int proc;
1368 int nApics = 0;
1369 KMP_CPU_SET_ITERATE(proc, __kmp_affin_fullMask) {
1370 // Skip this proc if it is not included in the machine model.
1371 if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
1372 continue;
Jonathan Peytonfd7cc422016-06-21 15:54:38 +00001373 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001374 KMP_DEBUG_ASSERT(nApics < __kmp_avail_proc);
1375
1376 __kmp_affinity_dispatch->bind_thread(proc);
1377
1378 // Extract labels for each level in the machine topology map from Apic ID.
1379 Address addr(depth);
1380 int prev_shift = 0;
1381
1382 for (level = 0; level < depth; level++) {
1383 __kmp_x86_cpuid(11, level, &buf);
1384 unsigned apicId = buf.edx;
1385 if (buf.ebx == 0) {
1386 if (level != depth - 1) {
1387 KMP_CPU_FREE(oldMask);
1388 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1389 return -1;
1390 }
1391 addr.labels[depth - level - 1] = apicId >> prev_shift;
1392 level++;
1393 break;
1394 }
1395 int shift = buf.eax & 0x1f;
1396 int mask = (1 << shift) - 1;
1397 addr.labels[depth - level - 1] = (apicId & mask) >> prev_shift;
1398 prev_shift = shift;
1399 }
1400 if (level != depth) {
1401 KMP_CPU_FREE(oldMask);
1402 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1403 return -1;
1404 }
1405
1406 retval[nApics] = AddrUnsPair(addr, proc);
1407 nApics++;
1408 }
1409
1410 // We've collected all the info we need.
1411 // Restore the old affinity mask for this thread.
1412 __kmp_set_system_affinity(oldMask, TRUE);
1413
1414 // If there's only one thread context to bind to, return now.
1415 KMP_ASSERT(nApics > 0);
1416 if (nApics == 1) {
1417 __kmp_ncores = nPackages = 1;
1418 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
1419 if (__kmp_affinity_verbose) {
1420 char buf[KMP_AFFIN_MASK_PRINT_LEN];
1421 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1422
1423 KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
1424 if (__kmp_affinity_respect_mask) {
1425 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
1426 } else {
1427 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
1428 }
1429 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
1430 KMP_INFORM(Uniform, "KMP_AFFINITY");
1431 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
1432 __kmp_nThreadsPerCore, __kmp_ncores);
1433 }
1434
Jim Cownie5e8470a2013-09-27 10:38:44 +00001435 if (__kmp_affinity_type == affinity_none) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001436 __kmp_free(retval);
1437 KMP_CPU_FREE(oldMask);
1438 return 0;
1439 }
1440
1441 // Form an Address object which only includes the package level.
1442 Address addr(1);
1443 addr.labels[0] = retval[0].first.labels[pkgLevel];
1444 retval[0].first = addr;
1445
1446 if (__kmp_affinity_gran_levels < 0) {
1447 __kmp_affinity_gran_levels = 0;
1448 }
1449
1450 if (__kmp_affinity_verbose) {
1451 __kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
1452 }
1453
1454 *address2os = retval;
1455 KMP_CPU_FREE(oldMask);
1456 return 1;
1457 }
1458
1459 // Sort the table by physical Id.
1460 qsort(retval, nApics, sizeof(*retval), __kmp_affinity_cmp_Address_labels);
1461
1462 // Find the radix at each of the levels.
1463 unsigned *totals = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
1464 unsigned *counts = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
1465 unsigned *maxCt = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
1466 unsigned *last = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
1467 for (level = 0; level < depth; level++) {
1468 totals[level] = 1;
1469 maxCt[level] = 1;
1470 counts[level] = 1;
1471 last[level] = retval[0].first.labels[level];
1472 }
1473
1474 // From here on, the iteration variable "level" runs from the finest level to
1475 // the coarsest, i.e. we iterate forward through
1476 // (*address2os)[].first.labels[] - in the previous loops, we iterated
1477 // backwards.
1478 for (proc = 1; (int)proc < nApics; proc++) {
1479 int level;
1480 for (level = 0; level < depth; level++) {
1481 if (retval[proc].first.labels[level] != last[level]) {
1482 int j;
1483 for (j = level + 1; j < depth; j++) {
1484 totals[j]++;
1485 counts[j] = 1;
1486 // The line below causes printing incorrect topology information in
1487 // case the max value for some level (maxCt[level]) is encountered
1488 // earlier than some less value while going through the array. For
1489 // example, let pkg0 has 4 cores and pkg1 has 2 cores. Then
1490 // maxCt[1] == 2
1491 // whereas it must be 4.
1492 // TODO!!! Check if it can be commented safely
1493 // maxCt[j] = 1;
1494 last[j] = retval[proc].first.labels[j];
1495 }
1496 totals[level]++;
1497 counts[level]++;
1498 if (counts[level] > maxCt[level]) {
1499 maxCt[level] = counts[level];
1500 }
1501 last[level] = retval[proc].first.labels[level];
1502 break;
1503 } else if (level == depth - 1) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001504 __kmp_free(last);
1505 __kmp_free(maxCt);
1506 __kmp_free(counts);
1507 __kmp_free(totals);
1508 __kmp_free(retval);
1509 KMP_CPU_FREE(oldMask);
Jonathan Peyton30419822017-05-12 18:01:32 +00001510 *msg_id = kmp_i18n_str_x2ApicIDsNotUnique;
1511 return -1;
1512 }
1513 }
1514 }
1515
1516 // When affinity is off, this routine will still be called to set
1517 // __kmp_ncores, as well as __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
1518 // Make sure all these vars are set correctly, and return if affinity is not
1519 // enabled.
1520 if (threadLevel >= 0) {
1521 __kmp_nThreadsPerCore = maxCt[threadLevel];
1522 } else {
1523 __kmp_nThreadsPerCore = 1;
1524 }
1525 nPackages = totals[pkgLevel];
1526
1527 if (coreLevel >= 0) {
1528 __kmp_ncores = totals[coreLevel];
1529 nCoresPerPkg = maxCt[coreLevel];
1530 } else {
1531 __kmp_ncores = nPackages;
1532 nCoresPerPkg = 1;
1533 }
1534
1535 // Check to see if the machine topology is uniform
1536 unsigned prod = maxCt[0];
1537 for (level = 1; level < depth; level++) {
1538 prod *= maxCt[level];
1539 }
1540 bool uniform = (prod == totals[level - 1]);
1541
1542 // Print the machine topology summary.
1543 if (__kmp_affinity_verbose) {
1544 char mask[KMP_AFFIN_MASK_PRINT_LEN];
1545 __kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1546
1547 KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
1548 if (__kmp_affinity_respect_mask) {
1549 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask);
1550 } else {
1551 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask);
1552 }
1553 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
1554 if (uniform) {
1555 KMP_INFORM(Uniform, "KMP_AFFINITY");
1556 } else {
1557 KMP_INFORM(NonUniform, "KMP_AFFINITY");
Jim Cownie5e8470a2013-09-27 10:38:44 +00001558 }
1559
Jonathan Peyton30419822017-05-12 18:01:32 +00001560 kmp_str_buf_t buf;
1561 __kmp_str_buf_init(&buf);
1562
1563 __kmp_str_buf_print(&buf, "%d", totals[0]);
1564 for (level = 1; level <= pkgLevel; level++) {
1565 __kmp_str_buf_print(&buf, " x %d", maxCt[level]);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001566 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001567 KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg,
1568 __kmp_nThreadsPerCore, __kmp_ncores);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001569
Jonathan Peyton30419822017-05-12 18:01:32 +00001570 __kmp_str_buf_free(&buf);
1571 }
1572 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
1573 KMP_DEBUG_ASSERT(nApics == __kmp_avail_proc);
1574 __kmp_pu_os_idx = (int *)__kmp_allocate(sizeof(int) * __kmp_avail_proc);
1575 for (proc = 0; (int)proc < nApics; ++proc) {
1576 __kmp_pu_os_idx[proc] = retval[proc].second;
1577 }
1578 if (__kmp_affinity_type == affinity_none) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001579 __kmp_free(last);
1580 __kmp_free(maxCt);
1581 __kmp_free(counts);
1582 __kmp_free(totals);
Jonathan Peyton30419822017-05-12 18:01:32 +00001583 __kmp_free(retval);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001584 KMP_CPU_FREE(oldMask);
Jonathan Peyton30419822017-05-12 18:01:32 +00001585 return 0;
1586 }
1587
1588 // Find any levels with radiix 1, and remove them from the map
1589 // (except for the package level).
1590 int new_depth = 0;
1591 for (level = 0; level < depth; level++) {
1592 if ((maxCt[level] == 1) && (level != pkgLevel)) {
1593 continue;
1594 }
1595 new_depth++;
1596 }
1597
1598 // If we are removing any levels, allocate a new vector to return,
1599 // and copy the relevant information to it.
1600 if (new_depth != depth) {
1601 AddrUnsPair *new_retval =
1602 (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair) * nApics);
1603 for (proc = 0; (int)proc < nApics; proc++) {
1604 Address addr(new_depth);
1605 new_retval[proc] = AddrUnsPair(addr, retval[proc].second);
1606 }
1607 int new_level = 0;
1608 int newPkgLevel = -1;
1609 int newCoreLevel = -1;
1610 int newThreadLevel = -1;
1611 int i;
1612 for (level = 0; level < depth; level++) {
1613 if ((maxCt[level] == 1) && (level != pkgLevel)) {
1614 // Remove this level. Never remove the package level
1615 continue;
1616 }
1617 if (level == pkgLevel) {
1618 newPkgLevel = level;
1619 }
1620 if (level == coreLevel) {
1621 newCoreLevel = level;
1622 }
1623 if (level == threadLevel) {
1624 newThreadLevel = level;
1625 }
1626 for (proc = 0; (int)proc < nApics; proc++) {
1627 new_retval[proc].first.labels[new_level] =
1628 retval[proc].first.labels[level];
1629 }
1630 new_level++;
1631 }
1632
1633 __kmp_free(retval);
1634 retval = new_retval;
1635 depth = new_depth;
1636 pkgLevel = newPkgLevel;
1637 coreLevel = newCoreLevel;
1638 threadLevel = newThreadLevel;
1639 }
1640
1641 if (__kmp_affinity_gran_levels < 0) {
1642 // Set the granularity level based on what levels are modeled
1643 // in the machine topology map.
1644 __kmp_affinity_gran_levels = 0;
1645 if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
1646 __kmp_affinity_gran_levels++;
1647 }
1648 if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
1649 __kmp_affinity_gran_levels++;
1650 }
1651 if (__kmp_affinity_gran > affinity_gran_package) {
1652 __kmp_affinity_gran_levels++;
1653 }
1654 }
1655
1656 if (__kmp_affinity_verbose) {
1657 __kmp_affinity_print_topology(retval, nApics, depth, pkgLevel, coreLevel,
1658 threadLevel);
1659 }
1660
1661 __kmp_free(last);
1662 __kmp_free(maxCt);
1663 __kmp_free(counts);
1664 __kmp_free(totals);
1665 KMP_CPU_FREE(oldMask);
1666 *address2os = retval;
1667 return depth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001668}
1669
Jonathan Peyton30419822017-05-12 18:01:32 +00001670#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001671
Jonathan Peyton30419822017-05-12 18:01:32 +00001672#define osIdIndex 0
1673#define threadIdIndex 1
1674#define coreIdIndex 2
1675#define pkgIdIndex 3
1676#define nodeIdIndex 4
Jim Cownie5e8470a2013-09-27 10:38:44 +00001677
1678typedef unsigned *ProcCpuInfo;
1679static unsigned maxIndex = pkgIdIndex;
1680
Jonathan Peyton30419822017-05-12 18:01:32 +00001681static int __kmp_affinity_cmp_ProcCpuInfo_os_id(const void *a, const void *b) {
1682 const unsigned *aa = (const unsigned *)a;
1683 const unsigned *bb = (const unsigned *)b;
1684 if (aa[osIdIndex] < bb[osIdIndex])
1685 return -1;
1686 if (aa[osIdIndex] > bb[osIdIndex])
1687 return 1;
1688 return 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001689};
1690
Jonathan Peyton30419822017-05-12 18:01:32 +00001691static int __kmp_affinity_cmp_ProcCpuInfo_phys_id(const void *a,
1692 const void *b) {
1693 unsigned i;
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00001694 const unsigned *aa = *(RCAST(unsigned **, CCAST(void *, a)));
1695 const unsigned *bb = *(RCAST(unsigned **, CCAST(void *, b)));
Jonathan Peyton30419822017-05-12 18:01:32 +00001696 for (i = maxIndex;; i--) {
1697 if (aa[i] < bb[i])
1698 return -1;
1699 if (aa[i] > bb[i])
1700 return 1;
1701 if (i == osIdIndex)
1702 break;
1703 }
1704 return 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001705}
1706
Jim Cownie5e8470a2013-09-27 10:38:44 +00001707// Parse /proc/cpuinfo (or an alternate file in the same format) to obtain the
1708// affinity map.
Jonathan Peyton30419822017-05-12 18:01:32 +00001709static int __kmp_affinity_create_cpuinfo_map(AddrUnsPair **address2os,
1710 int *line,
1711 kmp_i18n_id_t *const msg_id,
1712 FILE *f) {
1713 *address2os = NULL;
1714 *msg_id = kmp_i18n_null;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001715
Jonathan Peyton30419822017-05-12 18:01:32 +00001716 // Scan of the file, and count the number of "processor" (osId) fields,
1717 // and find the highest value of <n> for a node_<n> field.
1718 char buf[256];
1719 unsigned num_records = 0;
1720 while (!feof(f)) {
1721 buf[sizeof(buf) - 1] = 1;
1722 if (!fgets(buf, sizeof(buf), f)) {
1723 // Read errors presumably because of EOF
1724 break;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001725 }
1726
Jonathan Peyton30419822017-05-12 18:01:32 +00001727 char s1[] = "processor";
1728 if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
1729 num_records++;
1730 continue;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001731 }
1732
Jonathan Peyton30419822017-05-12 18:01:32 +00001733 // FIXME - this will match "node_<n> <garbage>"
1734 unsigned level;
1735 if (KMP_SSCANF(buf, "node_%d id", &level) == 1) {
1736 if (nodeIdIndex + level >= maxIndex) {
1737 maxIndex = nodeIdIndex + level;
1738 }
1739 continue;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001740 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001741 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001742
Jonathan Peyton30419822017-05-12 18:01:32 +00001743 // Check for empty file / no valid processor records, or too many. The number
1744 // of records can't exceed the number of valid bits in the affinity mask.
1745 if (num_records == 0) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001746 *line = 0;
Jonathan Peyton30419822017-05-12 18:01:32 +00001747 *msg_id = kmp_i18n_str_NoProcRecords;
1748 return -1;
1749 }
1750 if (num_records > (unsigned)__kmp_xproc) {
1751 *line = 0;
1752 *msg_id = kmp_i18n_str_TooManyProcRecords;
1753 return -1;
1754 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001755
Jonathan Peyton30419822017-05-12 18:01:32 +00001756 // Set the file pointer back to the begginning, so that we can scan the file
1757 // again, this time performing a full parse of the data. Allocate a vector of
1758 // ProcCpuInfo object, where we will place the data. Adding an extra element
1759 // at the end allows us to remove a lot of extra checks for termination
1760 // conditions.
1761 if (fseek(f, 0, SEEK_SET) != 0) {
1762 *line = 0;
1763 *msg_id = kmp_i18n_str_CantRewindCpuinfo;
1764 return -1;
1765 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001766
Jonathan Peyton30419822017-05-12 18:01:32 +00001767 // Allocate the array of records to store the proc info in. The dummy
1768 // element at the end makes the logic in filling them out easier to code.
1769 unsigned **threadInfo =
1770 (unsigned **)__kmp_allocate((num_records + 1) * sizeof(unsigned *));
1771 unsigned i;
1772 for (i = 0; i <= num_records; i++) {
1773 threadInfo[i] =
1774 (unsigned *)__kmp_allocate((maxIndex + 1) * sizeof(unsigned));
1775 }
1776
1777#define CLEANUP_THREAD_INFO \
1778 for (i = 0; i <= num_records; i++) { \
1779 __kmp_free(threadInfo[i]); \
1780 } \
1781 __kmp_free(threadInfo);
1782
1783 // A value of UINT_MAX means that we didn't find the field
1784 unsigned __index;
1785
1786#define INIT_PROC_INFO(p) \
1787 for (__index = 0; __index <= maxIndex; __index++) { \
1788 (p)[__index] = UINT_MAX; \
1789 }
1790
1791 for (i = 0; i <= num_records; i++) {
1792 INIT_PROC_INFO(threadInfo[i]);
1793 }
1794
1795 unsigned num_avail = 0;
1796 *line = 0;
1797 while (!feof(f)) {
1798 // Create an inner scoping level, so that all the goto targets at the end of
1799 // the loop appear in an outer scoping level. This avoids warnings about
1800 // jumping past an initialization to a target in the same block.
1801 {
1802 buf[sizeof(buf) - 1] = 1;
1803 bool long_line = false;
1804 if (!fgets(buf, sizeof(buf), f)) {
1805 // Read errors presumably because of EOF
1806 // If there is valid data in threadInfo[num_avail], then fake
1807 // a blank line in ensure that the last address gets parsed.
1808 bool valid = false;
1809 for (i = 0; i <= maxIndex; i++) {
1810 if (threadInfo[num_avail][i] != UINT_MAX) {
1811 valid = true;
1812 }
1813 }
1814 if (!valid) {
1815 break;
1816 }
1817 buf[0] = 0;
1818 } else if (!buf[sizeof(buf) - 1]) {
1819 // The line is longer than the buffer. Set a flag and don't
1820 // emit an error if we were going to ignore the line, anyway.
1821 long_line = true;
1822
1823#define CHECK_LINE \
1824 if (long_line) { \
1825 CLEANUP_THREAD_INFO; \
1826 *msg_id = kmp_i18n_str_LongLineCpuinfo; \
1827 return -1; \
1828 }
1829 }
1830 (*line)++;
1831
1832 char s1[] = "processor";
1833 if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
1834 CHECK_LINE;
1835 char *p = strchr(buf + sizeof(s1) - 1, ':');
1836 unsigned val;
1837 if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1))
1838 goto no_val;
1839 if (threadInfo[num_avail][osIdIndex] != UINT_MAX)
1840 goto dup_field;
1841 threadInfo[num_avail][osIdIndex] = val;
Jim Cownie181b4bb2013-12-23 17:28:57 +00001842#if KMP_OS_LINUX && USE_SYSFS_INFO
Jonathan Peyton30419822017-05-12 18:01:32 +00001843 char path[256];
1844 KMP_SNPRINTF(
1845 path, sizeof(path),
1846 "/sys/devices/system/cpu/cpu%u/topology/physical_package_id",
1847 threadInfo[num_avail][osIdIndex]);
1848 __kmp_read_from_file(path, "%u", &threadInfo[num_avail][pkgIdIndex]);
Jim Cownie181b4bb2013-12-23 17:28:57 +00001849
Jonathan Peyton30419822017-05-12 18:01:32 +00001850 KMP_SNPRINTF(path, sizeof(path),
1851 "/sys/devices/system/cpu/cpu%u/topology/core_id",
1852 threadInfo[num_avail][osIdIndex]);
1853 __kmp_read_from_file(path, "%u", &threadInfo[num_avail][coreIdIndex]);
1854 continue;
Jim Cownie181b4bb2013-12-23 17:28:57 +00001855#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001856 }
1857 char s2[] = "physical id";
1858 if (strncmp(buf, s2, sizeof(s2) - 1) == 0) {
1859 CHECK_LINE;
1860 char *p = strchr(buf + sizeof(s2) - 1, ':');
1861 unsigned val;
1862 if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1))
1863 goto no_val;
1864 if (threadInfo[num_avail][pkgIdIndex] != UINT_MAX)
1865 goto dup_field;
1866 threadInfo[num_avail][pkgIdIndex] = val;
1867 continue;
1868 }
1869 char s3[] = "core id";
1870 if (strncmp(buf, s3, sizeof(s3) - 1) == 0) {
1871 CHECK_LINE;
1872 char *p = strchr(buf + sizeof(s3) - 1, ':');
1873 unsigned val;
1874 if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1))
1875 goto no_val;
1876 if (threadInfo[num_avail][coreIdIndex] != UINT_MAX)
1877 goto dup_field;
1878 threadInfo[num_avail][coreIdIndex] = val;
1879 continue;
Jim Cownie181b4bb2013-12-23 17:28:57 +00001880#endif // KMP_OS_LINUX && USE_SYSFS_INFO
Jonathan Peyton30419822017-05-12 18:01:32 +00001881 }
1882 char s4[] = "thread id";
1883 if (strncmp(buf, s4, sizeof(s4) - 1) == 0) {
1884 CHECK_LINE;
1885 char *p = strchr(buf + sizeof(s4) - 1, ':');
1886 unsigned val;
1887 if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1))
1888 goto no_val;
1889 if (threadInfo[num_avail][threadIdIndex] != UINT_MAX)
1890 goto dup_field;
1891 threadInfo[num_avail][threadIdIndex] = val;
1892 continue;
1893 }
1894 unsigned level;
1895 if (KMP_SSCANF(buf, "node_%d id", &level) == 1) {
1896 CHECK_LINE;
1897 char *p = strchr(buf + sizeof(s4) - 1, ':');
1898 unsigned val;
1899 if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1))
1900 goto no_val;
1901 KMP_ASSERT(nodeIdIndex + level <= maxIndex);
1902 if (threadInfo[num_avail][nodeIdIndex + level] != UINT_MAX)
1903 goto dup_field;
1904 threadInfo[num_avail][nodeIdIndex + level] = val;
1905 continue;
1906 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001907
Jonathan Peyton30419822017-05-12 18:01:32 +00001908 // We didn't recognize the leading token on the line. There are lots of
1909 // leading tokens that we don't recognize - if the line isn't empty, go on
1910 // to the next line.
1911 if ((*buf != 0) && (*buf != '\n')) {
1912 // If the line is longer than the buffer, read characters
1913 // until we find a newline.
1914 if (long_line) {
1915 int ch;
1916 while (((ch = fgetc(f)) != EOF) && (ch != '\n'))
1917 ;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001918 }
1919 continue;
Jonathan Peyton30419822017-05-12 18:01:32 +00001920 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001921
Jonathan Peyton30419822017-05-12 18:01:32 +00001922 // A newline has signalled the end of the processor record.
1923 // Check that there aren't too many procs specified.
1924 if ((int)num_avail == __kmp_xproc) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001925 CLEANUP_THREAD_INFO;
Jonathan Peyton30419822017-05-12 18:01:32 +00001926 *msg_id = kmp_i18n_str_TooManyEntries;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001927 return -1;
Jonathan Peyton30419822017-05-12 18:01:32 +00001928 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001929
Jonathan Peyton30419822017-05-12 18:01:32 +00001930 // Check for missing fields. The osId field must be there, and we
1931 // currently require that the physical id field is specified, also.
1932 if (threadInfo[num_avail][osIdIndex] == UINT_MAX) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001933 CLEANUP_THREAD_INFO;
Jonathan Peyton30419822017-05-12 18:01:32 +00001934 *msg_id = kmp_i18n_str_MissingProcField;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001935 return -1;
Jonathan Peyton30419822017-05-12 18:01:32 +00001936 }
1937 if (threadInfo[0][pkgIdIndex] == UINT_MAX) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001938 CLEANUP_THREAD_INFO;
Jonathan Peyton30419822017-05-12 18:01:32 +00001939 *msg_id = kmp_i18n_str_MissingPhysicalIDField;
1940 return -1;
1941 }
1942
1943 // Skip this proc if it is not included in the machine model.
1944 if (!KMP_CPU_ISSET(threadInfo[num_avail][osIdIndex],
1945 __kmp_affin_fullMask)) {
1946 INIT_PROC_INFO(threadInfo[num_avail]);
1947 continue;
1948 }
1949
1950 // We have a successful parse of this proc's info.
1951 // Increment the counter, and prepare for the next proc.
1952 num_avail++;
1953 KMP_ASSERT(num_avail <= num_records);
1954 INIT_PROC_INFO(threadInfo[num_avail]);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001955 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001956 continue;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001957
Jonathan Peyton30419822017-05-12 18:01:32 +00001958 no_val:
1959 CLEANUP_THREAD_INFO;
1960 *msg_id = kmp_i18n_str_MissingValCpuinfo;
1961 return -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001962
Jonathan Peyton30419822017-05-12 18:01:32 +00001963 dup_field:
1964 CLEANUP_THREAD_INFO;
1965 *msg_id = kmp_i18n_str_DuplicateFieldCpuinfo;
1966 return -1;
1967 }
1968 *line = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001969
Jonathan Peyton30419822017-05-12 18:01:32 +00001970#if KMP_MIC && REDUCE_TEAM_SIZE
1971 unsigned teamSize = 0;
1972#endif // KMP_MIC && REDUCE_TEAM_SIZE
Jim Cownie5e8470a2013-09-27 10:38:44 +00001973
Jonathan Peyton30419822017-05-12 18:01:32 +00001974 // check for num_records == __kmp_xproc ???
Jim Cownie5e8470a2013-09-27 10:38:44 +00001975
Jonathan Peyton30419822017-05-12 18:01:32 +00001976 // If there's only one thread context to bind to, form an Address object with
1977 // depth 1 and return immediately (or, if affinity is off, set address2os to
1978 // NULL and return).
1979 //
1980 // If it is configured to omit the package level when there is only a single
1981 // package, the logic at the end of this routine won't work if there is only a
1982 // single thread - it would try to form an Address object with depth 0.
1983 KMP_ASSERT(num_avail > 0);
1984 KMP_ASSERT(num_avail <= num_records);
1985 if (num_avail == 1) {
1986 __kmp_ncores = 1;
1987 __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001988 if (__kmp_affinity_verbose) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001989 if (!KMP_AFFINITY_CAPABLE()) {
1990 KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
1991 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
1992 KMP_INFORM(Uniform, "KMP_AFFINITY");
1993 } else {
1994 char buf[KMP_AFFIN_MASK_PRINT_LEN];
1995 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
1996 __kmp_affin_fullMask);
1997 KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
1998 if (__kmp_affinity_respect_mask) {
1999 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
2000 } else {
2001 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002002 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002003 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
2004 KMP_INFORM(Uniform, "KMP_AFFINITY");
2005 }
2006 int index;
2007 kmp_str_buf_t buf;
2008 __kmp_str_buf_init(&buf);
2009 __kmp_str_buf_print(&buf, "1");
2010 for (index = maxIndex - 1; index > pkgIdIndex; index--) {
2011 __kmp_str_buf_print(&buf, " x 1");
2012 }
2013 KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, 1, 1, 1);
2014 __kmp_str_buf_free(&buf);
Jonathan Peytonfd7cc422016-06-21 15:54:38 +00002015 }
2016
Jim Cownie5e8470a2013-09-27 10:38:44 +00002017 if (__kmp_affinity_type == affinity_none) {
Jonathan Peyton30419822017-05-12 18:01:32 +00002018 CLEANUP_THREAD_INFO;
2019 return 0;
2020 }
2021
2022 *address2os = (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair));
2023 Address addr(1);
2024 addr.labels[0] = threadInfo[0][pkgIdIndex];
2025 (*address2os)[0] = AddrUnsPair(addr, threadInfo[0][osIdIndex]);
2026
2027 if (__kmp_affinity_gran_levels < 0) {
2028 __kmp_affinity_gran_levels = 0;
2029 }
2030
2031 if (__kmp_affinity_verbose) {
2032 __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
2033 }
2034
2035 CLEANUP_THREAD_INFO;
2036 return 1;
2037 }
2038
2039 // Sort the threadInfo table by physical Id.
2040 qsort(threadInfo, num_avail, sizeof(*threadInfo),
2041 __kmp_affinity_cmp_ProcCpuInfo_phys_id);
2042
2043 // The table is now sorted by pkgId / coreId / threadId, but we really don't
2044 // know the radix of any of the fields. pkgId's may be sparsely assigned among
2045 // the chips on a system. Although coreId's are usually assigned
2046 // [0 .. coresPerPkg-1] and threadId's are usually assigned
2047 // [0..threadsPerCore-1], we don't want to make any such assumptions.
2048 //
2049 // For that matter, we don't know what coresPerPkg and threadsPerCore (or the
2050 // total # packages) are at this point - we want to determine that now. We
2051 // only have an upper bound on the first two figures.
2052 unsigned *counts =
2053 (unsigned *)__kmp_allocate((maxIndex + 1) * sizeof(unsigned));
2054 unsigned *maxCt =
2055 (unsigned *)__kmp_allocate((maxIndex + 1) * sizeof(unsigned));
2056 unsigned *totals =
2057 (unsigned *)__kmp_allocate((maxIndex + 1) * sizeof(unsigned));
2058 unsigned *lastId =
2059 (unsigned *)__kmp_allocate((maxIndex + 1) * sizeof(unsigned));
2060
2061 bool assign_thread_ids = false;
2062 unsigned threadIdCt;
2063 unsigned index;
2064
2065restart_radix_check:
2066 threadIdCt = 0;
2067
2068 // Initialize the counter arrays with data from threadInfo[0].
2069 if (assign_thread_ids) {
2070 if (threadInfo[0][threadIdIndex] == UINT_MAX) {
2071 threadInfo[0][threadIdIndex] = threadIdCt++;
2072 } else if (threadIdCt <= threadInfo[0][threadIdIndex]) {
2073 threadIdCt = threadInfo[0][threadIdIndex] + 1;
2074 }
2075 }
2076 for (index = 0; index <= maxIndex; index++) {
2077 counts[index] = 1;
2078 maxCt[index] = 1;
2079 totals[index] = 1;
2080 lastId[index] = threadInfo[0][index];
2081 ;
2082 }
2083
2084 // Run through the rest of the OS procs.
2085 for (i = 1; i < num_avail; i++) {
2086 // Find the most significant index whose id differs from the id for the
2087 // previous OS proc.
2088 for (index = maxIndex; index >= threadIdIndex; index--) {
2089 if (assign_thread_ids && (index == threadIdIndex)) {
2090 // Auto-assign the thread id field if it wasn't specified.
2091 if (threadInfo[i][threadIdIndex] == UINT_MAX) {
2092 threadInfo[i][threadIdIndex] = threadIdCt++;
2093 }
Jonathan Peyton642688b2017-06-01 16:46:36 +00002094 // Apparently the thread id field was specified for some entries and not
Jonathan Peyton30419822017-05-12 18:01:32 +00002095 // others. Start the thread id counter off at the next higher thread id.
2096 else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
2097 threadIdCt = threadInfo[i][threadIdIndex] + 1;
2098 }
2099 }
2100 if (threadInfo[i][index] != lastId[index]) {
2101 // Run through all indices which are less significant, and reset the
2102 // counts to 1. At all levels up to and including index, we need to
2103 // increment the totals and record the last id.
2104 unsigned index2;
2105 for (index2 = threadIdIndex; index2 < index; index2++) {
2106 totals[index2]++;
2107 if (counts[index2] > maxCt[index2]) {
2108 maxCt[index2] = counts[index2];
2109 }
2110 counts[index2] = 1;
2111 lastId[index2] = threadInfo[i][index2];
2112 }
2113 counts[index]++;
2114 totals[index]++;
2115 lastId[index] = threadInfo[i][index];
2116
2117 if (assign_thread_ids && (index > threadIdIndex)) {
2118
2119#if KMP_MIC && REDUCE_TEAM_SIZE
2120 // The default team size is the total #threads in the machine
2121 // minus 1 thread for every core that has 3 or more threads.
2122 teamSize += (threadIdCt <= 2) ? (threadIdCt) : (threadIdCt - 1);
2123#endif // KMP_MIC && REDUCE_TEAM_SIZE
2124
2125 // Restart the thread counter, as we are on a new core.
2126 threadIdCt = 0;
2127
2128 // Auto-assign the thread id field if it wasn't specified.
2129 if (threadInfo[i][threadIdIndex] == UINT_MAX) {
2130 threadInfo[i][threadIdIndex] = threadIdCt++;
2131 }
2132
2133 // Aparrently the thread id field was specified for some entries and
2134 // not others. Start the thread id counter off at the next higher
2135 // thread id.
2136 else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
2137 threadIdCt = threadInfo[i][threadIdIndex] + 1;
2138 }
2139 }
2140 break;
2141 }
2142 }
2143 if (index < threadIdIndex) {
2144 // If thread ids were specified, it is an error if they are not unique.
2145 // Also, check that we waven't already restarted the loop (to be safe -
2146 // shouldn't need to).
2147 if ((threadInfo[i][threadIdIndex] != UINT_MAX) || assign_thread_ids) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002148 __kmp_free(lastId);
2149 __kmp_free(totals);
2150 __kmp_free(maxCt);
2151 __kmp_free(counts);
2152 CLEANUP_THREAD_INFO;
Jonathan Peyton30419822017-05-12 18:01:32 +00002153 *msg_id = kmp_i18n_str_PhysicalIDsNotUnique;
2154 return -1;
2155 }
2156
2157 // If the thread ids were not specified and we see entries entries that
2158 // are duplicates, start the loop over and assign the thread ids manually.
2159 assign_thread_ids = true;
2160 goto restart_radix_check;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002161 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002162 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002163
Jonathan Peyton30419822017-05-12 18:01:32 +00002164#if KMP_MIC && REDUCE_TEAM_SIZE
2165 // The default team size is the total #threads in the machine
2166 // minus 1 thread for every core that has 3 or more threads.
2167 teamSize += (threadIdCt <= 2) ? (threadIdCt) : (threadIdCt - 1);
2168#endif // KMP_MIC && REDUCE_TEAM_SIZE
2169
2170 for (index = threadIdIndex; index <= maxIndex; index++) {
2171 if (counts[index] > maxCt[index]) {
2172 maxCt[index] = counts[index];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002173 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002174 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002175
Jonathan Peyton30419822017-05-12 18:01:32 +00002176 __kmp_nThreadsPerCore = maxCt[threadIdIndex];
2177 nCoresPerPkg = maxCt[coreIdIndex];
2178 nPackages = totals[pkgIdIndex];
2179
2180 // Check to see if the machine topology is uniform
2181 unsigned prod = totals[maxIndex];
2182 for (index = threadIdIndex; index < maxIndex; index++) {
2183 prod *= maxCt[index];
2184 }
2185 bool uniform = (prod == totals[threadIdIndex]);
2186
2187 // When affinity is off, this routine will still be called to set
2188 // __kmp_ncores, as well as __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
2189 // Make sure all these vars are set correctly, and return now if affinity is
2190 // not enabled.
2191 __kmp_ncores = totals[coreIdIndex];
2192
2193 if (__kmp_affinity_verbose) {
2194 if (!KMP_AFFINITY_CAPABLE()) {
2195 KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
2196 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
2197 if (uniform) {
2198 KMP_INFORM(Uniform, "KMP_AFFINITY");
2199 } else {
2200 KMP_INFORM(NonUniform, "KMP_AFFINITY");
2201 }
2202 } else {
2203 char buf[KMP_AFFIN_MASK_PRINT_LEN];
2204 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
2205 __kmp_affin_fullMask);
2206 KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
2207 if (__kmp_affinity_respect_mask) {
2208 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
2209 } else {
2210 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
2211 }
2212 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
2213 if (uniform) {
2214 KMP_INFORM(Uniform, "KMP_AFFINITY");
2215 } else {
2216 KMP_INFORM(NonUniform, "KMP_AFFINITY");
2217 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002218 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002219 kmp_str_buf_t buf;
2220 __kmp_str_buf_init(&buf);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002221
Jonathan Peyton30419822017-05-12 18:01:32 +00002222 __kmp_str_buf_print(&buf, "%d", totals[maxIndex]);
2223 for (index = maxIndex - 1; index >= pkgIdIndex; index--) {
2224 __kmp_str_buf_print(&buf, " x %d", maxCt[index]);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002225 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002226 KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, maxCt[coreIdIndex],
2227 maxCt[threadIdIndex], __kmp_ncores);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002228
Jonathan Peyton30419822017-05-12 18:01:32 +00002229 __kmp_str_buf_free(&buf);
2230 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002231
Jonathan Peyton30419822017-05-12 18:01:32 +00002232#if KMP_MIC && REDUCE_TEAM_SIZE
2233 // Set the default team size.
2234 if ((__kmp_dflt_team_nth == 0) && (teamSize > 0)) {
2235 __kmp_dflt_team_nth = teamSize;
2236 KA_TRACE(20, ("__kmp_affinity_create_cpuinfo_map: setting "
2237 "__kmp_dflt_team_nth = %d\n",
2238 __kmp_dflt_team_nth));
2239 }
2240#endif // KMP_MIC && REDUCE_TEAM_SIZE
Jim Cownie5e8470a2013-09-27 10:38:44 +00002241
Jonathan Peyton30419822017-05-12 18:01:32 +00002242 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
2243 KMP_DEBUG_ASSERT(num_avail == __kmp_avail_proc);
2244 __kmp_pu_os_idx = (int *)__kmp_allocate(sizeof(int) * __kmp_avail_proc);
2245 for (i = 0; i < num_avail; ++i) { // fill the os indices
2246 __kmp_pu_os_idx[i] = threadInfo[i][osIdIndex];
2247 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002248
Jonathan Peyton30419822017-05-12 18:01:32 +00002249 if (__kmp_affinity_type == affinity_none) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002250 __kmp_free(lastId);
2251 __kmp_free(totals);
2252 __kmp_free(maxCt);
2253 __kmp_free(counts);
2254 CLEANUP_THREAD_INFO;
Jonathan Peyton30419822017-05-12 18:01:32 +00002255 return 0;
2256 }
2257
2258 // Count the number of levels which have more nodes at that level than at the
2259 // parent's level (with there being an implicit root node of the top level).
2260 // This is equivalent to saying that there is at least one node at this level
2261 // which has a sibling. These levels are in the map, and the package level is
2262 // always in the map.
2263 bool *inMap = (bool *)__kmp_allocate((maxIndex + 1) * sizeof(bool));
2264 int level = 0;
2265 for (index = threadIdIndex; index < maxIndex; index++) {
2266 KMP_ASSERT(totals[index] >= totals[index + 1]);
2267 inMap[index] = (totals[index] > totals[index + 1]);
2268 }
2269 inMap[maxIndex] = (totals[maxIndex] > 1);
2270 inMap[pkgIdIndex] = true;
2271
2272 int depth = 0;
2273 for (index = threadIdIndex; index <= maxIndex; index++) {
2274 if (inMap[index]) {
2275 depth++;
2276 }
2277 }
2278 KMP_ASSERT(depth > 0);
2279
2280 // Construct the data structure that is to be returned.
2281 *address2os = (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair) * num_avail);
2282 int pkgLevel = -1;
2283 int coreLevel = -1;
2284 int threadLevel = -1;
2285
2286 for (i = 0; i < num_avail; ++i) {
2287 Address addr(depth);
2288 unsigned os = threadInfo[i][osIdIndex];
2289 int src_index;
2290 int dst_index = 0;
2291
2292 for (src_index = maxIndex; src_index >= threadIdIndex; src_index--) {
2293 if (!inMap[src_index]) {
2294 continue;
2295 }
2296 addr.labels[dst_index] = threadInfo[i][src_index];
2297 if (src_index == pkgIdIndex) {
2298 pkgLevel = dst_index;
2299 } else if (src_index == coreIdIndex) {
2300 coreLevel = dst_index;
2301 } else if (src_index == threadIdIndex) {
2302 threadLevel = dst_index;
2303 }
2304 dst_index++;
2305 }
2306 (*address2os)[i] = AddrUnsPair(addr, os);
2307 }
2308
2309 if (__kmp_affinity_gran_levels < 0) {
2310 // Set the granularity level based on what levels are modeled
2311 // in the machine topology map.
2312 unsigned src_index;
2313 __kmp_affinity_gran_levels = 0;
2314 for (src_index = threadIdIndex; src_index <= maxIndex; src_index++) {
2315 if (!inMap[src_index]) {
2316 continue;
2317 }
2318 switch (src_index) {
2319 case threadIdIndex:
2320 if (__kmp_affinity_gran > affinity_gran_thread) {
2321 __kmp_affinity_gran_levels++;
2322 }
2323
2324 break;
2325 case coreIdIndex:
2326 if (__kmp_affinity_gran > affinity_gran_core) {
2327 __kmp_affinity_gran_levels++;
2328 }
2329 break;
2330
2331 case pkgIdIndex:
2332 if (__kmp_affinity_gran > affinity_gran_package) {
2333 __kmp_affinity_gran_levels++;
2334 }
2335 break;
2336 }
2337 }
2338 }
2339
2340 if (__kmp_affinity_verbose) {
2341 __kmp_affinity_print_topology(*address2os, num_avail, depth, pkgLevel,
2342 coreLevel, threadLevel);
2343 }
2344
2345 __kmp_free(inMap);
2346 __kmp_free(lastId);
2347 __kmp_free(totals);
2348 __kmp_free(maxCt);
2349 __kmp_free(counts);
2350 CLEANUP_THREAD_INFO;
2351 return depth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002352}
2353
Jim Cownie5e8470a2013-09-27 10:38:44 +00002354// Create and return a table of affinity masks, indexed by OS thread ID.
2355// This routine handles OR'ing together all the affinity masks of threads
2356// that are sufficiently close, if granularity > fine.
Jonathan Peyton30419822017-05-12 18:01:32 +00002357static kmp_affin_mask_t *__kmp_create_masks(unsigned *maxIndex,
2358 unsigned *numUnique,
2359 AddrUnsPair *address2os,
2360 unsigned numAddrs) {
2361 // First form a table of affinity masks in order of OS thread id.
2362 unsigned depth;
2363 unsigned maxOsId;
2364 unsigned i;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002365
Jonathan Peyton30419822017-05-12 18:01:32 +00002366 KMP_ASSERT(numAddrs > 0);
2367 depth = address2os[0].first.depth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002368
Jonathan Peyton30419822017-05-12 18:01:32 +00002369 maxOsId = 0;
2370 for (i = 0; i < numAddrs; i++) {
2371 unsigned osId = address2os[i].second;
2372 if (osId > maxOsId) {
2373 maxOsId = osId;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002374 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002375 }
2376 kmp_affin_mask_t *osId2Mask;
2377 KMP_CPU_ALLOC_ARRAY(osId2Mask, (maxOsId + 1));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002378
Jonathan Peyton30419822017-05-12 18:01:32 +00002379 // Sort the address2os table according to physical order. Doing so will put
2380 // all threads on the same core/package/node in consecutive locations.
2381 qsort(address2os, numAddrs, sizeof(*address2os),
2382 __kmp_affinity_cmp_Address_labels);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002383
Jonathan Peyton30419822017-05-12 18:01:32 +00002384 KMP_ASSERT(__kmp_affinity_gran_levels >= 0);
2385 if (__kmp_affinity_verbose && (__kmp_affinity_gran_levels > 0)) {
2386 KMP_INFORM(ThreadsMigrate, "KMP_AFFINITY", __kmp_affinity_gran_levels);
2387 }
2388 if (__kmp_affinity_gran_levels >= (int)depth) {
2389 if (__kmp_affinity_verbose ||
2390 (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) {
2391 KMP_WARNING(AffThreadsMayMigrate);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002392 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002393 }
2394
2395 // Run through the table, forming the masks for all threads on each core.
2396 // Threads on the same core will have identical "Address" objects, not
2397 // considering the last level, which must be the thread id. All threads on a
2398 // core will appear consecutively.
2399 unsigned unique = 0;
2400 unsigned j = 0; // index of 1st thread on core
2401 unsigned leader = 0;
2402 Address *leaderAddr = &(address2os[0].first);
2403 kmp_affin_mask_t *sum;
2404 KMP_CPU_ALLOC_ON_STACK(sum);
2405 KMP_CPU_ZERO(sum);
2406 KMP_CPU_SET(address2os[0].second, sum);
2407 for (i = 1; i < numAddrs; i++) {
2408 // If this thread is sufficiently close to the leader (within the
2409 // granularity setting), then set the bit for this os thread in the
2410 // affinity mask for this group, and go on to the next thread.
2411 if (leaderAddr->isClose(address2os[i].first, __kmp_affinity_gran_levels)) {
2412 KMP_CPU_SET(address2os[i].second, sum);
2413 continue;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002414 }
2415
Jonathan Peyton30419822017-05-12 18:01:32 +00002416 // For every thread in this group, copy the mask to the thread's entry in
2417 // the osId2Mask table. Mark the first address as a leader.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002418 for (; j < i; j++) {
Jonathan Peyton30419822017-05-12 18:01:32 +00002419 unsigned osId = address2os[j].second;
2420 KMP_DEBUG_ASSERT(osId <= maxOsId);
2421 kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
2422 KMP_CPU_COPY(mask, sum);
2423 address2os[j].first.leader = (j == leader);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002424 }
2425 unique++;
2426
Jonathan Peyton30419822017-05-12 18:01:32 +00002427 // Start a new mask.
2428 leader = i;
2429 leaderAddr = &(address2os[i].first);
2430 KMP_CPU_ZERO(sum);
2431 KMP_CPU_SET(address2os[i].second, sum);
2432 }
2433
2434 // For every thread in last group, copy the mask to the thread's
2435 // entry in the osId2Mask table.
2436 for (; j < i; j++) {
2437 unsigned osId = address2os[j].second;
2438 KMP_DEBUG_ASSERT(osId <= maxOsId);
2439 kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
2440 KMP_CPU_COPY(mask, sum);
2441 address2os[j].first.leader = (j == leader);
2442 }
2443 unique++;
2444 KMP_CPU_FREE_FROM_STACK(sum);
2445
2446 *maxIndex = maxOsId;
2447 *numUnique = unique;
2448 return osId2Mask;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002449}
2450
Jim Cownie5e8470a2013-09-27 10:38:44 +00002451// Stuff for the affinity proclist parsers. It's easier to declare these vars
2452// as file-static than to try and pass them through the calling sequence of
2453// the recursive-descent OMP_PLACES parser.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002454static kmp_affin_mask_t *newMasks;
2455static int numNewMasks;
2456static int nextNewMask;
2457
Jonathan Peyton30419822017-05-12 18:01:32 +00002458#define ADD_MASK(_mask) \
2459 { \
2460 if (nextNewMask >= numNewMasks) { \
2461 int i; \
2462 numNewMasks *= 2; \
2463 kmp_affin_mask_t *temp; \
2464 KMP_CPU_INTERNAL_ALLOC_ARRAY(temp, numNewMasks); \
2465 for (i = 0; i < numNewMasks / 2; i++) { \
2466 kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i); \
2467 kmp_affin_mask_t *dest = KMP_CPU_INDEX(temp, i); \
2468 KMP_CPU_COPY(dest, src); \
2469 } \
2470 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks / 2); \
2471 newMasks = temp; \
2472 } \
2473 KMP_CPU_COPY(KMP_CPU_INDEX(newMasks, nextNewMask), (_mask)); \
2474 nextNewMask++; \
2475 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002476
Jonathan Peyton30419822017-05-12 18:01:32 +00002477#define ADD_MASK_OSID(_osId, _osId2Mask, _maxOsId) \
2478 { \
2479 if (((_osId) > _maxOsId) || \
2480 (!KMP_CPU_ISSET((_osId), KMP_CPU_INDEX((_osId2Mask), (_osId))))) { \
2481 if (__kmp_affinity_verbose || \
2482 (__kmp_affinity_warnings && \
2483 (__kmp_affinity_type != affinity_none))) { \
2484 KMP_WARNING(AffIgnoreInvalidProcID, _osId); \
2485 } \
2486 } else { \
2487 ADD_MASK(KMP_CPU_INDEX(_osId2Mask, (_osId))); \
2488 } \
2489 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002490
Jim Cownie5e8470a2013-09-27 10:38:44 +00002491// Re-parse the proclist (for the explicit affinity type), and form the list
2492// of affinity newMasks indexed by gtid.
Jonathan Peyton30419822017-05-12 18:01:32 +00002493static void __kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks,
2494 unsigned int *out_numMasks,
2495 const char *proclist,
2496 kmp_affin_mask_t *osId2Mask,
2497 int maxOsId) {
2498 int i;
2499 const char *scan = proclist;
2500 const char *next = proclist;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002501
Jonathan Peyton30419822017-05-12 18:01:32 +00002502 // We use malloc() for the temporary mask vector, so that we can use
2503 // realloc() to extend it.
2504 numNewMasks = 2;
2505 KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks);
2506 nextNewMask = 0;
2507 kmp_affin_mask_t *sumMask;
2508 KMP_CPU_ALLOC(sumMask);
2509 int setSize = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002510
Jonathan Peyton30419822017-05-12 18:01:32 +00002511 for (;;) {
2512 int start, end, stride;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002513
Jonathan Peyton30419822017-05-12 18:01:32 +00002514 SKIP_WS(scan);
2515 next = scan;
2516 if (*next == '\0') {
2517 break;
2518 }
2519
2520 if (*next == '{') {
2521 int num;
2522 setSize = 0;
2523 next++; // skip '{'
2524 SKIP_WS(next);
2525 scan = next;
2526
2527 // Read the first integer in the set.
2528 KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad proclist");
2529 SKIP_DIGITS(next);
2530 num = __kmp_str_to_int(scan, *next);
2531 KMP_ASSERT2(num >= 0, "bad explicit proc list");
2532
2533 // Copy the mask for that osId to the sum (union) mask.
2534 if ((num > maxOsId) ||
2535 (!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
2536 if (__kmp_affinity_verbose ||
2537 (__kmp_affinity_warnings &&
2538 (__kmp_affinity_type != affinity_none))) {
2539 KMP_WARNING(AffIgnoreInvalidProcID, num);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002540 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002541 KMP_CPU_ZERO(sumMask);
2542 } else {
2543 KMP_CPU_COPY(sumMask, KMP_CPU_INDEX(osId2Mask, num));
2544 setSize = 1;
2545 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002546
Jonathan Peyton30419822017-05-12 18:01:32 +00002547 for (;;) {
2548 // Check for end of set.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002549 SKIP_WS(next);
Jonathan Peyton30419822017-05-12 18:01:32 +00002550 if (*next == '}') {
2551 next++; // skip '}'
2552 break;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002553 }
2554
Jim Cownie5e8470a2013-09-27 10:38:44 +00002555 // Skip optional comma.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002556 if (*next == ',') {
Jonathan Peyton30419822017-05-12 18:01:32 +00002557 next++;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002558 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002559 SKIP_WS(next);
2560
2561 // Read the next integer in the set.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002562 scan = next;
Jonathan Peyton30419822017-05-12 18:01:32 +00002563 KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list");
2564
2565 SKIP_DIGITS(next);
2566 num = __kmp_str_to_int(scan, *next);
2567 KMP_ASSERT2(num >= 0, "bad explicit proc list");
2568
2569 // Add the mask for that osId to the sum mask.
2570 if ((num > maxOsId) ||
2571 (!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
2572 if (__kmp_affinity_verbose ||
2573 (__kmp_affinity_warnings &&
2574 (__kmp_affinity_type != affinity_none))) {
2575 KMP_WARNING(AffIgnoreInvalidProcID, num);
2576 }
2577 } else {
2578 KMP_CPU_UNION(sumMask, KMP_CPU_INDEX(osId2Mask, num));
2579 setSize++;
2580 }
2581 }
2582 if (setSize > 0) {
2583 ADD_MASK(sumMask);
2584 }
2585
2586 SKIP_WS(next);
2587 if (*next == ',') {
2588 next++;
2589 }
2590 scan = next;
2591 continue;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002592 }
2593
Jonathan Peyton30419822017-05-12 18:01:32 +00002594 // Read the first integer.
2595 KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list");
2596 SKIP_DIGITS(next);
2597 start = __kmp_str_to_int(scan, *next);
2598 KMP_ASSERT2(start >= 0, "bad explicit proc list");
2599 SKIP_WS(next);
2600
2601 // If this isn't a range, then add a mask to the list and go on.
2602 if (*next != '-') {
2603 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2604
2605 // Skip optional comma.
2606 if (*next == ',') {
2607 next++;
2608 }
2609 scan = next;
2610 continue;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002611 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002612
2613 // This is a range. Skip over the '-' and read in the 2nd int.
2614 next++; // skip '-'
2615 SKIP_WS(next);
2616 scan = next;
2617 KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list");
2618 SKIP_DIGITS(next);
2619 end = __kmp_str_to_int(scan, *next);
2620 KMP_ASSERT2(end >= 0, "bad explicit proc list");
2621
2622 // Check for a stride parameter
2623 stride = 1;
2624 SKIP_WS(next);
2625 if (*next == ':') {
2626 // A stride is specified. Skip over the ':" and read the 3rd int.
2627 int sign = +1;
2628 next++; // skip ':'
2629 SKIP_WS(next);
2630 scan = next;
2631 if (*next == '-') {
2632 sign = -1;
2633 next++;
2634 SKIP_WS(next);
2635 scan = next;
2636 }
2637 KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list");
2638 SKIP_DIGITS(next);
2639 stride = __kmp_str_to_int(scan, *next);
2640 KMP_ASSERT2(stride >= 0, "bad explicit proc list");
2641 stride *= sign;
Jonathan Peyton01dcf362015-11-30 20:02:59 +00002642 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002643
2644 // Do some range checks.
2645 KMP_ASSERT2(stride != 0, "bad explicit proc list");
2646 if (stride > 0) {
2647 KMP_ASSERT2(start <= end, "bad explicit proc list");
2648 } else {
2649 KMP_ASSERT2(start >= end, "bad explicit proc list");
2650 }
2651 KMP_ASSERT2((end - start) / stride <= 65536, "bad explicit proc list");
2652
2653 // Add the mask for each OS proc # to the list.
2654 if (stride > 0) {
2655 do {
2656 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2657 start += stride;
2658 } while (start <= end);
2659 } else {
2660 do {
2661 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2662 start += stride;
2663 } while (start >= end);
2664 }
2665
2666 // Skip optional comma.
2667 SKIP_WS(next);
2668 if (*next == ',') {
2669 next++;
2670 }
2671 scan = next;
2672 }
2673
2674 *out_numMasks = nextNewMask;
2675 if (nextNewMask == 0) {
2676 *out_masks = NULL;
Jonathan Peyton01dcf362015-11-30 20:02:59 +00002677 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
Jonathan Peyton30419822017-05-12 18:01:32 +00002678 return;
2679 }
2680 KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask);
2681 for (i = 0; i < nextNewMask; i++) {
2682 kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i);
2683 kmp_affin_mask_t *dest = KMP_CPU_INDEX((*out_masks), i);
2684 KMP_CPU_COPY(dest, src);
2685 }
2686 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
2687 KMP_CPU_FREE(sumMask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002688}
2689
Jonathan Peyton30419822017-05-12 18:01:32 +00002690#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00002691
2692/*-----------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +00002693Re-parse the OMP_PLACES proc id list, forming the newMasks for the different
2694places. Again, Here is the grammar:
2695
2696place_list := place
2697place_list := place , place_list
2698place := num
2699place := place : num
2700place := place : num : signed
2701place := { subplacelist }
2702place := ! place // (lowest priority)
2703subplace_list := subplace
2704subplace_list := subplace , subplace_list
2705subplace := num
2706subplace := num : num
2707subplace := num : num : signed
2708signed := num
2709signed := + signed
2710signed := - signed
Jim Cownie5e8470a2013-09-27 10:38:44 +00002711-----------------------------------------------------------------------------*/
2712
Jonathan Peyton30419822017-05-12 18:01:32 +00002713static void __kmp_process_subplace_list(const char **scan,
2714 kmp_affin_mask_t *osId2Mask,
2715 int maxOsId, kmp_affin_mask_t *tempMask,
2716 int *setSize) {
2717 const char *next;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002718
Jonathan Peyton30419822017-05-12 18:01:32 +00002719 for (;;) {
2720 int start, count, stride, i;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002721
Jonathan Peyton30419822017-05-12 18:01:32 +00002722 // Read in the starting proc id
Jim Cownie5e8470a2013-09-27 10:38:44 +00002723 SKIP_WS(*scan);
Jonathan Peyton30419822017-05-12 18:01:32 +00002724 KMP_ASSERT2((**scan >= '0') && (**scan <= '9'), "bad explicit places list");
2725 next = *scan;
2726 SKIP_DIGITS(next);
2727 start = __kmp_str_to_int(*scan, *next);
2728 KMP_ASSERT(start >= 0);
2729 *scan = next;
2730
2731 // valid follow sets are ',' ':' and '}'
2732 SKIP_WS(*scan);
2733 if (**scan == '}' || **scan == ',') {
2734 if ((start > maxOsId) ||
2735 (!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
2736 if (__kmp_affinity_verbose ||
2737 (__kmp_affinity_warnings &&
2738 (__kmp_affinity_type != affinity_none))) {
2739 KMP_WARNING(AffIgnoreInvalidProcID, start);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002740 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002741 } else {
2742 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
2743 (*setSize)++;
2744 }
2745 if (**scan == '}') {
2746 break;
2747 }
2748 (*scan)++; // skip ','
2749 continue;
2750 }
2751 KMP_ASSERT2(**scan == ':', "bad explicit places list");
2752 (*scan)++; // skip ':'
2753
2754 // Read count parameter
2755 SKIP_WS(*scan);
2756 KMP_ASSERT2((**scan >= '0') && (**scan <= '9'), "bad explicit places list");
2757 next = *scan;
2758 SKIP_DIGITS(next);
2759 count = __kmp_str_to_int(*scan, *next);
2760 KMP_ASSERT(count >= 0);
2761 *scan = next;
2762
2763 // valid follow sets are ',' ':' and '}'
2764 SKIP_WS(*scan);
2765 if (**scan == '}' || **scan == ',') {
2766 for (i = 0; i < count; i++) {
2767 if ((start > maxOsId) ||
2768 (!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
2769 if (__kmp_affinity_verbose ||
2770 (__kmp_affinity_warnings &&
2771 (__kmp_affinity_type != affinity_none))) {
2772 KMP_WARNING(AffIgnoreInvalidProcID, start);
2773 }
2774 break; // don't proliferate warnings for large count
2775 } else {
2776 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
2777 start++;
2778 (*setSize)++;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002779 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002780 }
2781 if (**scan == '}') {
2782 break;
2783 }
2784 (*scan)++; // skip ','
2785 continue;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002786 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002787 KMP_ASSERT2(**scan == ':', "bad explicit places list");
2788 (*scan)++; // skip ':'
Jim Cownie5e8470a2013-09-27 10:38:44 +00002789
Jonathan Peyton30419822017-05-12 18:01:32 +00002790 // Read stride parameter
2791 int sign = +1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002792 for (;;) {
Jonathan Peyton30419822017-05-12 18:01:32 +00002793 SKIP_WS(*scan);
2794 if (**scan == '+') {
2795 (*scan)++; // skip '+'
2796 continue;
2797 }
2798 if (**scan == '-') {
2799 sign *= -1;
2800 (*scan)++; // skip '-'
2801 continue;
2802 }
2803 break;
2804 }
2805 SKIP_WS(*scan);
2806 KMP_ASSERT2((**scan >= '0') && (**scan <= '9'), "bad explicit places list");
2807 next = *scan;
2808 SKIP_DIGITS(next);
2809 stride = __kmp_str_to_int(*scan, *next);
2810 KMP_ASSERT(stride >= 0);
2811 *scan = next;
2812 stride *= sign;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002813
Jonathan Peyton30419822017-05-12 18:01:32 +00002814 // valid follow sets are ',' and '}'
2815 SKIP_WS(*scan);
2816 if (**scan == '}' || **scan == ',') {
2817 for (i = 0; i < count; i++) {
2818 if ((start > maxOsId) ||
2819 (!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
2820 if (__kmp_affinity_verbose ||
2821 (__kmp_affinity_warnings &&
2822 (__kmp_affinity_type != affinity_none))) {
2823 KMP_WARNING(AffIgnoreInvalidProcID, start);
2824 }
2825 break; // don't proliferate warnings for large count
2826 } else {
2827 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
2828 start += stride;
2829 (*setSize)++;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002830 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002831 }
2832 if (**scan == '}') {
2833 break;
2834 }
2835 (*scan)++; // skip ','
2836 continue;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002837 }
2838
Jonathan Peyton30419822017-05-12 18:01:32 +00002839 KMP_ASSERT2(0, "bad explicit places list");
2840 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002841}
2842
Jonathan Peyton30419822017-05-12 18:01:32 +00002843static void __kmp_process_place(const char **scan, kmp_affin_mask_t *osId2Mask,
2844 int maxOsId, kmp_affin_mask_t *tempMask,
2845 int *setSize) {
2846 const char *next;
2847
2848 // valid follow sets are '{' '!' and num
2849 SKIP_WS(*scan);
2850 if (**scan == '{') {
2851 (*scan)++; // skip '{'
2852 __kmp_process_subplace_list(scan, osId2Mask, maxOsId, tempMask, setSize);
2853 KMP_ASSERT2(**scan == '}', "bad explicit places list");
2854 (*scan)++; // skip '}'
2855 } else if (**scan == '!') {
2856 (*scan)++; // skip '!'
2857 __kmp_process_place(scan, osId2Mask, maxOsId, tempMask, setSize);
2858 KMP_CPU_COMPLEMENT(maxOsId, tempMask);
2859 } else if ((**scan >= '0') && (**scan <= '9')) {
2860 next = *scan;
2861 SKIP_DIGITS(next);
2862 int num = __kmp_str_to_int(*scan, *next);
2863 KMP_ASSERT(num >= 0);
2864 if ((num > maxOsId) ||
2865 (!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
2866 if (__kmp_affinity_verbose ||
2867 (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) {
2868 KMP_WARNING(AffIgnoreInvalidProcID, num);
2869 }
2870 } else {
2871 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, num));
2872 (*setSize)++;
2873 }
2874 *scan = next; // skip num
2875 } else {
2876 KMP_ASSERT2(0, "bad explicit places list");
2877 }
2878}
2879
2880// static void
2881void __kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
2882 unsigned int *out_numMasks,
2883 const char *placelist,
2884 kmp_affin_mask_t *osId2Mask,
2885 int maxOsId) {
2886 int i, j, count, stride, sign;
2887 const char *scan = placelist;
2888 const char *next = placelist;
2889
2890 numNewMasks = 2;
2891 KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks);
2892 nextNewMask = 0;
2893
2894 // tempMask is modified based on the previous or initial
2895 // place to form the current place
2896 // previousMask contains the previous place
2897 kmp_affin_mask_t *tempMask;
2898 kmp_affin_mask_t *previousMask;
2899 KMP_CPU_ALLOC(tempMask);
2900 KMP_CPU_ZERO(tempMask);
2901 KMP_CPU_ALLOC(previousMask);
2902 KMP_CPU_ZERO(previousMask);
2903 int setSize = 0;
2904
2905 for (;;) {
2906 __kmp_process_place(&scan, osId2Mask, maxOsId, tempMask, &setSize);
2907
2908 // valid follow sets are ',' ':' and EOL
2909 SKIP_WS(scan);
2910 if (*scan == '\0' || *scan == ',') {
2911 if (setSize > 0) {
2912 ADD_MASK(tempMask);
2913 }
2914 KMP_CPU_ZERO(tempMask);
2915 setSize = 0;
2916 if (*scan == '\0') {
2917 break;
2918 }
2919 scan++; // skip ','
2920 continue;
2921 }
2922
2923 KMP_ASSERT2(*scan == ':', "bad explicit places list");
2924 scan++; // skip ':'
2925
2926 // Read count parameter
2927 SKIP_WS(scan);
2928 KMP_ASSERT2((*scan >= '0') && (*scan <= '9'), "bad explicit places list");
2929 next = scan;
2930 SKIP_DIGITS(next);
2931 count = __kmp_str_to_int(scan, *next);
2932 KMP_ASSERT(count >= 0);
2933 scan = next;
2934
2935 // valid follow sets are ',' ':' and EOL
2936 SKIP_WS(scan);
2937 if (*scan == '\0' || *scan == ',') {
2938 stride = +1;
2939 } else {
2940 KMP_ASSERT2(*scan == ':', "bad explicit places list");
2941 scan++; // skip ':'
2942
2943 // Read stride parameter
2944 sign = +1;
2945 for (;;) {
2946 SKIP_WS(scan);
2947 if (*scan == '+') {
2948 scan++; // skip '+'
2949 continue;
2950 }
2951 if (*scan == '-') {
2952 sign *= -1;
2953 scan++; // skip '-'
2954 continue;
2955 }
2956 break;
2957 }
2958 SKIP_WS(scan);
2959 KMP_ASSERT2((*scan >= '0') && (*scan <= '9'), "bad explicit places list");
2960 next = scan;
2961 SKIP_DIGITS(next);
2962 stride = __kmp_str_to_int(scan, *next);
2963 KMP_DEBUG_ASSERT(stride >= 0);
2964 scan = next;
2965 stride *= sign;
2966 }
2967
2968 // Add places determined by initial_place : count : stride
2969 for (i = 0; i < count; i++) {
2970 if (setSize == 0) {
2971 break;
2972 }
2973 // Add the current place, then build the next place (tempMask) from that
2974 KMP_CPU_COPY(previousMask, tempMask);
2975 ADD_MASK(previousMask);
2976 KMP_CPU_ZERO(tempMask);
2977 setSize = 0;
2978 KMP_CPU_SET_ITERATE(j, previousMask) {
2979 if (!KMP_CPU_ISSET(j, previousMask)) {
2980 continue;
2981 }
2982 if ((j + stride > maxOsId) || (j + stride < 0) ||
2983 (!KMP_CPU_ISSET(j, __kmp_affin_fullMask)) ||
2984 (!KMP_CPU_ISSET(j + stride,
2985 KMP_CPU_INDEX(osId2Mask, j + stride)))) {
2986 if ((__kmp_affinity_verbose ||
2987 (__kmp_affinity_warnings &&
2988 (__kmp_affinity_type != affinity_none))) &&
2989 i < count - 1) {
2990 KMP_WARNING(AffIgnoreInvalidProcID, j + stride);
2991 }
2992 continue;
2993 }
2994 KMP_CPU_SET(j + stride, tempMask);
2995 setSize++;
2996 }
2997 }
2998 KMP_CPU_ZERO(tempMask);
2999 setSize = 0;
3000
3001 // valid follow sets are ',' and EOL
3002 SKIP_WS(scan);
3003 if (*scan == '\0') {
3004 break;
3005 }
3006 if (*scan == ',') {
3007 scan++; // skip ','
3008 continue;
3009 }
3010
3011 KMP_ASSERT2(0, "bad explicit places list");
3012 }
3013
3014 *out_numMasks = nextNewMask;
3015 if (nextNewMask == 0) {
3016 *out_masks = NULL;
3017 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
3018 return;
3019 }
3020 KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask);
3021 KMP_CPU_FREE(tempMask);
3022 KMP_CPU_FREE(previousMask);
3023 for (i = 0; i < nextNewMask; i++) {
3024 kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i);
3025 kmp_affin_mask_t *dest = KMP_CPU_INDEX((*out_masks), i);
3026 KMP_CPU_COPY(dest, src);
3027 }
3028 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
3029}
3030
3031#endif /* OMP_40_ENABLED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003032
3033#undef ADD_MASK
3034#undef ADD_MASK_OSID
3035
Andrey Churbanov4a9a8922017-04-13 17:15:07 +00003036#if KMP_USE_HWLOC
Jonathan Peyton30419822017-05-12 18:01:32 +00003037static int __kmp_hwloc_count_children_by_type(hwloc_topology_t t, hwloc_obj_t o,
3038 hwloc_obj_type_t type,
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003039 hwloc_obj_t *f) {
Jonathan Peyton30419822017-05-12 18:01:32 +00003040 if (!hwloc_compare_types(o->type, type)) {
3041 if (*f == NULL)
3042 *f = o; // output first descendant found
3043 return 1;
3044 }
3045 int sum = 0;
3046 for (unsigned i = 0; i < o->arity; i++)
3047 sum += __kmp_hwloc_count_children_by_type(t, o->children[i], type, f);
3048 return sum; // will be 0 if no one found (as PU arity is 0)
Andrey Churbanov4a9a8922017-04-13 17:15:07 +00003049}
3050
Jonathan Peyton30419822017-05-12 18:01:32 +00003051static int __kmp_hwloc_count_children_by_depth(hwloc_topology_t t,
3052 hwloc_obj_t o, unsigned depth,
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003053 hwloc_obj_t *f) {
Jonathan Peyton30419822017-05-12 18:01:32 +00003054 if (o->depth == depth) {
3055 if (*f == NULL)
3056 *f = o; // output first descendant found
3057 return 1;
3058 }
3059 int sum = 0;
3060 for (unsigned i = 0; i < o->arity; i++)
3061 sum += __kmp_hwloc_count_children_by_depth(t, o->children[i], depth, f);
3062 return sum; // will be 0 if no one found (as PU arity is 0)
Andrey Churbanov4a9a8922017-04-13 17:15:07 +00003063}
3064
Jonathan Peyton30419822017-05-12 18:01:32 +00003065static int __kmp_hwloc_skip_PUs_obj(hwloc_topology_t t, hwloc_obj_t o) {
3066 // skip PUs descendants of the object o
3067 int skipped = 0;
3068 hwloc_obj_t hT = NULL;
3069 int N = __kmp_hwloc_count_children_by_type(t, o, HWLOC_OBJ_PU, &hT);
3070 for (int i = 0; i < N; ++i) {
3071 KMP_DEBUG_ASSERT(hT);
3072 unsigned idx = hT->os_index;
3073 if (KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
3074 KMP_CPU_CLR(idx, __kmp_affin_fullMask);
3075 KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx));
3076 ++skipped;
Andrey Churbanov4a9a8922017-04-13 17:15:07 +00003077 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003078 hT = hwloc_get_next_obj_by_type(t, HWLOC_OBJ_PU, hT);
3079 }
3080 return skipped; // count number of skipped units
Andrey Churbanov4a9a8922017-04-13 17:15:07 +00003081}
3082
Jonathan Peyton30419822017-05-12 18:01:32 +00003083static int __kmp_hwloc_obj_has_PUs(hwloc_topology_t t, hwloc_obj_t o) {
3084 // check if obj has PUs present in fullMask
3085 hwloc_obj_t hT = NULL;
3086 int N = __kmp_hwloc_count_children_by_type(t, o, HWLOC_OBJ_PU, &hT);
3087 for (int i = 0; i < N; ++i) {
3088 KMP_DEBUG_ASSERT(hT);
3089 unsigned idx = hT->os_index;
3090 if (KMP_CPU_ISSET(idx, __kmp_affin_fullMask))
3091 return 1; // found PU
3092 hT = hwloc_get_next_obj_by_type(t, HWLOC_OBJ_PU, hT);
3093 }
3094 return 0; // no PUs found
Andrey Churbanov4a9a8922017-04-13 17:15:07 +00003095}
3096#endif // KMP_USE_HWLOC
3097
Jonathan Peyton30419822017-05-12 18:01:32 +00003098static void __kmp_apply_thread_places(AddrUnsPair **pAddr, int depth) {
3099 AddrUnsPair *newAddr;
3100 if (__kmp_hws_requested == 0)
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003101 goto _exit; // no topology limiting actions requested, exit
Andrey Churbanov4a9a8922017-04-13 17:15:07 +00003102#if KMP_USE_HWLOC
Jonathan Peyton30419822017-05-12 18:01:32 +00003103 if (__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC) {
3104 // Number of subobjects calculated dynamically, this works fine for
3105 // any non-uniform topology.
3106 // L2 cache objects are determined by depth, other objects - by type.
3107 hwloc_topology_t tp = __kmp_hwloc_topology;
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003108 int nS = 0, nN = 0, nL = 0, nC = 0,
3109 nT = 0; // logical index including skipped
3110 int nCr = 0, nTr = 0; // number of requested units
3111 int nPkg = 0, nCo = 0, n_new = 0, n_old = 0, nCpP = 0, nTpC = 0; // counters
Jonathan Peyton30419822017-05-12 18:01:32 +00003112 hwloc_obj_t hT, hC, hL, hN, hS; // hwloc objects (pointers to)
3113 int L2depth, idx;
Jonathan Peytondd4aa9b2015-10-08 17:55:54 +00003114
Jonathan Peyton30419822017-05-12 18:01:32 +00003115 // check support of extensions ----------------------------------
3116 int numa_support = 0, tile_support = 0;
3117 if (__kmp_pu_os_idx)
3118 hT = hwloc_get_pu_obj_by_os_index(tp,
3119 __kmp_pu_os_idx[__kmp_avail_proc - 1]);
3120 else
3121 hT = hwloc_get_obj_by_type(tp, HWLOC_OBJ_PU, __kmp_avail_proc - 1);
3122 if (hT == NULL) { // something's gone wrong
3123 KMP_WARNING(AffHWSubsetUnsupported);
3124 goto _exit;
3125 }
3126 // check NUMA node
3127 hN = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hT);
3128 hS = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_PACKAGE, hT);
3129 if (hN != NULL && hN->depth > hS->depth) {
3130 numa_support = 1; // 1 in case socket includes node(s)
3131 } else if (__kmp_hws_node.num > 0) {
3132 // don't support sockets inside NUMA node (no such HW found for testing)
3133 KMP_WARNING(AffHWSubsetUnsupported);
3134 goto _exit;
3135 }
3136 // check L2 cahce, get object by depth because of multiple caches
3137 L2depth = hwloc_get_cache_type_depth(tp, 2, HWLOC_OBJ_CACHE_UNIFIED);
3138 hL = hwloc_get_ancestor_obj_by_depth(tp, L2depth, hT);
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003139 if (hL != NULL &&
3140 __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE, &hC) > 1) {
Jonathan Peyton30419822017-05-12 18:01:32 +00003141 tile_support = 1; // no sense to count L2 if it includes single core
3142 } else if (__kmp_hws_tile.num > 0) {
3143 if (__kmp_hws_core.num == 0) {
3144 __kmp_hws_core = __kmp_hws_tile; // replace L2 with core
3145 __kmp_hws_tile.num = 0;
3146 } else {
3147 // L2 and core are both requested, but represent same object
3148 KMP_WARNING(AffHWSubsetInvalid);
Andrey Churbanov4a9a8922017-04-13 17:15:07 +00003149 goto _exit;
3150 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003151 }
3152 // end of check of extensions -----------------------------------
3153
3154 // fill in unset items, validate settings -----------------------
3155 if (__kmp_hws_socket.num == 0)
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003156 __kmp_hws_socket.num = nPackages; // use all available sockets
Jonathan Peyton30419822017-05-12 18:01:32 +00003157 if (__kmp_hws_socket.offset >= nPackages) {
3158 KMP_WARNING(AffHWSubsetManySockets);
3159 goto _exit;
3160 }
3161 if (numa_support) {
3162 int NN = __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_NUMANODE,
3163 &hN); // num nodes in socket
3164 if (__kmp_hws_node.num == 0)
3165 __kmp_hws_node.num = NN; // use all available nodes
3166 if (__kmp_hws_node.offset >= NN) {
3167 KMP_WARNING(AffHWSubsetManyNodes);
Andrey Churbanov4a9a8922017-04-13 17:15:07 +00003168 goto _exit;
3169 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003170 if (tile_support) {
3171 // get num tiles in node
3172 int NL = __kmp_hwloc_count_children_by_depth(tp, hN, L2depth, &hL);
3173 if (__kmp_hws_tile.num == 0) {
3174 __kmp_hws_tile.num = NL + 1;
3175 } // use all available tiles, some node may have more tiles, thus +1
3176 if (__kmp_hws_tile.offset >= NL) {
3177 KMP_WARNING(AffHWSubsetManyTiles);
Andrey Churbanov4a9a8922017-04-13 17:15:07 +00003178 goto _exit;
3179 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003180 int NC = __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE,
3181 &hC); // num cores in tile
3182 if (__kmp_hws_core.num == 0)
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003183 __kmp_hws_core.num = NC; // use all available cores
Jonathan Peyton30419822017-05-12 18:01:32 +00003184 if (__kmp_hws_core.offset >= NC) {
3185 KMP_WARNING(AffHWSubsetManyCores);
Andrey Churbanov4a9a8922017-04-13 17:15:07 +00003186 goto _exit;
Jonathan Peyton30419822017-05-12 18:01:32 +00003187 }
3188 } else { // tile_support
3189 int NC = __kmp_hwloc_count_children_by_type(tp, hN, HWLOC_OBJ_CORE,
3190 &hC); // num cores in node
3191 if (__kmp_hws_core.num == 0)
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003192 __kmp_hws_core.num = NC; // use all available cores
Jonathan Peyton30419822017-05-12 18:01:32 +00003193 if (__kmp_hws_core.offset >= NC) {
3194 KMP_WARNING(AffHWSubsetManyCores);
3195 goto _exit;
3196 }
3197 } // tile_support
3198 } else { // numa_support
3199 if (tile_support) {
3200 // get num tiles in socket
3201 int NL = __kmp_hwloc_count_children_by_depth(tp, hS, L2depth, &hL);
3202 if (__kmp_hws_tile.num == 0)
3203 __kmp_hws_tile.num = NL; // use all available tiles
3204 if (__kmp_hws_tile.offset >= NL) {
3205 KMP_WARNING(AffHWSubsetManyTiles);
3206 goto _exit;
3207 }
3208 int NC = __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE,
3209 &hC); // num cores in tile
3210 if (__kmp_hws_core.num == 0)
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003211 __kmp_hws_core.num = NC; // use all available cores
Jonathan Peyton30419822017-05-12 18:01:32 +00003212 if (__kmp_hws_core.offset >= NC) {
3213 KMP_WARNING(AffHWSubsetManyCores);
3214 goto _exit;
3215 }
3216 } else { // tile_support
3217 int NC = __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_CORE,
3218 &hC); // num cores in socket
3219 if (__kmp_hws_core.num == 0)
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003220 __kmp_hws_core.num = NC; // use all available cores
Jonathan Peyton30419822017-05-12 18:01:32 +00003221 if (__kmp_hws_core.offset >= NC) {
3222 KMP_WARNING(AffHWSubsetManyCores);
3223 goto _exit;
3224 }
3225 } // tile_support
3226 }
3227 if (__kmp_hws_proc.num == 0)
3228 __kmp_hws_proc.num = __kmp_nThreadsPerCore; // use all available procs
3229 if (__kmp_hws_proc.offset >= __kmp_nThreadsPerCore) {
3230 KMP_WARNING(AffHWSubsetManyProcs);
3231 goto _exit;
3232 }
3233 // end of validation --------------------------------------------
3234
3235 if (pAddr) // pAddr is NULL in case of affinity_none
3236 newAddr = (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair) *
3237 __kmp_avail_proc); // max size
3238 // main loop to form HW subset ----------------------------------
3239 hS = NULL;
3240 int NP = hwloc_get_nbobjs_by_type(tp, HWLOC_OBJ_PACKAGE);
3241 for (int s = 0; s < NP; ++s) {
3242 // Check Socket -----------------------------------------------
3243 hS = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PACKAGE, hS);
3244 if (!__kmp_hwloc_obj_has_PUs(tp, hS))
3245 continue; // skip socket if all PUs are out of fullMask
3246 ++nS; // only count objects those have PUs in affinity mask
3247 if (nS <= __kmp_hws_socket.offset ||
3248 nS > __kmp_hws_socket.num + __kmp_hws_socket.offset) {
3249 n_old += __kmp_hwloc_skip_PUs_obj(tp, hS); // skip socket
3250 continue; // move to next socket
Andrey Churbanov4a9a8922017-04-13 17:15:07 +00003251 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003252 nCr = 0; // count number of cores per socket
3253 // socket requested, go down the topology tree
3254 // check 4 cases: (+NUMA+Tile), (+NUMA-Tile), (-NUMA+Tile), (-NUMA-Tile)
Andrey Churbanov4a9a8922017-04-13 17:15:07 +00003255 if (numa_support) {
Jonathan Peyton30419822017-05-12 18:01:32 +00003256 nN = 0;
3257 hN = NULL;
3258 // num nodes in current socket
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003259 int NN =
3260 __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_NUMANODE, &hN);
Jonathan Peyton30419822017-05-12 18:01:32 +00003261 for (int n = 0; n < NN; ++n) {
3262 // Check NUMA Node ----------------------------------------
3263 if (!__kmp_hwloc_obj_has_PUs(tp, hN)) {
Andrey Churbanov4a9a8922017-04-13 17:15:07 +00003264 hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN);
Jonathan Peyton30419822017-05-12 18:01:32 +00003265 continue; // skip node if all PUs are out of fullMask
3266 }
3267 ++nN;
3268 if (nN <= __kmp_hws_node.offset ||
3269 nN > __kmp_hws_node.num + __kmp_hws_node.offset) {
3270 // skip node as not requested
3271 n_old += __kmp_hwloc_skip_PUs_obj(tp, hN); // skip node
3272 hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN);
3273 continue; // move to next node
3274 }
3275 // node requested, go down the topology tree
Andrey Churbanov4a9a8922017-04-13 17:15:07 +00003276 if (tile_support) {
3277 nL = 0;
3278 hL = NULL;
Jonathan Peyton30419822017-05-12 18:01:32 +00003279 int NL = __kmp_hwloc_count_children_by_depth(tp, hN, L2depth, &hL);
Andrey Churbanov4a9a8922017-04-13 17:15:07 +00003280 for (int l = 0; l < NL; ++l) {
3281 // Check L2 (tile) ------------------------------------
3282 if (!__kmp_hwloc_obj_has_PUs(tp, hL)) {
3283 hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
3284 continue; // skip tile if all PUs are out of fullMask
3285 }
3286 ++nL;
3287 if (nL <= __kmp_hws_tile.offset ||
3288 nL > __kmp_hws_tile.num + __kmp_hws_tile.offset) {
3289 // skip tile as not requested
3290 n_old += __kmp_hwloc_skip_PUs_obj(tp, hL); // skip tile
3291 hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
3292 continue; // move to next tile
3293 }
3294 // tile requested, go down the topology tree
3295 nC = 0;
3296 hC = NULL;
Jonathan Peyton30419822017-05-12 18:01:32 +00003297 // num cores in current tile
3298 int NC = __kmp_hwloc_count_children_by_type(tp, hL,
3299 HWLOC_OBJ_CORE, &hC);
Andrey Churbanov4a9a8922017-04-13 17:15:07 +00003300 for (int c = 0; c < NC; ++c) {
3301 // Check Core ---------------------------------------
3302 if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
3303 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3304 continue; // skip core if all PUs are out of fullMask
3305 }
3306 ++nC;
3307 if (nC <= __kmp_hws_core.offset ||
3308 nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
3309 // skip node as not requested
3310 n_old += __kmp_hwloc_skip_PUs_obj(tp, hC); // skip core
3311 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3312 continue; // move to next node
3313 }
3314 // core requested, go down to PUs
3315 nT = 0;
3316 nTr = 0;
3317 hT = NULL;
Jonathan Peyton30419822017-05-12 18:01:32 +00003318 // num procs in current core
3319 int NT = __kmp_hwloc_count_children_by_type(tp, hC,
3320 HWLOC_OBJ_PU, &hT);
Andrey Churbanov4a9a8922017-04-13 17:15:07 +00003321 for (int t = 0; t < NT; ++t) {
3322 // Check PU ---------------------------------------
3323 idx = hT->os_index;
3324 if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
3325 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3326 continue; // skip PU if not in fullMask
3327 }
3328 ++nT;
3329 if (nT <= __kmp_hws_proc.offset ||
3330 nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
3331 // skip PU
3332 KMP_CPU_CLR(idx, __kmp_affin_fullMask);
3333 ++n_old;
3334 KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx));
3335 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3336 continue; // move to next node
3337 }
3338 ++nTr;
3339 if (pAddr) // collect requested thread's data
3340 newAddr[n_new] = (*pAddr)[n_old];
3341 ++n_new;
3342 ++n_old;
3343 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3344 } // threads loop
3345 if (nTr > 0) {
3346 ++nCr; // num cores per socket
3347 ++nCo; // total num cores
3348 if (nTr > nTpC)
3349 nTpC = nTr; // calc max threads per core
3350 }
3351 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3352 } // cores loop
3353 hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
3354 } // tiles loop
3355 } else { // tile_support
3356 // no tiles, check cores
3357 nC = 0;
3358 hC = NULL;
Jonathan Peyton30419822017-05-12 18:01:32 +00003359 // num cores in current node
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003360 int NC =
3361 __kmp_hwloc_count_children_by_type(tp, hN, HWLOC_OBJ_CORE, &hC);
Andrey Churbanov4a9a8922017-04-13 17:15:07 +00003362 for (int c = 0; c < NC; ++c) {
Jonathan Peyton30419822017-05-12 18:01:32 +00003363 // Check Core ---------------------------------------
Andrey Churbanov4a9a8922017-04-13 17:15:07 +00003364 if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
3365 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3366 continue; // skip core if all PUs are out of fullMask
3367 }
3368 ++nC;
3369 if (nC <= __kmp_hws_core.offset ||
3370 nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
3371 // skip node as not requested
3372 n_old += __kmp_hwloc_skip_PUs_obj(tp, hC); // skip core
3373 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3374 continue; // move to next node
3375 }
3376 // core requested, go down to PUs
3377 nT = 0;
3378 nTr = 0;
3379 hT = NULL;
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003380 int NT =
3381 __kmp_hwloc_count_children_by_type(tp, hC, HWLOC_OBJ_PU, &hT);
Andrey Churbanov4a9a8922017-04-13 17:15:07 +00003382 for (int t = 0; t < NT; ++t) {
3383 // Check PU ---------------------------------------
3384 idx = hT->os_index;
3385 if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
3386 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3387 continue; // skip PU if not in fullMask
3388 }
3389 ++nT;
3390 if (nT <= __kmp_hws_proc.offset ||
3391 nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
3392 // skip PU
3393 KMP_CPU_CLR(idx, __kmp_affin_fullMask);
3394 ++n_old;
3395 KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx));
3396 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3397 continue; // move to next node
3398 }
3399 ++nTr;
3400 if (pAddr) // collect requested thread's data
3401 newAddr[n_new] = (*pAddr)[n_old];
3402 ++n_new;
3403 ++n_old;
3404 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3405 } // threads loop
3406 if (nTr > 0) {
3407 ++nCr; // num cores per socket
3408 ++nCo; // total num cores
3409 if (nTr > nTpC)
3410 nTpC = nTr; // calc max threads per core
3411 }
3412 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3413 } // cores loop
3414 } // tiles support
Jonathan Peyton30419822017-05-12 18:01:32 +00003415 hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN);
3416 } // nodes loop
3417 } else { // numa_support
3418 // no NUMA support
3419 if (tile_support) {
3420 nL = 0;
3421 hL = NULL;
3422 // num tiles in current socket
3423 int NL = __kmp_hwloc_count_children_by_depth(tp, hS, L2depth, &hL);
3424 for (int l = 0; l < NL; ++l) {
3425 // Check L2 (tile) ------------------------------------
3426 if (!__kmp_hwloc_obj_has_PUs(tp, hL)) {
3427 hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
3428 continue; // skip tile if all PUs are out of fullMask
3429 }
3430 ++nL;
3431 if (nL <= __kmp_hws_tile.offset ||
3432 nL > __kmp_hws_tile.num + __kmp_hws_tile.offset) {
3433 // skip tile as not requested
3434 n_old += __kmp_hwloc_skip_PUs_obj(tp, hL); // skip tile
3435 hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
3436 continue; // move to next tile
3437 }
3438 // tile requested, go down the topology tree
3439 nC = 0;
3440 hC = NULL;
3441 // num cores per tile
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003442 int NC =
3443 __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE, &hC);
Jonathan Peyton30419822017-05-12 18:01:32 +00003444 for (int c = 0; c < NC; ++c) {
3445 // Check Core ---------------------------------------
3446 if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
3447 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3448 continue; // skip core if all PUs are out of fullMask
Andrey Churbanov4a9a8922017-04-13 17:15:07 +00003449 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003450 ++nC;
3451 if (nC <= __kmp_hws_core.offset ||
3452 nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
3453 // skip node as not requested
3454 n_old += __kmp_hwloc_skip_PUs_obj(tp, hC); // skip core
3455 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3456 continue; // move to next node
3457 }
3458 // core requested, go down to PUs
3459 nT = 0;
3460 nTr = 0;
3461 hT = NULL;
3462 // num procs per core
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003463 int NT =
3464 __kmp_hwloc_count_children_by_type(tp, hC, HWLOC_OBJ_PU, &hT);
Jonathan Peyton30419822017-05-12 18:01:32 +00003465 for (int t = 0; t < NT; ++t) {
3466 // Check PU ---------------------------------------
3467 idx = hT->os_index;
3468 if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
3469 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3470 continue; // skip PU if not in fullMask
3471 }
3472 ++nT;
3473 if (nT <= __kmp_hws_proc.offset ||
3474 nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
3475 // skip PU
3476 KMP_CPU_CLR(idx, __kmp_affin_fullMask);
3477 ++n_old;
3478 KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx));
3479 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3480 continue; // move to next node
3481 }
3482 ++nTr;
3483 if (pAddr) // collect requested thread's data
3484 newAddr[n_new] = (*pAddr)[n_old];
3485 ++n_new;
3486 ++n_old;
3487 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3488 } // threads loop
3489 if (nTr > 0) {
3490 ++nCr; // num cores per socket
3491 ++nCo; // total num cores
3492 if (nTr > nTpC)
3493 nTpC = nTr; // calc max threads per core
3494 }
3495 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3496 } // cores loop
3497 hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
3498 } // tiles loop
3499 } else { // tile_support
3500 // no tiles, check cores
3501 nC = 0;
3502 hC = NULL;
3503 // num cores in socket
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003504 int NC =
3505 __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_CORE, &hC);
Jonathan Peyton30419822017-05-12 18:01:32 +00003506 for (int c = 0; c < NC; ++c) {
3507 // Check Core -------------------------------------------
3508 if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
3509 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3510 continue; // skip core if all PUs are out of fullMask
3511 }
3512 ++nC;
3513 if (nC <= __kmp_hws_core.offset ||
3514 nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
3515 // skip node as not requested
3516 n_old += __kmp_hwloc_skip_PUs_obj(tp, hC); // skip core
3517 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3518 continue; // move to next node
3519 }
3520 // core requested, go down to PUs
3521 nT = 0;
3522 nTr = 0;
3523 hT = NULL;
3524 // num procs per core
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003525 int NT =
3526 __kmp_hwloc_count_children_by_type(tp, hC, HWLOC_OBJ_PU, &hT);
Jonathan Peyton30419822017-05-12 18:01:32 +00003527 for (int t = 0; t < NT; ++t) {
3528 // Check PU ---------------------------------------
3529 idx = hT->os_index;
3530 if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
3531 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3532 continue; // skip PU if not in fullMask
3533 }
3534 ++nT;
3535 if (nT <= __kmp_hws_proc.offset ||
3536 nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
3537 // skip PU
3538 KMP_CPU_CLR(idx, __kmp_affin_fullMask);
3539 ++n_old;
3540 KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx));
3541 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3542 continue; // move to next node
3543 }
3544 ++nTr;
3545 if (pAddr) // collect requested thread's data
3546 newAddr[n_new] = (*pAddr)[n_old];
3547 ++n_new;
3548 ++n_old;
3549 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3550 } // threads loop
3551 if (nTr > 0) {
3552 ++nCr; // num cores per socket
3553 ++nCo; // total num cores
3554 if (nTr > nTpC)
3555 nTpC = nTr; // calc max threads per core
3556 }
3557 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3558 } // cores loop
3559 } // tiles support
3560 } // numa_support
3561 if (nCr > 0) { // found cores?
3562 ++nPkg; // num sockets
3563 if (nCr > nCpP)
3564 nCpP = nCr; // calc max cores per socket
3565 }
3566 } // sockets loop
3567
3568 // check the subset is valid
3569 KMP_DEBUG_ASSERT(n_old == __kmp_avail_proc);
3570 KMP_DEBUG_ASSERT(nPkg > 0);
3571 KMP_DEBUG_ASSERT(nCpP > 0);
3572 KMP_DEBUG_ASSERT(nTpC > 0);
3573 KMP_DEBUG_ASSERT(nCo > 0);
3574 KMP_DEBUG_ASSERT(nPkg <= nPackages);
3575 KMP_DEBUG_ASSERT(nCpP <= nCoresPerPkg);
3576 KMP_DEBUG_ASSERT(nTpC <= __kmp_nThreadsPerCore);
3577 KMP_DEBUG_ASSERT(nCo <= __kmp_ncores);
3578
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003579 nPackages = nPkg; // correct num sockets
3580 nCoresPerPkg = nCpP; // correct num cores per socket
Jonathan Peyton30419822017-05-12 18:01:32 +00003581 __kmp_nThreadsPerCore = nTpC; // correct num threads per core
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003582 __kmp_avail_proc = n_new; // correct num procs
3583 __kmp_ncores = nCo; // correct num cores
Jonathan Peyton30419822017-05-12 18:01:32 +00003584 // hwloc topology method end
3585 } else
3586#endif // KMP_USE_HWLOC
3587 {
3588 int n_old = 0, n_new = 0, proc_num = 0;
3589 if (__kmp_hws_node.num > 0 || __kmp_hws_tile.num > 0) {
3590 KMP_WARNING(AffHWSubsetNoHWLOC);
3591 goto _exit;
3592 }
3593 if (__kmp_hws_socket.num == 0)
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003594 __kmp_hws_socket.num = nPackages; // use all available sockets
Jonathan Peyton30419822017-05-12 18:01:32 +00003595 if (__kmp_hws_core.num == 0)
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003596 __kmp_hws_core.num = nCoresPerPkg; // use all available cores
3597 if (__kmp_hws_proc.num == 0 || __kmp_hws_proc.num > __kmp_nThreadsPerCore)
Jonathan Peyton30419822017-05-12 18:01:32 +00003598 __kmp_hws_proc.num = __kmp_nThreadsPerCore; // use all HW contexts
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003599 if (!__kmp_affinity_uniform_topology()) {
3600 KMP_WARNING(AffHWSubsetNonUniform);
Jonathan Peyton30419822017-05-12 18:01:32 +00003601 goto _exit; // don't support non-uniform topology
3602 }
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003603 if (depth > 3) {
3604 KMP_WARNING(AffHWSubsetNonThreeLevel);
Jonathan Peyton30419822017-05-12 18:01:32 +00003605 goto _exit; // don't support not-3-level topology
3606 }
3607 if (__kmp_hws_socket.offset + __kmp_hws_socket.num > nPackages) {
3608 KMP_WARNING(AffHWSubsetManySockets);
3609 goto _exit;
3610 }
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003611 if (__kmp_hws_core.offset + __kmp_hws_core.num > nCoresPerPkg) {
3612 KMP_WARNING(AffHWSubsetManyCores);
Jonathan Peyton30419822017-05-12 18:01:32 +00003613 goto _exit;
3614 }
3615 // Form the requested subset
3616 if (pAddr) // pAddr is NULL in case of affinity_none
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003617 newAddr = (AddrUnsPair *)__kmp_allocate(
3618 sizeof(AddrUnsPair) * __kmp_hws_socket.num * __kmp_hws_core.num *
3619 __kmp_hws_proc.num);
Jonathan Peyton30419822017-05-12 18:01:32 +00003620 for (int i = 0; i < nPackages; ++i) {
3621 if (i < __kmp_hws_socket.offset ||
3622 i >= __kmp_hws_socket.offset + __kmp_hws_socket.num) {
3623 // skip not-requested socket
3624 n_old += nCoresPerPkg * __kmp_nThreadsPerCore;
3625 if (__kmp_pu_os_idx != NULL) {
3626 // walk through skipped socket
3627 for (int j = 0; j < nCoresPerPkg; ++j) {
3628 for (int k = 0; k < __kmp_nThreadsPerCore; ++k) {
3629 KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
3630 ++proc_num;
Jonathan Peytonfd7cc422016-06-21 15:54:38 +00003631 }
Andrey Churbanov4a9a8922017-04-13 17:15:07 +00003632 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003633 }
3634 } else {
3635 // walk through requested socket
3636 for (int j = 0; j < nCoresPerPkg; ++j) {
3637 if (j < __kmp_hws_core.offset ||
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003638 j >= __kmp_hws_core.offset +
3639 __kmp_hws_core.num) { // skip not-requested core
3640 n_old += __kmp_nThreadsPerCore;
3641 if (__kmp_pu_os_idx != NULL) {
3642 for (int k = 0; k < __kmp_nThreadsPerCore; ++k) {
3643 KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
3644 ++proc_num;
Andrey Churbanov4a9a8922017-04-13 17:15:07 +00003645 }
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003646 }
3647 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00003648 // walk through requested core
3649 for (int k = 0; k < __kmp_nThreadsPerCore; ++k) {
3650 if (k < __kmp_hws_proc.num) {
3651 if (pAddr) // collect requested thread's data
3652 newAddr[n_new] = (*pAddr)[n_old];
3653 n_new++;
3654 } else {
3655 if (__kmp_pu_os_idx != NULL)
3656 KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
Andrey Churbanov4a9a8922017-04-13 17:15:07 +00003657 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003658 n_old++;
3659 ++proc_num;
Jonathan Peytonfd7cc422016-06-21 15:54:38 +00003660 }
Andrey Churbanov4a9a8922017-04-13 17:15:07 +00003661 }
Jonathan Peytonfd7cc422016-06-21 15:54:38 +00003662 }
Andrey Churbanov4a9a8922017-04-13 17:15:07 +00003663 }
Andrey Churbanov4a9a8922017-04-13 17:15:07 +00003664 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003665 KMP_DEBUG_ASSERT(n_old == nPackages * nCoresPerPkg * __kmp_nThreadsPerCore);
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003666 KMP_DEBUG_ASSERT(n_new ==
3667 __kmp_hws_socket.num * __kmp_hws_core.num *
3668 __kmp_hws_proc.num);
3669 nPackages = __kmp_hws_socket.num; // correct nPackages
3670 nCoresPerPkg = __kmp_hws_core.num; // correct nCoresPerPkg
Jonathan Peyton30419822017-05-12 18:01:32 +00003671 __kmp_nThreadsPerCore = __kmp_hws_proc.num; // correct __kmp_nThreadsPerCore
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003672 __kmp_avail_proc = n_new; // correct avail_proc
Jonathan Peyton30419822017-05-12 18:01:32 +00003673 __kmp_ncores = nPackages * __kmp_hws_core.num; // correct ncores
3674 } // non-hwloc topology method
3675 if (pAddr) {
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003676 __kmp_free(*pAddr);
3677 *pAddr = newAddr; // replace old topology with new one
Jonathan Peyton30419822017-05-12 18:01:32 +00003678 }
3679 if (__kmp_affinity_verbose) {
3680 char m[KMP_AFFIN_MASK_PRINT_LEN];
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003681 __kmp_affinity_print_mask(m, KMP_AFFIN_MASK_PRINT_LEN,
3682 __kmp_affin_fullMask);
Jonathan Peyton30419822017-05-12 18:01:32 +00003683 if (__kmp_affinity_respect_mask) {
3684 KMP_INFORM(InitOSProcSetRespect, "KMP_HW_SUBSET", m);
Paul Osmialowskiecbe2ea2016-07-29 20:55:03 +00003685 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00003686 KMP_INFORM(InitOSProcSetNotRespect, "KMP_HW_SUBSET", m);
Paul Osmialowskiecbe2ea2016-07-29 20:55:03 +00003687 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003688 KMP_INFORM(AvailableOSProc, "KMP_HW_SUBSET", __kmp_avail_proc);
3689 kmp_str_buf_t buf;
3690 __kmp_str_buf_init(&buf);
3691 __kmp_str_buf_print(&buf, "%d", nPackages);
3692 KMP_INFORM(TopologyExtra, "KMP_HW_SUBSET", buf.str, nCoresPerPkg,
3693 __kmp_nThreadsPerCore, __kmp_ncores);
3694 __kmp_str_buf_free(&buf);
3695 }
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003696_exit:
Jonathan Peyton30419822017-05-12 18:01:32 +00003697 if (__kmp_pu_os_idx != NULL) {
3698 __kmp_free(__kmp_pu_os_idx);
3699 __kmp_pu_os_idx = NULL;
3700 }
3701}
3702
3703// This function figures out the deepest level at which there is at least one
3704// cluster/core with more than one processing unit bound to it.
3705static int __kmp_affinity_find_core_level(const AddrUnsPair *address2os,
3706 int nprocs, int bottom_level) {
3707 int core_level = 0;
3708
3709 for (int i = 0; i < nprocs; i++) {
3710 for (int j = bottom_level; j > 0; j--) {
3711 if (address2os[i].first.labels[j] > 0) {
3712 if (core_level < (j - 1)) {
3713 core_level = j - 1;
3714 }
3715 }
3716 }
3717 }
3718 return core_level;
3719}
3720
3721// This function counts number of clusters/cores at given level.
3722static int __kmp_affinity_compute_ncores(const AddrUnsPair *address2os,
3723 int nprocs, int bottom_level,
3724 int core_level) {
3725 int ncores = 0;
3726 int i, j;
3727
3728 j = bottom_level;
3729 for (i = 0; i < nprocs; i++) {
3730 for (j = bottom_level; j > core_level; j--) {
3731 if ((i + 1) < nprocs) {
3732 if (address2os[i + 1].first.labels[j] > 0) {
3733 break;
3734 }
3735 }
3736 }
3737 if (j == core_level) {
3738 ncores++;
3739 }
3740 }
3741 if (j > core_level) {
3742 // In case of ( nprocs < __kmp_avail_proc ) we may end too deep and miss one
3743 // core. May occur when called from __kmp_affinity_find_core().
3744 ncores++;
3745 }
3746 return ncores;
3747}
3748
3749// This function finds to which cluster/core given processing unit is bound.
3750static int __kmp_affinity_find_core(const AddrUnsPair *address2os, int proc,
3751 int bottom_level, int core_level) {
3752 return __kmp_affinity_compute_ncores(address2os, proc + 1, bottom_level,
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003753 core_level) -
3754 1;
Jonathan Peyton30419822017-05-12 18:01:32 +00003755}
3756
3757// This function finds maximal number of processing units bound to a
3758// cluster/core at given level.
3759static int __kmp_affinity_max_proc_per_core(const AddrUnsPair *address2os,
3760 int nprocs, int bottom_level,
3761 int core_level) {
3762 int maxprocpercore = 0;
3763
3764 if (core_level < bottom_level) {
3765 for (int i = 0; i < nprocs; i++) {
3766 int percore = address2os[i].first.labels[core_level + 1] + 1;
3767
3768 if (percore > maxprocpercore) {
3769 maxprocpercore = percore;
3770 }
3771 }
3772 } else {
3773 maxprocpercore = 1;
3774 }
3775 return maxprocpercore;
Paul Osmialowskiecbe2ea2016-07-29 20:55:03 +00003776}
Jim Cownie5e8470a2013-09-27 10:38:44 +00003777
3778static AddrUnsPair *address2os = NULL;
Jonathan Peyton30419822017-05-12 18:01:32 +00003779static int *procarr = NULL;
3780static int __kmp_aff_depth = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003781
Jonathan Peyton30419822017-05-12 18:01:32 +00003782#define KMP_EXIT_AFF_NONE \
3783 KMP_ASSERT(__kmp_affinity_type == affinity_none); \
3784 KMP_ASSERT(address2os == NULL); \
3785 __kmp_apply_thread_places(NULL, 0); \
3786 return;
Jonathan Peytonfd7cc422016-06-21 15:54:38 +00003787
Jonathan Peyton30419822017-05-12 18:01:32 +00003788static int __kmp_affinity_cmp_Address_child_num(const void *a, const void *b) {
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003789 const Address *aa =
3790 (const Address *)&(((AddrUnsPair *)CCAST(void *, a))->first);
3791 const Address *bb =
3792 (const Address *)&(((AddrUnsPair *)CCAST(void *, b))->first);
Jonathan Peyton30419822017-05-12 18:01:32 +00003793 unsigned depth = aa->depth;
3794 unsigned i;
3795 KMP_DEBUG_ASSERT(depth == bb->depth);
3796 KMP_DEBUG_ASSERT((unsigned)__kmp_affinity_compact <= depth);
3797 KMP_DEBUG_ASSERT(__kmp_affinity_compact >= 0);
3798 for (i = 0; i < (unsigned)__kmp_affinity_compact; i++) {
3799 int j = depth - i - 1;
3800 if (aa->childNums[j] < bb->childNums[j])
3801 return -1;
3802 if (aa->childNums[j] > bb->childNums[j])
3803 return 1;
3804 }
3805 for (; i < depth; i++) {
3806 int j = i - __kmp_affinity_compact;
3807 if (aa->childNums[j] < bb->childNums[j])
3808 return -1;
3809 if (aa->childNums[j] > bb->childNums[j])
3810 return 1;
3811 }
3812 return 0;
Jonathan Peytone6abe522016-09-02 20:54:58 +00003813}
3814
Jonathan Peyton30419822017-05-12 18:01:32 +00003815static void __kmp_aux_affinity_initialize(void) {
3816 if (__kmp_affinity_masks != NULL) {
3817 KMP_ASSERT(__kmp_affin_fullMask != NULL);
3818 return;
3819 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003820
Jonathan Peyton30419822017-05-12 18:01:32 +00003821 // Create the "full" mask - this defines all of the processors that we
3822 // consider to be in the machine model. If respect is set, then it is the
3823 // initialization thread's affinity mask. Otherwise, it is all processors that
3824 // we know about on the machine.
3825 if (__kmp_affin_fullMask == NULL) {
3826 KMP_CPU_ALLOC(__kmp_affin_fullMask);
3827 }
3828 if (KMP_AFFINITY_CAPABLE()) {
3829 if (__kmp_affinity_respect_mask) {
3830 __kmp_get_system_affinity(__kmp_affin_fullMask, TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003831
Jonathan Peyton30419822017-05-12 18:01:32 +00003832 // Count the number of available processors.
3833 unsigned i;
3834 __kmp_avail_proc = 0;
3835 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
3836 if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
3837 continue;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003838 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003839 __kmp_avail_proc++;
3840 }
3841 if (__kmp_avail_proc > __kmp_xproc) {
3842 if (__kmp_affinity_verbose ||
3843 (__kmp_affinity_warnings &&
3844 (__kmp_affinity_type != affinity_none))) {
3845 KMP_WARNING(ErrorInitializeAffinity);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003846 }
3847 __kmp_affinity_type = affinity_none;
Andrey Churbanov1f037e42015-03-10 09:15:26 +00003848 KMP_AFFINITY_DISABLE();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003849 return;
Jonathan Peyton30419822017-05-12 18:01:32 +00003850 }
3851 } else {
3852 __kmp_affinity_entire_machine_mask(__kmp_affin_fullMask);
3853 __kmp_avail_proc = __kmp_xproc;
3854 }
3855 }
3856
3857 int depth = -1;
3858 kmp_i18n_id_t msg_id = kmp_i18n_null;
3859
3860 // For backward compatibility, setting KMP_CPUINFO_FILE =>
3861 // KMP_TOPOLOGY_METHOD=cpuinfo
3862 if ((__kmp_cpuinfo_file != NULL) &&
3863 (__kmp_affinity_top_method == affinity_top_method_all)) {
3864 __kmp_affinity_top_method = affinity_top_method_cpuinfo;
3865 }
3866
3867 if (__kmp_affinity_top_method == affinity_top_method_all) {
3868 // In the default code path, errors are not fatal - we just try using
3869 // another method. We only emit a warning message if affinity is on, or the
3870 // verbose flag is set, an the nowarnings flag was not set.
3871 const char *file_name = NULL;
3872 int line = 0;
3873#if KMP_USE_HWLOC
3874 if (depth < 0 &&
3875 __kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC) {
3876 if (__kmp_affinity_verbose) {
3877 KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
3878 }
3879 if (!__kmp_hwloc_error) {
3880 depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id);
3881 if (depth == 0) {
3882 KMP_EXIT_AFF_NONE;
3883 } else if (depth < 0 && __kmp_affinity_verbose) {
3884 KMP_INFORM(AffIgnoringHwloc, "KMP_AFFINITY");
3885 }
3886 } else if (__kmp_affinity_verbose) {
3887 KMP_INFORM(AffIgnoringHwloc, "KMP_AFFINITY");
3888 }
3889 }
3890#endif
3891
3892#if KMP_ARCH_X86 || KMP_ARCH_X86_64
3893
3894 if (depth < 0) {
3895 if (__kmp_affinity_verbose) {
3896 KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
3897 }
3898
3899 file_name = NULL;
3900 depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
3901 if (depth == 0) {
3902 KMP_EXIT_AFF_NONE;
3903 }
3904
3905 if (depth < 0) {
3906 if (__kmp_affinity_verbose) {
3907 if (msg_id != kmp_i18n_null) {
3908 KMP_INFORM(AffInfoStrStr, "KMP_AFFINITY",
3909 __kmp_i18n_catgets(msg_id),
3910 KMP_I18N_STR(DecodingLegacyAPIC));
3911 } else {
3912 KMP_INFORM(AffInfoStr, "KMP_AFFINITY",
3913 KMP_I18N_STR(DecodingLegacyAPIC));
3914 }
3915 }
3916
3917 file_name = NULL;
3918 depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
3919 if (depth == 0) {
3920 KMP_EXIT_AFF_NONE;
3921 }
3922 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003923 }
3924
Jonathan Peyton30419822017-05-12 18:01:32 +00003925#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003926
Jonathan Peyton30419822017-05-12 18:01:32 +00003927#if KMP_OS_LINUX
Jim Cownie5e8470a2013-09-27 10:38:44 +00003928
Jonathan Peyton30419822017-05-12 18:01:32 +00003929 if (depth < 0) {
3930 if (__kmp_affinity_verbose) {
3931 if (msg_id != kmp_i18n_null) {
3932 KMP_INFORM(AffStrParseFilename, "KMP_AFFINITY",
3933 __kmp_i18n_catgets(msg_id), "/proc/cpuinfo");
Jim Cownie5e8470a2013-09-27 10:38:44 +00003934 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00003935 KMP_INFORM(AffParseFilename, "KMP_AFFINITY", "/proc/cpuinfo");
Jim Cownie5e8470a2013-09-27 10:38:44 +00003936 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003937 }
3938
3939 FILE *f = fopen("/proc/cpuinfo", "r");
3940 if (f == NULL) {
3941 msg_id = kmp_i18n_str_CantOpenCpuinfo;
3942 } else {
3943 file_name = "/proc/cpuinfo";
3944 depth =
3945 __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
3946 fclose(f);
3947 if (depth == 0) {
3948 KMP_EXIT_AFF_NONE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003949 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003950 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003951 }
3952
Jonathan Peyton30419822017-05-12 18:01:32 +00003953#endif /* KMP_OS_LINUX */
3954
3955#if KMP_GROUP_AFFINITY
3956
3957 if ((depth < 0) && (__kmp_num_proc_groups > 1)) {
3958 if (__kmp_affinity_verbose) {
3959 KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
3960 }
3961
3962 depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
3963 KMP_ASSERT(depth != 0);
3964 }
3965
3966#endif /* KMP_GROUP_AFFINITY */
3967
3968 if (depth < 0) {
3969 if (__kmp_affinity_verbose && (msg_id != kmp_i18n_null)) {
3970 if (file_name == NULL) {
3971 KMP_INFORM(UsingFlatOS, __kmp_i18n_catgets(msg_id));
3972 } else if (line == 0) {
3973 KMP_INFORM(UsingFlatOSFile, file_name, __kmp_i18n_catgets(msg_id));
3974 } else {
3975 KMP_INFORM(UsingFlatOSFileLine, file_name, line,
3976 __kmp_i18n_catgets(msg_id));
3977 }
3978 }
3979 // FIXME - print msg if msg_id = kmp_i18n_null ???
3980
3981 file_name = "";
3982 depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
3983 if (depth == 0) {
3984 KMP_EXIT_AFF_NONE;
3985 }
3986 KMP_ASSERT(depth > 0);
3987 KMP_ASSERT(address2os != NULL);
3988 }
3989 }
3990
3991// If the user has specified that a paricular topology discovery method is to be
3992// used, then we abort if that method fails. The exception is group affinity,
3993// which might have been implicitly set.
3994
3995#if KMP_ARCH_X86 || KMP_ARCH_X86_64
3996
3997 else if (__kmp_affinity_top_method == affinity_top_method_x2apicid) {
3998 if (__kmp_affinity_verbose) {
3999 KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
4000 }
4001
4002 depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
4003 if (depth == 0) {
4004 KMP_EXIT_AFF_NONE;
4005 }
4006 if (depth < 0) {
4007 KMP_ASSERT(msg_id != kmp_i18n_null);
4008 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
4009 }
4010 } else if (__kmp_affinity_top_method == affinity_top_method_apicid) {
4011 if (__kmp_affinity_verbose) {
4012 KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(DecodingLegacyAPIC));
4013 }
4014
4015 depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
4016 if (depth == 0) {
4017 KMP_EXIT_AFF_NONE;
4018 }
4019 if (depth < 0) {
4020 KMP_ASSERT(msg_id != kmp_i18n_null);
4021 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
4022 }
4023 }
4024
4025#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
4026
4027 else if (__kmp_affinity_top_method == affinity_top_method_cpuinfo) {
4028 const char *filename;
4029 if (__kmp_cpuinfo_file != NULL) {
4030 filename = __kmp_cpuinfo_file;
4031 } else {
4032 filename = "/proc/cpuinfo";
4033 }
4034
4035 if (__kmp_affinity_verbose) {
4036 KMP_INFORM(AffParseFilename, "KMP_AFFINITY", filename);
4037 }
4038
4039 FILE *f = fopen(filename, "r");
4040 if (f == NULL) {
4041 int code = errno;
4042 if (__kmp_cpuinfo_file != NULL) {
4043 __kmp_msg(kmp_ms_fatal, KMP_MSG(CantOpenFileForReading, filename),
4044 KMP_ERR(code), KMP_HNT(NameComesFrom_CPUINFO_FILE),
4045 __kmp_msg_null);
4046 } else {
4047 __kmp_msg(kmp_ms_fatal, KMP_MSG(CantOpenFileForReading, filename),
4048 KMP_ERR(code), __kmp_msg_null);
4049 }
4050 }
4051 int line = 0;
4052 depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
4053 fclose(f);
4054 if (depth < 0) {
4055 KMP_ASSERT(msg_id != kmp_i18n_null);
4056 if (line > 0) {
4057 KMP_FATAL(FileLineMsgExiting, filename, line,
4058 __kmp_i18n_catgets(msg_id));
4059 } else {
4060 KMP_FATAL(FileMsgExiting, filename, __kmp_i18n_catgets(msg_id));
4061 }
4062 }
4063 if (__kmp_affinity_type == affinity_none) {
4064 KMP_ASSERT(depth == 0);
4065 KMP_EXIT_AFF_NONE;
4066 }
4067 }
4068
4069#if KMP_GROUP_AFFINITY
4070
4071 else if (__kmp_affinity_top_method == affinity_top_method_group) {
4072 if (__kmp_affinity_verbose) {
4073 KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
4074 }
4075
4076 depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
4077 KMP_ASSERT(depth != 0);
4078 if (depth < 0) {
4079 KMP_ASSERT(msg_id != kmp_i18n_null);
4080 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
4081 }
4082 }
4083
4084#endif /* KMP_GROUP_AFFINITY */
4085
4086 else if (__kmp_affinity_top_method == affinity_top_method_flat) {
4087 if (__kmp_affinity_verbose) {
4088 KMP_INFORM(AffUsingFlatOS, "KMP_AFFINITY");
4089 }
4090
4091 depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
4092 if (depth == 0) {
4093 KMP_EXIT_AFF_NONE;
4094 }
4095 // should not fail
4096 KMP_ASSERT(depth > 0);
4097 KMP_ASSERT(address2os != NULL);
4098 }
4099
4100#if KMP_USE_HWLOC
4101 else if (__kmp_affinity_top_method == affinity_top_method_hwloc) {
4102 KMP_ASSERT(__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC);
4103 if (__kmp_affinity_verbose) {
4104 KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
4105 }
4106 depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id);
4107 if (depth == 0) {
4108 KMP_EXIT_AFF_NONE;
4109 }
4110 }
4111#endif // KMP_USE_HWLOC
4112
4113 if (address2os == NULL) {
4114 if (KMP_AFFINITY_CAPABLE() &&
4115 (__kmp_affinity_verbose ||
4116 (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none)))) {
4117 KMP_WARNING(ErrorInitializeAffinity);
4118 }
4119 __kmp_affinity_type = affinity_none;
4120 KMP_AFFINITY_DISABLE();
4121 return;
4122 }
4123
4124 __kmp_apply_thread_places(&address2os, depth);
4125
4126 // Create the table of masks, indexed by thread Id.
4127 unsigned maxIndex;
4128 unsigned numUnique;
4129 kmp_affin_mask_t *osId2Mask =
4130 __kmp_create_masks(&maxIndex, &numUnique, address2os, __kmp_avail_proc);
4131 if (__kmp_affinity_gran_levels == 0) {
4132 KMP_DEBUG_ASSERT((int)numUnique == __kmp_avail_proc);
4133 }
4134
4135 // Set the childNums vector in all Address objects. This must be done before
4136 // we can sort using __kmp_affinity_cmp_Address_child_num(), which takes into
4137 // account the setting of __kmp_affinity_compact.
4138 __kmp_affinity_assign_child_nums(address2os, __kmp_avail_proc);
4139
4140 switch (__kmp_affinity_type) {
4141
4142 case affinity_explicit:
4143 KMP_DEBUG_ASSERT(__kmp_affinity_proclist != NULL);
4144#if OMP_40_ENABLED
4145 if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
4146#endif
4147 {
4148 __kmp_affinity_process_proclist(
4149 &__kmp_affinity_masks, &__kmp_affinity_num_masks,
4150 __kmp_affinity_proclist, osId2Mask, maxIndex);
4151 }
4152#if OMP_40_ENABLED
4153 else {
4154 __kmp_affinity_process_placelist(
4155 &__kmp_affinity_masks, &__kmp_affinity_num_masks,
4156 __kmp_affinity_proclist, osId2Mask, maxIndex);
4157 }
4158#endif
4159 if (__kmp_affinity_num_masks == 0) {
4160 if (__kmp_affinity_verbose ||
4161 (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) {
4162 KMP_WARNING(AffNoValidProcID);
4163 }
4164 __kmp_affinity_type = affinity_none;
4165 return;
4166 }
4167 break;
4168
4169 // The other affinity types rely on sorting the Addresses according to some
4170 // permutation of the machine topology tree. Set __kmp_affinity_compact and
4171 // __kmp_affinity_offset appropriately, then jump to a common code fragment
4172 // to do the sort and create the array of affinity masks.
4173
4174 case affinity_logical:
4175 __kmp_affinity_compact = 0;
4176 if (__kmp_affinity_offset) {
4177 __kmp_affinity_offset =
4178 __kmp_nThreadsPerCore * __kmp_affinity_offset % __kmp_avail_proc;
4179 }
4180 goto sortAddresses;
4181
4182 case affinity_physical:
4183 if (__kmp_nThreadsPerCore > 1) {
4184 __kmp_affinity_compact = 1;
4185 if (__kmp_affinity_compact >= depth) {
4186 __kmp_affinity_compact = 0;
4187 }
4188 } else {
4189 __kmp_affinity_compact = 0;
4190 }
4191 if (__kmp_affinity_offset) {
4192 __kmp_affinity_offset =
4193 __kmp_nThreadsPerCore * __kmp_affinity_offset % __kmp_avail_proc;
4194 }
4195 goto sortAddresses;
4196
4197 case affinity_scatter:
4198 if (__kmp_affinity_compact >= depth) {
4199 __kmp_affinity_compact = 0;
4200 } else {
4201 __kmp_affinity_compact = depth - 1 - __kmp_affinity_compact;
4202 }
4203 goto sortAddresses;
4204
4205 case affinity_compact:
4206 if (__kmp_affinity_compact >= depth) {
4207 __kmp_affinity_compact = depth - 1;
4208 }
4209 goto sortAddresses;
4210
4211 case affinity_balanced:
4212 if (depth <= 1) {
4213 if (__kmp_affinity_verbose || __kmp_affinity_warnings) {
4214 KMP_WARNING(AffBalancedNotAvail, "KMP_AFFINITY");
4215 }
4216 __kmp_affinity_type = affinity_none;
4217 return;
4218 } else if (__kmp_affinity_uniform_topology()) {
4219 break;
4220 } else { // Non-uniform topology
4221
4222 // Save the depth for further usage
4223 __kmp_aff_depth = depth;
4224
4225 int core_level = __kmp_affinity_find_core_level(
4226 address2os, __kmp_avail_proc, depth - 1);
4227 int ncores = __kmp_affinity_compute_ncores(address2os, __kmp_avail_proc,
4228 depth - 1, core_level);
4229 int maxprocpercore = __kmp_affinity_max_proc_per_core(
4230 address2os, __kmp_avail_proc, depth - 1, core_level);
4231
4232 int nproc = ncores * maxprocpercore;
4233 if ((nproc < 2) || (nproc < __kmp_avail_proc)) {
4234 if (__kmp_affinity_verbose || __kmp_affinity_warnings) {
4235 KMP_WARNING(AffBalancedNotAvail, "KMP_AFFINITY");
4236 }
4237 __kmp_affinity_type = affinity_none;
4238 return;
4239 }
4240
4241 procarr = (int *)__kmp_allocate(sizeof(int) * nproc);
4242 for (int i = 0; i < nproc; i++) {
4243 procarr[i] = -1;
4244 }
4245
4246 int lastcore = -1;
4247 int inlastcore = 0;
4248 for (int i = 0; i < __kmp_avail_proc; i++) {
4249 int proc = address2os[i].second;
4250 int core =
4251 __kmp_affinity_find_core(address2os, i, depth - 1, core_level);
4252
4253 if (core == lastcore) {
4254 inlastcore++;
4255 } else {
4256 inlastcore = 0;
4257 }
4258 lastcore = core;
4259
4260 procarr[core * maxprocpercore + inlastcore] = proc;
4261 }
4262
4263 break;
4264 }
4265
4266 sortAddresses:
4267 // Allocate the gtid->affinity mask table.
4268 if (__kmp_affinity_dups) {
4269 __kmp_affinity_num_masks = __kmp_avail_proc;
4270 } else {
4271 __kmp_affinity_num_masks = numUnique;
4272 }
4273
4274#if OMP_40_ENABLED
4275 if ((__kmp_nested_proc_bind.bind_types[0] != proc_bind_intel) &&
4276 (__kmp_affinity_num_places > 0) &&
4277 ((unsigned)__kmp_affinity_num_places < __kmp_affinity_num_masks)) {
4278 __kmp_affinity_num_masks = __kmp_affinity_num_places;
4279 }
4280#endif
4281
4282 KMP_CPU_ALLOC_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
4283
4284 // Sort the address2os table according to the current setting of
4285 // __kmp_affinity_compact, then fill out __kmp_affinity_masks.
4286 qsort(address2os, __kmp_avail_proc, sizeof(*address2os),
4287 __kmp_affinity_cmp_Address_child_num);
4288 {
4289 int i;
4290 unsigned j;
4291 for (i = 0, j = 0; i < __kmp_avail_proc; i++) {
4292 if ((!__kmp_affinity_dups) && (!address2os[i].first.leader)) {
4293 continue;
4294 }
4295 unsigned osId = address2os[i].second;
4296 kmp_affin_mask_t *src = KMP_CPU_INDEX(osId2Mask, osId);
4297 kmp_affin_mask_t *dest = KMP_CPU_INDEX(__kmp_affinity_masks, j);
4298 KMP_ASSERT(KMP_CPU_ISSET(osId, src));
4299 KMP_CPU_COPY(dest, src);
4300 if (++j >= __kmp_affinity_num_masks) {
4301 break;
4302 }
4303 }
4304 KMP_DEBUG_ASSERT(j == __kmp_affinity_num_masks);
4305 }
4306 break;
4307
4308 default:
4309 KMP_ASSERT2(0, "Unexpected affinity setting");
4310 }
4311
4312 KMP_CPU_FREE_ARRAY(osId2Mask, maxIndex + 1);
4313 machine_hierarchy.init(address2os, __kmp_avail_proc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004314}
Jonathan Peytonfd7cc422016-06-21 15:54:38 +00004315#undef KMP_EXIT_AFF_NONE
Jim Cownie5e8470a2013-09-27 10:38:44 +00004316
Jonathan Peyton30419822017-05-12 18:01:32 +00004317void __kmp_affinity_initialize(void) {
4318 // Much of the code above was written assumming that if a machine was not
4319 // affinity capable, then __kmp_affinity_type == affinity_none. We now
4320 // explicitly represent this as __kmp_affinity_type == affinity_disabled.
4321 // There are too many checks for __kmp_affinity_type == affinity_none
4322 // in this code. Instead of trying to change them all, check if
4323 // __kmp_affinity_type == affinity_disabled, and if so, slam it with
4324 // affinity_none, call the real initialization routine, then restore
4325 // __kmp_affinity_type to affinity_disabled.
4326 int disabled = (__kmp_affinity_type == affinity_disabled);
4327 if (!KMP_AFFINITY_CAPABLE()) {
4328 KMP_ASSERT(disabled);
4329 }
4330 if (disabled) {
4331 __kmp_affinity_type = affinity_none;
4332 }
4333 __kmp_aux_affinity_initialize();
4334 if (disabled) {
4335 __kmp_affinity_type = affinity_disabled;
4336 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004337}
4338
Jonathan Peyton30419822017-05-12 18:01:32 +00004339void __kmp_affinity_uninitialize(void) {
4340 if (__kmp_affinity_masks != NULL) {
4341 KMP_CPU_FREE_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
4342 __kmp_affinity_masks = NULL;
4343 }
4344 if (__kmp_affin_fullMask != NULL) {
4345 KMP_CPU_FREE(__kmp_affin_fullMask);
4346 __kmp_affin_fullMask = NULL;
4347 }
4348 __kmp_affinity_num_masks = 0;
4349 __kmp_affinity_type = affinity_default;
4350#if OMP_40_ENABLED
4351 __kmp_affinity_num_places = 0;
4352#endif
4353 if (__kmp_affinity_proclist != NULL) {
4354 __kmp_free(__kmp_affinity_proclist);
4355 __kmp_affinity_proclist = NULL;
4356 }
4357 if (address2os != NULL) {
4358 __kmp_free(address2os);
4359 address2os = NULL;
4360 }
4361 if (procarr != NULL) {
4362 __kmp_free(procarr);
4363 procarr = NULL;
4364 }
4365#if KMP_USE_HWLOC
4366 if (__kmp_hwloc_topology != NULL) {
4367 hwloc_topology_destroy(__kmp_hwloc_topology);
4368 __kmp_hwloc_topology = NULL;
4369 }
4370#endif
4371 KMPAffinity::destroy_api();
Jim Cownie5e8470a2013-09-27 10:38:44 +00004372}
4373
Jonathan Peyton30419822017-05-12 18:01:32 +00004374void __kmp_affinity_set_init_mask(int gtid, int isa_root) {
4375 if (!KMP_AFFINITY_CAPABLE()) {
4376 return;
4377 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004378
Jonathan Peyton30419822017-05-12 18:01:32 +00004379 kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
4380 if (th->th.th_affin_mask == NULL) {
4381 KMP_CPU_ALLOC(th->th.th_affin_mask);
4382 } else {
4383 KMP_CPU_ZERO(th->th.th_affin_mask);
4384 }
4385
4386 // Copy the thread mask to the kmp_info_t strucuture. If
4387 // __kmp_affinity_type == affinity_none, copy the "full" mask, i.e. one that
4388 // has all of the OS proc ids set, or if __kmp_affinity_respect_mask is set,
4389 // then the full mask is the same as the mask of the initialization thread.
4390 kmp_affin_mask_t *mask;
4391 int i;
4392
4393#if OMP_40_ENABLED
4394 if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
4395#endif
4396 {
4397 if ((__kmp_affinity_type == affinity_none) ||
4398 (__kmp_affinity_type == affinity_balanced)) {
4399#if KMP_GROUP_AFFINITY
4400 if (__kmp_num_proc_groups > 1) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004401 return;
Jonathan Peyton30419822017-05-12 18:01:32 +00004402 }
4403#endif
4404 KMP_ASSERT(__kmp_affin_fullMask != NULL);
4405 i = KMP_PLACE_ALL;
4406 mask = __kmp_affin_fullMask;
4407 } else {
4408 KMP_DEBUG_ASSERT(__kmp_affinity_num_masks > 0);
4409 i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
4410 mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004411 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004412 }
4413#if OMP_40_ENABLED
4414 else {
4415 if ((!isa_root) ||
4416 (__kmp_nested_proc_bind.bind_types[0] == proc_bind_false)) {
4417#if KMP_GROUP_AFFINITY
4418 if (__kmp_num_proc_groups > 1) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004419 return;
Jonathan Peyton30419822017-05-12 18:01:32 +00004420 }
4421#endif
4422 KMP_ASSERT(__kmp_affin_fullMask != NULL);
4423 i = KMP_PLACE_ALL;
4424 mask = __kmp_affin_fullMask;
4425 } else {
4426 // int i = some hash function or just a counter that doesn't
4427 // always start at 0. Use gtid for now.
4428 KMP_DEBUG_ASSERT(__kmp_affinity_num_masks > 0);
4429 i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
4430 mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004431 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004432 }
4433#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004434
Jonathan Peyton30419822017-05-12 18:01:32 +00004435#if OMP_40_ENABLED
4436 th->th.th_current_place = i;
4437 if (isa_root) {
4438 th->th.th_new_place = i;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004439 th->th.th_first_place = 0;
4440 th->th.th_last_place = __kmp_affinity_num_masks - 1;
Jonathan Peyton30419822017-05-12 18:01:32 +00004441 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004442
Jonathan Peyton30419822017-05-12 18:01:32 +00004443 if (i == KMP_PLACE_ALL) {
4444 KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to all places\n",
4445 gtid));
4446 } else {
4447 KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to place %d\n",
4448 gtid, i));
4449 }
4450#else
4451 if (i == -1) {
4452 KA_TRACE(
4453 100,
4454 ("__kmp_affinity_set_init_mask: binding T#%d to __kmp_affin_fullMask\n",
4455 gtid));
4456 } else {
4457 KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to mask %d\n",
4458 gtid, i));
4459 }
4460#endif /* OMP_40_ENABLED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004461
Jonathan Peyton30419822017-05-12 18:01:32 +00004462 KMP_CPU_COPY(th->th.th_affin_mask, mask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004463
Jonathan Peyton30419822017-05-12 18:01:32 +00004464 if (__kmp_affinity_verbose) {
4465 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4466 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4467 th->th.th_affin_mask);
4468 KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
4469 __kmp_gettid(), gtid, buf);
4470 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004471
Jonathan Peyton30419822017-05-12 18:01:32 +00004472#if KMP_OS_WINDOWS
4473 // On Windows* OS, the process affinity mask might have changed. If the user
4474 // didn't request affinity and this call fails, just continue silently.
4475 // See CQ171393.
4476 if (__kmp_affinity_type == affinity_none) {
4477 __kmp_set_system_affinity(th->th.th_affin_mask, FALSE);
4478 } else
Jonathan Peyton7c465a52016-09-12 19:02:53 +00004479#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004480 __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
Jonathan Peyton7c465a52016-09-12 19:02:53 +00004481}
4482
Jonathan Peyton30419822017-05-12 18:01:32 +00004483#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004484
Jonathan Peyton30419822017-05-12 18:01:32 +00004485void __kmp_affinity_set_place(int gtid) {
4486 int retval;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004487
Jonathan Peyton30419822017-05-12 18:01:32 +00004488 if (!KMP_AFFINITY_CAPABLE()) {
4489 return;
4490 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004491
Jonathan Peyton30419822017-05-12 18:01:32 +00004492 kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
4493
4494 KA_TRACE(100, ("__kmp_affinity_set_place: binding T#%d to place %d (current "
4495 "place = %d)\n",
4496 gtid, th->th.th_new_place, th->th.th_current_place));
4497
4498 // Check that the new place is within this thread's partition.
4499 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
4500 KMP_ASSERT(th->th.th_new_place >= 0);
4501 KMP_ASSERT((unsigned)th->th.th_new_place <= __kmp_affinity_num_masks);
4502 if (th->th.th_first_place <= th->th.th_last_place) {
4503 KMP_ASSERT((th->th.th_new_place >= th->th.th_first_place) &&
4504 (th->th.th_new_place <= th->th.th_last_place));
4505 } else {
4506 KMP_ASSERT((th->th.th_new_place <= th->th.th_first_place) ||
4507 (th->th.th_new_place >= th->th.th_last_place));
4508 }
4509
4510 // Copy the thread mask to the kmp_info_t strucuture,
4511 // and set this thread's affinity.
4512 kmp_affin_mask_t *mask =
4513 KMP_CPU_INDEX(__kmp_affinity_masks, th->th.th_new_place);
4514 KMP_CPU_COPY(th->th.th_affin_mask, mask);
4515 th->th.th_current_place = th->th.th_new_place;
4516
4517 if (__kmp_affinity_verbose) {
4518 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4519 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4520 th->th.th_affin_mask);
4521 KMP_INFORM(BoundToOSProcSet, "OMP_PROC_BIND", (kmp_int32)getpid(),
4522 __kmp_gettid(), gtid, buf);
4523 }
4524 __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
4525}
4526
4527#endif /* OMP_40_ENABLED */
4528
4529int __kmp_aux_set_affinity(void **mask) {
4530 int gtid;
4531 kmp_info_t *th;
4532 int retval;
4533
4534 if (!KMP_AFFINITY_CAPABLE()) {
4535 return -1;
4536 }
4537
4538 gtid = __kmp_entry_gtid();
4539 KA_TRACE(1000, ; {
4540 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4541 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4542 (kmp_affin_mask_t *)(*mask));
4543 __kmp_debug_printf(
4544 "kmp_set_affinity: setting affinity mask for thread %d = %s\n", gtid,
4545 buf);
4546 });
4547
4548 if (__kmp_env_consistency_check) {
4549 if ((mask == NULL) || (*mask == NULL)) {
4550 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
4551 } else {
4552 unsigned proc;
4553 int num_procs = 0;
4554
4555 KMP_CPU_SET_ITERATE(proc, ((kmp_affin_mask_t *)(*mask))) {
4556 if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
4557 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
Jim Cownie5e8470a2013-09-27 10:38:44 +00004558 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004559 if (!KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask))) {
4560 continue;
4561 }
4562 num_procs++;
4563 }
4564 if (num_procs == 0) {
4565 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
4566 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004567
Jonathan Peyton30419822017-05-12 18:01:32 +00004568#if KMP_GROUP_AFFINITY
4569 if (__kmp_get_proc_group((kmp_affin_mask_t *)(*mask)) < 0) {
4570 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
4571 }
4572#endif /* KMP_GROUP_AFFINITY */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004573 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004574 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004575
Jonathan Peyton30419822017-05-12 18:01:32 +00004576 th = __kmp_threads[gtid];
4577 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
4578 retval = __kmp_set_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
4579 if (retval == 0) {
4580 KMP_CPU_COPY(th->th.th_affin_mask, (kmp_affin_mask_t *)(*mask));
4581 }
4582
4583#if OMP_40_ENABLED
4584 th->th.th_current_place = KMP_PLACE_UNDEFINED;
4585 th->th.th_new_place = KMP_PLACE_UNDEFINED;
4586 th->th.th_first_place = 0;
4587 th->th.th_last_place = __kmp_affinity_num_masks - 1;
4588
4589 // Turn off 4.0 affinity for the current tread at this parallel level.
4590 th->th.th_current_task->td_icvs.proc_bind = proc_bind_false;
4591#endif
4592
4593 return retval;
4594}
4595
4596int __kmp_aux_get_affinity(void **mask) {
4597 int gtid;
4598 int retval;
4599 kmp_info_t *th;
4600
4601 if (!KMP_AFFINITY_CAPABLE()) {
4602 return -1;
4603 }
4604
4605 gtid = __kmp_entry_gtid();
4606 th = __kmp_threads[gtid];
4607 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
4608
4609 KA_TRACE(1000, ; {
4610 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4611 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4612 th->th.th_affin_mask);
4613 __kmp_printf("kmp_get_affinity: stored affinity mask for thread %d = %s\n",
4614 gtid, buf);
4615 });
4616
4617 if (__kmp_env_consistency_check) {
4618 if ((mask == NULL) || (*mask == NULL)) {
4619 KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity");
4620 }
4621 }
4622
4623#if !KMP_OS_WINDOWS
4624
4625 retval = __kmp_get_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
4626 KA_TRACE(1000, ; {
4627 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4628 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4629 (kmp_affin_mask_t *)(*mask));
4630 __kmp_printf("kmp_get_affinity: system affinity mask for thread %d = %s\n",
4631 gtid, buf);
4632 });
4633 return retval;
4634
4635#else
4636
4637 KMP_CPU_COPY((kmp_affin_mask_t *)(*mask), th->th.th_affin_mask);
4638 return 0;
4639
4640#endif /* KMP_OS_WINDOWS */
4641}
4642
4643int __kmp_aux_get_affinity_max_proc() {
4644 if (!KMP_AFFINITY_CAPABLE()) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004645 return 0;
Jonathan Peyton30419822017-05-12 18:01:32 +00004646 }
4647#if KMP_GROUP_AFFINITY
4648 if (__kmp_num_proc_groups > 1) {
4649 return (int)(__kmp_num_proc_groups * sizeof(DWORD_PTR) * CHAR_BIT);
4650 }
4651#endif
4652 return __kmp_xproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004653}
4654
Jonathan Peyton30419822017-05-12 18:01:32 +00004655int __kmp_aux_set_affinity_mask_proc(int proc, void **mask) {
4656 int retval;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004657
Jonathan Peyton30419822017-05-12 18:01:32 +00004658 if (!KMP_AFFINITY_CAPABLE()) {
4659 return -1;
4660 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004661
Jonathan Peyton30419822017-05-12 18:01:32 +00004662 KA_TRACE(1000, ; {
4663 int gtid = __kmp_entry_gtid();
4664 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4665 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4666 (kmp_affin_mask_t *)(*mask));
4667 __kmp_debug_printf("kmp_set_affinity_mask_proc: setting proc %d in "
4668 "affinity mask for thread %d = %s\n",
4669 proc, gtid, buf);
4670 });
4671
4672 if (__kmp_env_consistency_check) {
4673 if ((mask == NULL) || (*mask == NULL)) {
4674 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity_mask_proc");
Jim Cownie5e8470a2013-09-27 10:38:44 +00004675 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004676 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004677
Jonathan Peyton30419822017-05-12 18:01:32 +00004678 if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) {
4679 return -1;
4680 }
4681 if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
4682 return -2;
4683 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004684
Jonathan Peyton30419822017-05-12 18:01:32 +00004685 KMP_CPU_SET(proc, (kmp_affin_mask_t *)(*mask));
4686 return 0;
4687}
4688
4689int __kmp_aux_unset_affinity_mask_proc(int proc, void **mask) {
4690 int retval;
4691
4692 if (!KMP_AFFINITY_CAPABLE()) {
4693 return -1;
4694 }
4695
4696 KA_TRACE(1000, ; {
4697 int gtid = __kmp_entry_gtid();
4698 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4699 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4700 (kmp_affin_mask_t *)(*mask));
4701 __kmp_debug_printf("kmp_unset_affinity_mask_proc: unsetting proc %d in "
4702 "affinity mask for thread %d = %s\n",
4703 proc, gtid, buf);
4704 });
4705
4706 if (__kmp_env_consistency_check) {
4707 if ((mask == NULL) || (*mask == NULL)) {
4708 KMP_FATAL(AffinityInvalidMask, "kmp_unset_affinity_mask_proc");
Jim Cownie5e8470a2013-09-27 10:38:44 +00004709 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004710 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004711
Jonathan Peyton30419822017-05-12 18:01:32 +00004712 if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) {
4713 return -1;
4714 }
4715 if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
4716 return -2;
4717 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004718
Jonathan Peyton30419822017-05-12 18:01:32 +00004719 KMP_CPU_CLR(proc, (kmp_affin_mask_t *)(*mask));
4720 return 0;
4721}
4722
4723int __kmp_aux_get_affinity_mask_proc(int proc, void **mask) {
4724 int retval;
4725
4726 if (!KMP_AFFINITY_CAPABLE()) {
4727 return -1;
4728 }
4729
4730 KA_TRACE(1000, ; {
4731 int gtid = __kmp_entry_gtid();
4732 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4733 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4734 (kmp_affin_mask_t *)(*mask));
4735 __kmp_debug_printf("kmp_get_affinity_mask_proc: getting proc %d in "
4736 "affinity mask for thread %d = %s\n",
4737 proc, gtid, buf);
4738 });
4739
4740 if (__kmp_env_consistency_check) {
4741 if ((mask == NULL) || (*mask == NULL)) {
4742 KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity_mask_proc");
4743 }
4744 }
4745
4746 if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) {
4747 return -1;
4748 }
4749 if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004750 return 0;
Jonathan Peyton30419822017-05-12 18:01:32 +00004751 }
4752
4753 return KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004754}
4755
Jim Cownie5e8470a2013-09-27 10:38:44 +00004756// Dynamic affinity settings - Affinity balanced
Jonathan Peyton30419822017-05-12 18:01:32 +00004757void __kmp_balanced_affinity(int tid, int nthreads) {
4758 bool fine_gran = true;
Paul Osmialowskiecbe2ea2016-07-29 20:55:03 +00004759
Jonathan Peyton30419822017-05-12 18:01:32 +00004760 switch (__kmp_affinity_gran) {
4761 case affinity_gran_fine:
4762 case affinity_gran_thread:
4763 break;
4764 case affinity_gran_core:
4765 if (__kmp_nThreadsPerCore > 1) {
4766 fine_gran = false;
4767 }
4768 break;
4769 case affinity_gran_package:
4770 if (nCoresPerPkg > 1) {
4771 fine_gran = false;
4772 }
4773 break;
4774 default:
4775 fine_gran = false;
4776 }
4777
4778 if (__kmp_affinity_uniform_topology()) {
4779 int coreID;
4780 int threadID;
4781 // Number of hyper threads per core in HT machine
4782 int __kmp_nth_per_core = __kmp_avail_proc / __kmp_ncores;
4783 // Number of cores
4784 int ncores = __kmp_ncores;
4785 if ((nPackages > 1) && (__kmp_nth_per_core <= 1)) {
4786 __kmp_nth_per_core = __kmp_avail_proc / nPackages;
4787 ncores = nPackages;
4788 }
4789 // How many threads will be bound to each core
4790 int chunk = nthreads / ncores;
4791 // How many cores will have an additional thread bound to it - "big cores"
4792 int big_cores = nthreads % ncores;
4793 // Number of threads on the big cores
4794 int big_nth = (chunk + 1) * big_cores;
4795 if (tid < big_nth) {
4796 coreID = tid / (chunk + 1);
4797 threadID = (tid % (chunk + 1)) % __kmp_nth_per_core;
4798 } else { // tid >= big_nth
4799 coreID = (tid - big_cores) / chunk;
4800 threadID = ((tid - big_cores) % chunk) % __kmp_nth_per_core;
Paul Osmialowskiecbe2ea2016-07-29 20:55:03 +00004801 }
4802
Jonathan Peyton30419822017-05-12 18:01:32 +00004803 KMP_DEBUG_ASSERT2(KMP_AFFINITY_CAPABLE(),
4804 "Illegal set affinity operation when not capable");
4805
4806 kmp_affin_mask_t *mask;
4807 KMP_CPU_ALLOC_ON_STACK(mask);
4808 KMP_CPU_ZERO(mask);
4809
4810 if (fine_gran) {
4811 int osID = address2os[coreID * __kmp_nth_per_core + threadID].second;
4812 KMP_CPU_SET(osID, mask);
4813 } else {
4814 for (int i = 0; i < __kmp_nth_per_core; i++) {
4815 int osID;
4816 osID = address2os[coreID * __kmp_nth_per_core + i].second;
4817 KMP_CPU_SET(osID, mask);
4818 }
4819 }
4820 if (__kmp_affinity_verbose) {
4821 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4822 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
4823 KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
4824 __kmp_gettid(), tid, buf);
4825 }
4826 __kmp_set_system_affinity(mask, TRUE);
4827 KMP_CPU_FREE_FROM_STACK(mask);
4828 } else { // Non-uniform topology
4829
4830 kmp_affin_mask_t *mask;
4831 KMP_CPU_ALLOC_ON_STACK(mask);
4832 KMP_CPU_ZERO(mask);
4833
4834 int core_level = __kmp_affinity_find_core_level(
4835 address2os, __kmp_avail_proc, __kmp_aff_depth - 1);
4836 int ncores = __kmp_affinity_compute_ncores(address2os, __kmp_avail_proc,
4837 __kmp_aff_depth - 1, core_level);
4838 int nth_per_core = __kmp_affinity_max_proc_per_core(
4839 address2os, __kmp_avail_proc, __kmp_aff_depth - 1, core_level);
4840
4841 // For performance gain consider the special case nthreads ==
4842 // __kmp_avail_proc
4843 if (nthreads == __kmp_avail_proc) {
4844 if (fine_gran) {
4845 int osID = address2os[tid].second;
4846 KMP_CPU_SET(osID, mask);
4847 } else {
4848 int core = __kmp_affinity_find_core(address2os, tid,
4849 __kmp_aff_depth - 1, core_level);
4850 for (int i = 0; i < __kmp_avail_proc; i++) {
4851 int osID = address2os[i].second;
4852 if (__kmp_affinity_find_core(address2os, i, __kmp_aff_depth - 1,
4853 core_level) == core) {
4854 KMP_CPU_SET(osID, mask);
4855 }
Paul Osmialowskiecbe2ea2016-07-29 20:55:03 +00004856 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004857 }
4858 } else if (nthreads <= ncores) {
4859
4860 int core = 0;
4861 for (int i = 0; i < ncores; i++) {
4862 // Check if this core from procarr[] is in the mask
4863 int in_mask = 0;
4864 for (int j = 0; j < nth_per_core; j++) {
4865 if (procarr[i * nth_per_core + j] != -1) {
4866 in_mask = 1;
4867 break;
4868 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004869 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004870 if (in_mask) {
4871 if (tid == core) {
4872 for (int j = 0; j < nth_per_core; j++) {
4873 int osID = procarr[i * nth_per_core + j];
4874 if (osID != -1) {
4875 KMP_CPU_SET(osID, mask);
4876 // For fine granularity it is enough to set the first available
4877 // osID for this core
4878 if (fine_gran) {
4879 break;
4880 }
4881 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004882 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004883 break;
4884 } else {
4885 core++;
4886 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004887 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004888 }
4889 } else { // nthreads > ncores
4890 // Array to save the number of processors at each core
4891 int *nproc_at_core = (int *)KMP_ALLOCA(sizeof(int) * ncores);
4892 // Array to save the number of cores with "x" available processors;
4893 int *ncores_with_x_procs =
4894 (int *)KMP_ALLOCA(sizeof(int) * (nth_per_core + 1));
4895 // Array to save the number of cores with # procs from x to nth_per_core
4896 int *ncores_with_x_to_max_procs =
4897 (int *)KMP_ALLOCA(sizeof(int) * (nth_per_core + 1));
4898
4899 for (int i = 0; i <= nth_per_core; i++) {
4900 ncores_with_x_procs[i] = 0;
4901 ncores_with_x_to_max_procs[i] = 0;
4902 }
4903
4904 for (int i = 0; i < ncores; i++) {
4905 int cnt = 0;
4906 for (int j = 0; j < nth_per_core; j++) {
4907 if (procarr[i * nth_per_core + j] != -1) {
4908 cnt++;
4909 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004910 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004911 nproc_at_core[i] = cnt;
4912 ncores_with_x_procs[cnt]++;
4913 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004914
Jonathan Peyton30419822017-05-12 18:01:32 +00004915 for (int i = 0; i <= nth_per_core; i++) {
4916 for (int j = i; j <= nth_per_core; j++) {
4917 ncores_with_x_to_max_procs[i] += ncores_with_x_procs[j];
4918 }
4919 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004920
Jonathan Peyton30419822017-05-12 18:01:32 +00004921 // Max number of processors
4922 int nproc = nth_per_core * ncores;
4923 // An array to keep number of threads per each context
4924 int *newarr = (int *)__kmp_allocate(sizeof(int) * nproc);
4925 for (int i = 0; i < nproc; i++) {
4926 newarr[i] = 0;
4927 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004928
Jonathan Peyton30419822017-05-12 18:01:32 +00004929 int nth = nthreads;
4930 int flag = 0;
4931 while (nth > 0) {
4932 for (int j = 1; j <= nth_per_core; j++) {
4933 int cnt = ncores_with_x_to_max_procs[j];
4934 for (int i = 0; i < ncores; i++) {
4935 // Skip the core with 0 processors
4936 if (nproc_at_core[i] == 0) {
4937 continue;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004938 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004939 for (int k = 0; k < nth_per_core; k++) {
4940 if (procarr[i * nth_per_core + k] != -1) {
4941 if (newarr[i * nth_per_core + k] == 0) {
4942 newarr[i * nth_per_core + k] = 1;
4943 cnt--;
4944 nth--;
4945 break;
4946 } else {
4947 if (flag != 0) {
4948 newarr[i * nth_per_core + k]++;
4949 cnt--;
4950 nth--;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004951 break;
Jonathan Peyton30419822017-05-12 18:01:32 +00004952 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004953 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004954 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004955 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004956 if (cnt == 0 || nth == 0) {
4957 break;
4958 }
4959 }
4960 if (nth == 0) {
4961 break;
4962 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004963 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004964 flag = 1;
4965 }
4966 int sum = 0;
4967 for (int i = 0; i < nproc; i++) {
4968 sum += newarr[i];
4969 if (sum > tid) {
4970 if (fine_gran) {
4971 int osID = procarr[i];
4972 KMP_CPU_SET(osID, mask);
4973 } else {
4974 int coreID = i / nth_per_core;
4975 for (int ii = 0; ii < nth_per_core; ii++) {
4976 int osID = procarr[coreID * nth_per_core + ii];
4977 if (osID != -1) {
4978 KMP_CPU_SET(osID, mask);
4979 }
4980 }
4981 }
4982 break;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004983 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004984 }
4985 __kmp_free(newarr);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004986 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004987
4988 if (__kmp_affinity_verbose) {
4989 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4990 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
4991 KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
4992 __kmp_gettid(), tid, buf);
4993 }
4994 __kmp_set_system_affinity(mask, TRUE);
4995 KMP_CPU_FREE_FROM_STACK(mask);
4996 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004997}
4998
Jonathan Peyton3076fa42016-01-12 17:21:55 +00004999#if KMP_OS_LINUX
5000// We don't need this entry for Windows because
5001// there is GetProcessAffinityMask() api
5002//
5003// The intended usage is indicated by these steps:
5004// 1) The user gets the current affinity mask
5005// 2) Then sets the affinity by calling this function
5006// 3) Error check the return value
5007// 4) Use non-OpenMP parallelization
5008// 5) Reset the affinity to what was stored in step 1)
5009#ifdef __cplusplus
5010extern "C"
5011#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00005012 int
5013 kmp_set_thread_affinity_mask_initial()
Jonathan Peyton3076fa42016-01-12 17:21:55 +00005014// the function returns 0 on success,
5015// -1 if we cannot bind thread
5016// >0 (errno) if an error happened during binding
5017{
Jonathan Peyton30419822017-05-12 18:01:32 +00005018 int gtid = __kmp_get_gtid();
5019 if (gtid < 0) {
5020 // Do not touch non-omp threads
5021 KA_TRACE(30, ("kmp_set_thread_affinity_mask_initial: "
5022 "non-omp thread, returning\n"));
5023 return -1;
5024 }
5025 if (!KMP_AFFINITY_CAPABLE() || !__kmp_init_middle) {
5026 KA_TRACE(30, ("kmp_set_thread_affinity_mask_initial: "
5027 "affinity not initialized, returning\n"));
5028 return -1;
5029 }
5030 KA_TRACE(30, ("kmp_set_thread_affinity_mask_initial: "
5031 "set full mask for thread %d\n",
5032 gtid));
5033 KMP_DEBUG_ASSERT(__kmp_affin_fullMask != NULL);
5034 return __kmp_set_system_affinity(__kmp_affin_fullMask, FALSE);
Jonathan Peyton3076fa42016-01-12 17:21:55 +00005035}
5036#endif
5037
Alp Toker763b9392014-02-28 09:42:41 +00005038#endif // KMP_AFFINITY_SUPPORTED