blob: 79b9b91df7550845682d0887e947343326a14512 [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
2 * kmp_affinity.cpp -- affinity management
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16#include "kmp.h"
17#include "kmp_i18n.h"
18#include "kmp_io.h"
19#include "kmp_str.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000020#include "kmp_wrapper_getpid.h"
Jonathan Peyton17078362015-09-10 19:22:07 +000021#include "kmp_affinity.h"
22
23// Store the real or imagined machine hierarchy here
24static hierarchy_info machine_hierarchy;
25
26void __kmp_cleanup_hierarchy() {
27 machine_hierarchy.fini();
28}
29
30void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
31 kmp_uint32 depth;
32 // The test below is true if affinity is available, but set to "none". Need to init on first use of hierarchical barrier.
33 if (TCR_1(machine_hierarchy.uninitialized))
34 machine_hierarchy.init(NULL, nproc);
Jonathan Peyton17078362015-09-10 19:22:07 +000035
Jonathan Peyton7dee82e2015-11-09 16:24:53 +000036 // Adjust the hierarchy in case num threads exceeds original
37 if (nproc > machine_hierarchy.base_num_threads)
38 machine_hierarchy.resize(nproc);
39
Jonathan Peyton17078362015-09-10 19:22:07 +000040 depth = machine_hierarchy.depth;
41 KMP_DEBUG_ASSERT(depth > 0);
Jonathan Peyton17078362015-09-10 19:22:07 +000042
43 thr_bar->depth = depth;
44 thr_bar->base_leaf_kids = (kmp_uint8)machine_hierarchy.numPerLevel[0]-1;
45 thr_bar->skip_per_level = machine_hierarchy.skipPerLevel;
46}
Jim Cownie5e8470a2013-09-27 10:38:44 +000047
Alp Toker763b9392014-02-28 09:42:41 +000048#if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +000049
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +000050bool KMPAffinity::picked_api = false;
51
52void* KMPAffinity::Mask::operator new(size_t n) { return __kmp_allocate(n); }
53void* KMPAffinity::Mask::operator new[](size_t n) { return __kmp_allocate(n); }
54void KMPAffinity::Mask::operator delete(void* p) { __kmp_free(p); }
55void KMPAffinity::Mask::operator delete[](void* p) { __kmp_free(p); }
56void* KMPAffinity::operator new(size_t n) { return __kmp_allocate(n); }
57void KMPAffinity::operator delete(void* p) { __kmp_free(p); }
58
59void KMPAffinity::pick_api() {
60 KMPAffinity* affinity_dispatch;
61 if (picked_api)
62 return;
63#if KMP_USE_HWLOC
64 if (__kmp_affinity_top_method == affinity_top_method_hwloc) {
65 affinity_dispatch = new KMPHwlocAffinity();
66 } else
67#endif
68 {
69 affinity_dispatch = new KMPNativeAffinity();
70 }
71 __kmp_affinity_dispatch = affinity_dispatch;
72 picked_api = true;
73}
74
75void KMPAffinity::destroy_api() {
76 if (__kmp_affinity_dispatch != NULL) {
77 delete __kmp_affinity_dispatch;
78 __kmp_affinity_dispatch = NULL;
79 picked_api = false;
80 }
81}
82
Jim Cownie5e8470a2013-09-27 10:38:44 +000083//
84// Print the affinity mask to the character array in a pretty format.
85//
86char *
87__kmp_affinity_print_mask(char *buf, int buf_len, kmp_affin_mask_t *mask)
88{
89 KMP_ASSERT(buf_len >= 40);
90 char *scan = buf;
91 char *end = buf + buf_len - 1;
92
93 //
94 // Find first element / check for empty set.
95 //
96 size_t i;
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +000097 i = mask->begin();
98 if (i == mask->end()) {
Jonathan Peyton7edeef12015-09-25 17:23:17 +000099 KMP_SNPRINTF(scan, end-scan+1, "{<empty>}");
Jim Cownie5e8470a2013-09-27 10:38:44 +0000100 while (*scan != '\0') scan++;
101 KMP_ASSERT(scan <= end);
102 return buf;
103 }
104
Jonathan Peyton7edeef12015-09-25 17:23:17 +0000105 KMP_SNPRINTF(scan, end-scan+1, "{%ld", (long)i);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000106 while (*scan != '\0') scan++;
107 i++;
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +0000108 for (; i != mask->end(); i = mask->next(i)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000109 if (! KMP_CPU_ISSET(i, mask)) {
110 continue;
111 }
112
113 //
114 // Check for buffer overflow. A string of the form ",<n>" will have
115 // at most 10 characters, plus we want to leave room to print ",...}"
116 // if the set is too large to print for a total of 15 characters.
117 // We already left room for '\0' in setting end.
118 //
119 if (end - scan < 15) {
120 break;
121 }
Jonathan Peyton7edeef12015-09-25 17:23:17 +0000122 KMP_SNPRINTF(scan, end-scan+1, ",%-ld", (long)i);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000123 while (*scan != '\0') scan++;
124 }
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +0000125 if (i != mask->end()) {
Jonathan Peyton7edeef12015-09-25 17:23:17 +0000126 KMP_SNPRINTF(scan, end-scan+1, ",...");
Jim Cownie5e8470a2013-09-27 10:38:44 +0000127 while (*scan != '\0') scan++;
128 }
Jonathan Peyton7edeef12015-09-25 17:23:17 +0000129 KMP_SNPRINTF(scan, end-scan+1, "}");
Jim Cownie5e8470a2013-09-27 10:38:44 +0000130 while (*scan != '\0') scan++;
131 KMP_ASSERT(scan <= end);
132 return buf;
133}
134
135
136void
137__kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask)
138{
139 KMP_CPU_ZERO(mask);
140
Andrey Churbanov7daf9802015-01-27 16:52:57 +0000141# if KMP_GROUP_AFFINITY
Jim Cownie5e8470a2013-09-27 10:38:44 +0000142
143 if (__kmp_num_proc_groups > 1) {
144 int group;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000145 KMP_DEBUG_ASSERT(__kmp_GetActiveProcessorCount != NULL);
146 for (group = 0; group < __kmp_num_proc_groups; group++) {
147 int i;
148 int num = __kmp_GetActiveProcessorCount(group);
149 for (i = 0; i < num; i++) {
150 KMP_CPU_SET(i + group * (CHAR_BIT * sizeof(DWORD_PTR)), mask);
151 }
152 }
153 }
154 else
155
Andrey Churbanov7daf9802015-01-27 16:52:57 +0000156# endif /* KMP_GROUP_AFFINITY */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000157
158 {
159 int proc;
160 for (proc = 0; proc < __kmp_xproc; proc++) {
161 KMP_CPU_SET(proc, mask);
162 }
163 }
164}
165
Jim Cownie5e8470a2013-09-27 10:38:44 +0000166//
167// When sorting by labels, __kmp_affinity_assign_child_nums() must first be
168// called to renumber the labels from [0..n] and place them into the child_num
169// vector of the address object. This is done in case the labels used for
Alp Toker8f2d3f02014-02-24 10:40:15 +0000170// the children at one node of the hierarchy differ from those used for
Jim Cownie5e8470a2013-09-27 10:38:44 +0000171// another node at the same level. Example: suppose the machine has 2 nodes
172// with 2 packages each. The first node contains packages 601 and 602, and
173// second node contains packages 603 and 604. If we try to sort the table
174// for "scatter" affinity, the table will still be sorted 601, 602, 603, 604
175// because we are paying attention to the labels themselves, not the ordinal
176// child numbers. By using the child numbers in the sort, the result is
177// {0,0}=601, {0,1}=603, {1,0}=602, {1,1}=604.
178//
179static void
180__kmp_affinity_assign_child_nums(AddrUnsPair *address2os,
181 int numAddrs)
182{
183 KMP_DEBUG_ASSERT(numAddrs > 0);
184 int depth = address2os->first.depth;
185 unsigned *counts = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
186 unsigned *lastLabel = (unsigned *)__kmp_allocate(depth
187 * sizeof(unsigned));
188 int labCt;
189 for (labCt = 0; labCt < depth; labCt++) {
190 address2os[0].first.childNums[labCt] = counts[labCt] = 0;
191 lastLabel[labCt] = address2os[0].first.labels[labCt];
192 }
193 int i;
194 for (i = 1; i < numAddrs; i++) {
195 for (labCt = 0; labCt < depth; labCt++) {
196 if (address2os[i].first.labels[labCt] != lastLabel[labCt]) {
197 int labCt2;
198 for (labCt2 = labCt + 1; labCt2 < depth; labCt2++) {
199 counts[labCt2] = 0;
200 lastLabel[labCt2] = address2os[i].first.labels[labCt2];
201 }
202 counts[labCt]++;
203 lastLabel[labCt] = address2os[i].first.labels[labCt];
204 break;
205 }
206 }
207 for (labCt = 0; labCt < depth; labCt++) {
208 address2os[i].first.childNums[labCt] = counts[labCt];
209 }
210 for (; labCt < (int)Address::maxDepth; labCt++) {
211 address2os[i].first.childNums[labCt] = 0;
212 }
213 }
Andrey Churbanovcb28d6e2016-07-08 14:40:20 +0000214 __kmp_free(lastLabel);
215 __kmp_free(counts);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000216}
217
218
219//
220// All of the __kmp_affinity_create_*_map() routines should set
221// __kmp_affinity_masks to a vector of affinity mask objects of length
222// __kmp_affinity_num_masks, if __kmp_affinity_type != affinity_none, and
223// return the number of levels in the machine topology tree (zero if
224// __kmp_affinity_type == affinity_none).
225//
Jonathan Peytonc5304aa2016-06-13 21:28:03 +0000226// All of the __kmp_affinity_create_*_map() routines should set *__kmp_affin_fullMask
Jim Cownie5e8470a2013-09-27 10:38:44 +0000227// to the affinity mask for the initialization thread. They need to save and
228// restore the mask, and it could be needed later, so saving it is just an
229// optimization to avoid calling kmp_get_system_affinity() again.
230//
Jonathan Peytonc5304aa2016-06-13 21:28:03 +0000231kmp_affin_mask_t *__kmp_affin_fullMask = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000232
233static int nCoresPerPkg, nPackages;
Andrey Churbanovf696c822015-01-27 16:55:43 +0000234static int __kmp_nThreadsPerCore;
235#ifndef KMP_DFLT_NTH_CORES
236static int __kmp_ncores;
237#endif
Jonathan Peytonfd7cc422016-06-21 15:54:38 +0000238static int *__kmp_pu_os_idx = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000239
240//
241// __kmp_affinity_uniform_topology() doesn't work when called from
242// places which support arbitrarily many levels in the machine topology
243// map, i.e. the non-default cases in __kmp_affinity_create_cpuinfo_map()
244// __kmp_affinity_create_x2apicid_map().
245//
246inline static bool
247__kmp_affinity_uniform_topology()
248{
249 return __kmp_avail_proc == (__kmp_nThreadsPerCore * nCoresPerPkg * nPackages);
250}
251
252
253//
254// Print out the detailed machine topology map, i.e. the physical locations
255// of each OS proc.
256//
257static void
258__kmp_affinity_print_topology(AddrUnsPair *address2os, int len, int depth,
259 int pkgLevel, int coreLevel, int threadLevel)
260{
261 int proc;
262
263 KMP_INFORM(OSProcToPhysicalThreadMap, "KMP_AFFINITY");
264 for (proc = 0; proc < len; proc++) {
265 int level;
266 kmp_str_buf_t buf;
267 __kmp_str_buf_init(&buf);
268 for (level = 0; level < depth; level++) {
269 if (level == threadLevel) {
270 __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Thread));
271 }
272 else if (level == coreLevel) {
273 __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Core));
274 }
275 else if (level == pkgLevel) {
276 __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Package));
277 }
278 else if (level > pkgLevel) {
279 __kmp_str_buf_print(&buf, "%s_%d ", KMP_I18N_STR(Node),
280 level - pkgLevel - 1);
281 }
282 else {
283 __kmp_str_buf_print(&buf, "L%d ", level);
284 }
285 __kmp_str_buf_print(&buf, "%d ",
286 address2os[proc].first.labels[level]);
287 }
288 KMP_INFORM(OSProcMapToPack, "KMP_AFFINITY", address2os[proc].second,
289 buf.str);
290 __kmp_str_buf_free(&buf);
291 }
292}
293
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000294#if KMP_USE_HWLOC
Jonathan Peyton202a24d2016-06-13 17:30:08 +0000295
296// This function removes the topology levels that are radix 1 and don't offer
297// further information about the topology. The most common example is when you
298// have one thread context per core, we don't want the extra thread context
299// level if it offers no unique labels. So they are removed.
300// return value: the new depth of address2os
301static int
302__kmp_affinity_remove_radix_one_levels(AddrUnsPair *address2os, int nActiveThreads, int depth, int* pkgLevel, int* coreLevel, int* threadLevel) {
303 int level;
304 int i;
305 int radix1_detected;
306
307 for (level = depth-1; level >= 0; --level) {
308 // Always keep the package level
309 if (level == *pkgLevel)
310 continue;
311 // Detect if this level is radix 1
312 radix1_detected = 1;
313 for (i = 1; i < nActiveThreads; ++i) {
314 if (address2os[0].first.labels[level] != address2os[i].first.labels[level]) {
315 // There are differing label values for this level so it stays
316 radix1_detected = 0;
317 break;
318 }
319 }
320 if (!radix1_detected)
321 continue;
322 // Radix 1 was detected
323 if (level == *threadLevel) {
324 // If only one thread per core, then just decrement
325 // the depth which removes the threadlevel from address2os
326 for (i = 0; i < nActiveThreads; ++i) {
327 address2os[i].first.depth--;
328 }
329 *threadLevel = -1;
330 } else if (level == *coreLevel) {
331 // For core level, we move the thread labels over if they are still
332 // valid (*threadLevel != -1), and also reduce the depth another level
333 for (i = 0; i < nActiveThreads; ++i) {
334 if (*threadLevel != -1) {
335 address2os[i].first.labels[*coreLevel] = address2os[i].first.labels[*threadLevel];
336 }
337 address2os[i].first.depth--;
338 }
339 *coreLevel = -1;
340 }
341 }
342 return address2os[0].first.depth;
343}
344
345// Returns the number of objects of type 'type' below 'obj' within the topology tree structure.
346// e.g., if obj is a HWLOC_OBJ_SOCKET object, and type is HWLOC_OBJ_PU, then
347// this will return the number of PU's under the SOCKET object.
348static int
349__kmp_hwloc_get_nobjs_under_obj(hwloc_obj_t obj, hwloc_obj_type_t type) {
350 int retval = 0;
351 hwloc_obj_t first;
352 for(first = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, obj->type, obj->logical_index, type, 0);
353 first != NULL && hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, obj->type, first) == obj;
354 first = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, first->type, first))
355 {
356 ++retval;
357 }
358 return retval;
359}
360
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000361static int
362__kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
363 kmp_i18n_id_t *const msg_id)
364{
365 *address2os = NULL;
366 *msg_id = kmp_i18n_null;
367
368 //
369 // Save the affinity mask for the current thread.
370 //
371 kmp_affin_mask_t *oldMask;
372 KMP_CPU_ALLOC(oldMask);
373 __kmp_get_system_affinity(oldMask, TRUE);
374
Jonathan Peyton202a24d2016-06-13 17:30:08 +0000375 int depth = 3;
376 int pkgLevel = 0;
377 int coreLevel = 1;
378 int threadLevel = 2;
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000379
380 if (! KMP_AFFINITY_CAPABLE())
381 {
382 //
383 // Hack to try and infer the machine topology using only the data
384 // available from cpuid on the current thread, and __kmp_xproc.
385 //
386 KMP_ASSERT(__kmp_affinity_type == affinity_none);
387
Jonathan Peytonbf357712016-06-16 20:31:19 +0000388 nCoresPerPkg = __kmp_hwloc_get_nobjs_under_obj(hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_SOCKET, 0), HWLOC_OBJ_CORE);
389 __kmp_nThreadsPerCore = __kmp_hwloc_get_nobjs_under_obj(hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_CORE, 0), HWLOC_OBJ_PU);
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000390 __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
391 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
392 if (__kmp_affinity_verbose) {
393 KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY");
394 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
395 if (__kmp_affinity_uniform_topology()) {
396 KMP_INFORM(Uniform, "KMP_AFFINITY");
397 } else {
398 KMP_INFORM(NonUniform, "KMP_AFFINITY");
399 }
400 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
401 __kmp_nThreadsPerCore, __kmp_ncores);
402 }
Jonathan Peyton72a84982016-06-16 20:14:54 +0000403 KMP_CPU_FREE(oldMask);
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000404 return 0;
405 }
406
407 //
408 // Allocate the data structure to be returned.
409 //
410 AddrUnsPair *retval = (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc);
Jonathan Peytonfd7cc422016-06-21 15:54:38 +0000411 __kmp_pu_os_idx = (int*)__kmp_allocate(sizeof(int) * __kmp_avail_proc);
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000412
Jonathan Peytonbf357712016-06-16 20:31:19 +0000413 //
414 // When affinity is off, this routine will still be called to set
415 // __kmp_ncores, as well as __kmp_nThreadsPerCore,
416 // nCoresPerPkg, & nPackages. Make sure all these vars are set
417 // correctly, and return if affinity is not enabled.
418 //
419
Jonathan Peyton202a24d2016-06-13 17:30:08 +0000420 hwloc_obj_t pu;
421 hwloc_obj_t core;
422 hwloc_obj_t socket;
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000423 int nActiveThreads = 0;
Jonathan Peyton202a24d2016-06-13 17:30:08 +0000424 int socket_identifier = 0;
Jonathan Peytonbf357712016-06-16 20:31:19 +0000425 // re-calculate globals to count only accessible resources
426 __kmp_ncores = nPackages = nCoresPerPkg = __kmp_nThreadsPerCore = 0;
Jonathan Peyton202a24d2016-06-13 17:30:08 +0000427 for(socket = hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_SOCKET, 0);
428 socket != NULL;
429 socket = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_SOCKET, socket),
430 socket_identifier++)
431 {
432 int core_identifier = 0;
Jonathan Peytonbf357712016-06-16 20:31:19 +0000433 int num_active_cores = 0;
Jonathan Peyton202a24d2016-06-13 17:30:08 +0000434 for(core = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, socket->type, socket->logical_index, HWLOC_OBJ_CORE, 0);
435 core != NULL && hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, socket->type, core) == socket;
436 core = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_CORE, core),
437 core_identifier++)
438 {
439 int pu_identifier = 0;
Jonathan Peytonbf357712016-06-16 20:31:19 +0000440 int num_active_threads = 0;
Jonathan Peyton202a24d2016-06-13 17:30:08 +0000441 for(pu = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, core->type, core->logical_index, HWLOC_OBJ_PU, 0);
442 pu != NULL && hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, core->type, pu) == core;
443 pu = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_PU, pu),
444 pu_identifier++)
445 {
446 Address addr(3);
Jonathan Peytonc5304aa2016-06-13 21:28:03 +0000447 if(! KMP_CPU_ISSET(pu->os_index, __kmp_affin_fullMask))
Jonathan Peytonbf357712016-06-16 20:31:19 +0000448 continue; // skip inactive (inaccessible) unit
Jonathan Peyton202a24d2016-06-13 17:30:08 +0000449 KA_TRACE(20, ("Hwloc inserting %d (%d) %d (%d) %d (%d) into address2os\n",
450 socket->os_index, socket->logical_index, core->os_index, core->logical_index, pu->os_index,pu->logical_index));
451 addr.labels[0] = socket_identifier; // package
452 addr.labels[1] = core_identifier; // core
453 addr.labels[2] = pu_identifier; // pu
454 retval[nActiveThreads] = AddrUnsPair(addr, pu->os_index);
Jonathan Peytonfd7cc422016-06-21 15:54:38 +0000455 __kmp_pu_os_idx[nActiveThreads] = pu->os_index; // keep os index for each active pu
Jonathan Peyton202a24d2016-06-13 17:30:08 +0000456 nActiveThreads++;
Jonathan Peytonbf357712016-06-16 20:31:19 +0000457 ++num_active_threads; // count active threads per core
Jonathan Peyton202a24d2016-06-13 17:30:08 +0000458 }
Jonathan Peytonbf357712016-06-16 20:31:19 +0000459 if (num_active_threads) { // were there any active threads on the core?
460 ++__kmp_ncores; // count total active cores
461 ++num_active_cores; // count active cores per socket
462 if (num_active_threads > __kmp_nThreadsPerCore)
463 __kmp_nThreadsPerCore = num_active_threads; // calc maximum
464 }
465 }
466 if (num_active_cores) { // were there any active cores on the socket?
467 ++nPackages; // count total active packages
468 if (num_active_cores > nCoresPerPkg)
469 nCoresPerPkg = num_active_cores; // calc maximum
Jonathan Peyton202a24d2016-06-13 17:30:08 +0000470 }
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000471 }
472
473 //
474 // If there's only one thread context to bind to, return now.
475 //
Jonathan Peytonbf357712016-06-16 20:31:19 +0000476 KMP_DEBUG_ASSERT(nActiveThreads == __kmp_avail_proc);
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000477 KMP_ASSERT(nActiveThreads > 0);
478 if (nActiveThreads == 1) {
479 __kmp_ncores = nPackages = 1;
480 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
481 if (__kmp_affinity_verbose) {
482 char buf[KMP_AFFIN_MASK_PRINT_LEN];
483 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
484
485 KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
486 if (__kmp_affinity_respect_mask) {
487 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
488 } else {
489 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
490 }
491 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
492 KMP_INFORM(Uniform, "KMP_AFFINITY");
493 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
494 __kmp_nThreadsPerCore, __kmp_ncores);
495 }
496
497 if (__kmp_affinity_type == affinity_none) {
498 __kmp_free(retval);
499 KMP_CPU_FREE(oldMask);
500 return 0;
501 }
502
503 //
504 // Form an Address object which only includes the package level.
505 //
506 Address addr(1);
Jonathan Peyton202a24d2016-06-13 17:30:08 +0000507 addr.labels[0] = retval[0].first.labels[pkgLevel];
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000508 retval[0].first = addr;
509
510 if (__kmp_affinity_gran_levels < 0) {
511 __kmp_affinity_gran_levels = 0;
512 }
513
514 if (__kmp_affinity_verbose) {
515 __kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
516 }
517
518 *address2os = retval;
519 KMP_CPU_FREE(oldMask);
520 return 1;
521 }
522
523 //
524 // Sort the table by physical Id.
525 //
526 qsort(retval, nActiveThreads, sizeof(*retval), __kmp_affinity_cmp_Address_labels);
527
528 //
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000529 // Check to see if the machine topology is uniform
530 //
Jonathan Peytonbf357712016-06-16 20:31:19 +0000531 unsigned uniform = (nPackages * nCoresPerPkg * __kmp_nThreadsPerCore == nActiveThreads);
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000532
533 //
534 // Print the machine topology summary.
535 //
536 if (__kmp_affinity_verbose) {
537 char mask[KMP_AFFIN_MASK_PRINT_LEN];
538 __kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
539
540 KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
541 if (__kmp_affinity_respect_mask) {
542 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask);
543 } else {
544 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask);
545 }
546 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
547 if (uniform) {
548 KMP_INFORM(Uniform, "KMP_AFFINITY");
549 } else {
550 KMP_INFORM(NonUniform, "KMP_AFFINITY");
551 }
552
553 kmp_str_buf_t buf;
554 __kmp_str_buf_init(&buf);
555
Jonathan Peytonbf357712016-06-16 20:31:19 +0000556 __kmp_str_buf_print(&buf, "%d", nPackages);
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000557 //for (level = 1; level <= pkgLevel; level++) {
558 // __kmp_str_buf_print(&buf, " x %d", maxCt[level]);
559 // }
560 KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg,
561 __kmp_nThreadsPerCore, __kmp_ncores);
562
563 __kmp_str_buf_free(&buf);
564 }
565
566 if (__kmp_affinity_type == affinity_none) {
Jonathan Peyton72a84982016-06-16 20:14:54 +0000567 __kmp_free(retval);
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000568 KMP_CPU_FREE(oldMask);
569 return 0;
570 }
571
572 //
573 // Find any levels with radiix 1, and remove them from the map
574 // (except for the package level).
575 //
Jonathan Peyton202a24d2016-06-13 17:30:08 +0000576 depth = __kmp_affinity_remove_radix_one_levels(retval, nActiveThreads, depth, &pkgLevel, &coreLevel, &threadLevel);
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000577
578 if (__kmp_affinity_gran_levels < 0) {
579 //
580 // Set the granularity level based on what levels are modeled
581 // in the machine topology map.
582 //
583 __kmp_affinity_gran_levels = 0;
Jonathan Peyton202a24d2016-06-13 17:30:08 +0000584 if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000585 __kmp_affinity_gran_levels++;
586 }
Jonathan Peyton202a24d2016-06-13 17:30:08 +0000587 if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000588 __kmp_affinity_gran_levels++;
589 }
590 if (__kmp_affinity_gran > affinity_gran_package) {
591 __kmp_affinity_gran_levels++;
592 }
593 }
594
595 if (__kmp_affinity_verbose) {
Jonathan Peyton202a24d2016-06-13 17:30:08 +0000596 __kmp_affinity_print_topology(retval, nActiveThreads, depth, pkgLevel,
597 coreLevel, threadLevel);
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000598 }
599
600 KMP_CPU_FREE(oldMask);
601 *address2os = retval;
Jonathan Peyton202a24d2016-06-13 17:30:08 +0000602 return depth;
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000603}
604#endif // KMP_USE_HWLOC
Jim Cownie5e8470a2013-09-27 10:38:44 +0000605
606//
607// If we don't know how to retrieve the machine's processor topology, or
608// encounter an error in doing so, this routine is called to form a "flat"
609// mapping of os thread id's <-> processor id's.
610//
611static int
612__kmp_affinity_create_flat_map(AddrUnsPair **address2os,
613 kmp_i18n_id_t *const msg_id)
614{
615 *address2os = NULL;
616 *msg_id = kmp_i18n_null;
617
618 //
619 // Even if __kmp_affinity_type == affinity_none, this routine might still
Andrey Churbanovf696c822015-01-27 16:55:43 +0000620 // called to set __kmp_ncores, as well as
Jim Cownie5e8470a2013-09-27 10:38:44 +0000621 // __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
622 //
623 if (! KMP_AFFINITY_CAPABLE()) {
624 KMP_ASSERT(__kmp_affinity_type == affinity_none);
625 __kmp_ncores = nPackages = __kmp_xproc;
626 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000627 if (__kmp_affinity_verbose) {
628 KMP_INFORM(AffFlatTopology, "KMP_AFFINITY");
629 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
630 KMP_INFORM(Uniform, "KMP_AFFINITY");
631 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
632 __kmp_nThreadsPerCore, __kmp_ncores);
633 }
634 return 0;
635 }
636
637 //
638 // When affinity is off, this routine will still be called to set
Andrey Churbanovf696c822015-01-27 16:55:43 +0000639 // __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie5e8470a2013-09-27 10:38:44 +0000640 // nCoresPerPkg, & nPackages. Make sure all these vars are set
641 // correctly, and return now if affinity is not enabled.
642 //
643 __kmp_ncores = nPackages = __kmp_avail_proc;
644 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000645 if (__kmp_affinity_verbose) {
646 char buf[KMP_AFFIN_MASK_PRINT_LEN];
Jonathan Peytonc5304aa2016-06-13 21:28:03 +0000647 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, __kmp_affin_fullMask);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000648
649 KMP_INFORM(AffCapableUseFlat, "KMP_AFFINITY");
650 if (__kmp_affinity_respect_mask) {
651 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
652 } else {
653 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
654 }
655 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
656 KMP_INFORM(Uniform, "KMP_AFFINITY");
657 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
658 __kmp_nThreadsPerCore, __kmp_ncores);
659 }
Jonathan Peytonfd7cc422016-06-21 15:54:38 +0000660 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
661 __kmp_pu_os_idx = (int*)__kmp_allocate(sizeof(int) * __kmp_avail_proc);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000662 if (__kmp_affinity_type == affinity_none) {
Jonathan Peytonfd7cc422016-06-21 15:54:38 +0000663 int avail_ct = 0;
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +0000664 int i;
Jonathan Peytonfd7cc422016-06-21 15:54:38 +0000665 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
666 if (! KMP_CPU_ISSET(i, __kmp_affin_fullMask))
667 continue;
668 __kmp_pu_os_idx[avail_ct++] = i; // suppose indices are flat
669 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000670 return 0;
671 }
672
673 //
674 // Contruct the data structure to be returned.
675 //
676 *address2os = (AddrUnsPair*)
677 __kmp_allocate(sizeof(**address2os) * __kmp_avail_proc);
678 int avail_ct = 0;
679 unsigned int i;
Jonathan Peytonc5304aa2016-06-13 21:28:03 +0000680 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000681 //
682 // Skip this proc if it is not included in the machine model.
683 //
Jonathan Peytonc5304aa2016-06-13 21:28:03 +0000684 if (! KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000685 continue;
686 }
Jonathan Peytonfd7cc422016-06-21 15:54:38 +0000687 __kmp_pu_os_idx[avail_ct] = i; // suppose indices are flat
Jim Cownie5e8470a2013-09-27 10:38:44 +0000688 Address addr(1);
689 addr.labels[0] = i;
690 (*address2os)[avail_ct++] = AddrUnsPair(addr,i);
691 }
692 if (__kmp_affinity_verbose) {
693 KMP_INFORM(OSProcToPackage, "KMP_AFFINITY");
694 }
695
696 if (__kmp_affinity_gran_levels < 0) {
697 //
698 // Only the package level is modeled in the machine topology map,
699 // so the #levels of granularity is either 0 or 1.
700 //
701 if (__kmp_affinity_gran > affinity_gran_package) {
702 __kmp_affinity_gran_levels = 1;
703 }
704 else {
705 __kmp_affinity_gran_levels = 0;
706 }
707 }
708 return 1;
709}
710
711
Andrey Churbanov7daf9802015-01-27 16:52:57 +0000712# if KMP_GROUP_AFFINITY
Jim Cownie5e8470a2013-09-27 10:38:44 +0000713
714//
715// If multiple Windows* OS processor groups exist, we can create a 2-level
716// topology map with the groups at level 0 and the individual procs at
717// level 1.
718//
719// This facilitates letting the threads float among all procs in a group,
720// if granularity=group (the default when there are multiple groups).
721//
722static int
723__kmp_affinity_create_proc_group_map(AddrUnsPair **address2os,
724 kmp_i18n_id_t *const msg_id)
725{
726 *address2os = NULL;
727 *msg_id = kmp_i18n_null;
728
729 //
730 // If we don't have multiple processor groups, return now.
731 // The flat mapping will be used.
732 //
Jonathan Peytonc5304aa2016-06-13 21:28:03 +0000733 if ((! KMP_AFFINITY_CAPABLE()) || (__kmp_get_proc_group(__kmp_affin_fullMask) >= 0)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000734 // FIXME set *msg_id
735 return -1;
736 }
737
738 //
739 // Contruct the data structure to be returned.
740 //
741 *address2os = (AddrUnsPair*)
742 __kmp_allocate(sizeof(**address2os) * __kmp_avail_proc);
Jonathan Peytonfd7cc422016-06-21 15:54:38 +0000743 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
744 __kmp_pu_os_idx = (int*)__kmp_allocate(sizeof(int) * __kmp_avail_proc);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000745 int avail_ct = 0;
746 int i;
Jonathan Peytonc5304aa2016-06-13 21:28:03 +0000747 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000748 //
749 // Skip this proc if it is not included in the machine model.
750 //
Jonathan Peytonc5304aa2016-06-13 21:28:03 +0000751 if (! KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000752 continue;
753 }
Jonathan Peytonfd7cc422016-06-21 15:54:38 +0000754 __kmp_pu_os_idx[avail_ct] = i; // suppose indices are flat
Jim Cownie5e8470a2013-09-27 10:38:44 +0000755 Address addr(2);
756 addr.labels[0] = i / (CHAR_BIT * sizeof(DWORD_PTR));
757 addr.labels[1] = i % (CHAR_BIT * sizeof(DWORD_PTR));
758 (*address2os)[avail_ct++] = AddrUnsPair(addr,i);
759
760 if (__kmp_affinity_verbose) {
761 KMP_INFORM(AffOSProcToGroup, "KMP_AFFINITY", i, addr.labels[0],
762 addr.labels[1]);
763 }
764 }
765
766 if (__kmp_affinity_gran_levels < 0) {
767 if (__kmp_affinity_gran == affinity_gran_group) {
768 __kmp_affinity_gran_levels = 1;
769 }
770 else if ((__kmp_affinity_gran == affinity_gran_fine)
771 || (__kmp_affinity_gran == affinity_gran_thread)) {
772 __kmp_affinity_gran_levels = 0;
773 }
774 else {
775 const char *gran_str = NULL;
776 if (__kmp_affinity_gran == affinity_gran_core) {
777 gran_str = "core";
778 }
779 else if (__kmp_affinity_gran == affinity_gran_package) {
780 gran_str = "package";
781 }
782 else if (__kmp_affinity_gran == affinity_gran_node) {
783 gran_str = "node";
784 }
785 else {
786 KMP_ASSERT(0);
787 }
788
789 // Warning: can't use affinity granularity \"gran\" with group topology method, using "thread"
790 __kmp_affinity_gran_levels = 0;
791 }
792 }
793 return 2;
794}
795
Andrey Churbanov7daf9802015-01-27 16:52:57 +0000796# endif /* KMP_GROUP_AFFINITY */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000797
798
799# if KMP_ARCH_X86 || KMP_ARCH_X86_64
800
801static int
802__kmp_cpuid_mask_width(int count) {
803 int r = 0;
804
805 while((1<<r) < count)
806 ++r;
807 return r;
808}
809
810
811class apicThreadInfo {
812public:
813 unsigned osId; // param to __kmp_affinity_bind_thread
814 unsigned apicId; // from cpuid after binding
815 unsigned maxCoresPerPkg; // ""
816 unsigned maxThreadsPerPkg; // ""
817 unsigned pkgId; // inferred from above values
818 unsigned coreId; // ""
819 unsigned threadId; // ""
820};
821
822
823static int
824__kmp_affinity_cmp_apicThreadInfo_os_id(const void *a, const void *b)
825{
826 const apicThreadInfo *aa = (const apicThreadInfo *)a;
827 const apicThreadInfo *bb = (const apicThreadInfo *)b;
828 if (aa->osId < bb->osId) return -1;
829 if (aa->osId > bb->osId) return 1;
830 return 0;
831}
832
833
834static int
835__kmp_affinity_cmp_apicThreadInfo_phys_id(const void *a, const void *b)
836{
837 const apicThreadInfo *aa = (const apicThreadInfo *)a;
838 const apicThreadInfo *bb = (const apicThreadInfo *)b;
839 if (aa->pkgId < bb->pkgId) return -1;
840 if (aa->pkgId > bb->pkgId) return 1;
841 if (aa->coreId < bb->coreId) return -1;
842 if (aa->coreId > bb->coreId) return 1;
843 if (aa->threadId < bb->threadId) return -1;
844 if (aa->threadId > bb->threadId) return 1;
845 return 0;
846}
847
848
849//
850// On IA-32 architecture and Intel(R) 64 architecture, we attempt to use
851// an algorithm which cycles through the available os threads, setting
852// the current thread's affinity mask to that thread, and then retrieves
853// the Apic Id for each thread context using the cpuid instruction.
854//
855static int
856__kmp_affinity_create_apicid_map(AddrUnsPair **address2os,
857 kmp_i18n_id_t *const msg_id)
858{
Andrey Churbanov1c331292015-01-27 17:03:42 +0000859 kmp_cpuid buf;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000860 int rc;
861 *address2os = NULL;
862 *msg_id = kmp_i18n_null;
863
Andrey Churbanov1c331292015-01-27 17:03:42 +0000864 //
865 // Check if cpuid leaf 4 is supported.
866 //
Jim Cownie5e8470a2013-09-27 10:38:44 +0000867 __kmp_x86_cpuid(0, 0, &buf);
868 if (buf.eax < 4) {
869 *msg_id = kmp_i18n_str_NoLeaf4Support;
870 return -1;
871 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000872
873 //
Jim Cownie5e8470a2013-09-27 10:38:44 +0000874 // The algorithm used starts by setting the affinity to each available
Andrey Churbanov1c331292015-01-27 17:03:42 +0000875 // thread and retrieving info from the cpuid instruction, so if we are
876 // not capable of calling __kmp_get_system_affinity() and
877 // _kmp_get_system_affinity(), then we need to do something else - use
878 // the defaults that we calculated from issuing cpuid without binding
879 // to each proc.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000880 //
881 if (! KMP_AFFINITY_CAPABLE()) {
882 //
883 // Hack to try and infer the machine topology using only the data
884 // available from cpuid on the current thread, and __kmp_xproc.
885 //
886 KMP_ASSERT(__kmp_affinity_type == affinity_none);
887
888 //
889 // Get an upper bound on the number of threads per package using
890 // cpuid(1).
891 //
892 // On some OS/chps combinations where HT is supported by the chip
893 // but is disabled, this value will be 2 on a single core chip.
894 // Usually, it will be 2 if HT is enabled and 1 if HT is disabled.
895 //
Jim Cownie5e8470a2013-09-27 10:38:44 +0000896 __kmp_x86_cpuid(1, 0, &buf);
897 int maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
898 if (maxThreadsPerPkg == 0) {
899 maxThreadsPerPkg = 1;
900 }
901
902 //
903 // The num cores per pkg comes from cpuid(4).
904 // 1 must be added to the encoded value.
905 //
906 // The author of cpu_count.cpp treated this only an upper bound
907 // on the number of cores, but I haven't seen any cases where it
908 // was greater than the actual number of cores, so we will treat
909 // it as exact in this block of code.
910 //
911 // First, we need to check if cpuid(4) is supported on this chip.
912 // To see if cpuid(n) is supported, issue cpuid(0) and check if eax
913 // has the value n or greater.
914 //
915 __kmp_x86_cpuid(0, 0, &buf);
916 if (buf.eax >= 4) {
917 __kmp_x86_cpuid(4, 0, &buf);
918 nCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
919 }
920 else {
921 nCoresPerPkg = 1;
922 }
923
924 //
925 // There is no way to reliably tell if HT is enabled without issuing
926 // the cpuid instruction from every thread, can correlating the cpuid
927 // info, so if the machine is not affinity capable, we assume that HT
928 // is off. We have seen quite a few machines where maxThreadsPerPkg
929 // is 2, yet the machine does not support HT.
930 //
931 // - Older OSes are usually found on machines with older chips, which
932 // do not support HT.
933 //
934 // - The performance penalty for mistakenly identifying a machine as
935 // HT when it isn't (which results in blocktime being incorrecly set
936 // to 0) is greater than the penalty when for mistakenly identifying
937 // a machine as being 1 thread/core when it is really HT enabled
938 // (which results in blocktime being incorrectly set to a positive
939 // value).
940 //
941 __kmp_ncores = __kmp_xproc;
942 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
943 __kmp_nThreadsPerCore = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000944 if (__kmp_affinity_verbose) {
945 KMP_INFORM(AffNotCapableUseLocCpuid, "KMP_AFFINITY");
946 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
947 if (__kmp_affinity_uniform_topology()) {
948 KMP_INFORM(Uniform, "KMP_AFFINITY");
949 } else {
950 KMP_INFORM(NonUniform, "KMP_AFFINITY");
951 }
952 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
953 __kmp_nThreadsPerCore, __kmp_ncores);
954 }
955 return 0;
956 }
957
958 //
959 //
960 // From here on, we can assume that it is safe to call
961 // __kmp_get_system_affinity() and __kmp_set_system_affinity(),
962 // even if __kmp_affinity_type = affinity_none.
963 //
964
965 //
966 // Save the affinity mask for the current thread.
967 //
968 kmp_affin_mask_t *oldMask;
969 KMP_CPU_ALLOC(oldMask);
970 KMP_ASSERT(oldMask != NULL);
971 __kmp_get_system_affinity(oldMask, TRUE);
972
973 //
974 // Run through each of the available contexts, binding the current thread
975 // to it, and obtaining the pertinent information using the cpuid instr.
976 //
977 // The relevant information is:
978 //
979 // Apic Id: Bits 24:31 of ebx after issuing cpuid(1) - each thread context
980 // has a uniqie Apic Id, which is of the form pkg# : core# : thread#.
981 //
982 // Max Threads Per Pkg: Bits 16:23 of ebx after issuing cpuid(1). The
983 // value of this field determines the width of the core# + thread#
984 // fields in the Apic Id. It is also an upper bound on the number
985 // of threads per package, but it has been verified that situations
986 // happen were it is not exact. In particular, on certain OS/chip
987 // combinations where Intel(R) Hyper-Threading Technology is supported
988 // by the chip but has
989 // been disabled, the value of this field will be 2 (for a single core
990 // chip). On other OS/chip combinations supporting
991 // Intel(R) Hyper-Threading Technology, the value of
992 // this field will be 1 when Intel(R) Hyper-Threading Technology is
993 // disabled and 2 when it is enabled.
994 //
995 // Max Cores Per Pkg: Bits 26:31 of eax after issuing cpuid(4). The
996 // value of this field (+1) determines the width of the core# field in
997 // the Apic Id. The comments in "cpucount.cpp" say that this value is
998 // an upper bound, but the IA-32 architecture manual says that it is
999 // exactly the number of cores per package, and I haven't seen any
1000 // case where it wasn't.
1001 //
1002 // From this information, deduce the package Id, core Id, and thread Id,
1003 // and set the corresponding fields in the apicThreadInfo struct.
1004 //
1005 unsigned i;
1006 apicThreadInfo *threadInfo = (apicThreadInfo *)__kmp_allocate(
1007 __kmp_avail_proc * sizeof(apicThreadInfo));
1008 unsigned nApics = 0;
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00001009 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001010 //
1011 // Skip this proc if it is not included in the machine model.
1012 //
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00001013 if (! KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001014 continue;
1015 }
1016 KMP_DEBUG_ASSERT((int)nApics < __kmp_avail_proc);
1017
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +00001018 __kmp_affinity_dispatch->bind_thread(i);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001019 threadInfo[nApics].osId = i;
1020
1021 //
1022 // The apic id and max threads per pkg come from cpuid(1).
1023 //
Jim Cownie5e8470a2013-09-27 10:38:44 +00001024 __kmp_x86_cpuid(1, 0, &buf);
Jonas Hahnfeldc9a8a6c2017-01-12 11:39:04 +00001025 if (((buf.edx >> 9) & 1) == 0) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001026 __kmp_set_system_affinity(oldMask, TRUE);
1027 __kmp_free(threadInfo);
1028 KMP_CPU_FREE(oldMask);
1029 *msg_id = kmp_i18n_str_ApicNotPresent;
1030 return -1;
1031 }
1032 threadInfo[nApics].apicId = (buf.ebx >> 24) & 0xff;
1033 threadInfo[nApics].maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
1034 if (threadInfo[nApics].maxThreadsPerPkg == 0) {
1035 threadInfo[nApics].maxThreadsPerPkg = 1;
1036 }
1037
1038 //
1039 // Max cores per pkg comes from cpuid(4).
1040 // 1 must be added to the encoded value.
1041 //
1042 // First, we need to check if cpuid(4) is supported on this chip.
1043 // To see if cpuid(n) is supported, issue cpuid(0) and check if eax
1044 // has the value n or greater.
1045 //
1046 __kmp_x86_cpuid(0, 0, &buf);
1047 if (buf.eax >= 4) {
1048 __kmp_x86_cpuid(4, 0, &buf);
1049 threadInfo[nApics].maxCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
1050 }
1051 else {
1052 threadInfo[nApics].maxCoresPerPkg = 1;
1053 }
1054
1055 //
1056 // Infer the pkgId / coreId / threadId using only the info
1057 // obtained locally.
1058 //
1059 int widthCT = __kmp_cpuid_mask_width(
1060 threadInfo[nApics].maxThreadsPerPkg);
1061 threadInfo[nApics].pkgId = threadInfo[nApics].apicId >> widthCT;
1062
1063 int widthC = __kmp_cpuid_mask_width(
1064 threadInfo[nApics].maxCoresPerPkg);
1065 int widthT = widthCT - widthC;
1066 if (widthT < 0) {
1067 //
1068 // I've never seen this one happen, but I suppose it could, if
1069 // the cpuid instruction on a chip was really screwed up.
1070 // Make sure to restore the affinity mask before the tail call.
1071 //
1072 __kmp_set_system_affinity(oldMask, TRUE);
1073 __kmp_free(threadInfo);
1074 KMP_CPU_FREE(oldMask);
1075 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1076 return -1;
1077 }
1078
1079 int maskC = (1 << widthC) - 1;
1080 threadInfo[nApics].coreId = (threadInfo[nApics].apicId >> widthT)
1081 &maskC;
1082
1083 int maskT = (1 << widthT) - 1;
1084 threadInfo[nApics].threadId = threadInfo[nApics].apicId &maskT;
1085
1086 nApics++;
1087 }
1088
1089 //
1090 // We've collected all the info we need.
1091 // Restore the old affinity mask for this thread.
1092 //
1093 __kmp_set_system_affinity(oldMask, TRUE);
1094
1095 //
1096 // If there's only one thread context to bind to, form an Address object
1097 // with depth 1 and return immediately (or, if affinity is off, set
1098 // address2os to NULL and return).
1099 //
1100 // If it is configured to omit the package level when there is only a
1101 // single package, the logic at the end of this routine won't work if
1102 // there is only a single thread - it would try to form an Address
1103 // object with depth 0.
1104 //
1105 KMP_ASSERT(nApics > 0);
1106 if (nApics == 1) {
1107 __kmp_ncores = nPackages = 1;
1108 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001109 if (__kmp_affinity_verbose) {
1110 char buf[KMP_AFFIN_MASK_PRINT_LEN];
1111 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1112
1113 KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
1114 if (__kmp_affinity_respect_mask) {
1115 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
1116 } else {
1117 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
1118 }
1119 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
1120 KMP_INFORM(Uniform, "KMP_AFFINITY");
1121 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
1122 __kmp_nThreadsPerCore, __kmp_ncores);
1123 }
1124
1125 if (__kmp_affinity_type == affinity_none) {
1126 __kmp_free(threadInfo);
1127 KMP_CPU_FREE(oldMask);
1128 return 0;
1129 }
1130
1131 *address2os = (AddrUnsPair*)__kmp_allocate(sizeof(AddrUnsPair));
1132 Address addr(1);
1133 addr.labels[0] = threadInfo[0].pkgId;
1134 (*address2os)[0] = AddrUnsPair(addr, threadInfo[0].osId);
1135
1136 if (__kmp_affinity_gran_levels < 0) {
1137 __kmp_affinity_gran_levels = 0;
1138 }
1139
1140 if (__kmp_affinity_verbose) {
1141 __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
1142 }
1143
1144 __kmp_free(threadInfo);
1145 KMP_CPU_FREE(oldMask);
1146 return 1;
1147 }
1148
1149 //
1150 // Sort the threadInfo table by physical Id.
1151 //
1152 qsort(threadInfo, nApics, sizeof(*threadInfo),
1153 __kmp_affinity_cmp_apicThreadInfo_phys_id);
1154
1155 //
1156 // The table is now sorted by pkgId / coreId / threadId, but we really
1157 // don't know the radix of any of the fields. pkgId's may be sparsely
1158 // assigned among the chips on a system. Although coreId's are usually
1159 // assigned [0 .. coresPerPkg-1] and threadId's are usually assigned
1160 // [0..threadsPerCore-1], we don't want to make any such assumptions.
1161 //
1162 // For that matter, we don't know what coresPerPkg and threadsPerCore
1163 // (or the total # packages) are at this point - we want to determine
1164 // that now. We only have an upper bound on the first two figures.
1165 //
1166 // We also perform a consistency check at this point: the values returned
1167 // by the cpuid instruction for any thread bound to a given package had
1168 // better return the same info for maxThreadsPerPkg and maxCoresPerPkg.
1169 //
1170 nPackages = 1;
1171 nCoresPerPkg = 1;
1172 __kmp_nThreadsPerCore = 1;
1173 unsigned nCores = 1;
1174
1175 unsigned pkgCt = 1; // to determine radii
1176 unsigned lastPkgId = threadInfo[0].pkgId;
1177 unsigned coreCt = 1;
1178 unsigned lastCoreId = threadInfo[0].coreId;
1179 unsigned threadCt = 1;
1180 unsigned lastThreadId = threadInfo[0].threadId;
1181
1182 // intra-pkg consist checks
1183 unsigned prevMaxCoresPerPkg = threadInfo[0].maxCoresPerPkg;
1184 unsigned prevMaxThreadsPerPkg = threadInfo[0].maxThreadsPerPkg;
1185
1186 for (i = 1; i < nApics; i++) {
1187 if (threadInfo[i].pkgId != lastPkgId) {
1188 nCores++;
1189 pkgCt++;
1190 lastPkgId = threadInfo[i].pkgId;
1191 if ((int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt;
1192 coreCt = 1;
1193 lastCoreId = threadInfo[i].coreId;
1194 if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
1195 threadCt = 1;
1196 lastThreadId = threadInfo[i].threadId;
1197
1198 //
1199 // This is a different package, so go on to the next iteration
1200 // without doing any consistency checks. Reset the consistency
1201 // check vars, though.
1202 //
1203 prevMaxCoresPerPkg = threadInfo[i].maxCoresPerPkg;
1204 prevMaxThreadsPerPkg = threadInfo[i].maxThreadsPerPkg;
1205 continue;
1206 }
1207
1208 if (threadInfo[i].coreId != lastCoreId) {
1209 nCores++;
1210 coreCt++;
1211 lastCoreId = threadInfo[i].coreId;
1212 if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
1213 threadCt = 1;
1214 lastThreadId = threadInfo[i].threadId;
1215 }
1216 else if (threadInfo[i].threadId != lastThreadId) {
1217 threadCt++;
1218 lastThreadId = threadInfo[i].threadId;
1219 }
1220 else {
1221 __kmp_free(threadInfo);
1222 KMP_CPU_FREE(oldMask);
1223 *msg_id = kmp_i18n_str_LegacyApicIDsNotUnique;
1224 return -1;
1225 }
1226
1227 //
1228 // Check to make certain that the maxCoresPerPkg and maxThreadsPerPkg
1229 // fields agree between all the threads bounds to a given package.
1230 //
1231 if ((prevMaxCoresPerPkg != threadInfo[i].maxCoresPerPkg)
1232 || (prevMaxThreadsPerPkg != threadInfo[i].maxThreadsPerPkg)) {
1233 __kmp_free(threadInfo);
1234 KMP_CPU_FREE(oldMask);
1235 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1236 return -1;
1237 }
1238 }
1239 nPackages = pkgCt;
1240 if ((int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt;
1241 if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
1242
1243 //
1244 // When affinity is off, this routine will still be called to set
Andrey Churbanovf696c822015-01-27 16:55:43 +00001245 // __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie5e8470a2013-09-27 10:38:44 +00001246 // nCoresPerPkg, & nPackages. Make sure all these vars are set
1247 // correctly, and return now if affinity is not enabled.
1248 //
Jim Cownie5e8470a2013-09-27 10:38:44 +00001249 __kmp_ncores = nCores;
1250 if (__kmp_affinity_verbose) {
1251 char buf[KMP_AFFIN_MASK_PRINT_LEN];
1252 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1253
1254 KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
1255 if (__kmp_affinity_respect_mask) {
1256 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
1257 } else {
1258 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
1259 }
1260 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
1261 if (__kmp_affinity_uniform_topology()) {
1262 KMP_INFORM(Uniform, "KMP_AFFINITY");
1263 } else {
1264 KMP_INFORM(NonUniform, "KMP_AFFINITY");
1265 }
1266 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
1267 __kmp_nThreadsPerCore, __kmp_ncores);
1268
1269 }
Jonathan Peytonfd7cc422016-06-21 15:54:38 +00001270 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
1271 KMP_DEBUG_ASSERT(nApics == __kmp_avail_proc);
1272 __kmp_pu_os_idx = (int*)__kmp_allocate(sizeof(int) * __kmp_avail_proc);
1273 for (i = 0; i < nApics; ++i) {
1274 __kmp_pu_os_idx[i] = threadInfo[i].osId;
1275 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001276 if (__kmp_affinity_type == affinity_none) {
1277 __kmp_free(threadInfo);
1278 KMP_CPU_FREE(oldMask);
1279 return 0;
1280 }
1281
1282 //
1283 // Now that we've determined the number of packages, the number of cores
1284 // per package, and the number of threads per core, we can construct the
1285 // data structure that is to be returned.
1286 //
1287 int pkgLevel = 0;
1288 int coreLevel = (nCoresPerPkg <= 1) ? -1 : 1;
1289 int threadLevel = (__kmp_nThreadsPerCore <= 1) ? -1 : ((coreLevel >= 0) ? 2 : 1);
1290 unsigned depth = (pkgLevel >= 0) + (coreLevel >= 0) + (threadLevel >= 0);
1291
1292 KMP_ASSERT(depth > 0);
1293 *address2os = (AddrUnsPair*)__kmp_allocate(sizeof(AddrUnsPair) * nApics);
1294
1295 for (i = 0; i < nApics; ++i) {
1296 Address addr(depth);
1297 unsigned os = threadInfo[i].osId;
1298 int d = 0;
1299
1300 if (pkgLevel >= 0) {
1301 addr.labels[d++] = threadInfo[i].pkgId;
1302 }
1303 if (coreLevel >= 0) {
1304 addr.labels[d++] = threadInfo[i].coreId;
1305 }
1306 if (threadLevel >= 0) {
1307 addr.labels[d++] = threadInfo[i].threadId;
1308 }
1309 (*address2os)[i] = AddrUnsPair(addr, os);
1310 }
1311
1312 if (__kmp_affinity_gran_levels < 0) {
1313 //
1314 // Set the granularity level based on what levels are modeled
1315 // in the machine topology map.
1316 //
1317 __kmp_affinity_gran_levels = 0;
1318 if ((threadLevel >= 0)
1319 && (__kmp_affinity_gran > affinity_gran_thread)) {
1320 __kmp_affinity_gran_levels++;
1321 }
1322 if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
1323 __kmp_affinity_gran_levels++;
1324 }
1325 if ((pkgLevel >= 0) && (__kmp_affinity_gran > affinity_gran_package)) {
1326 __kmp_affinity_gran_levels++;
1327 }
1328 }
1329
1330 if (__kmp_affinity_verbose) {
1331 __kmp_affinity_print_topology(*address2os, nApics, depth, pkgLevel,
1332 coreLevel, threadLevel);
1333 }
1334
1335 __kmp_free(threadInfo);
1336 KMP_CPU_FREE(oldMask);
1337 return depth;
1338}
1339
1340
1341//
1342// Intel(R) microarchitecture code name Nehalem, Dunnington and later
1343// architectures support a newer interface for specifying the x2APIC Ids,
1344// based on cpuid leaf 11.
1345//
1346static int
1347__kmp_affinity_create_x2apicid_map(AddrUnsPair **address2os,
1348 kmp_i18n_id_t *const msg_id)
1349{
1350 kmp_cpuid buf;
1351
1352 *address2os = NULL;
1353 *msg_id = kmp_i18n_null;
1354
1355 //
1356 // Check to see if cpuid leaf 11 is supported.
1357 //
1358 __kmp_x86_cpuid(0, 0, &buf);
1359 if (buf.eax < 11) {
1360 *msg_id = kmp_i18n_str_NoLeaf11Support;
1361 return -1;
1362 }
1363 __kmp_x86_cpuid(11, 0, &buf);
1364 if (buf.ebx == 0) {
1365 *msg_id = kmp_i18n_str_NoLeaf11Support;
1366 return -1;
1367 }
1368
1369 //
1370 // Find the number of levels in the machine topology. While we're at it,
1371 // get the default values for __kmp_nThreadsPerCore & nCoresPerPkg. We will
1372 // try to get more accurate values later by explicitly counting them,
1373 // but get reasonable defaults now, in case we return early.
1374 //
1375 int level;
1376 int threadLevel = -1;
1377 int coreLevel = -1;
1378 int pkgLevel = -1;
1379 __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
1380
1381 for (level = 0;; level++) {
1382 if (level > 31) {
1383 //
1384 // FIXME: Hack for DPD200163180
1385 //
1386 // If level is big then something went wrong -> exiting
1387 //
1388 // There could actually be 32 valid levels in the machine topology,
1389 // but so far, the only machine we have seen which does not exit
1390 // this loop before iteration 32 has fubar x2APIC settings.
1391 //
1392 // For now, just reject this case based upon loop trip count.
1393 //
1394 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1395 return -1;
1396 }
1397 __kmp_x86_cpuid(11, level, &buf);
1398 if (buf.ebx == 0) {
1399 if (pkgLevel < 0) {
1400 //
1401 // Will infer nPackages from __kmp_xproc
1402 //
1403 pkgLevel = level;
1404 level++;
1405 }
1406 break;
1407 }
1408 int kind = (buf.ecx >> 8) & 0xff;
1409 if (kind == 1) {
1410 //
1411 // SMT level
1412 //
1413 threadLevel = level;
1414 coreLevel = -1;
1415 pkgLevel = -1;
Andrey Churbanov5bf494e2016-08-05 15:59:11 +00001416 __kmp_nThreadsPerCore = buf.ebx & 0xffff;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001417 if (__kmp_nThreadsPerCore == 0) {
1418 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1419 return -1;
1420 }
1421 }
1422 else if (kind == 2) {
1423 //
1424 // core level
1425 //
1426 coreLevel = level;
1427 pkgLevel = -1;
Andrey Churbanov5bf494e2016-08-05 15:59:11 +00001428 nCoresPerPkg = buf.ebx & 0xffff;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001429 if (nCoresPerPkg == 0) {
1430 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1431 return -1;
1432 }
1433 }
1434 else {
1435 if (level <= 0) {
1436 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1437 return -1;
1438 }
1439 if (pkgLevel >= 0) {
1440 continue;
1441 }
1442 pkgLevel = level;
Andrey Churbanov5bf494e2016-08-05 15:59:11 +00001443 nPackages = buf.ebx & 0xffff;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001444 if (nPackages == 0) {
1445 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1446 return -1;
1447 }
1448 }
1449 }
1450 int depth = level;
1451
1452 //
1453 // In the above loop, "level" was counted from the finest level (usually
1454 // thread) to the coarsest. The caller expects that we will place the
1455 // labels in (*address2os)[].first.labels[] in the inverse order, so
1456 // we need to invert the vars saying which level means what.
1457 //
1458 if (threadLevel >= 0) {
1459 threadLevel = depth - threadLevel - 1;
1460 }
1461 if (coreLevel >= 0) {
1462 coreLevel = depth - coreLevel - 1;
1463 }
1464 KMP_DEBUG_ASSERT(pkgLevel >= 0);
1465 pkgLevel = depth - pkgLevel - 1;
1466
1467 //
1468 // The algorithm used starts by setting the affinity to each available
Andrey Churbanov1c331292015-01-27 17:03:42 +00001469 // thread and retrieving info from the cpuid instruction, so if we are
1470 // not capable of calling __kmp_get_system_affinity() and
1471 // _kmp_get_system_affinity(), then we need to do something else - use
1472 // the defaults that we calculated from issuing cpuid without binding
1473 // to each proc.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001474 //
1475 if (! KMP_AFFINITY_CAPABLE())
1476 {
1477 //
1478 // Hack to try and infer the machine topology using only the data
1479 // available from cpuid on the current thread, and __kmp_xproc.
1480 //
1481 KMP_ASSERT(__kmp_affinity_type == affinity_none);
1482
1483 __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
1484 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001485 if (__kmp_affinity_verbose) {
1486 KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY");
1487 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
1488 if (__kmp_affinity_uniform_topology()) {
1489 KMP_INFORM(Uniform, "KMP_AFFINITY");
1490 } else {
1491 KMP_INFORM(NonUniform, "KMP_AFFINITY");
1492 }
1493 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
1494 __kmp_nThreadsPerCore, __kmp_ncores);
1495 }
1496 return 0;
1497 }
1498
1499 //
1500 //
1501 // From here on, we can assume that it is safe to call
1502 // __kmp_get_system_affinity() and __kmp_set_system_affinity(),
1503 // even if __kmp_affinity_type = affinity_none.
1504 //
1505
1506 //
1507 // Save the affinity mask for the current thread.
1508 //
1509 kmp_affin_mask_t *oldMask;
1510 KMP_CPU_ALLOC(oldMask);
1511 __kmp_get_system_affinity(oldMask, TRUE);
1512
1513 //
1514 // Allocate the data structure to be returned.
1515 //
1516 AddrUnsPair *retval = (AddrUnsPair *)
1517 __kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc);
1518
1519 //
1520 // Run through each of the available contexts, binding the current thread
1521 // to it, and obtaining the pertinent information using the cpuid instr.
1522 //
1523 unsigned int proc;
1524 int nApics = 0;
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00001525 KMP_CPU_SET_ITERATE(proc, __kmp_affin_fullMask) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001526 //
1527 // Skip this proc if it is not included in the machine model.
1528 //
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00001529 if (! KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001530 continue;
1531 }
1532 KMP_DEBUG_ASSERT(nApics < __kmp_avail_proc);
1533
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +00001534 __kmp_affinity_dispatch->bind_thread(proc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001535
1536 //
1537 // Extrach the labels for each level in the machine topology map
1538 // from the Apic ID.
1539 //
1540 Address addr(depth);
1541 int prev_shift = 0;
1542
1543 for (level = 0; level < depth; level++) {
1544 __kmp_x86_cpuid(11, level, &buf);
1545 unsigned apicId = buf.edx;
1546 if (buf.ebx == 0) {
1547 if (level != depth - 1) {
1548 KMP_CPU_FREE(oldMask);
1549 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1550 return -1;
1551 }
1552 addr.labels[depth - level - 1] = apicId >> prev_shift;
1553 level++;
1554 break;
1555 }
1556 int shift = buf.eax & 0x1f;
1557 int mask = (1 << shift) - 1;
1558 addr.labels[depth - level - 1] = (apicId & mask) >> prev_shift;
1559 prev_shift = shift;
1560 }
1561 if (level != depth) {
1562 KMP_CPU_FREE(oldMask);
1563 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1564 return -1;
1565 }
1566
1567 retval[nApics] = AddrUnsPair(addr, proc);
1568 nApics++;
1569 }
1570
1571 //
1572 // We've collected all the info we need.
1573 // Restore the old affinity mask for this thread.
1574 //
1575 __kmp_set_system_affinity(oldMask, TRUE);
1576
1577 //
1578 // If there's only one thread context to bind to, return now.
1579 //
1580 KMP_ASSERT(nApics > 0);
1581 if (nApics == 1) {
1582 __kmp_ncores = nPackages = 1;
1583 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001584 if (__kmp_affinity_verbose) {
1585 char buf[KMP_AFFIN_MASK_PRINT_LEN];
1586 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1587
1588 KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
1589 if (__kmp_affinity_respect_mask) {
1590 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
1591 } else {
1592 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
1593 }
1594 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
1595 KMP_INFORM(Uniform, "KMP_AFFINITY");
1596 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
1597 __kmp_nThreadsPerCore, __kmp_ncores);
1598 }
1599
1600 if (__kmp_affinity_type == affinity_none) {
1601 __kmp_free(retval);
1602 KMP_CPU_FREE(oldMask);
1603 return 0;
1604 }
1605
1606 //
1607 // Form an Address object which only includes the package level.
1608 //
1609 Address addr(1);
1610 addr.labels[0] = retval[0].first.labels[pkgLevel];
1611 retval[0].first = addr;
1612
1613 if (__kmp_affinity_gran_levels < 0) {
1614 __kmp_affinity_gran_levels = 0;
1615 }
1616
1617 if (__kmp_affinity_verbose) {
1618 __kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
1619 }
1620
1621 *address2os = retval;
1622 KMP_CPU_FREE(oldMask);
1623 return 1;
1624 }
1625
1626 //
1627 // Sort the table by physical Id.
1628 //
1629 qsort(retval, nApics, sizeof(*retval), __kmp_affinity_cmp_Address_labels);
1630
1631 //
1632 // Find the radix at each of the levels.
1633 //
1634 unsigned *totals = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
1635 unsigned *counts = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
1636 unsigned *maxCt = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
1637 unsigned *last = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
1638 for (level = 0; level < depth; level++) {
1639 totals[level] = 1;
1640 maxCt[level] = 1;
1641 counts[level] = 1;
1642 last[level] = retval[0].first.labels[level];
1643 }
1644
1645 //
1646 // From here on, the iteration variable "level" runs from the finest
1647 // level to the coarsest, i.e. we iterate forward through
1648 // (*address2os)[].first.labels[] - in the previous loops, we iterated
1649 // backwards.
1650 //
1651 for (proc = 1; (int)proc < nApics; proc++) {
1652 int level;
1653 for (level = 0; level < depth; level++) {
1654 if (retval[proc].first.labels[level] != last[level]) {
1655 int j;
1656 for (j = level + 1; j < depth; j++) {
1657 totals[j]++;
1658 counts[j] = 1;
1659 // The line below causes printing incorrect topology information
1660 // in case the max value for some level (maxCt[level]) is encountered earlier than
1661 // some less value while going through the array.
1662 // For example, let pkg0 has 4 cores and pkg1 has 2 cores. Then maxCt[1] == 2
1663 // whereas it must be 4.
1664 // TODO!!! Check if it can be commented safely
1665 //maxCt[j] = 1;
1666 last[j] = retval[proc].first.labels[j];
1667 }
1668 totals[level]++;
1669 counts[level]++;
1670 if (counts[level] > maxCt[level]) {
1671 maxCt[level] = counts[level];
1672 }
1673 last[level] = retval[proc].first.labels[level];
1674 break;
1675 }
1676 else if (level == depth - 1) {
1677 __kmp_free(last);
1678 __kmp_free(maxCt);
1679 __kmp_free(counts);
1680 __kmp_free(totals);
1681 __kmp_free(retval);
1682 KMP_CPU_FREE(oldMask);
1683 *msg_id = kmp_i18n_str_x2ApicIDsNotUnique;
1684 return -1;
1685 }
1686 }
1687 }
1688
1689 //
1690 // When affinity is off, this routine will still be called to set
Andrey Churbanovf696c822015-01-27 16:55:43 +00001691 // __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie5e8470a2013-09-27 10:38:44 +00001692 // nCoresPerPkg, & nPackages. Make sure all these vars are set
1693 // correctly, and return if affinity is not enabled.
1694 //
1695 if (threadLevel >= 0) {
1696 __kmp_nThreadsPerCore = maxCt[threadLevel];
1697 }
1698 else {
1699 __kmp_nThreadsPerCore = 1;
1700 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001701 nPackages = totals[pkgLevel];
1702
1703 if (coreLevel >= 0) {
1704 __kmp_ncores = totals[coreLevel];
1705 nCoresPerPkg = maxCt[coreLevel];
1706 }
1707 else {
1708 __kmp_ncores = nPackages;
1709 nCoresPerPkg = 1;
1710 }
1711
1712 //
1713 // Check to see if the machine topology is uniform
1714 //
1715 unsigned prod = maxCt[0];
1716 for (level = 1; level < depth; level++) {
1717 prod *= maxCt[level];
1718 }
1719 bool uniform = (prod == totals[level - 1]);
1720
1721 //
1722 // Print the machine topology summary.
1723 //
1724 if (__kmp_affinity_verbose) {
1725 char mask[KMP_AFFIN_MASK_PRINT_LEN];
1726 __kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1727
1728 KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
1729 if (__kmp_affinity_respect_mask) {
1730 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask);
1731 } else {
1732 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask);
1733 }
1734 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
1735 if (uniform) {
1736 KMP_INFORM(Uniform, "KMP_AFFINITY");
1737 } else {
1738 KMP_INFORM(NonUniform, "KMP_AFFINITY");
1739 }
1740
1741 kmp_str_buf_t buf;
1742 __kmp_str_buf_init(&buf);
1743
1744 __kmp_str_buf_print(&buf, "%d", totals[0]);
1745 for (level = 1; level <= pkgLevel; level++) {
1746 __kmp_str_buf_print(&buf, " x %d", maxCt[level]);
1747 }
1748 KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg,
1749 __kmp_nThreadsPerCore, __kmp_ncores);
1750
1751 __kmp_str_buf_free(&buf);
1752 }
Jonathan Peytonfd7cc422016-06-21 15:54:38 +00001753 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
1754 KMP_DEBUG_ASSERT(nApics == __kmp_avail_proc);
1755 __kmp_pu_os_idx = (int*)__kmp_allocate(sizeof(int) * __kmp_avail_proc);
1756 for (proc = 0; (int)proc < nApics; ++proc) {
1757 __kmp_pu_os_idx[proc] = retval[proc].second;
1758 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001759 if (__kmp_affinity_type == affinity_none) {
1760 __kmp_free(last);
1761 __kmp_free(maxCt);
1762 __kmp_free(counts);
1763 __kmp_free(totals);
1764 __kmp_free(retval);
1765 KMP_CPU_FREE(oldMask);
1766 return 0;
1767 }
1768
1769 //
1770 // Find any levels with radiix 1, and remove them from the map
1771 // (except for the package level).
1772 //
1773 int new_depth = 0;
1774 for (level = 0; level < depth; level++) {
1775 if ((maxCt[level] == 1) && (level != pkgLevel)) {
1776 continue;
1777 }
1778 new_depth++;
1779 }
1780
1781 //
1782 // If we are removing any levels, allocate a new vector to return,
1783 // and copy the relevant information to it.
1784 //
1785 if (new_depth != depth) {
1786 AddrUnsPair *new_retval = (AddrUnsPair *)__kmp_allocate(
1787 sizeof(AddrUnsPair) * nApics);
1788 for (proc = 0; (int)proc < nApics; proc++) {
1789 Address addr(new_depth);
1790 new_retval[proc] = AddrUnsPair(addr, retval[proc].second);
1791 }
1792 int new_level = 0;
Jonathan Peyton62f38402015-08-25 18:44:41 +00001793 int newPkgLevel = -1;
1794 int newCoreLevel = -1;
1795 int newThreadLevel = -1;
1796 int i;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001797 for (level = 0; level < depth; level++) {
Jonathan Peyton62f38402015-08-25 18:44:41 +00001798 if ((maxCt[level] == 1)
1799 && (level != pkgLevel)) {
1800 //
1801 // Remove this level. Never remove the package level
1802 //
1803 continue;
1804 }
1805 if (level == pkgLevel) {
1806 newPkgLevel = level;
1807 }
1808 if (level == coreLevel) {
1809 newCoreLevel = level;
1810 }
1811 if (level == threadLevel) {
1812 newThreadLevel = level;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001813 }
1814 for (proc = 0; (int)proc < nApics; proc++) {
1815 new_retval[proc].first.labels[new_level]
1816 = retval[proc].first.labels[level];
1817 }
1818 new_level++;
1819 }
1820
1821 __kmp_free(retval);
1822 retval = new_retval;
1823 depth = new_depth;
Jonathan Peyton62f38402015-08-25 18:44:41 +00001824 pkgLevel = newPkgLevel;
1825 coreLevel = newCoreLevel;
1826 threadLevel = newThreadLevel;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001827 }
1828
1829 if (__kmp_affinity_gran_levels < 0) {
1830 //
1831 // Set the granularity level based on what levels are modeled
1832 // in the machine topology map.
1833 //
1834 __kmp_affinity_gran_levels = 0;
1835 if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
1836 __kmp_affinity_gran_levels++;
1837 }
1838 if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
1839 __kmp_affinity_gran_levels++;
1840 }
1841 if (__kmp_affinity_gran > affinity_gran_package) {
1842 __kmp_affinity_gran_levels++;
1843 }
1844 }
1845
1846 if (__kmp_affinity_verbose) {
1847 __kmp_affinity_print_topology(retval, nApics, depth, pkgLevel,
1848 coreLevel, threadLevel);
1849 }
1850
1851 __kmp_free(last);
1852 __kmp_free(maxCt);
1853 __kmp_free(counts);
1854 __kmp_free(totals);
1855 KMP_CPU_FREE(oldMask);
1856 *address2os = retval;
1857 return depth;
1858}
1859
1860
1861# endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1862
1863
1864#define osIdIndex 0
1865#define threadIdIndex 1
1866#define coreIdIndex 2
1867#define pkgIdIndex 3
1868#define nodeIdIndex 4
1869
1870typedef unsigned *ProcCpuInfo;
1871static unsigned maxIndex = pkgIdIndex;
1872
1873
1874static int
1875__kmp_affinity_cmp_ProcCpuInfo_os_id(const void *a, const void *b)
1876{
1877 const unsigned *aa = (const unsigned *)a;
1878 const unsigned *bb = (const unsigned *)b;
1879 if (aa[osIdIndex] < bb[osIdIndex]) return -1;
1880 if (aa[osIdIndex] > bb[osIdIndex]) return 1;
1881 return 0;
1882};
1883
1884
1885static int
1886__kmp_affinity_cmp_ProcCpuInfo_phys_id(const void *a, const void *b)
1887{
1888 unsigned i;
1889 const unsigned *aa = *((const unsigned **)a);
1890 const unsigned *bb = *((const unsigned **)b);
1891 for (i = maxIndex; ; i--) {
1892 if (aa[i] < bb[i]) return -1;
1893 if (aa[i] > bb[i]) return 1;
1894 if (i == osIdIndex) break;
1895 }
1896 return 0;
1897}
1898
1899
1900//
1901// Parse /proc/cpuinfo (or an alternate file in the same format) to obtain the
1902// affinity map.
1903//
1904static int
1905__kmp_affinity_create_cpuinfo_map(AddrUnsPair **address2os, int *line,
1906 kmp_i18n_id_t *const msg_id, FILE *f)
1907{
1908 *address2os = NULL;
1909 *msg_id = kmp_i18n_null;
1910
1911 //
1912 // Scan of the file, and count the number of "processor" (osId) fields,
Alp Toker8f2d3f02014-02-24 10:40:15 +00001913 // and find the highest value of <n> for a node_<n> field.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001914 //
1915 char buf[256];
1916 unsigned num_records = 0;
1917 while (! feof(f)) {
1918 buf[sizeof(buf) - 1] = 1;
1919 if (! fgets(buf, sizeof(buf), f)) {
1920 //
1921 // Read errors presumably because of EOF
1922 //
1923 break;
1924 }
1925
1926 char s1[] = "processor";
1927 if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
1928 num_records++;
1929 continue;
1930 }
1931
1932 //
1933 // FIXME - this will match "node_<n> <garbage>"
1934 //
1935 unsigned level;
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00001936 if (KMP_SSCANF(buf, "node_%d id", &level) == 1) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001937 if (nodeIdIndex + level >= maxIndex) {
1938 maxIndex = nodeIdIndex + level;
1939 }
1940 continue;
1941 }
1942 }
1943
1944 //
1945 // Check for empty file / no valid processor records, or too many.
1946 // The number of records can't exceed the number of valid bits in the
1947 // affinity mask.
1948 //
1949 if (num_records == 0) {
1950 *line = 0;
1951 *msg_id = kmp_i18n_str_NoProcRecords;
1952 return -1;
1953 }
1954 if (num_records > (unsigned)__kmp_xproc) {
1955 *line = 0;
1956 *msg_id = kmp_i18n_str_TooManyProcRecords;
1957 return -1;
1958 }
1959
1960 //
1961 // Set the file pointer back to the begginning, so that we can scan the
1962 // file again, this time performing a full parse of the data.
1963 // Allocate a vector of ProcCpuInfo object, where we will place the data.
1964 // Adding an extra element at the end allows us to remove a lot of extra
1965 // checks for termination conditions.
1966 //
1967 if (fseek(f, 0, SEEK_SET) != 0) {
1968 *line = 0;
1969 *msg_id = kmp_i18n_str_CantRewindCpuinfo;
1970 return -1;
1971 }
1972
1973 //
1974 // Allocate the array of records to store the proc info in. The dummy
1975 // element at the end makes the logic in filling them out easier to code.
1976 //
1977 unsigned **threadInfo = (unsigned **)__kmp_allocate((num_records + 1)
1978 * sizeof(unsigned *));
1979 unsigned i;
1980 for (i = 0; i <= num_records; i++) {
1981 threadInfo[i] = (unsigned *)__kmp_allocate((maxIndex + 1)
1982 * sizeof(unsigned));
1983 }
1984
1985#define CLEANUP_THREAD_INFO \
1986 for (i = 0; i <= num_records; i++) { \
1987 __kmp_free(threadInfo[i]); \
1988 } \
1989 __kmp_free(threadInfo);
1990
1991 //
1992 // A value of UINT_MAX means that we didn't find the field
1993 //
1994 unsigned __index;
1995
1996#define INIT_PROC_INFO(p) \
1997 for (__index = 0; __index <= maxIndex; __index++) { \
1998 (p)[__index] = UINT_MAX; \
1999 }
2000
2001 for (i = 0; i <= num_records; i++) {
2002 INIT_PROC_INFO(threadInfo[i]);
2003 }
2004
2005 unsigned num_avail = 0;
2006 *line = 0;
2007 while (! feof(f)) {
2008 //
2009 // Create an inner scoping level, so that all the goto targets at the
2010 // end of the loop appear in an outer scoping level. This avoids
2011 // warnings about jumping past an initialization to a target in the
2012 // same block.
2013 //
2014 {
2015 buf[sizeof(buf) - 1] = 1;
2016 bool long_line = false;
2017 if (! fgets(buf, sizeof(buf), f)) {
2018 //
2019 // Read errors presumably because of EOF
2020 //
2021 // If there is valid data in threadInfo[num_avail], then fake
2022 // a blank line in ensure that the last address gets parsed.
2023 //
2024 bool valid = false;
2025 for (i = 0; i <= maxIndex; i++) {
2026 if (threadInfo[num_avail][i] != UINT_MAX) {
2027 valid = true;
2028 }
2029 }
2030 if (! valid) {
2031 break;
2032 }
2033 buf[0] = 0;
2034 } else if (!buf[sizeof(buf) - 1]) {
2035 //
2036 // The line is longer than the buffer. Set a flag and don't
2037 // emit an error if we were going to ignore the line, anyway.
2038 //
2039 long_line = true;
2040
2041#define CHECK_LINE \
2042 if (long_line) { \
2043 CLEANUP_THREAD_INFO; \
2044 *msg_id = kmp_i18n_str_LongLineCpuinfo; \
2045 return -1; \
2046 }
2047 }
2048 (*line)++;
2049
2050 char s1[] = "processor";
2051 if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
2052 CHECK_LINE;
2053 char *p = strchr(buf + sizeof(s1) - 1, ':');
2054 unsigned val;
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00002055 if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002056 if (threadInfo[num_avail][osIdIndex] != UINT_MAX) goto dup_field;
2057 threadInfo[num_avail][osIdIndex] = val;
Jim Cownie181b4bb2013-12-23 17:28:57 +00002058#if KMP_OS_LINUX && USE_SYSFS_INFO
2059 char path[256];
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00002060 KMP_SNPRINTF(path, sizeof(path),
Jim Cownie181b4bb2013-12-23 17:28:57 +00002061 "/sys/devices/system/cpu/cpu%u/topology/physical_package_id",
2062 threadInfo[num_avail][osIdIndex]);
2063 __kmp_read_from_file(path, "%u", &threadInfo[num_avail][pkgIdIndex]);
2064
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00002065 KMP_SNPRINTF(path, sizeof(path),
Jim Cownie181b4bb2013-12-23 17:28:57 +00002066 "/sys/devices/system/cpu/cpu%u/topology/core_id",
2067 threadInfo[num_avail][osIdIndex]);
2068 __kmp_read_from_file(path, "%u", &threadInfo[num_avail][coreIdIndex]);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002069 continue;
Jim Cownie181b4bb2013-12-23 17:28:57 +00002070#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00002071 }
2072 char s2[] = "physical id";
2073 if (strncmp(buf, s2, sizeof(s2) - 1) == 0) {
2074 CHECK_LINE;
2075 char *p = strchr(buf + sizeof(s2) - 1, ':');
2076 unsigned val;
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00002077 if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002078 if (threadInfo[num_avail][pkgIdIndex] != UINT_MAX) goto dup_field;
2079 threadInfo[num_avail][pkgIdIndex] = val;
2080 continue;
2081 }
2082 char s3[] = "core id";
2083 if (strncmp(buf, s3, sizeof(s3) - 1) == 0) {
2084 CHECK_LINE;
2085 char *p = strchr(buf + sizeof(s3) - 1, ':');
2086 unsigned val;
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00002087 if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002088 if (threadInfo[num_avail][coreIdIndex] != UINT_MAX) goto dup_field;
2089 threadInfo[num_avail][coreIdIndex] = val;
2090 continue;
Jim Cownie181b4bb2013-12-23 17:28:57 +00002091#endif // KMP_OS_LINUX && USE_SYSFS_INFO
Jim Cownie5e8470a2013-09-27 10:38:44 +00002092 }
2093 char s4[] = "thread id";
2094 if (strncmp(buf, s4, sizeof(s4) - 1) == 0) {
2095 CHECK_LINE;
2096 char *p = strchr(buf + sizeof(s4) - 1, ':');
2097 unsigned val;
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00002098 if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002099 if (threadInfo[num_avail][threadIdIndex] != UINT_MAX) goto dup_field;
2100 threadInfo[num_avail][threadIdIndex] = val;
2101 continue;
2102 }
2103 unsigned level;
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00002104 if (KMP_SSCANF(buf, "node_%d id", &level) == 1) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002105 CHECK_LINE;
2106 char *p = strchr(buf + sizeof(s4) - 1, ':');
2107 unsigned val;
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00002108 if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002109 KMP_ASSERT(nodeIdIndex + level <= maxIndex);
2110 if (threadInfo[num_avail][nodeIdIndex + level] != UINT_MAX) goto dup_field;
2111 threadInfo[num_avail][nodeIdIndex + level] = val;
2112 continue;
2113 }
2114
2115 //
2116 // We didn't recognize the leading token on the line.
2117 // There are lots of leading tokens that we don't recognize -
2118 // if the line isn't empty, go on to the next line.
2119 //
2120 if ((*buf != 0) && (*buf != '\n')) {
2121 //
2122 // If the line is longer than the buffer, read characters
2123 // until we find a newline.
2124 //
2125 if (long_line) {
2126 int ch;
2127 while (((ch = fgetc(f)) != EOF) && (ch != '\n'));
2128 }
2129 continue;
2130 }
2131
2132 //
2133 // A newline has signalled the end of the processor record.
2134 // Check that there aren't too many procs specified.
2135 //
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002136 if ((int)num_avail == __kmp_xproc) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002137 CLEANUP_THREAD_INFO;
2138 *msg_id = kmp_i18n_str_TooManyEntries;
2139 return -1;
2140 }
2141
2142 //
2143 // Check for missing fields. The osId field must be there, and we
2144 // currently require that the physical id field is specified, also.
2145 //
2146 if (threadInfo[num_avail][osIdIndex] == UINT_MAX) {
2147 CLEANUP_THREAD_INFO;
2148 *msg_id = kmp_i18n_str_MissingProcField;
2149 return -1;
2150 }
2151 if (threadInfo[0][pkgIdIndex] == UINT_MAX) {
2152 CLEANUP_THREAD_INFO;
2153 *msg_id = kmp_i18n_str_MissingPhysicalIDField;
2154 return -1;
2155 }
2156
2157 //
2158 // Skip this proc if it is not included in the machine model.
2159 //
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00002160 if (! KMP_CPU_ISSET(threadInfo[num_avail][osIdIndex], __kmp_affin_fullMask)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002161 INIT_PROC_INFO(threadInfo[num_avail]);
2162 continue;
2163 }
2164
2165 //
2166 // We have a successful parse of this proc's info.
2167 // Increment the counter, and prepare for the next proc.
2168 //
2169 num_avail++;
2170 KMP_ASSERT(num_avail <= num_records);
2171 INIT_PROC_INFO(threadInfo[num_avail]);
2172 }
2173 continue;
2174
2175 no_val:
2176 CLEANUP_THREAD_INFO;
2177 *msg_id = kmp_i18n_str_MissingValCpuinfo;
2178 return -1;
2179
2180 dup_field:
2181 CLEANUP_THREAD_INFO;
2182 *msg_id = kmp_i18n_str_DuplicateFieldCpuinfo;
2183 return -1;
2184 }
2185 *line = 0;
2186
2187# if KMP_MIC && REDUCE_TEAM_SIZE
2188 unsigned teamSize = 0;
2189# endif // KMP_MIC && REDUCE_TEAM_SIZE
2190
2191 // check for num_records == __kmp_xproc ???
2192
2193 //
2194 // If there's only one thread context to bind to, form an Address object
2195 // with depth 1 and return immediately (or, if affinity is off, set
2196 // address2os to NULL and return).
2197 //
2198 // If it is configured to omit the package level when there is only a
2199 // single package, the logic at the end of this routine won't work if
2200 // there is only a single thread - it would try to form an Address
2201 // object with depth 0.
2202 //
2203 KMP_ASSERT(num_avail > 0);
2204 KMP_ASSERT(num_avail <= num_records);
2205 if (num_avail == 1) {
2206 __kmp_ncores = 1;
2207 __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002208 if (__kmp_affinity_verbose) {
2209 if (! KMP_AFFINITY_CAPABLE()) {
2210 KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
2211 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
2212 KMP_INFORM(Uniform, "KMP_AFFINITY");
2213 }
2214 else {
2215 char buf[KMP_AFFIN_MASK_PRINT_LEN];
2216 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00002217 __kmp_affin_fullMask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002218 KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
2219 if (__kmp_affinity_respect_mask) {
2220 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
2221 } else {
2222 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
2223 }
2224 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
2225 KMP_INFORM(Uniform, "KMP_AFFINITY");
2226 }
2227 int index;
2228 kmp_str_buf_t buf;
2229 __kmp_str_buf_init(&buf);
2230 __kmp_str_buf_print(&buf, "1");
2231 for (index = maxIndex - 1; index > pkgIdIndex; index--) {
2232 __kmp_str_buf_print(&buf, " x 1");
2233 }
2234 KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, 1, 1, 1);
2235 __kmp_str_buf_free(&buf);
2236 }
2237
2238 if (__kmp_affinity_type == affinity_none) {
2239 CLEANUP_THREAD_INFO;
2240 return 0;
2241 }
2242
2243 *address2os = (AddrUnsPair*)__kmp_allocate(sizeof(AddrUnsPair));
2244 Address addr(1);
2245 addr.labels[0] = threadInfo[0][pkgIdIndex];
2246 (*address2os)[0] = AddrUnsPair(addr, threadInfo[0][osIdIndex]);
2247
2248 if (__kmp_affinity_gran_levels < 0) {
2249 __kmp_affinity_gran_levels = 0;
2250 }
2251
2252 if (__kmp_affinity_verbose) {
2253 __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
2254 }
2255
2256 CLEANUP_THREAD_INFO;
2257 return 1;
2258 }
2259
2260 //
2261 // Sort the threadInfo table by physical Id.
2262 //
2263 qsort(threadInfo, num_avail, sizeof(*threadInfo),
2264 __kmp_affinity_cmp_ProcCpuInfo_phys_id);
2265
2266 //
2267 // The table is now sorted by pkgId / coreId / threadId, but we really
2268 // don't know the radix of any of the fields. pkgId's may be sparsely
2269 // assigned among the chips on a system. Although coreId's are usually
2270 // assigned [0 .. coresPerPkg-1] and threadId's are usually assigned
2271 // [0..threadsPerCore-1], we don't want to make any such assumptions.
2272 //
2273 // For that matter, we don't know what coresPerPkg and threadsPerCore
2274 // (or the total # packages) are at this point - we want to determine
2275 // that now. We only have an upper bound on the first two figures.
2276 //
2277 unsigned *counts = (unsigned *)__kmp_allocate((maxIndex + 1)
2278 * sizeof(unsigned));
2279 unsigned *maxCt = (unsigned *)__kmp_allocate((maxIndex + 1)
2280 * sizeof(unsigned));
2281 unsigned *totals = (unsigned *)__kmp_allocate((maxIndex + 1)
2282 * sizeof(unsigned));
2283 unsigned *lastId = (unsigned *)__kmp_allocate((maxIndex + 1)
2284 * sizeof(unsigned));
2285
2286 bool assign_thread_ids = false;
2287 unsigned threadIdCt;
2288 unsigned index;
2289
2290 restart_radix_check:
2291 threadIdCt = 0;
2292
2293 //
2294 // Initialize the counter arrays with data from threadInfo[0].
2295 //
2296 if (assign_thread_ids) {
2297 if (threadInfo[0][threadIdIndex] == UINT_MAX) {
2298 threadInfo[0][threadIdIndex] = threadIdCt++;
2299 }
2300 else if (threadIdCt <= threadInfo[0][threadIdIndex]) {
2301 threadIdCt = threadInfo[0][threadIdIndex] + 1;
2302 }
2303 }
2304 for (index = 0; index <= maxIndex; index++) {
2305 counts[index] = 1;
2306 maxCt[index] = 1;
2307 totals[index] = 1;
2308 lastId[index] = threadInfo[0][index];;
2309 }
2310
2311 //
2312 // Run through the rest of the OS procs.
2313 //
2314 for (i = 1; i < num_avail; i++) {
2315 //
2316 // Find the most significant index whose id differs
2317 // from the id for the previous OS proc.
2318 //
2319 for (index = maxIndex; index >= threadIdIndex; index--) {
2320 if (assign_thread_ids && (index == threadIdIndex)) {
2321 //
2322 // Auto-assign the thread id field if it wasn't specified.
2323 //
2324 if (threadInfo[i][threadIdIndex] == UINT_MAX) {
2325 threadInfo[i][threadIdIndex] = threadIdCt++;
2326 }
2327
2328 //
2329 // Aparrently the thread id field was specified for some
2330 // entries and not others. Start the thread id counter
2331 // off at the next higher thread id.
2332 //
2333 else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
2334 threadIdCt = threadInfo[i][threadIdIndex] + 1;
2335 }
2336 }
2337 if (threadInfo[i][index] != lastId[index]) {
2338 //
2339 // Run through all indices which are less significant,
2340 // and reset the counts to 1.
2341 //
2342 // At all levels up to and including index, we need to
2343 // increment the totals and record the last id.
2344 //
2345 unsigned index2;
2346 for (index2 = threadIdIndex; index2 < index; index2++) {
2347 totals[index2]++;
2348 if (counts[index2] > maxCt[index2]) {
2349 maxCt[index2] = counts[index2];
2350 }
2351 counts[index2] = 1;
2352 lastId[index2] = threadInfo[i][index2];
2353 }
2354 counts[index]++;
2355 totals[index]++;
2356 lastId[index] = threadInfo[i][index];
2357
2358 if (assign_thread_ids && (index > threadIdIndex)) {
2359
2360# if KMP_MIC && REDUCE_TEAM_SIZE
2361 //
2362 // The default team size is the total #threads in the machine
2363 // minus 1 thread for every core that has 3 or more threads.
2364 //
2365 teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 );
2366# endif // KMP_MIC && REDUCE_TEAM_SIZE
2367
2368 //
2369 // Restart the thread counter, as we are on a new core.
2370 //
2371 threadIdCt = 0;
2372
2373 //
2374 // Auto-assign the thread id field if it wasn't specified.
2375 //
2376 if (threadInfo[i][threadIdIndex] == UINT_MAX) {
2377 threadInfo[i][threadIdIndex] = threadIdCt++;
2378 }
2379
2380 //
2381 // Aparrently the thread id field was specified for some
2382 // entries and not others. Start the thread id counter
2383 // off at the next higher thread id.
2384 //
2385 else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
2386 threadIdCt = threadInfo[i][threadIdIndex] + 1;
2387 }
2388 }
2389 break;
2390 }
2391 }
2392 if (index < threadIdIndex) {
2393 //
2394 // If thread ids were specified, it is an error if they are not
2395 // unique. Also, check that we waven't already restarted the
2396 // loop (to be safe - shouldn't need to).
2397 //
2398 if ((threadInfo[i][threadIdIndex] != UINT_MAX)
2399 || assign_thread_ids) {
2400 __kmp_free(lastId);
2401 __kmp_free(totals);
2402 __kmp_free(maxCt);
2403 __kmp_free(counts);
2404 CLEANUP_THREAD_INFO;
2405 *msg_id = kmp_i18n_str_PhysicalIDsNotUnique;
2406 return -1;
2407 }
2408
2409 //
2410 // If the thread ids were not specified and we see entries
2411 // entries that are duplicates, start the loop over and
2412 // assign the thread ids manually.
2413 //
2414 assign_thread_ids = true;
2415 goto restart_radix_check;
2416 }
2417 }
2418
2419# if KMP_MIC && REDUCE_TEAM_SIZE
2420 //
2421 // The default team size is the total #threads in the machine
2422 // minus 1 thread for every core that has 3 or more threads.
2423 //
2424 teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 );
2425# endif // KMP_MIC && REDUCE_TEAM_SIZE
2426
2427 for (index = threadIdIndex; index <= maxIndex; index++) {
2428 if (counts[index] > maxCt[index]) {
2429 maxCt[index] = counts[index];
2430 }
2431 }
2432
2433 __kmp_nThreadsPerCore = maxCt[threadIdIndex];
2434 nCoresPerPkg = maxCt[coreIdIndex];
2435 nPackages = totals[pkgIdIndex];
2436
2437 //
2438 // Check to see if the machine topology is uniform
2439 //
2440 unsigned prod = totals[maxIndex];
2441 for (index = threadIdIndex; index < maxIndex; index++) {
2442 prod *= maxCt[index];
2443 }
2444 bool uniform = (prod == totals[threadIdIndex]);
2445
2446 //
2447 // When affinity is off, this routine will still be called to set
Andrey Churbanovf696c822015-01-27 16:55:43 +00002448 // __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie5e8470a2013-09-27 10:38:44 +00002449 // nCoresPerPkg, & nPackages. Make sure all these vars are set
2450 // correctly, and return now if affinity is not enabled.
2451 //
Jim Cownie5e8470a2013-09-27 10:38:44 +00002452 __kmp_ncores = totals[coreIdIndex];
2453
2454 if (__kmp_affinity_verbose) {
2455 if (! KMP_AFFINITY_CAPABLE()) {
2456 KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
2457 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
2458 if (uniform) {
2459 KMP_INFORM(Uniform, "KMP_AFFINITY");
2460 } else {
2461 KMP_INFORM(NonUniform, "KMP_AFFINITY");
2462 }
2463 }
2464 else {
2465 char buf[KMP_AFFIN_MASK_PRINT_LEN];
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00002466 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, __kmp_affin_fullMask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002467 KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
2468 if (__kmp_affinity_respect_mask) {
2469 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
2470 } else {
2471 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
2472 }
2473 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
2474 if (uniform) {
2475 KMP_INFORM(Uniform, "KMP_AFFINITY");
2476 } else {
2477 KMP_INFORM(NonUniform, "KMP_AFFINITY");
2478 }
2479 }
2480 kmp_str_buf_t buf;
2481 __kmp_str_buf_init(&buf);
2482
2483 __kmp_str_buf_print(&buf, "%d", totals[maxIndex]);
2484 for (index = maxIndex - 1; index >= pkgIdIndex; index--) {
2485 __kmp_str_buf_print(&buf, " x %d", maxCt[index]);
2486 }
2487 KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, maxCt[coreIdIndex],
2488 maxCt[threadIdIndex], __kmp_ncores);
2489
2490 __kmp_str_buf_free(&buf);
2491 }
2492
2493# if KMP_MIC && REDUCE_TEAM_SIZE
2494 //
2495 // Set the default team size.
2496 //
2497 if ((__kmp_dflt_team_nth == 0) && (teamSize > 0)) {
2498 __kmp_dflt_team_nth = teamSize;
2499 KA_TRACE(20, ("__kmp_affinity_create_cpuinfo_map: setting __kmp_dflt_team_nth = %d\n",
2500 __kmp_dflt_team_nth));
2501 }
2502# endif // KMP_MIC && REDUCE_TEAM_SIZE
2503
Jonathan Peytonfd7cc422016-06-21 15:54:38 +00002504 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
2505 KMP_DEBUG_ASSERT(num_avail == __kmp_avail_proc);
2506 __kmp_pu_os_idx = (int*)__kmp_allocate(sizeof(int) * __kmp_avail_proc);
2507 for (i = 0; i < num_avail; ++i) { // fill the os indices
2508 __kmp_pu_os_idx[i] = threadInfo[i][osIdIndex];
2509 }
2510
Jim Cownie5e8470a2013-09-27 10:38:44 +00002511 if (__kmp_affinity_type == affinity_none) {
2512 __kmp_free(lastId);
2513 __kmp_free(totals);
2514 __kmp_free(maxCt);
2515 __kmp_free(counts);
2516 CLEANUP_THREAD_INFO;
2517 return 0;
2518 }
2519
2520 //
2521 // Count the number of levels which have more nodes at that level than
2522 // at the parent's level (with there being an implicit root node of
2523 // the top level). This is equivalent to saying that there is at least
2524 // one node at this level which has a sibling. These levels are in the
2525 // map, and the package level is always in the map.
2526 //
2527 bool *inMap = (bool *)__kmp_allocate((maxIndex + 1) * sizeof(bool));
2528 int level = 0;
2529 for (index = threadIdIndex; index < maxIndex; index++) {
2530 KMP_ASSERT(totals[index] >= totals[index + 1]);
2531 inMap[index] = (totals[index] > totals[index + 1]);
2532 }
2533 inMap[maxIndex] = (totals[maxIndex] > 1);
2534 inMap[pkgIdIndex] = true;
2535
2536 int depth = 0;
2537 for (index = threadIdIndex; index <= maxIndex; index++) {
2538 if (inMap[index]) {
2539 depth++;
2540 }
2541 }
2542 KMP_ASSERT(depth > 0);
2543
2544 //
2545 // Construct the data structure that is to be returned.
2546 //
2547 *address2os = (AddrUnsPair*)
2548 __kmp_allocate(sizeof(AddrUnsPair) * num_avail);
2549 int pkgLevel = -1;
2550 int coreLevel = -1;
2551 int threadLevel = -1;
2552
2553 for (i = 0; i < num_avail; ++i) {
2554 Address addr(depth);
2555 unsigned os = threadInfo[i][osIdIndex];
2556 int src_index;
2557 int dst_index = 0;
2558
2559 for (src_index = maxIndex; src_index >= threadIdIndex; src_index--) {
2560 if (! inMap[src_index]) {
2561 continue;
2562 }
2563 addr.labels[dst_index] = threadInfo[i][src_index];
2564 if (src_index == pkgIdIndex) {
2565 pkgLevel = dst_index;
2566 }
2567 else if (src_index == coreIdIndex) {
2568 coreLevel = dst_index;
2569 }
2570 else if (src_index == threadIdIndex) {
2571 threadLevel = dst_index;
2572 }
2573 dst_index++;
2574 }
2575 (*address2os)[i] = AddrUnsPair(addr, os);
2576 }
2577
2578 if (__kmp_affinity_gran_levels < 0) {
2579 //
2580 // Set the granularity level based on what levels are modeled
2581 // in the machine topology map.
2582 //
2583 unsigned src_index;
2584 __kmp_affinity_gran_levels = 0;
2585 for (src_index = threadIdIndex; src_index <= maxIndex; src_index++) {
2586 if (! inMap[src_index]) {
2587 continue;
2588 }
2589 switch (src_index) {
2590 case threadIdIndex:
2591 if (__kmp_affinity_gran > affinity_gran_thread) {
2592 __kmp_affinity_gran_levels++;
2593 }
2594
2595 break;
2596 case coreIdIndex:
2597 if (__kmp_affinity_gran > affinity_gran_core) {
2598 __kmp_affinity_gran_levels++;
2599 }
2600 break;
2601
2602 case pkgIdIndex:
2603 if (__kmp_affinity_gran > affinity_gran_package) {
2604 __kmp_affinity_gran_levels++;
2605 }
2606 break;
2607 }
2608 }
2609 }
2610
2611 if (__kmp_affinity_verbose) {
2612 __kmp_affinity_print_topology(*address2os, num_avail, depth, pkgLevel,
2613 coreLevel, threadLevel);
2614 }
2615
2616 __kmp_free(inMap);
2617 __kmp_free(lastId);
2618 __kmp_free(totals);
2619 __kmp_free(maxCt);
2620 __kmp_free(counts);
2621 CLEANUP_THREAD_INFO;
2622 return depth;
2623}
2624
2625
2626//
2627// Create and return a table of affinity masks, indexed by OS thread ID.
2628// This routine handles OR'ing together all the affinity masks of threads
2629// that are sufficiently close, if granularity > fine.
2630//
2631static kmp_affin_mask_t *
2632__kmp_create_masks(unsigned *maxIndex, unsigned *numUnique,
2633 AddrUnsPair *address2os, unsigned numAddrs)
2634{
2635 //
2636 // First form a table of affinity masks in order of OS thread id.
2637 //
2638 unsigned depth;
2639 unsigned maxOsId;
2640 unsigned i;
2641
2642 KMP_ASSERT(numAddrs > 0);
2643 depth = address2os[0].first.depth;
2644
2645 maxOsId = 0;
2646 for (i = 0; i < numAddrs; i++) {
2647 unsigned osId = address2os[i].second;
2648 if (osId > maxOsId) {
2649 maxOsId = osId;
2650 }
2651 }
Jonathan Peyton01dcf362015-11-30 20:02:59 +00002652 kmp_affin_mask_t *osId2Mask;
2653 KMP_CPU_ALLOC_ARRAY(osId2Mask, (maxOsId+1));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002654
2655 //
2656 // Sort the address2os table according to physical order. Doing so
2657 // will put all threads on the same core/package/node in consecutive
2658 // locations.
2659 //
2660 qsort(address2os, numAddrs, sizeof(*address2os),
2661 __kmp_affinity_cmp_Address_labels);
2662
2663 KMP_ASSERT(__kmp_affinity_gran_levels >= 0);
2664 if (__kmp_affinity_verbose && (__kmp_affinity_gran_levels > 0)) {
2665 KMP_INFORM(ThreadsMigrate, "KMP_AFFINITY", __kmp_affinity_gran_levels);
2666 }
2667 if (__kmp_affinity_gran_levels >= (int)depth) {
2668 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
2669 && (__kmp_affinity_type != affinity_none))) {
2670 KMP_WARNING(AffThreadsMayMigrate);
2671 }
2672 }
2673
2674 //
2675 // Run through the table, forming the masks for all threads on each
2676 // core. Threads on the same core will have identical "Address"
2677 // objects, not considering the last level, which must be the thread
2678 // id. All threads on a core will appear consecutively.
2679 //
2680 unsigned unique = 0;
2681 unsigned j = 0; // index of 1st thread on core
2682 unsigned leader = 0;
2683 Address *leaderAddr = &(address2os[0].first);
Jonathan Peyton01dcf362015-11-30 20:02:59 +00002684 kmp_affin_mask_t *sum;
2685 KMP_CPU_ALLOC_ON_STACK(sum);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002686 KMP_CPU_ZERO(sum);
2687 KMP_CPU_SET(address2os[0].second, sum);
2688 for (i = 1; i < numAddrs; i++) {
2689 //
Alp Toker8f2d3f02014-02-24 10:40:15 +00002690 // If this thread is sufficiently close to the leader (within the
Jim Cownie5e8470a2013-09-27 10:38:44 +00002691 // granularity setting), then set the bit for this os thread in the
2692 // affinity mask for this group, and go on to the next thread.
2693 //
2694 if (leaderAddr->isClose(address2os[i].first,
2695 __kmp_affinity_gran_levels)) {
2696 KMP_CPU_SET(address2os[i].second, sum);
2697 continue;
2698 }
2699
2700 //
2701 // For every thread in this group, copy the mask to the thread's
2702 // entry in the osId2Mask table. Mark the first address as a
2703 // leader.
2704 //
2705 for (; j < i; j++) {
2706 unsigned osId = address2os[j].second;
2707 KMP_DEBUG_ASSERT(osId <= maxOsId);
2708 kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
2709 KMP_CPU_COPY(mask, sum);
2710 address2os[j].first.leader = (j == leader);
2711 }
2712 unique++;
2713
2714 //
2715 // Start a new mask.
2716 //
2717 leader = i;
2718 leaderAddr = &(address2os[i].first);
2719 KMP_CPU_ZERO(sum);
2720 KMP_CPU_SET(address2os[i].second, sum);
2721 }
2722
2723 //
2724 // For every thread in last group, copy the mask to the thread's
2725 // entry in the osId2Mask table.
2726 //
2727 for (; j < i; j++) {
2728 unsigned osId = address2os[j].second;
2729 KMP_DEBUG_ASSERT(osId <= maxOsId);
2730 kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
2731 KMP_CPU_COPY(mask, sum);
2732 address2os[j].first.leader = (j == leader);
2733 }
2734 unique++;
Jonathan Peyton01dcf362015-11-30 20:02:59 +00002735 KMP_CPU_FREE_FROM_STACK(sum);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002736
2737 *maxIndex = maxOsId;
2738 *numUnique = unique;
2739 return osId2Mask;
2740}
2741
2742
2743//
2744// Stuff for the affinity proclist parsers. It's easier to declare these vars
2745// as file-static than to try and pass them through the calling sequence of
2746// the recursive-descent OMP_PLACES parser.
2747//
2748static kmp_affin_mask_t *newMasks;
2749static int numNewMasks;
2750static int nextNewMask;
2751
2752#define ADD_MASK(_mask) \
2753 { \
2754 if (nextNewMask >= numNewMasks) { \
Jonathan Peyton01dcf362015-11-30 20:02:59 +00002755 int i; \
Jim Cownie5e8470a2013-09-27 10:38:44 +00002756 numNewMasks *= 2; \
Jonathan Peyton01dcf362015-11-30 20:02:59 +00002757 kmp_affin_mask_t* temp; \
2758 KMP_CPU_INTERNAL_ALLOC_ARRAY(temp, numNewMasks); \
2759 for(i=0;i<numNewMasks/2;i++) { \
2760 kmp_affin_mask_t* src = KMP_CPU_INDEX(newMasks, i); \
2761 kmp_affin_mask_t* dest = KMP_CPU_INDEX(temp, i); \
2762 KMP_CPU_COPY(dest, src); \
2763 } \
2764 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks/2); \
2765 newMasks = temp; \
Jim Cownie5e8470a2013-09-27 10:38:44 +00002766 } \
2767 KMP_CPU_COPY(KMP_CPU_INDEX(newMasks, nextNewMask), (_mask)); \
2768 nextNewMask++; \
2769 }
2770
2771#define ADD_MASK_OSID(_osId,_osId2Mask,_maxOsId) \
2772 { \
2773 if (((_osId) > _maxOsId) || \
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002774 (! KMP_CPU_ISSET((_osId), KMP_CPU_INDEX((_osId2Mask), (_osId))))) { \
Jim Cownie5e8470a2013-09-27 10:38:44 +00002775 if (__kmp_affinity_verbose || (__kmp_affinity_warnings \
2776 && (__kmp_affinity_type != affinity_none))) { \
2777 KMP_WARNING(AffIgnoreInvalidProcID, _osId); \
2778 } \
2779 } \
2780 else { \
2781 ADD_MASK(KMP_CPU_INDEX(_osId2Mask, (_osId))); \
2782 } \
2783 }
2784
2785
2786//
2787// Re-parse the proclist (for the explicit affinity type), and form the list
2788// of affinity newMasks indexed by gtid.
2789//
2790static void
2791__kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks,
2792 unsigned int *out_numMasks, const char *proclist,
2793 kmp_affin_mask_t *osId2Mask, int maxOsId)
2794{
Jonathan Peyton01dcf362015-11-30 20:02:59 +00002795 int i;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002796 const char *scan = proclist;
2797 const char *next = proclist;
2798
2799 //
2800 // We use malloc() for the temporary mask vector,
2801 // so that we can use realloc() to extend it.
2802 //
2803 numNewMasks = 2;
Jonathan Peyton01dcf362015-11-30 20:02:59 +00002804 KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002805 nextNewMask = 0;
Jonathan Peyton01dcf362015-11-30 20:02:59 +00002806 kmp_affin_mask_t *sumMask;
2807 KMP_CPU_ALLOC(sumMask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002808 int setSize = 0;
2809
2810 for (;;) {
2811 int start, end, stride;
2812
2813 SKIP_WS(scan);
2814 next = scan;
2815 if (*next == '\0') {
2816 break;
2817 }
2818
2819 if (*next == '{') {
2820 int num;
2821 setSize = 0;
2822 next++; // skip '{'
2823 SKIP_WS(next);
2824 scan = next;
2825
2826 //
2827 // Read the first integer in the set.
2828 //
2829 KMP_ASSERT2((*next >= '0') && (*next <= '9'),
2830 "bad proclist");
2831 SKIP_DIGITS(next);
2832 num = __kmp_str_to_int(scan, *next);
2833 KMP_ASSERT2(num >= 0, "bad explicit proc list");
2834
2835 //
2836 // Copy the mask for that osId to the sum (union) mask.
2837 //
2838 if ((num > maxOsId) ||
2839 (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
2840 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
2841 && (__kmp_affinity_type != affinity_none))) {
2842 KMP_WARNING(AffIgnoreInvalidProcID, num);
2843 }
2844 KMP_CPU_ZERO(sumMask);
2845 }
2846 else {
2847 KMP_CPU_COPY(sumMask, KMP_CPU_INDEX(osId2Mask, num));
2848 setSize = 1;
2849 }
2850
2851 for (;;) {
2852 //
2853 // Check for end of set.
2854 //
2855 SKIP_WS(next);
2856 if (*next == '}') {
2857 next++; // skip '}'
2858 break;
2859 }
2860
2861 //
2862 // Skip optional comma.
2863 //
2864 if (*next == ',') {
2865 next++;
2866 }
2867 SKIP_WS(next);
2868
2869 //
2870 // Read the next integer in the set.
2871 //
2872 scan = next;
2873 KMP_ASSERT2((*next >= '0') && (*next <= '9'),
2874 "bad explicit proc list");
2875
2876 SKIP_DIGITS(next);
2877 num = __kmp_str_to_int(scan, *next);
2878 KMP_ASSERT2(num >= 0, "bad explicit proc list");
2879
2880 //
2881 // Add the mask for that osId to the sum mask.
2882 //
2883 if ((num > maxOsId) ||
2884 (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
2885 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
2886 && (__kmp_affinity_type != affinity_none))) {
2887 KMP_WARNING(AffIgnoreInvalidProcID, num);
2888 }
2889 }
2890 else {
2891 KMP_CPU_UNION(sumMask, KMP_CPU_INDEX(osId2Mask, num));
2892 setSize++;
2893 }
2894 }
2895 if (setSize > 0) {
2896 ADD_MASK(sumMask);
2897 }
2898
2899 SKIP_WS(next);
2900 if (*next == ',') {
2901 next++;
2902 }
2903 scan = next;
2904 continue;
2905 }
2906
2907 //
2908 // Read the first integer.
2909 //
2910 KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list");
2911 SKIP_DIGITS(next);
2912 start = __kmp_str_to_int(scan, *next);
2913 KMP_ASSERT2(start >= 0, "bad explicit proc list");
2914 SKIP_WS(next);
2915
2916 //
2917 // If this isn't a range, then add a mask to the list and go on.
2918 //
2919 if (*next != '-') {
2920 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2921
2922 //
2923 // Skip optional comma.
2924 //
2925 if (*next == ',') {
2926 next++;
2927 }
2928 scan = next;
2929 continue;
2930 }
2931
2932 //
2933 // This is a range. Skip over the '-' and read in the 2nd int.
2934 //
2935 next++; // skip '-'
2936 SKIP_WS(next);
2937 scan = next;
2938 KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list");
2939 SKIP_DIGITS(next);
2940 end = __kmp_str_to_int(scan, *next);
2941 KMP_ASSERT2(end >= 0, "bad explicit proc list");
2942
2943 //
2944 // Check for a stride parameter
2945 //
2946 stride = 1;
2947 SKIP_WS(next);
2948 if (*next == ':') {
2949 //
2950 // A stride is specified. Skip over the ':" and read the 3rd int.
2951 //
2952 int sign = +1;
2953 next++; // skip ':'
2954 SKIP_WS(next);
2955 scan = next;
2956 if (*next == '-') {
2957 sign = -1;
2958 next++;
2959 SKIP_WS(next);
2960 scan = next;
2961 }
2962 KMP_ASSERT2((*next >= '0') && (*next <= '9'),
2963 "bad explicit proc list");
2964 SKIP_DIGITS(next);
2965 stride = __kmp_str_to_int(scan, *next);
2966 KMP_ASSERT2(stride >= 0, "bad explicit proc list");
2967 stride *= sign;
2968 }
2969
2970 //
2971 // Do some range checks.
2972 //
2973 KMP_ASSERT2(stride != 0, "bad explicit proc list");
2974 if (stride > 0) {
2975 KMP_ASSERT2(start <= end, "bad explicit proc list");
2976 }
2977 else {
2978 KMP_ASSERT2(start >= end, "bad explicit proc list");
2979 }
2980 KMP_ASSERT2((end - start) / stride <= 65536, "bad explicit proc list");
2981
2982 //
2983 // Add the mask for each OS proc # to the list.
2984 //
2985 if (stride > 0) {
2986 do {
2987 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2988 start += stride;
2989 } while (start <= end);
2990 }
2991 else {
2992 do {
2993 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2994 start += stride;
2995 } while (start >= end);
2996 }
2997
2998 //
2999 // Skip optional comma.
3000 //
3001 SKIP_WS(next);
3002 if (*next == ',') {
3003 next++;
3004 }
3005 scan = next;
3006 }
3007
3008 *out_numMasks = nextNewMask;
3009 if (nextNewMask == 0) {
3010 *out_masks = NULL;
Jonathan Peyton01dcf362015-11-30 20:02:59 +00003011 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003012 return;
3013 }
Jonathan Peyton01dcf362015-11-30 20:02:59 +00003014 KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask);
3015 for(i = 0; i < nextNewMask; i++) {
3016 kmp_affin_mask_t* src = KMP_CPU_INDEX(newMasks, i);
3017 kmp_affin_mask_t* dest = KMP_CPU_INDEX((*out_masks), i);
3018 KMP_CPU_COPY(dest, src);
3019 }
3020 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
3021 KMP_CPU_FREE(sumMask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003022}
3023
3024
3025# if OMP_40_ENABLED
3026
3027/*-----------------------------------------------------------------------------
3028
3029Re-parse the OMP_PLACES proc id list, forming the newMasks for the different
3030places. Again, Here is the grammar:
3031
3032place_list := place
3033place_list := place , place_list
3034place := num
3035place := place : num
3036place := place : num : signed
3037place := { subplacelist }
3038place := ! place // (lowest priority)
3039subplace_list := subplace
3040subplace_list := subplace , subplace_list
3041subplace := num
3042subplace := num : num
3043subplace := num : num : signed
3044signed := num
3045signed := + signed
3046signed := - signed
3047
3048-----------------------------------------------------------------------------*/
3049
3050static void
3051__kmp_process_subplace_list(const char **scan, kmp_affin_mask_t *osId2Mask,
3052 int maxOsId, kmp_affin_mask_t *tempMask, int *setSize)
3053{
3054 const char *next;
3055
3056 for (;;) {
3057 int start, count, stride, i;
3058
3059 //
3060 // Read in the starting proc id
3061 //
3062 SKIP_WS(*scan);
3063 KMP_ASSERT2((**scan >= '0') && (**scan <= '9'),
3064 "bad explicit places list");
3065 next = *scan;
3066 SKIP_DIGITS(next);
3067 start = __kmp_str_to_int(*scan, *next);
3068 KMP_ASSERT(start >= 0);
3069 *scan = next;
3070
3071 //
3072 // valid follow sets are ',' ':' and '}'
3073 //
3074 SKIP_WS(*scan);
3075 if (**scan == '}' || **scan == ',') {
3076 if ((start > maxOsId) ||
3077 (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
3078 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3079 && (__kmp_affinity_type != affinity_none))) {
3080 KMP_WARNING(AffIgnoreInvalidProcID, start);
3081 }
3082 }
3083 else {
3084 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
3085 (*setSize)++;
3086 }
3087 if (**scan == '}') {
3088 break;
3089 }
3090 (*scan)++; // skip ','
3091 continue;
3092 }
3093 KMP_ASSERT2(**scan == ':', "bad explicit places list");
3094 (*scan)++; // skip ':'
3095
3096 //
3097 // Read count parameter
3098 //
3099 SKIP_WS(*scan);
3100 KMP_ASSERT2((**scan >= '0') && (**scan <= '9'),
3101 "bad explicit places list");
3102 next = *scan;
3103 SKIP_DIGITS(next);
3104 count = __kmp_str_to_int(*scan, *next);
3105 KMP_ASSERT(count >= 0);
3106 *scan = next;
3107
3108 //
3109 // valid follow sets are ',' ':' and '}'
3110 //
3111 SKIP_WS(*scan);
3112 if (**scan == '}' || **scan == ',') {
3113 for (i = 0; i < count; i++) {
3114 if ((start > maxOsId) ||
3115 (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
3116 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3117 && (__kmp_affinity_type != affinity_none))) {
3118 KMP_WARNING(AffIgnoreInvalidProcID, start);
3119 }
3120 break; // don't proliferate warnings for large count
3121 }
3122 else {
3123 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
3124 start++;
3125 (*setSize)++;
3126 }
3127 }
3128 if (**scan == '}') {
3129 break;
3130 }
3131 (*scan)++; // skip ','
3132 continue;
3133 }
3134 KMP_ASSERT2(**scan == ':', "bad explicit places list");
3135 (*scan)++; // skip ':'
3136
3137 //
3138 // Read stride parameter
3139 //
3140 int sign = +1;
3141 for (;;) {
3142 SKIP_WS(*scan);
3143 if (**scan == '+') {
3144 (*scan)++; // skip '+'
3145 continue;
3146 }
3147 if (**scan == '-') {
3148 sign *= -1;
3149 (*scan)++; // skip '-'
3150 continue;
3151 }
3152 break;
3153 }
3154 SKIP_WS(*scan);
3155 KMP_ASSERT2((**scan >= '0') && (**scan <= '9'),
3156 "bad explicit places list");
3157 next = *scan;
3158 SKIP_DIGITS(next);
3159 stride = __kmp_str_to_int(*scan, *next);
3160 KMP_ASSERT(stride >= 0);
3161 *scan = next;
3162 stride *= sign;
3163
3164 //
3165 // valid follow sets are ',' and '}'
3166 //
3167 SKIP_WS(*scan);
3168 if (**scan == '}' || **scan == ',') {
3169 for (i = 0; i < count; i++) {
3170 if ((start > maxOsId) ||
3171 (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
3172 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3173 && (__kmp_affinity_type != affinity_none))) {
3174 KMP_WARNING(AffIgnoreInvalidProcID, start);
3175 }
3176 break; // don't proliferate warnings for large count
3177 }
3178 else {
3179 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
3180 start += stride;
3181 (*setSize)++;
3182 }
3183 }
3184 if (**scan == '}') {
3185 break;
3186 }
3187 (*scan)++; // skip ','
3188 continue;
3189 }
3190
3191 KMP_ASSERT2(0, "bad explicit places list");
3192 }
3193}
3194
3195
3196static void
3197__kmp_process_place(const char **scan, kmp_affin_mask_t *osId2Mask,
3198 int maxOsId, kmp_affin_mask_t *tempMask, int *setSize)
3199{
3200 const char *next;
3201
3202 //
3203 // valid follow sets are '{' '!' and num
3204 //
3205 SKIP_WS(*scan);
3206 if (**scan == '{') {
3207 (*scan)++; // skip '{'
3208 __kmp_process_subplace_list(scan, osId2Mask, maxOsId , tempMask,
3209 setSize);
3210 KMP_ASSERT2(**scan == '}', "bad explicit places list");
3211 (*scan)++; // skip '}'
3212 }
3213 else if (**scan == '!') {
Jonathan Peyton6778c732015-10-19 19:43:01 +00003214 (*scan)++; // skip '!'
Jim Cownie5e8470a2013-09-27 10:38:44 +00003215 __kmp_process_place(scan, osId2Mask, maxOsId, tempMask, setSize);
Jonathan Peyton01dcf362015-11-30 20:02:59 +00003216 KMP_CPU_COMPLEMENT(maxOsId, tempMask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003217 }
3218 else if ((**scan >= '0') && (**scan <= '9')) {
3219 next = *scan;
3220 SKIP_DIGITS(next);
3221 int num = __kmp_str_to_int(*scan, *next);
3222 KMP_ASSERT(num >= 0);
3223 if ((num > maxOsId) ||
3224 (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
3225 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3226 && (__kmp_affinity_type != affinity_none))) {
3227 KMP_WARNING(AffIgnoreInvalidProcID, num);
3228 }
3229 }
3230 else {
3231 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, num));
3232 (*setSize)++;
3233 }
3234 *scan = next; // skip num
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003235 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003236 else {
3237 KMP_ASSERT2(0, "bad explicit places list");
3238 }
3239}
3240
3241
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003242//static void
3243void
Jim Cownie5e8470a2013-09-27 10:38:44 +00003244__kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
3245 unsigned int *out_numMasks, const char *placelist,
3246 kmp_affin_mask_t *osId2Mask, int maxOsId)
3247{
Jonathan Peyton01dcf362015-11-30 20:02:59 +00003248 int i,j,count,stride,sign;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003249 const char *scan = placelist;
3250 const char *next = placelist;
3251
3252 numNewMasks = 2;
Jonathan Peyton01dcf362015-11-30 20:02:59 +00003253 KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003254 nextNewMask = 0;
3255
Jonathan Peyton01dcf362015-11-30 20:02:59 +00003256 // tempMask is modified based on the previous or initial
3257 // place to form the current place
3258 // previousMask contains the previous place
3259 kmp_affin_mask_t *tempMask;
3260 kmp_affin_mask_t *previousMask;
3261 KMP_CPU_ALLOC(tempMask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003262 KMP_CPU_ZERO(tempMask);
Jonathan Peyton01dcf362015-11-30 20:02:59 +00003263 KMP_CPU_ALLOC(previousMask);
3264 KMP_CPU_ZERO(previousMask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003265 int setSize = 0;
3266
3267 for (;;) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003268 __kmp_process_place(&scan, osId2Mask, maxOsId, tempMask, &setSize);
3269
3270 //
3271 // valid follow sets are ',' ':' and EOL
3272 //
3273 SKIP_WS(scan);
3274 if (*scan == '\0' || *scan == ',') {
3275 if (setSize > 0) {
3276 ADD_MASK(tempMask);
3277 }
3278 KMP_CPU_ZERO(tempMask);
3279 setSize = 0;
3280 if (*scan == '\0') {
3281 break;
3282 }
3283 scan++; // skip ','
3284 continue;
3285 }
3286
3287 KMP_ASSERT2(*scan == ':', "bad explicit places list");
3288 scan++; // skip ':'
3289
3290 //
3291 // Read count parameter
3292 //
3293 SKIP_WS(scan);
3294 KMP_ASSERT2((*scan >= '0') && (*scan <= '9'),
3295 "bad explicit places list");
3296 next = scan;
3297 SKIP_DIGITS(next);
Jonathan Peyton01dcf362015-11-30 20:02:59 +00003298 count = __kmp_str_to_int(scan, *next);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003299 KMP_ASSERT(count >= 0);
3300 scan = next;
3301
3302 //
3303 // valid follow sets are ',' ':' and EOL
3304 //
3305 SKIP_WS(scan);
3306 if (*scan == '\0' || *scan == ',') {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003307 stride = +1;
3308 }
3309 else {
3310 KMP_ASSERT2(*scan == ':', "bad explicit places list");
3311 scan++; // skip ':'
Jim Cownie5e8470a2013-09-27 10:38:44 +00003312
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003313 //
3314 // Read stride parameter
3315 //
Jonathan Peyton01dcf362015-11-30 20:02:59 +00003316 sign = +1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003317 for (;;) {
3318 SKIP_WS(scan);
3319 if (*scan == '+') {
3320 scan++; // skip '+'
3321 continue;
3322 }
3323 if (*scan == '-') {
3324 sign *= -1;
3325 scan++; // skip '-'
3326 continue;
3327 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003328 break;
3329 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003330 SKIP_WS(scan);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003331 KMP_ASSERT2((*scan >= '0') && (*scan <= '9'),
3332 "bad explicit places list");
3333 next = scan;
3334 SKIP_DIGITS(next);
3335 stride = __kmp_str_to_int(scan, *next);
3336 KMP_DEBUG_ASSERT(stride >= 0);
3337 scan = next;
3338 stride *= sign;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003339 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003340
Jonathan Peyton01dcf362015-11-30 20:02:59 +00003341 // Add places determined by initial_place : count : stride
3342 for (i = 0; i < count; i++) {
3343 if (setSize == 0) {
3344 break;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003345 }
Jonathan Peyton01dcf362015-11-30 20:02:59 +00003346 // Add the current place, then build the next place (tempMask) from that
3347 KMP_CPU_COPY(previousMask, tempMask);
3348 ADD_MASK(previousMask);
3349 KMP_CPU_ZERO(tempMask);
3350 setSize = 0;
3351 KMP_CPU_SET_ITERATE(j, previousMask) {
3352 if (! KMP_CPU_ISSET(j, previousMask)) {
3353 continue;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003354 }
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00003355 if ((j+stride > maxOsId) || (j+stride < 0) ||
3356 (! KMP_CPU_ISSET(j, __kmp_affin_fullMask)) ||
Jonathan Peyton01dcf362015-11-30 20:02:59 +00003357 (! KMP_CPU_ISSET(j+stride, KMP_CPU_INDEX(osId2Mask, j+stride)))) {
3358 if ((__kmp_affinity_verbose || (__kmp_affinity_warnings
3359 && (__kmp_affinity_type != affinity_none))) && i < count - 1) {
3360 KMP_WARNING(AffIgnoreInvalidProcID, j+stride);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003361 }
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00003362 continue;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003363 }
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00003364 KMP_CPU_SET(j+stride, tempMask);
3365 setSize++;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003366 }
3367 }
3368 KMP_CPU_ZERO(tempMask);
3369 setSize = 0;
3370
3371 //
3372 // valid follow sets are ',' and EOL
3373 //
3374 SKIP_WS(scan);
3375 if (*scan == '\0') {
3376 break;
3377 }
3378 if (*scan == ',') {
3379 scan++; // skip ','
3380 continue;
3381 }
3382
3383 KMP_ASSERT2(0, "bad explicit places list");
3384 }
3385
3386 *out_numMasks = nextNewMask;
3387 if (nextNewMask == 0) {
3388 *out_masks = NULL;
Jonathan Peyton01dcf362015-11-30 20:02:59 +00003389 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003390 return;
3391 }
Jonathan Peyton01dcf362015-11-30 20:02:59 +00003392 KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask);
3393 KMP_CPU_FREE(tempMask);
3394 KMP_CPU_FREE(previousMask);
3395 for(i = 0; i < nextNewMask; i++) {
3396 kmp_affin_mask_t* src = KMP_CPU_INDEX(newMasks, i);
3397 kmp_affin_mask_t* dest = KMP_CPU_INDEX((*out_masks), i);
3398 KMP_CPU_COPY(dest, src);
3399 }
3400 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003401}
3402
3403# endif /* OMP_40_ENABLED */
3404
3405#undef ADD_MASK
3406#undef ADD_MASK_OSID
3407
Jim Cownie5e8470a2013-09-27 10:38:44 +00003408static void
3409__kmp_apply_thread_places(AddrUnsPair **pAddr, int depth)
3410{
Jonathan Peytonfd7cc422016-06-21 15:54:38 +00003411 int i, j, k, n_old = 0, n_new = 0, proc_num = 0;
Jonathan Peytondd4aa9b2015-10-08 17:55:54 +00003412 if (__kmp_place_num_sockets == 0 &&
3413 __kmp_place_num_cores == 0 &&
3414 __kmp_place_num_threads_per_core == 0 )
Jonathan Peytonfd7cc422016-06-21 15:54:38 +00003415 goto _exit; // no topology limiting actions requested, exit
Jonathan Peytondd4aa9b2015-10-08 17:55:54 +00003416 if (__kmp_place_num_sockets == 0)
3417 __kmp_place_num_sockets = nPackages; // use all available sockets
3418 if (__kmp_place_num_cores == 0)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003419 __kmp_place_num_cores = nCoresPerPkg; // use all available cores
Jonathan Peytondd4aa9b2015-10-08 17:55:54 +00003420 if (__kmp_place_num_threads_per_core == 0 ||
3421 __kmp_place_num_threads_per_core > __kmp_nThreadsPerCore)
3422 __kmp_place_num_threads_per_core = __kmp_nThreadsPerCore; // use all HW contexts
3423
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003424 if ( !__kmp_affinity_uniform_topology() ) {
Jonathan Peytonb9d28fb2016-06-16 18:53:48 +00003425 KMP_WARNING( AffHWSubsetNonUniform );
Jonathan Peytonfd7cc422016-06-21 15:54:38 +00003426 goto _exit; // don't support non-uniform topology
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003427 }
Jonathan Peytonfd7cc422016-06-21 15:54:38 +00003428 if ( depth > 3 ) {
Jonathan Peytonb9d28fb2016-06-16 18:53:48 +00003429 KMP_WARNING( AffHWSubsetNonThreeLevel );
Jonathan Peytonfd7cc422016-06-21 15:54:38 +00003430 goto _exit; // don't support not-3-level topology
Jim Cownie5e8470a2013-09-27 10:38:44 +00003431 }
Jonathan Peytondd4aa9b2015-10-08 17:55:54 +00003432 if (__kmp_place_socket_offset + __kmp_place_num_sockets > nPackages) {
Jonathan Peytonb9d28fb2016-06-16 18:53:48 +00003433 KMP_WARNING(AffHWSubsetManySockets);
Jonathan Peytonfd7cc422016-06-21 15:54:38 +00003434 goto _exit;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003435 }
Andrey Churbanov12875572015-03-10 09:00:36 +00003436 if ( __kmp_place_core_offset + __kmp_place_num_cores > nCoresPerPkg ) {
Jonathan Peytonb9d28fb2016-06-16 18:53:48 +00003437 KMP_WARNING( AffHWSubsetManyCores );
Jonathan Peytonfd7cc422016-06-21 15:54:38 +00003438 goto _exit;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003439 }
3440
Jonathan Peytonfd7cc422016-06-21 15:54:38 +00003441 AddrUnsPair *newAddr;
3442 if (pAddr) // pAddr is NULL in case of affinity_none
3443 newAddr = (AddrUnsPair *)__kmp_allocate( sizeof(AddrUnsPair) *
3444 __kmp_place_num_sockets * __kmp_place_num_cores * __kmp_place_num_threads_per_core);
Jonathan Peytondd4aa9b2015-10-08 17:55:54 +00003445
Jonathan Peytonfd7cc422016-06-21 15:54:38 +00003446 for (i = 0; i < nPackages; ++i) {
Jonathan Peytondd4aa9b2015-10-08 17:55:54 +00003447 if (i < __kmp_place_socket_offset ||
Jonathan Peytonfd7cc422016-06-21 15:54:38 +00003448 i >= __kmp_place_socket_offset + __kmp_place_num_sockets) {
Jonathan Peytondd4aa9b2015-10-08 17:55:54 +00003449 n_old += nCoresPerPkg * __kmp_nThreadsPerCore; // skip not-requested socket
Jonathan Peytonfd7cc422016-06-21 15:54:38 +00003450 if (__kmp_pu_os_idx != NULL) {
3451 for (j = 0; j < nCoresPerPkg; ++j) { // walk through skipped socket
3452 for (k = 0; k < __kmp_nThreadsPerCore; ++k) {
3453 KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
3454 ++proc_num;
3455 }
3456 }
3457 }
3458 } else {
3459 for (j = 0; j < nCoresPerPkg; ++j) { // walk through requested socket
Jonathan Peytondd4aa9b2015-10-08 17:55:54 +00003460 if (j < __kmp_place_core_offset ||
Jonathan Peytonfd7cc422016-06-21 15:54:38 +00003461 j >= __kmp_place_core_offset + __kmp_place_num_cores) {
Jonathan Peytondd4aa9b2015-10-08 17:55:54 +00003462 n_old += __kmp_nThreadsPerCore; // skip not-requested core
Jonathan Peytonfd7cc422016-06-21 15:54:38 +00003463 if (__kmp_pu_os_idx != NULL) {
3464 for (k = 0; k < __kmp_nThreadsPerCore; ++k) { // walk through skipped core
3465 KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
3466 ++proc_num;
3467 }
3468 }
3469 } else {
Jonathan Peytondd4aa9b2015-10-08 17:55:54 +00003470 for (k = 0; k < __kmp_nThreadsPerCore; ++k) { // walk through requested core
3471 if (k < __kmp_place_num_threads_per_core) {
Jonathan Peytonfd7cc422016-06-21 15:54:38 +00003472 if (pAddr)
3473 newAddr[n_new] = (*pAddr)[n_old]; // collect requested thread's data
Jonathan Peytondd4aa9b2015-10-08 17:55:54 +00003474 n_new++;
Jonathan Peytonfd7cc422016-06-21 15:54:38 +00003475 } else {
3476 if (__kmp_pu_os_idx != NULL)
3477 KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
Jonathan Peytondd4aa9b2015-10-08 17:55:54 +00003478 }
3479 n_old++;
Jonathan Peytonfd7cc422016-06-21 15:54:38 +00003480 ++proc_num;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003481 }
Jonathan Peytonfd7cc422016-06-21 15:54:38 +00003482 }
3483 }
3484 }
3485 }
Jonathan Peytondd4aa9b2015-10-08 17:55:54 +00003486 KMP_DEBUG_ASSERT(n_old == nPackages * nCoresPerPkg * __kmp_nThreadsPerCore);
3487 KMP_DEBUG_ASSERT(n_new == __kmp_place_num_sockets * __kmp_place_num_cores *
3488 __kmp_place_num_threads_per_core);
3489
3490 nPackages = __kmp_place_num_sockets; // correct nPackages
Jim Cownie5e8470a2013-09-27 10:38:44 +00003491 nCoresPerPkg = __kmp_place_num_cores; // correct nCoresPerPkg
3492 __kmp_nThreadsPerCore = __kmp_place_num_threads_per_core; // correct __kmp_nThreadsPerCore
3493 __kmp_avail_proc = n_new; // correct avail_proc
3494 __kmp_ncores = nPackages * __kmp_place_num_cores; // correct ncores
3495
Jonathan Peytonfd7cc422016-06-21 15:54:38 +00003496 if (pAddr) {
3497 __kmp_free( *pAddr );
3498 *pAddr = newAddr; // replace old topology with new one
3499 }
3500_exit:
3501 if (__kmp_pu_os_idx != NULL) {
3502 __kmp_free(__kmp_pu_os_idx);
3503 __kmp_pu_os_idx = NULL;
3504 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003505}
3506
Paul Osmialowskiecbe2ea2016-07-29 20:55:03 +00003507//
3508// This function figures out the deepest level at which there is at least one cluster/core
3509// with more than one processing unit bound to it.
3510//
3511static int
3512__kmp_affinity_find_core_level(const AddrUnsPair *address2os, int nprocs, int bottom_level)
3513{
3514 int core_level = 0;
3515
3516 for( int i = 0; i < nprocs; i++ ) {
3517 for( int j = bottom_level; j > 0; j-- ) {
3518 if( address2os[i].first.labels[j] > 0 ) {
3519 if( core_level < ( j - 1 ) ) {
3520 core_level = j - 1;
3521 }
3522 }
3523 }
3524 }
3525 return core_level;
3526}
3527
3528//
3529// This function counts number of clusters/cores at given level.
3530//
3531static int __kmp_affinity_compute_ncores(const AddrUnsPair *address2os, int nprocs, int bottom_level, int core_level)
3532{
3533 int ncores = 0;
3534 int i, j;
3535
3536 j = bottom_level;
3537 for( i = 0; i < nprocs; i++ ) {
3538 for ( j = bottom_level; j > core_level; j-- ) {
3539 if( ( i + 1 ) < nprocs ) {
3540 if( address2os[i + 1].first.labels[j] > 0 ) {
3541 break;
3542 }
3543 }
3544 }
3545 if( j == core_level ) {
3546 ncores++;
3547 }
3548 }
3549 if( j > core_level ) {
3550 //
3551 // In case of ( nprocs < __kmp_avail_proc ) we may end too deep and miss one core.
3552 // May occur when called from __kmp_affinity_find_core().
3553 //
3554 ncores++;
3555 }
3556 return ncores;
3557}
3558
3559//
3560// This function finds to which cluster/core given processing unit is bound.
3561//
3562static int __kmp_affinity_find_core(const AddrUnsPair *address2os, int proc, int bottom_level, int core_level)
3563{
3564 return __kmp_affinity_compute_ncores(address2os, proc + 1, bottom_level, core_level) - 1;
3565}
3566
3567//
3568// This function finds maximal number of processing units bound to a cluster/core at given level.
3569//
3570static int __kmp_affinity_max_proc_per_core(const AddrUnsPair *address2os, int nprocs, int bottom_level, int core_level)
3571{
3572 int maxprocpercore = 0;
3573
3574 if( core_level < bottom_level ) {
3575 for( int i = 0; i < nprocs; i++ ) {
3576 int percore = address2os[i].first.labels[core_level + 1] + 1;
3577
3578 if( percore > maxprocpercore ) {
3579 maxprocpercore = percore;
3580 }
3581 }
3582 } else {
3583 maxprocpercore = 1;
3584 }
3585 return maxprocpercore;
3586}
Jim Cownie5e8470a2013-09-27 10:38:44 +00003587
3588static AddrUnsPair *address2os = NULL;
3589static int * procarr = NULL;
3590static int __kmp_aff_depth = 0;
3591
Jonathan Peytonfd7cc422016-06-21 15:54:38 +00003592#define KMP_EXIT_AFF_NONE \
3593 KMP_ASSERT(__kmp_affinity_type == affinity_none); \
3594 KMP_ASSERT(address2os == NULL); \
3595 __kmp_apply_thread_places(NULL, 0); \
3596 return;
3597
Jonathan Peytone6abe522016-09-02 20:54:58 +00003598static int
3599__kmp_affinity_cmp_Address_child_num(const void *a, const void *b)
3600{
3601 const Address *aa = (const Address *)&(((AddrUnsPair *)a)
3602 ->first);
3603 const Address *bb = (const Address *)&(((AddrUnsPair *)b)
3604 ->first);
3605 unsigned depth = aa->depth;
3606 unsigned i;
3607 KMP_DEBUG_ASSERT(depth == bb->depth);
3608 KMP_DEBUG_ASSERT((unsigned)__kmp_affinity_compact <= depth);
3609 KMP_DEBUG_ASSERT(__kmp_affinity_compact >= 0);
3610 for (i = 0; i < (unsigned)__kmp_affinity_compact; i++) {
3611 int j = depth - i - 1;
3612 if (aa->childNums[j] < bb->childNums[j]) return -1;
3613 if (aa->childNums[j] > bb->childNums[j]) return 1;
3614 }
3615 for (; i < depth; i++) {
3616 int j = i - __kmp_affinity_compact;
3617 if (aa->childNums[j] < bb->childNums[j]) return -1;
3618 if (aa->childNums[j] > bb->childNums[j]) return 1;
3619 }
3620 return 0;
3621}
3622
Jim Cownie5e8470a2013-09-27 10:38:44 +00003623static void
3624__kmp_aux_affinity_initialize(void)
3625{
3626 if (__kmp_affinity_masks != NULL) {
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00003627 KMP_ASSERT(__kmp_affin_fullMask != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003628 return;
3629 }
3630
3631 //
3632 // Create the "full" mask - this defines all of the processors that we
3633 // consider to be in the machine model. If respect is set, then it is
3634 // the initialization thread's affinity mask. Otherwise, it is all
3635 // processors that we know about on the machine.
3636 //
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00003637 if (__kmp_affin_fullMask == NULL) {
3638 KMP_CPU_ALLOC(__kmp_affin_fullMask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003639 }
3640 if (KMP_AFFINITY_CAPABLE()) {
3641 if (__kmp_affinity_respect_mask) {
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00003642 __kmp_get_system_affinity(__kmp_affin_fullMask, TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003643
3644 //
3645 // Count the number of available processors.
3646 //
3647 unsigned i;
3648 __kmp_avail_proc = 0;
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00003649 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
3650 if (! KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003651 continue;
3652 }
3653 __kmp_avail_proc++;
3654 }
3655 if (__kmp_avail_proc > __kmp_xproc) {
3656 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3657 && (__kmp_affinity_type != affinity_none))) {
3658 KMP_WARNING(ErrorInitializeAffinity);
3659 }
3660 __kmp_affinity_type = affinity_none;
Andrey Churbanov1f037e42015-03-10 09:15:26 +00003661 KMP_AFFINITY_DISABLE();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003662 return;
3663 }
3664 }
3665 else {
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00003666 __kmp_affinity_entire_machine_mask(__kmp_affin_fullMask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003667 __kmp_avail_proc = __kmp_xproc;
3668 }
3669 }
3670
3671 int depth = -1;
3672 kmp_i18n_id_t msg_id = kmp_i18n_null;
3673
3674 //
Alp Toker8f2d3f02014-02-24 10:40:15 +00003675 // For backward compatibility, setting KMP_CPUINFO_FILE =>
Jim Cownie5e8470a2013-09-27 10:38:44 +00003676 // KMP_TOPOLOGY_METHOD=cpuinfo
3677 //
3678 if ((__kmp_cpuinfo_file != NULL) &&
3679 (__kmp_affinity_top_method == affinity_top_method_all)) {
3680 __kmp_affinity_top_method = affinity_top_method_cpuinfo;
3681 }
3682
3683 if (__kmp_affinity_top_method == affinity_top_method_all) {
3684 //
3685 // In the default code path, errors are not fatal - we just try using
3686 // another method. We only emit a warning message if affinity is on,
3687 // or the verbose flag is set, an the nowarnings flag was not set.
3688 //
3689 const char *file_name = NULL;
3690 int line = 0;
Jonathan Peyton01dcf362015-11-30 20:02:59 +00003691# if KMP_USE_HWLOC
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +00003692 if (depth < 0 && __kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC) {
Jonathan Peyton01dcf362015-11-30 20:02:59 +00003693 if (__kmp_affinity_verbose) {
3694 KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
3695 }
3696 if(!__kmp_hwloc_error) {
3697 depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id);
3698 if (depth == 0) {
Jonathan Peytonfd7cc422016-06-21 15:54:38 +00003699 KMP_EXIT_AFF_NONE;
Jonathan Peyton01dcf362015-11-30 20:02:59 +00003700 } else if(depth < 0 && __kmp_affinity_verbose) {
3701 KMP_INFORM(AffIgnoringHwloc, "KMP_AFFINITY");
3702 }
3703 } else if(__kmp_affinity_verbose) {
3704 KMP_INFORM(AffIgnoringHwloc, "KMP_AFFINITY");
3705 }
3706 }
3707# endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003708
3709# if KMP_ARCH_X86 || KMP_ARCH_X86_64
3710
Jim Cownie5e8470a2013-09-27 10:38:44 +00003711 if (depth < 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003712 if (__kmp_affinity_verbose) {
Jonathan Peyton01dcf362015-11-30 20:02:59 +00003713 KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003714 }
3715
3716 file_name = NULL;
Jonathan Peyton01dcf362015-11-30 20:02:59 +00003717 depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003718 if (depth == 0) {
Jonathan Peytonfd7cc422016-06-21 15:54:38 +00003719 KMP_EXIT_AFF_NONE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003720 }
Jonathan Peyton01dcf362015-11-30 20:02:59 +00003721
3722 if (depth < 0) {
3723 if (__kmp_affinity_verbose) {
3724 if (msg_id != kmp_i18n_null) {
3725 KMP_INFORM(AffInfoStrStr, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id),
3726 KMP_I18N_STR(DecodingLegacyAPIC));
3727 }
3728 else {
3729 KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(DecodingLegacyAPIC));
3730 }
3731 }
3732
3733 file_name = NULL;
3734 depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
3735 if (depth == 0) {
Jonathan Peytonfd7cc422016-06-21 15:54:38 +00003736 KMP_EXIT_AFF_NONE;
Jonathan Peyton01dcf362015-11-30 20:02:59 +00003737 }
3738 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003739 }
3740
3741# endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
3742
3743# if KMP_OS_LINUX
3744
3745 if (depth < 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003746 if (__kmp_affinity_verbose) {
3747 if (msg_id != kmp_i18n_null) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003748 KMP_INFORM(AffStrParseFilename, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id), "/proc/cpuinfo");
3749 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003750 else {
3751 KMP_INFORM(AffParseFilename, "KMP_AFFINITY", "/proc/cpuinfo");
3752 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003753 }
3754
3755 FILE *f = fopen("/proc/cpuinfo", "r");
3756 if (f == NULL) {
3757 msg_id = kmp_i18n_str_CantOpenCpuinfo;
3758 }
3759 else {
3760 file_name = "/proc/cpuinfo";
3761 depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
3762 fclose(f);
3763 if (depth == 0) {
Jonathan Peytonfd7cc422016-06-21 15:54:38 +00003764 KMP_EXIT_AFF_NONE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003765 }
3766 }
3767 }
3768
3769# endif /* KMP_OS_LINUX */
3770
Andrey Churbanov7daf9802015-01-27 16:52:57 +00003771# if KMP_GROUP_AFFINITY
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003772
3773 if ((depth < 0) && (__kmp_num_proc_groups > 1)) {
3774 if (__kmp_affinity_verbose) {
3775 KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
3776 }
3777
3778 depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
3779 KMP_ASSERT(depth != 0);
3780 }
3781
Andrey Churbanov7daf9802015-01-27 16:52:57 +00003782# endif /* KMP_GROUP_AFFINITY */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003783
Jim Cownie5e8470a2013-09-27 10:38:44 +00003784 if (depth < 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003785 if (__kmp_affinity_verbose && (msg_id != kmp_i18n_null)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003786 if (file_name == NULL) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003787 KMP_INFORM(UsingFlatOS, __kmp_i18n_catgets(msg_id));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003788 }
3789 else if (line == 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003790 KMP_INFORM(UsingFlatOSFile, file_name, __kmp_i18n_catgets(msg_id));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003791 }
3792 else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003793 KMP_INFORM(UsingFlatOSFileLine, file_name, line, __kmp_i18n_catgets(msg_id));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003794 }
3795 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003796 // FIXME - print msg if msg_id = kmp_i18n_null ???
Jim Cownie5e8470a2013-09-27 10:38:44 +00003797
3798 file_name = "";
3799 depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
3800 if (depth == 0) {
Jonathan Peytonfd7cc422016-06-21 15:54:38 +00003801 KMP_EXIT_AFF_NONE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003802 }
3803 KMP_ASSERT(depth > 0);
3804 KMP_ASSERT(address2os != NULL);
3805 }
3806 }
3807
3808 //
3809 // If the user has specified that a paricular topology discovery method
3810 // is to be used, then we abort if that method fails. The exception is
3811 // group affinity, which might have been implicitly set.
3812 //
3813
3814# if KMP_ARCH_X86 || KMP_ARCH_X86_64
3815
3816 else if (__kmp_affinity_top_method == affinity_top_method_x2apicid) {
3817 if (__kmp_affinity_verbose) {
3818 KMP_INFORM(AffInfoStr, "KMP_AFFINITY",
3819 KMP_I18N_STR(Decodingx2APIC));
3820 }
3821
3822 depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
3823 if (depth == 0) {
Jonathan Peytonfd7cc422016-06-21 15:54:38 +00003824 KMP_EXIT_AFF_NONE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003825 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003826 if (depth < 0) {
3827 KMP_ASSERT(msg_id != kmp_i18n_null);
3828 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
3829 }
3830 }
3831 else if (__kmp_affinity_top_method == affinity_top_method_apicid) {
3832 if (__kmp_affinity_verbose) {
3833 KMP_INFORM(AffInfoStr, "KMP_AFFINITY",
3834 KMP_I18N_STR(DecodingLegacyAPIC));
3835 }
3836
3837 depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
3838 if (depth == 0) {
Jonathan Peytonfd7cc422016-06-21 15:54:38 +00003839 KMP_EXIT_AFF_NONE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003840 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003841 if (depth < 0) {
3842 KMP_ASSERT(msg_id != kmp_i18n_null);
3843 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
3844 }
3845 }
3846
3847# endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
3848
3849 else if (__kmp_affinity_top_method == affinity_top_method_cpuinfo) {
3850 const char *filename;
3851 if (__kmp_cpuinfo_file != NULL) {
3852 filename = __kmp_cpuinfo_file;
3853 }
3854 else {
3855 filename = "/proc/cpuinfo";
3856 }
3857
3858 if (__kmp_affinity_verbose) {
3859 KMP_INFORM(AffParseFilename, "KMP_AFFINITY", filename);
3860 }
3861
3862 FILE *f = fopen(filename, "r");
3863 if (f == NULL) {
3864 int code = errno;
3865 if (__kmp_cpuinfo_file != NULL) {
3866 __kmp_msg(
3867 kmp_ms_fatal,
3868 KMP_MSG(CantOpenFileForReading, filename),
3869 KMP_ERR(code),
3870 KMP_HNT(NameComesFrom_CPUINFO_FILE),
3871 __kmp_msg_null
3872 );
3873 }
3874 else {
3875 __kmp_msg(
3876 kmp_ms_fatal,
3877 KMP_MSG(CantOpenFileForReading, filename),
3878 KMP_ERR(code),
3879 __kmp_msg_null
3880 );
3881 }
3882 }
3883 int line = 0;
3884 depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
3885 fclose(f);
3886 if (depth < 0) {
3887 KMP_ASSERT(msg_id != kmp_i18n_null);
3888 if (line > 0) {
3889 KMP_FATAL(FileLineMsgExiting, filename, line, __kmp_i18n_catgets(msg_id));
3890 }
3891 else {
3892 KMP_FATAL(FileMsgExiting, filename, __kmp_i18n_catgets(msg_id));
3893 }
3894 }
3895 if (__kmp_affinity_type == affinity_none) {
3896 KMP_ASSERT(depth == 0);
Jonathan Peytonfd7cc422016-06-21 15:54:38 +00003897 KMP_EXIT_AFF_NONE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003898 }
3899 }
3900
Andrey Churbanov7daf9802015-01-27 16:52:57 +00003901# if KMP_GROUP_AFFINITY
Jim Cownie5e8470a2013-09-27 10:38:44 +00003902
3903 else if (__kmp_affinity_top_method == affinity_top_method_group) {
3904 if (__kmp_affinity_verbose) {
3905 KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
3906 }
3907
3908 depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
3909 KMP_ASSERT(depth != 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003910 if (depth < 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003911 KMP_ASSERT(msg_id != kmp_i18n_null);
3912 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003913 }
3914 }
3915
Andrey Churbanov7daf9802015-01-27 16:52:57 +00003916# endif /* KMP_GROUP_AFFINITY */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003917
3918 else if (__kmp_affinity_top_method == affinity_top_method_flat) {
3919 if (__kmp_affinity_verbose) {
3920 KMP_INFORM(AffUsingFlatOS, "KMP_AFFINITY");
3921 }
3922
3923 depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
3924 if (depth == 0) {
Jonathan Peytonfd7cc422016-06-21 15:54:38 +00003925 KMP_EXIT_AFF_NONE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003926 }
3927 // should not fail
3928 KMP_ASSERT(depth > 0);
3929 KMP_ASSERT(address2os != NULL);
3930 }
3931
Jonathan Peyton01dcf362015-11-30 20:02:59 +00003932# if KMP_USE_HWLOC
3933 else if (__kmp_affinity_top_method == affinity_top_method_hwloc) {
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +00003934 KMP_ASSERT(__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC);
Jonathan Peyton01dcf362015-11-30 20:02:59 +00003935 if (__kmp_affinity_verbose) {
3936 KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
3937 }
3938 depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id);
3939 if (depth == 0) {
Jonathan Peytonfd7cc422016-06-21 15:54:38 +00003940 KMP_EXIT_AFF_NONE;
Jonathan Peyton01dcf362015-11-30 20:02:59 +00003941 }
Jonathan Peyton01dcf362015-11-30 20:02:59 +00003942 }
3943# endif // KMP_USE_HWLOC
3944
Jim Cownie5e8470a2013-09-27 10:38:44 +00003945 if (address2os == NULL) {
3946 if (KMP_AFFINITY_CAPABLE()
3947 && (__kmp_affinity_verbose || (__kmp_affinity_warnings
3948 && (__kmp_affinity_type != affinity_none)))) {
3949 KMP_WARNING(ErrorInitializeAffinity);
3950 }
3951 __kmp_affinity_type = affinity_none;
Andrey Churbanov1f037e42015-03-10 09:15:26 +00003952 KMP_AFFINITY_DISABLE();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003953 return;
3954 }
3955
Jim Cownie5e8470a2013-09-27 10:38:44 +00003956 __kmp_apply_thread_places(&address2os, depth);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003957
3958 //
3959 // Create the table of masks, indexed by thread Id.
3960 //
3961 unsigned maxIndex;
3962 unsigned numUnique;
3963 kmp_affin_mask_t *osId2Mask = __kmp_create_masks(&maxIndex, &numUnique,
3964 address2os, __kmp_avail_proc);
3965 if (__kmp_affinity_gran_levels == 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003966 KMP_DEBUG_ASSERT((int)numUnique == __kmp_avail_proc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003967 }
3968
3969 //
3970 // Set the childNums vector in all Address objects. This must be done
3971 // before we can sort using __kmp_affinity_cmp_Address_child_num(),
3972 // which takes into account the setting of __kmp_affinity_compact.
3973 //
3974 __kmp_affinity_assign_child_nums(address2os, __kmp_avail_proc);
3975
3976 switch (__kmp_affinity_type) {
3977
3978 case affinity_explicit:
3979 KMP_DEBUG_ASSERT(__kmp_affinity_proclist != NULL);
3980# if OMP_40_ENABLED
3981 if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
3982# endif
3983 {
3984 __kmp_affinity_process_proclist(&__kmp_affinity_masks,
3985 &__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask,
3986 maxIndex);
3987 }
3988# if OMP_40_ENABLED
3989 else {
3990 __kmp_affinity_process_placelist(&__kmp_affinity_masks,
3991 &__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask,
3992 maxIndex);
3993 }
3994# endif
3995 if (__kmp_affinity_num_masks == 0) {
3996 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3997 && (__kmp_affinity_type != affinity_none))) {
3998 KMP_WARNING(AffNoValidProcID);
3999 }
4000 __kmp_affinity_type = affinity_none;
4001 return;
4002 }
4003 break;
4004
4005 //
4006 // The other affinity types rely on sorting the Addresses according
4007 // to some permutation of the machine topology tree. Set
4008 // __kmp_affinity_compact and __kmp_affinity_offset appropriately,
4009 // then jump to a common code fragment to do the sort and create
4010 // the array of affinity masks.
4011 //
4012
4013 case affinity_logical:
4014 __kmp_affinity_compact = 0;
4015 if (__kmp_affinity_offset) {
4016 __kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset
4017 % __kmp_avail_proc;
4018 }
4019 goto sortAddresses;
4020
4021 case affinity_physical:
4022 if (__kmp_nThreadsPerCore > 1) {
4023 __kmp_affinity_compact = 1;
4024 if (__kmp_affinity_compact >= depth) {
4025 __kmp_affinity_compact = 0;
4026 }
4027 } else {
4028 __kmp_affinity_compact = 0;
4029 }
4030 if (__kmp_affinity_offset) {
4031 __kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset
4032 % __kmp_avail_proc;
4033 }
4034 goto sortAddresses;
4035
4036 case affinity_scatter:
4037 if (__kmp_affinity_compact >= depth) {
4038 __kmp_affinity_compact = 0;
4039 }
4040 else {
4041 __kmp_affinity_compact = depth - 1 - __kmp_affinity_compact;
4042 }
4043 goto sortAddresses;
4044
4045 case affinity_compact:
4046 if (__kmp_affinity_compact >= depth) {
4047 __kmp_affinity_compact = depth - 1;
4048 }
4049 goto sortAddresses;
4050
Jim Cownie5e8470a2013-09-27 10:38:44 +00004051 case affinity_balanced:
Paul Osmialowskiecbe2ea2016-07-29 20:55:03 +00004052 if( depth <= 1 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004053 if( __kmp_affinity_verbose || __kmp_affinity_warnings ) {
4054 KMP_WARNING( AffBalancedNotAvail, "KMP_AFFINITY" );
4055 }
4056 __kmp_affinity_type = affinity_none;
4057 return;
4058 } else if( __kmp_affinity_uniform_topology() ) {
4059 break;
4060 } else { // Non-uniform topology
4061
4062 // Save the depth for further usage
4063 __kmp_aff_depth = depth;
4064
Paul Osmialowskiecbe2ea2016-07-29 20:55:03 +00004065 int core_level = __kmp_affinity_find_core_level(address2os, __kmp_avail_proc, depth - 1);
4066 int ncores = __kmp_affinity_compute_ncores(address2os, __kmp_avail_proc, depth - 1, core_level);
4067 int maxprocpercore = __kmp_affinity_max_proc_per_core(address2os, __kmp_avail_proc, depth - 1, core_level);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004068
Paul Osmialowskiecbe2ea2016-07-29 20:55:03 +00004069 int nproc = ncores * maxprocpercore;
4070 if( ( nproc < 2 ) || ( nproc < __kmp_avail_proc ) ) {
4071 if( __kmp_affinity_verbose || __kmp_affinity_warnings ) {
4072 KMP_WARNING( AffBalancedNotAvail, "KMP_AFFINITY" );
4073 }
4074 __kmp_affinity_type = affinity_none;
4075 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004076 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004077
4078 procarr = ( int * )__kmp_allocate( sizeof( int ) * nproc );
4079 for( int i = 0; i < nproc; i++ ) {
4080 procarr[ i ] = -1;
4081 }
4082
Paul Osmialowskiecbe2ea2016-07-29 20:55:03 +00004083 int lastcore = -1;
4084 int inlastcore = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004085 for( int i = 0; i < __kmp_avail_proc; i++ ) {
4086 int proc = address2os[ i ].second;
Paul Osmialowskiecbe2ea2016-07-29 20:55:03 +00004087 int core = __kmp_affinity_find_core(address2os, i, depth - 1, core_level);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004088
Paul Osmialowskiecbe2ea2016-07-29 20:55:03 +00004089 if ( core == lastcore ) {
4090 inlastcore++;
4091 } else {
4092 inlastcore = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004093 }
Paul Osmialowskiecbe2ea2016-07-29 20:55:03 +00004094 lastcore = core;
4095
4096 procarr[ core * maxprocpercore + inlastcore ] = proc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004097 }
4098
4099 break;
4100 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004101
4102 sortAddresses:
4103 //
4104 // Allocate the gtid->affinity mask table.
4105 //
4106 if (__kmp_affinity_dups) {
4107 __kmp_affinity_num_masks = __kmp_avail_proc;
4108 }
4109 else {
4110 __kmp_affinity_num_masks = numUnique;
4111 }
4112
4113# if OMP_40_ENABLED
4114 if ( ( __kmp_nested_proc_bind.bind_types[0] != proc_bind_intel )
4115 && ( __kmp_affinity_num_places > 0 )
4116 && ( (unsigned)__kmp_affinity_num_places < __kmp_affinity_num_masks ) ) {
4117 __kmp_affinity_num_masks = __kmp_affinity_num_places;
4118 }
4119# endif
4120
Jonathan Peyton01dcf362015-11-30 20:02:59 +00004121 KMP_CPU_ALLOC_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004122
4123 //
4124 // Sort the address2os table according to the current setting of
4125 // __kmp_affinity_compact, then fill out __kmp_affinity_masks.
4126 //
4127 qsort(address2os, __kmp_avail_proc, sizeof(*address2os),
4128 __kmp_affinity_cmp_Address_child_num);
4129 {
4130 int i;
4131 unsigned j;
4132 for (i = 0, j = 0; i < __kmp_avail_proc; i++) {
4133 if ((! __kmp_affinity_dups) && (! address2os[i].first.leader)) {
4134 continue;
4135 }
4136 unsigned osId = address2os[i].second;
4137 kmp_affin_mask_t *src = KMP_CPU_INDEX(osId2Mask, osId);
4138 kmp_affin_mask_t *dest
4139 = KMP_CPU_INDEX(__kmp_affinity_masks, j);
4140 KMP_ASSERT(KMP_CPU_ISSET(osId, src));
4141 KMP_CPU_COPY(dest, src);
4142 if (++j >= __kmp_affinity_num_masks) {
4143 break;
4144 }
4145 }
4146 KMP_DEBUG_ASSERT(j == __kmp_affinity_num_masks);
4147 }
4148 break;
4149
4150 default:
4151 KMP_ASSERT2(0, "Unexpected affinity setting");
4152 }
4153
Jonathan Peyton788c5d62016-09-02 19:37:12 +00004154 KMP_CPU_FREE_ARRAY(osId2Mask, maxIndex+1);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004155 machine_hierarchy.init(address2os, __kmp_avail_proc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004156}
Jonathan Peytonfd7cc422016-06-21 15:54:38 +00004157#undef KMP_EXIT_AFF_NONE
Jim Cownie5e8470a2013-09-27 10:38:44 +00004158
4159
4160void
4161__kmp_affinity_initialize(void)
4162{
4163 //
4164 // Much of the code above was written assumming that if a machine was not
4165 // affinity capable, then __kmp_affinity_type == affinity_none. We now
4166 // explicitly represent this as __kmp_affinity_type == affinity_disabled.
4167 //
4168 // There are too many checks for __kmp_affinity_type == affinity_none
4169 // in this code. Instead of trying to change them all, check if
4170 // __kmp_affinity_type == affinity_disabled, and if so, slam it with
4171 // affinity_none, call the real initialization routine, then restore
4172 // __kmp_affinity_type to affinity_disabled.
4173 //
4174 int disabled = (__kmp_affinity_type == affinity_disabled);
4175 if (! KMP_AFFINITY_CAPABLE()) {
4176 KMP_ASSERT(disabled);
4177 }
4178 if (disabled) {
4179 __kmp_affinity_type = affinity_none;
4180 }
4181 __kmp_aux_affinity_initialize();
4182 if (disabled) {
4183 __kmp_affinity_type = affinity_disabled;
4184 }
4185}
4186
4187
4188void
4189__kmp_affinity_uninitialize(void)
4190{
4191 if (__kmp_affinity_masks != NULL) {
Jonathan Peyton01dcf362015-11-30 20:02:59 +00004192 KMP_CPU_FREE_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004193 __kmp_affinity_masks = NULL;
4194 }
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00004195 if (__kmp_affin_fullMask != NULL) {
4196 KMP_CPU_FREE(__kmp_affin_fullMask);
4197 __kmp_affin_fullMask = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004198 }
4199 __kmp_affinity_num_masks = 0;
4200# if OMP_40_ENABLED
4201 __kmp_affinity_num_places = 0;
4202# endif
4203 if (__kmp_affinity_proclist != NULL) {
4204 __kmp_free(__kmp_affinity_proclist);
4205 __kmp_affinity_proclist = NULL;
4206 }
4207 if( address2os != NULL ) {
4208 __kmp_free( address2os );
4209 address2os = NULL;
4210 }
4211 if( procarr != NULL ) {
4212 __kmp_free( procarr );
4213 procarr = NULL;
4214 }
Jonathan Peyton202a24d2016-06-13 17:30:08 +00004215# if KMP_USE_HWLOC
4216 if (__kmp_hwloc_topology != NULL) {
4217 hwloc_topology_destroy(__kmp_hwloc_topology);
4218 __kmp_hwloc_topology = NULL;
4219 }
4220# endif
Jonathan Peyton1cdd87a2016-11-14 21:08:35 +00004221 KMPAffinity::destroy_api();
Jim Cownie5e8470a2013-09-27 10:38:44 +00004222}
4223
4224
4225void
4226__kmp_affinity_set_init_mask(int gtid, int isa_root)
4227{
4228 if (! KMP_AFFINITY_CAPABLE()) {
4229 return;
4230 }
4231
4232 kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
4233 if (th->th.th_affin_mask == NULL) {
4234 KMP_CPU_ALLOC(th->th.th_affin_mask);
4235 }
4236 else {
4237 KMP_CPU_ZERO(th->th.th_affin_mask);
4238 }
4239
4240 //
4241 // Copy the thread mask to the kmp_info_t strucuture.
4242 // If __kmp_affinity_type == affinity_none, copy the "full" mask, i.e. one
4243 // that has all of the OS proc ids set, or if __kmp_affinity_respect_mask
4244 // is set, then the full mask is the same as the mask of the initialization
4245 // thread.
4246 //
4247 kmp_affin_mask_t *mask;
4248 int i;
4249
4250# if OMP_40_ENABLED
4251 if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
4252# endif
4253 {
Andrey Churbanovf28f6132015-01-13 14:54:00 +00004254 if ((__kmp_affinity_type == affinity_none) || (__kmp_affinity_type == affinity_balanced)
Jim Cownie5e8470a2013-09-27 10:38:44 +00004255 ) {
Andrey Churbanov7daf9802015-01-27 16:52:57 +00004256# if KMP_GROUP_AFFINITY
Jim Cownie5e8470a2013-09-27 10:38:44 +00004257 if (__kmp_num_proc_groups > 1) {
4258 return;
4259 }
4260# endif
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00004261 KMP_ASSERT(__kmp_affin_fullMask != NULL);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004262 i = KMP_PLACE_ALL;
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00004263 mask = __kmp_affin_fullMask;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004264 }
4265 else {
4266 KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 );
4267 i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
4268 mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
4269 }
4270 }
4271# if OMP_40_ENABLED
4272 else {
4273 if ((! isa_root)
4274 || (__kmp_nested_proc_bind.bind_types[0] == proc_bind_false)) {
Andrey Churbanov7daf9802015-01-27 16:52:57 +00004275# if KMP_GROUP_AFFINITY
Jim Cownie5e8470a2013-09-27 10:38:44 +00004276 if (__kmp_num_proc_groups > 1) {
4277 return;
4278 }
4279# endif
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00004280 KMP_ASSERT(__kmp_affin_fullMask != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004281 i = KMP_PLACE_ALL;
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00004282 mask = __kmp_affin_fullMask;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004283 }
4284 else {
4285 //
4286 // int i = some hash function or just a counter that doesn't
4287 // always start at 0. Use gtid for now.
4288 //
4289 KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 );
4290 i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
4291 mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
4292 }
4293 }
4294# endif
4295
4296# if OMP_40_ENABLED
4297 th->th.th_current_place = i;
4298 if (isa_root) {
4299 th->th.th_new_place = i;
4300 th->th.th_first_place = 0;
4301 th->th.th_last_place = __kmp_affinity_num_masks - 1;
4302 }
4303
4304 if (i == KMP_PLACE_ALL) {
4305 KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to all places\n",
4306 gtid));
4307 }
4308 else {
4309 KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to place %d\n",
4310 gtid, i));
4311 }
4312# else
4313 if (i == -1) {
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00004314 KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to __kmp_affin_fullMask\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00004315 gtid));
4316 }
4317 else {
4318 KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to mask %d\n",
4319 gtid, i));
4320 }
4321# endif /* OMP_40_ENABLED */
4322
4323 KMP_CPU_COPY(th->th.th_affin_mask, mask);
4324
4325 if (__kmp_affinity_verbose) {
4326 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4327 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4328 th->th.th_affin_mask);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004329 KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(), gtid,
4330 buf);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004331 }
4332
4333# if KMP_OS_WINDOWS
4334 //
4335 // On Windows* OS, the process affinity mask might have changed.
4336 // If the user didn't request affinity and this call fails,
4337 // just continue silently. See CQ171393.
4338 //
4339 if ( __kmp_affinity_type == affinity_none ) {
4340 __kmp_set_system_affinity(th->th.th_affin_mask, FALSE);
4341 }
4342 else
4343# endif
4344 __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
4345}
4346
4347
4348# if OMP_40_ENABLED
4349
4350void
4351__kmp_affinity_set_place(int gtid)
4352{
4353 int retval;
4354
4355 if (! KMP_AFFINITY_CAPABLE()) {
4356 return;
4357 }
4358
4359 kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
4360
4361 KA_TRACE(100, ("__kmp_affinity_set_place: binding T#%d to place %d (current place = %d)\n",
4362 gtid, th->th.th_new_place, th->th.th_current_place));
4363
4364 //
Alp Toker8f2d3f02014-02-24 10:40:15 +00004365 // Check that the new place is within this thread's partition.
Jim Cownie5e8470a2013-09-27 10:38:44 +00004366 //
4367 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004368 KMP_ASSERT(th->th.th_new_place >= 0);
4369 KMP_ASSERT((unsigned)th->th.th_new_place <= __kmp_affinity_num_masks);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004370 if (th->th.th_first_place <= th->th.th_last_place) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004371 KMP_ASSERT((th->th.th_new_place >= th->th.th_first_place)
Jim Cownie5e8470a2013-09-27 10:38:44 +00004372 && (th->th.th_new_place <= th->th.th_last_place));
4373 }
4374 else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004375 KMP_ASSERT((th->th.th_new_place <= th->th.th_first_place)
Jim Cownie5e8470a2013-09-27 10:38:44 +00004376 || (th->th.th_new_place >= th->th.th_last_place));
4377 }
4378
4379 //
4380 // Copy the thread mask to the kmp_info_t strucuture,
4381 // and set this thread's affinity.
4382 //
4383 kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity_masks,
4384 th->th.th_new_place);
4385 KMP_CPU_COPY(th->th.th_affin_mask, mask);
4386 th->th.th_current_place = th->th.th_new_place;
4387
4388 if (__kmp_affinity_verbose) {
4389 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4390 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4391 th->th.th_affin_mask);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004392 KMP_INFORM(BoundToOSProcSet, "OMP_PROC_BIND", (kmp_int32)getpid(),
4393 gtid, buf);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004394 }
4395 __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
4396}
4397
4398# endif /* OMP_40_ENABLED */
4399
4400
4401int
4402__kmp_aux_set_affinity(void **mask)
4403{
4404 int gtid;
4405 kmp_info_t *th;
4406 int retval;
4407
4408 if (! KMP_AFFINITY_CAPABLE()) {
4409 return -1;
4410 }
4411
4412 gtid = __kmp_entry_gtid();
4413 KA_TRACE(1000, ;{
4414 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4415 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4416 (kmp_affin_mask_t *)(*mask));
4417 __kmp_debug_printf("kmp_set_affinity: setting affinity mask for thread %d = %s\n",
4418 gtid, buf);
4419 });
4420
4421 if (__kmp_env_consistency_check) {
4422 if ((mask == NULL) || (*mask == NULL)) {
4423 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
4424 }
4425 else {
4426 unsigned proc;
4427 int num_procs = 0;
4428
Jonathan Peyton01dcf362015-11-30 20:02:59 +00004429 KMP_CPU_SET_ITERATE(proc, ((kmp_affin_mask_t*)(*mask))) {
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00004430 if (! KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
4431 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
4432 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004433 if (! KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask))) {
4434 continue;
4435 }
4436 num_procs++;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004437 }
4438 if (num_procs == 0) {
4439 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
4440 }
4441
Andrey Churbanov7daf9802015-01-27 16:52:57 +00004442# if KMP_GROUP_AFFINITY
Jim Cownie5e8470a2013-09-27 10:38:44 +00004443 if (__kmp_get_proc_group((kmp_affin_mask_t *)(*mask)) < 0) {
4444 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
4445 }
Andrey Churbanov7daf9802015-01-27 16:52:57 +00004446# endif /* KMP_GROUP_AFFINITY */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004447
4448 }
4449 }
4450
4451 th = __kmp_threads[gtid];
4452 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
4453 retval = __kmp_set_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
4454 if (retval == 0) {
4455 KMP_CPU_COPY(th->th.th_affin_mask, (kmp_affin_mask_t *)(*mask));
4456 }
4457
4458# if OMP_40_ENABLED
4459 th->th.th_current_place = KMP_PLACE_UNDEFINED;
4460 th->th.th_new_place = KMP_PLACE_UNDEFINED;
4461 th->th.th_first_place = 0;
4462 th->th.th_last_place = __kmp_affinity_num_masks - 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004463
4464 //
4465 // Turn off 4.0 affinity for the current tread at this parallel level.
4466 //
4467 th->th.th_current_task->td_icvs.proc_bind = proc_bind_false;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004468# endif
4469
4470 return retval;
4471}
4472
4473
4474int
4475__kmp_aux_get_affinity(void **mask)
4476{
4477 int gtid;
4478 int retval;
4479 kmp_info_t *th;
4480
4481 if (! KMP_AFFINITY_CAPABLE()) {
4482 return -1;
4483 }
4484
4485 gtid = __kmp_entry_gtid();
4486 th = __kmp_threads[gtid];
4487 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
4488
4489 KA_TRACE(1000, ;{
4490 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4491 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4492 th->th.th_affin_mask);
4493 __kmp_printf("kmp_get_affinity: stored affinity mask for thread %d = %s\n", gtid, buf);
4494 });
4495
4496 if (__kmp_env_consistency_check) {
4497 if ((mask == NULL) || (*mask == NULL)) {
4498 KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity");
4499 }
4500 }
4501
4502# if !KMP_OS_WINDOWS
4503
4504 retval = __kmp_get_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
4505 KA_TRACE(1000, ;{
4506 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4507 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4508 (kmp_affin_mask_t *)(*mask));
4509 __kmp_printf("kmp_get_affinity: system affinity mask for thread %d = %s\n", gtid, buf);
4510 });
4511 return retval;
4512
4513# else
4514
4515 KMP_CPU_COPY((kmp_affin_mask_t *)(*mask), th->th.th_affin_mask);
4516 return 0;
4517
4518# endif /* KMP_OS_WINDOWS */
4519
4520}
4521
Jim Cownie5e8470a2013-09-27 10:38:44 +00004522int
Jonathan Peyton7c465a52016-09-12 19:02:53 +00004523__kmp_aux_get_affinity_max_proc() {
4524 if (! KMP_AFFINITY_CAPABLE()) {
4525 return 0;
4526 }
4527#if KMP_GROUP_AFFINITY
4528 if ( __kmp_num_proc_groups > 1 ) {
4529 return (int)(__kmp_num_proc_groups*sizeof(DWORD_PTR)*CHAR_BIT);
4530 }
4531#endif
4532 return __kmp_xproc;
4533}
4534
4535int
Jim Cownie5e8470a2013-09-27 10:38:44 +00004536__kmp_aux_set_affinity_mask_proc(int proc, void **mask)
4537{
4538 int retval;
4539
4540 if (! KMP_AFFINITY_CAPABLE()) {
4541 return -1;
4542 }
4543
4544 KA_TRACE(1000, ;{
4545 int gtid = __kmp_entry_gtid();
4546 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4547 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4548 (kmp_affin_mask_t *)(*mask));
4549 __kmp_debug_printf("kmp_set_affinity_mask_proc: setting proc %d in affinity mask for thread %d = %s\n",
4550 proc, gtid, buf);
4551 });
4552
4553 if (__kmp_env_consistency_check) {
4554 if ((mask == NULL) || (*mask == NULL)) {
4555 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity_mask_proc");
4556 }
4557 }
4558
Jonathan Peyton7c465a52016-09-12 19:02:53 +00004559 if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004560 return -1;
4561 }
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00004562 if (! KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004563 return -2;
4564 }
4565
4566 KMP_CPU_SET(proc, (kmp_affin_mask_t *)(*mask));
4567 return 0;
4568}
4569
4570
4571int
4572__kmp_aux_unset_affinity_mask_proc(int proc, void **mask)
4573{
4574 int retval;
4575
4576 if (! KMP_AFFINITY_CAPABLE()) {
4577 return -1;
4578 }
4579
4580 KA_TRACE(1000, ;{
4581 int gtid = __kmp_entry_gtid();
4582 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4583 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4584 (kmp_affin_mask_t *)(*mask));
4585 __kmp_debug_printf("kmp_unset_affinity_mask_proc: unsetting proc %d in affinity mask for thread %d = %s\n",
4586 proc, gtid, buf);
4587 });
4588
4589 if (__kmp_env_consistency_check) {
4590 if ((mask == NULL) || (*mask == NULL)) {
4591 KMP_FATAL(AffinityInvalidMask, "kmp_unset_affinity_mask_proc");
4592 }
4593 }
4594
Jonathan Peyton7c465a52016-09-12 19:02:53 +00004595 if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004596 return -1;
4597 }
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00004598 if (! KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004599 return -2;
4600 }
4601
4602 KMP_CPU_CLR(proc, (kmp_affin_mask_t *)(*mask));
4603 return 0;
4604}
4605
4606
4607int
4608__kmp_aux_get_affinity_mask_proc(int proc, void **mask)
4609{
4610 int retval;
4611
4612 if (! KMP_AFFINITY_CAPABLE()) {
4613 return -1;
4614 }
4615
4616 KA_TRACE(1000, ;{
4617 int gtid = __kmp_entry_gtid();
4618 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4619 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4620 (kmp_affin_mask_t *)(*mask));
4621 __kmp_debug_printf("kmp_get_affinity_mask_proc: getting proc %d in affinity mask for thread %d = %s\n",
4622 proc, gtid, buf);
4623 });
4624
4625 if (__kmp_env_consistency_check) {
4626 if ((mask == NULL) || (*mask == NULL)) {
Andrey Churbanov4b2f17a2015-01-29 15:49:22 +00004627 KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity_mask_proc");
Jim Cownie5e8470a2013-09-27 10:38:44 +00004628 }
4629 }
4630
Jonathan Peyton7c465a52016-09-12 19:02:53 +00004631 if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) {
Jonathan Peyton01dcf362015-11-30 20:02:59 +00004632 return -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004633 }
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00004634 if (! KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004635 return 0;
4636 }
4637
4638 return KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask));
4639}
4640
Jim Cownie5e8470a2013-09-27 10:38:44 +00004641
4642// Dynamic affinity settings - Affinity balanced
4643void __kmp_balanced_affinity( int tid, int nthreads )
4644{
Paul Osmialowskiecbe2ea2016-07-29 20:55:03 +00004645 bool fine_gran = true;
4646
4647 switch (__kmp_affinity_gran) {
4648 case affinity_gran_fine:
4649 case affinity_gran_thread:
4650 break;
4651 case affinity_gran_core:
4652 if( __kmp_nThreadsPerCore > 1) {
4653 fine_gran = false;
4654 }
4655 break;
4656 case affinity_gran_package:
4657 if( nCoresPerPkg > 1) {
4658 fine_gran = false;
4659 }
4660 break;
4661 default:
4662 fine_gran = false;
4663 }
4664
Jim Cownie5e8470a2013-09-27 10:38:44 +00004665 if( __kmp_affinity_uniform_topology() ) {
4666 int coreID;
4667 int threadID;
4668 // Number of hyper threads per core in HT machine
4669 int __kmp_nth_per_core = __kmp_avail_proc / __kmp_ncores;
4670 // Number of cores
4671 int ncores = __kmp_ncores;
Paul Osmialowskiecbe2ea2016-07-29 20:55:03 +00004672 if( ( nPackages > 1 ) && ( __kmp_nth_per_core <= 1 ) ) {
4673 __kmp_nth_per_core = __kmp_avail_proc / nPackages;
4674 ncores = nPackages;
4675 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004676 // How many threads will be bound to each core
4677 int chunk = nthreads / ncores;
4678 // How many cores will have an additional thread bound to it - "big cores"
4679 int big_cores = nthreads % ncores;
4680 // Number of threads on the big cores
4681 int big_nth = ( chunk + 1 ) * big_cores;
4682 if( tid < big_nth ) {
4683 coreID = tid / (chunk + 1 );
4684 threadID = ( tid % (chunk + 1 ) ) % __kmp_nth_per_core ;
4685 } else { //tid >= big_nth
4686 coreID = ( tid - big_cores ) / chunk;
4687 threadID = ( ( tid - big_cores ) % chunk ) % __kmp_nth_per_core ;
4688 }
4689
4690 KMP_DEBUG_ASSERT2(KMP_AFFINITY_CAPABLE(),
4691 "Illegal set affinity operation when not capable");
4692
Jonathan Peyton01dcf362015-11-30 20:02:59 +00004693 kmp_affin_mask_t *mask;
4694 KMP_CPU_ALLOC_ON_STACK(mask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004695 KMP_CPU_ZERO(mask);
4696
Paul Osmialowskiecbe2ea2016-07-29 20:55:03 +00004697 if( fine_gran ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004698 int osID = address2os[ coreID * __kmp_nth_per_core + threadID ].second;
4699 KMP_CPU_SET( osID, mask);
Paul Osmialowskiecbe2ea2016-07-29 20:55:03 +00004700 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004701 for( int i = 0; i < __kmp_nth_per_core; i++ ) {
4702 int osID;
4703 osID = address2os[ coreID * __kmp_nth_per_core + i ].second;
4704 KMP_CPU_SET( osID, mask);
4705 }
4706 }
4707 if (__kmp_affinity_verbose) {
4708 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4709 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004710 KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
4711 tid, buf);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004712 }
4713 __kmp_set_system_affinity( mask, TRUE );
Jonathan Peyton01dcf362015-11-30 20:02:59 +00004714 KMP_CPU_FREE_FROM_STACK(mask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004715 } else { // Non-uniform topology
4716
Jonathan Peyton01dcf362015-11-30 20:02:59 +00004717 kmp_affin_mask_t *mask;
4718 KMP_CPU_ALLOC_ON_STACK(mask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004719 KMP_CPU_ZERO(mask);
4720
Paul Osmialowskiecbe2ea2016-07-29 20:55:03 +00004721 int core_level = __kmp_affinity_find_core_level(address2os, __kmp_avail_proc, __kmp_aff_depth - 1);
4722 int ncores = __kmp_affinity_compute_ncores(address2os, __kmp_avail_proc, __kmp_aff_depth - 1, core_level);
4723 int nth_per_core = __kmp_affinity_max_proc_per_core(address2os, __kmp_avail_proc, __kmp_aff_depth - 1, core_level);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004724
4725 // For performance gain consider the special case nthreads == __kmp_avail_proc
4726 if( nthreads == __kmp_avail_proc ) {
Paul Osmialowskiecbe2ea2016-07-29 20:55:03 +00004727 if( fine_gran ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004728 int osID = address2os[ tid ].second;
4729 KMP_CPU_SET( osID, mask);
Paul Osmialowskiecbe2ea2016-07-29 20:55:03 +00004730 } else {
4731 int core = __kmp_affinity_find_core(address2os, tid, __kmp_aff_depth - 1, core_level);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004732 for( int i = 0; i < __kmp_avail_proc; i++ ) {
4733 int osID = address2os[ i ].second;
Paul Osmialowskiecbe2ea2016-07-29 20:55:03 +00004734 if( __kmp_affinity_find_core(address2os, i, __kmp_aff_depth - 1, core_level) == core ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004735 KMP_CPU_SET( osID, mask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004736 }
4737 }
4738 }
Paul Osmialowskiecbe2ea2016-07-29 20:55:03 +00004739 } else if( nthreads <= ncores ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004740
4741 int core = 0;
4742 for( int i = 0; i < ncores; i++ ) {
4743 // Check if this core from procarr[] is in the mask
4744 int in_mask = 0;
4745 for( int j = 0; j < nth_per_core; j++ ) {
4746 if( procarr[ i * nth_per_core + j ] != - 1 ) {
4747 in_mask = 1;
4748 break;
4749 }
4750 }
4751 if( in_mask ) {
4752 if( tid == core ) {
4753 for( int j = 0; j < nth_per_core; j++ ) {
4754 int osID = procarr[ i * nth_per_core + j ];
4755 if( osID != -1 ) {
4756 KMP_CPU_SET( osID, mask );
Paul Osmialowskiecbe2ea2016-07-29 20:55:03 +00004757 // For fine granularity it is enough to set the first available osID for this core
4758 if( fine_gran) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004759 break;
4760 }
4761 }
4762 }
4763 break;
4764 } else {
4765 core++;
4766 }
4767 }
4768 }
4769
Paul Osmialowskiecbe2ea2016-07-29 20:55:03 +00004770 } else { // nthreads > ncores
Jim Cownie5e8470a2013-09-27 10:38:44 +00004771
4772 // Array to save the number of processors at each core
Jonathan Peyton7be075332015-06-22 15:53:50 +00004773 int* nproc_at_core = (int*)KMP_ALLOCA(sizeof(int)*ncores);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004774 // Array to save the number of cores with "x" available processors;
Jonathan Peyton7be075332015-06-22 15:53:50 +00004775 int* ncores_with_x_procs = (int*)KMP_ALLOCA(sizeof(int)*(nth_per_core+1));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004776 // Array to save the number of cores with # procs from x to nth_per_core
Jonathan Peyton7be075332015-06-22 15:53:50 +00004777 int* ncores_with_x_to_max_procs = (int*)KMP_ALLOCA(sizeof(int)*(nth_per_core+1));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004778
4779 for( int i = 0; i <= nth_per_core; i++ ) {
4780 ncores_with_x_procs[ i ] = 0;
4781 ncores_with_x_to_max_procs[ i ] = 0;
4782 }
4783
4784 for( int i = 0; i < ncores; i++ ) {
4785 int cnt = 0;
4786 for( int j = 0; j < nth_per_core; j++ ) {
4787 if( procarr[ i * nth_per_core + j ] != -1 ) {
4788 cnt++;
4789 }
4790 }
4791 nproc_at_core[ i ] = cnt;
4792 ncores_with_x_procs[ cnt ]++;
4793 }
4794
4795 for( int i = 0; i <= nth_per_core; i++ ) {
4796 for( int j = i; j <= nth_per_core; j++ ) {
4797 ncores_with_x_to_max_procs[ i ] += ncores_with_x_procs[ j ];
4798 }
4799 }
4800
4801 // Max number of processors
4802 int nproc = nth_per_core * ncores;
4803 // An array to keep number of threads per each context
4804 int * newarr = ( int * )__kmp_allocate( sizeof( int ) * nproc );
4805 for( int i = 0; i < nproc; i++ ) {
4806 newarr[ i ] = 0;
4807 }
4808
4809 int nth = nthreads;
4810 int flag = 0;
4811 while( nth > 0 ) {
4812 for( int j = 1; j <= nth_per_core; j++ ) {
4813 int cnt = ncores_with_x_to_max_procs[ j ];
4814 for( int i = 0; i < ncores; i++ ) {
4815 // Skip the core with 0 processors
4816 if( nproc_at_core[ i ] == 0 ) {
4817 continue;
4818 }
4819 for( int k = 0; k < nth_per_core; k++ ) {
4820 if( procarr[ i * nth_per_core + k ] != -1 ) {
4821 if( newarr[ i * nth_per_core + k ] == 0 ) {
4822 newarr[ i * nth_per_core + k ] = 1;
4823 cnt--;
4824 nth--;
4825 break;
4826 } else {
4827 if( flag != 0 ) {
4828 newarr[ i * nth_per_core + k ] ++;
4829 cnt--;
4830 nth--;
4831 break;
4832 }
4833 }
4834 }
4835 }
4836 if( cnt == 0 || nth == 0 ) {
4837 break;
4838 }
4839 }
4840 if( nth == 0 ) {
4841 break;
4842 }
4843 }
4844 flag = 1;
4845 }
4846 int sum = 0;
4847 for( int i = 0; i < nproc; i++ ) {
4848 sum += newarr[ i ];
4849 if( sum > tid ) {
Paul Osmialowskiecbe2ea2016-07-29 20:55:03 +00004850 if( fine_gran) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004851 int osID = procarr[ i ];
4852 KMP_CPU_SET( osID, mask);
Paul Osmialowskiecbe2ea2016-07-29 20:55:03 +00004853 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004854 int coreID = i / nth_per_core;
4855 for( int ii = 0; ii < nth_per_core; ii++ ) {
4856 int osID = procarr[ coreID * nth_per_core + ii ];
4857 if( osID != -1 ) {
4858 KMP_CPU_SET( osID, mask);
4859 }
4860 }
4861 }
4862 break;
4863 }
4864 }
4865 __kmp_free( newarr );
4866 }
4867
4868 if (__kmp_affinity_verbose) {
4869 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4870 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004871 KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
4872 tid, buf);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004873 }
4874 __kmp_set_system_affinity( mask, TRUE );
Jonathan Peyton01dcf362015-11-30 20:02:59 +00004875 KMP_CPU_FREE_FROM_STACK(mask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004876 }
4877}
4878
Jonathan Peyton3076fa42016-01-12 17:21:55 +00004879#if KMP_OS_LINUX
4880// We don't need this entry for Windows because
4881// there is GetProcessAffinityMask() api
4882//
4883// The intended usage is indicated by these steps:
4884// 1) The user gets the current affinity mask
4885// 2) Then sets the affinity by calling this function
4886// 3) Error check the return value
4887// 4) Use non-OpenMP parallelization
4888// 5) Reset the affinity to what was stored in step 1)
4889#ifdef __cplusplus
4890extern "C"
4891#endif
4892int
4893kmp_set_thread_affinity_mask_initial()
4894// the function returns 0 on success,
4895// -1 if we cannot bind thread
4896// >0 (errno) if an error happened during binding
4897{
4898 int gtid = __kmp_get_gtid();
4899 if (gtid < 0) {
4900 // Do not touch non-omp threads
4901 KA_TRACE(30, ( "kmp_set_thread_affinity_mask_initial: "
4902 "non-omp thread, returning\n"));
4903 return -1;
4904 }
4905 if (!KMP_AFFINITY_CAPABLE() || !__kmp_init_middle) {
4906 KA_TRACE(30, ( "kmp_set_thread_affinity_mask_initial: "
4907 "affinity not initialized, returning\n"));
4908 return -1;
4909 }
4910 KA_TRACE(30, ( "kmp_set_thread_affinity_mask_initial: "
4911 "set full mask for thread %d\n", gtid));
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00004912 KMP_DEBUG_ASSERT(__kmp_affin_fullMask != NULL);
4913 return __kmp_set_system_affinity(__kmp_affin_fullMask, FALSE);
Jonathan Peyton3076fa42016-01-12 17:21:55 +00004914}
4915#endif
4916
Alp Toker763b9392014-02-28 09:42:41 +00004917#endif // KMP_AFFINITY_SUPPORTED