blob: 2a4b9629b841b770ca4d4b8a4a272ccd58717f86 [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
2 * kmp_affinity.cpp -- affinity management
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16#include "kmp.h"
17#include "kmp_i18n.h"
18#include "kmp_io.h"
19#include "kmp_str.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000020#include "kmp_wrapper_getpid.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000021
Alp Toker763b9392014-02-28 09:42:41 +000022#if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +000023
24//
25// Print the affinity mask to the character array in a pretty format.
26//
27char *
28__kmp_affinity_print_mask(char *buf, int buf_len, kmp_affin_mask_t *mask)
29{
30 KMP_ASSERT(buf_len >= 40);
31 char *scan = buf;
32 char *end = buf + buf_len - 1;
33
34 //
35 // Find first element / check for empty set.
36 //
37 size_t i;
38 for (i = 0; i < KMP_CPU_SETSIZE; i++) {
39 if (KMP_CPU_ISSET(i, mask)) {
40 break;
41 }
42 }
43 if (i == KMP_CPU_SETSIZE) {
44 sprintf(scan, "{<empty>}");
45 while (*scan != '\0') scan++;
46 KMP_ASSERT(scan <= end);
47 return buf;
48 }
49
Jim Cownie4cc4bb42014-10-07 16:25:50 +000050 sprintf(scan, "{%ld", (long)i);
Jim Cownie5e8470a2013-09-27 10:38:44 +000051 while (*scan != '\0') scan++;
52 i++;
53 for (; i < KMP_CPU_SETSIZE; i++) {
54 if (! KMP_CPU_ISSET(i, mask)) {
55 continue;
56 }
57
58 //
59 // Check for buffer overflow. A string of the form ",<n>" will have
60 // at most 10 characters, plus we want to leave room to print ",...}"
61 // if the set is too large to print for a total of 15 characters.
62 // We already left room for '\0' in setting end.
63 //
64 if (end - scan < 15) {
65 break;
66 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +000067 sprintf(scan, ",%-ld", (long)i);
Jim Cownie5e8470a2013-09-27 10:38:44 +000068 while (*scan != '\0') scan++;
69 }
70 if (i < KMP_CPU_SETSIZE) {
71 sprintf(scan, ",...");
72 while (*scan != '\0') scan++;
73 }
74 sprintf(scan, "}");
75 while (*scan != '\0') scan++;
76 KMP_ASSERT(scan <= end);
77 return buf;
78}
79
80
81void
82__kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask)
83{
84 KMP_CPU_ZERO(mask);
85
Andrey Churbanov7daf9802015-01-27 16:52:57 +000086# if KMP_GROUP_AFFINITY
Jim Cownie5e8470a2013-09-27 10:38:44 +000087
88 if (__kmp_num_proc_groups > 1) {
89 int group;
Jim Cownie5e8470a2013-09-27 10:38:44 +000090 KMP_DEBUG_ASSERT(__kmp_GetActiveProcessorCount != NULL);
91 for (group = 0; group < __kmp_num_proc_groups; group++) {
92 int i;
93 int num = __kmp_GetActiveProcessorCount(group);
94 for (i = 0; i < num; i++) {
95 KMP_CPU_SET(i + group * (CHAR_BIT * sizeof(DWORD_PTR)), mask);
96 }
97 }
98 }
99 else
100
Andrey Churbanov7daf9802015-01-27 16:52:57 +0000101# endif /* KMP_GROUP_AFFINITY */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000102
103 {
104 int proc;
105 for (proc = 0; proc < __kmp_xproc; proc++) {
106 KMP_CPU_SET(proc, mask);
107 }
108 }
109}
110
111
112//
113// In Linux* OS debug & cover (-O0) builds, we need to avoid inline member
114// functions.
115//
116// The icc codegen emits sections with extremely long names, of the form
117// ".gnu.linkonce.<mangled_name>". There seems to have been a linker bug
118// introduced between GNU ld version 2.14.90.0.4 and 2.15.92.0.2 involving
119// some sort of memory corruption or table overflow that is triggered by
120// these long strings. I checked the latest version of the linker -
121// GNU ld (Linux* OS/GNU Binutils) 2.18.50.0.7.20080422 - and the bug is not
122// fixed.
123//
124// Unfortunately, my attempts to reproduce it in a smaller example have
125// failed - I'm not sure what the prospects are of getting it fixed
126// properly - but we need a reproducer smaller than all of libiomp.
127//
128// Work around the problem by avoiding inline constructors in such builds.
129// We do this for all platforms, not just Linux* OS - non-inline functions are
130// more debuggable and provide better coverage into than inline functions.
131// Use inline functions in shipping libs, for performance.
132//
133
134# if !defined(KMP_DEBUG) && !defined(COVER)
135
136class Address {
137public:
138 static const unsigned maxDepth = 32;
139 unsigned labels[maxDepth];
140 unsigned childNums[maxDepth];
141 unsigned depth;
142 unsigned leader;
143 Address(unsigned _depth)
144 : depth(_depth), leader(FALSE) {
145 }
146 Address &operator=(const Address &b) {
147 depth = b.depth;
148 for (unsigned i = 0; i < depth; i++) {
149 labels[i] = b.labels[i];
150 childNums[i] = b.childNums[i];
151 }
152 leader = FALSE;
153 return *this;
154 }
155 bool operator==(const Address &b) const {
156 if (depth != b.depth)
157 return false;
158 for (unsigned i = 0; i < depth; i++)
159 if(labels[i] != b.labels[i])
160 return false;
161 return true;
162 }
163 bool isClose(const Address &b, int level) const {
164 if (depth != b.depth)
165 return false;
166 if ((unsigned)level >= depth)
167 return true;
168 for (unsigned i = 0; i < (depth - level); i++)
169 if(labels[i] != b.labels[i])
170 return false;
171 return true;
172 }
173 bool operator!=(const Address &b) const {
174 return !operator==(b);
175 }
176};
177
178class AddrUnsPair {
179public:
180 Address first;
181 unsigned second;
182 AddrUnsPair(Address _first, unsigned _second)
183 : first(_first), second(_second) {
184 }
185 AddrUnsPair &operator=(const AddrUnsPair &b)
186 {
187 first = b.first;
188 second = b.second;
189 return *this;
190 }
191};
192
193# else
194
195class Address {
196public:
197 static const unsigned maxDepth = 32;
198 unsigned labels[maxDepth];
199 unsigned childNums[maxDepth];
200 unsigned depth;
201 unsigned leader;
202 Address(unsigned _depth);
203 Address &operator=(const Address &b);
204 bool operator==(const Address &b) const;
205 bool isClose(const Address &b, int level) const;
206 bool operator!=(const Address &b) const;
207};
208
209Address::Address(unsigned _depth)
210{
211 depth = _depth;
212 leader = FALSE;
213}
214
215Address &Address::operator=(const Address &b) {
216 depth = b.depth;
217 for (unsigned i = 0; i < depth; i++) {
218 labels[i] = b.labels[i];
219 childNums[i] = b.childNums[i];
220 }
221 leader = FALSE;
222 return *this;
223}
224
225bool Address::operator==(const Address &b) const {
226 if (depth != b.depth)
227 return false;
228 for (unsigned i = 0; i < depth; i++)
229 if(labels[i] != b.labels[i])
230 return false;
231 return true;
232}
233
234bool Address::isClose(const Address &b, int level) const {
235 if (depth != b.depth)
236 return false;
237 if ((unsigned)level >= depth)
238 return true;
239 for (unsigned i = 0; i < (depth - level); i++)
240 if(labels[i] != b.labels[i])
241 return false;
242 return true;
243}
244
245bool Address::operator!=(const Address &b) const {
246 return !operator==(b);
247}
248
249class AddrUnsPair {
250public:
251 Address first;
252 unsigned second;
253 AddrUnsPair(Address _first, unsigned _second);
254 AddrUnsPair &operator=(const AddrUnsPair &b);
255};
256
257AddrUnsPair::AddrUnsPair(Address _first, unsigned _second)
258 : first(_first), second(_second)
259{
260}
261
262AddrUnsPair &AddrUnsPair::operator=(const AddrUnsPair &b)
263{
264 first = b.first;
265 second = b.second;
266 return *this;
267}
268
269# endif /* !defined(KMP_DEBUG) && !defined(COVER) */
270
271
272static int
273__kmp_affinity_cmp_Address_labels(const void *a, const void *b)
274{
275 const Address *aa = (const Address *)&(((AddrUnsPair *)a)
276 ->first);
277 const Address *bb = (const Address *)&(((AddrUnsPair *)b)
278 ->first);
279 unsigned depth = aa->depth;
280 unsigned i;
281 KMP_DEBUG_ASSERT(depth == bb->depth);
282 for (i = 0; i < depth; i++) {
283 if (aa->labels[i] < bb->labels[i]) return -1;
284 if (aa->labels[i] > bb->labels[i]) return 1;
285 }
286 return 0;
287}
288
289
290static int
291__kmp_affinity_cmp_Address_child_num(const void *a, const void *b)
292{
293 const Address *aa = (const Address *)&(((AddrUnsPair *)a)
294 ->first);
295 const Address *bb = (const Address *)&(((AddrUnsPair *)b)
296 ->first);
297 unsigned depth = aa->depth;
298 unsigned i;
299 KMP_DEBUG_ASSERT(depth == bb->depth);
300 KMP_DEBUG_ASSERT((unsigned)__kmp_affinity_compact <= depth);
301 KMP_DEBUG_ASSERT(__kmp_affinity_compact >= 0);
302 for (i = 0; i < (unsigned)__kmp_affinity_compact; i++) {
303 int j = depth - i - 1;
304 if (aa->childNums[j] < bb->childNums[j]) return -1;
305 if (aa->childNums[j] > bb->childNums[j]) return 1;
306 }
307 for (; i < depth; i++) {
308 int j = i - __kmp_affinity_compact;
309 if (aa->childNums[j] < bb->childNums[j]) return -1;
310 if (aa->childNums[j] > bb->childNums[j]) return 1;
311 }
312 return 0;
313}
314
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000315/** A structure for holding machine-specific hierarchy info to be computed once at init. */
316class hierarchy_info {
317public:
318 /** Typical levels are threads/core, cores/package or socket, packages/node, nodes/machine,
319 etc. We don't want to get specific with nomenclature */
320 static const kmp_uint32 maxLevels=7;
321
322 /** This is specifically the depth of the machine configuration hierarchy, in terms of the
323 number of levels along the longest path from root to any leaf. It corresponds to the
324 number of entries in numPerLevel if we exclude all but one trailing 1. */
325 kmp_uint32 depth;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000326 kmp_uint32 base_num_threads;
327 bool uninitialized;
328
329 /** Level 0 corresponds to leaves. numPerLevel[i] is the number of children the parent of a
330 node at level i has. For example, if we have a machine with 4 packages, 4 cores/package
331 and 2 HT per core, then numPerLevel = {2, 4, 4, 1, 1}. All empty levels are set to 1. */
332 kmp_uint32 numPerLevel[maxLevels];
333 kmp_uint32 skipPerLevel[maxLevels];
334
335 void deriveLevels(AddrUnsPair *adr2os, int num_addrs) {
336 int hier_depth = adr2os[0].first.depth;
337 int level = 0;
338 for (int i=hier_depth-1; i>=0; --i) {
339 int max = -1;
340 for (int j=0; j<num_addrs; ++j) {
341 int next = adr2os[j].first.childNums[i];
342 if (next > max) max = next;
343 }
344 numPerLevel[level] = max+1;
345 ++level;
346 }
347 }
348
349 hierarchy_info() : depth(1), uninitialized(true) {}
350 void init(AddrUnsPair *adr2os, int num_addrs)
351 {
Andrey Churbanovb41e62b2015-02-10 20:10:21 +0000352 /* Added explicit initialization of the depth here to prevent usage of dirty value
353 observed when static library is re-initialized multiple times (e.g. when
354 non-OpenMP thread repeatedly launches/joins thread that uses OpenMP). */
355 depth = 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000356 uninitialized = false;
357 for (kmp_uint32 i=0; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
358 numPerLevel[i] = 1;
359 skipPerLevel[i] = 1;
360 }
361
362 // Sort table by physical ID
363 if (adr2os) {
364 qsort(adr2os, num_addrs, sizeof(*adr2os), __kmp_affinity_cmp_Address_labels);
365 deriveLevels(adr2os, num_addrs);
366 }
367 else {
368 numPerLevel[0] = 4;
369 numPerLevel[1] = num_addrs/4;
370 if (num_addrs%4) numPerLevel[1]++;
371 }
372
373 base_num_threads = num_addrs;
374 for (int i=maxLevels-1; i>=0; --i) // count non-empty levels to get depth
375 if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1'
376 depth++;
377
378 kmp_uint32 branch = 4;
379 if (numPerLevel[0] == 1) branch = num_addrs/4;
380 if (branch<4) branch=4;
381 for (kmp_uint32 d=0; d<depth-1; ++d) { // optimize hierarchy width
382 while (numPerLevel[d] > branch || (d==0 && numPerLevel[d]>4)) { // max 4 on level 0!
383 if (numPerLevel[d] & 1) numPerLevel[d]++;
384 numPerLevel[d] = numPerLevel[d] >> 1;
385 if (numPerLevel[d+1] == 1) depth++;
386 numPerLevel[d+1] = numPerLevel[d+1] << 1;
387 }
388 if(numPerLevel[0] == 1) {
389 branch = branch >> 1;
390 if (branch<4) branch = 4;
391 }
392 }
393
394 for (kmp_uint32 i=1; i<depth; ++i)
395 skipPerLevel[i] = numPerLevel[i-1] * skipPerLevel[i-1];
396
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000397 }
398};
399
400static hierarchy_info machine_hierarchy;
401
402void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
Andrey Churbanov1362ae72015-04-02 13:18:50 +0000403 kmp_uint32 depth;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000404 if (machine_hierarchy.uninitialized)
405 machine_hierarchy.init(NULL, nproc);
406
Andrey Churbanov1362ae72015-04-02 13:18:50 +0000407 depth = machine_hierarchy.depth;
408 KMP_DEBUG_ASSERT(depth > 0);
409 while (nproc > machine_hierarchy.skipPerLevel[depth-1]) {
410 depth++;
411 machine_hierarchy.skipPerLevel[depth-1] = 2*machine_hierarchy.skipPerLevel[depth-2];
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000412 }
Andrey Churbanov1362ae72015-04-02 13:18:50 +0000413 thr_bar->depth = depth;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000414 thr_bar->base_leaf_kids = (kmp_uint8)machine_hierarchy.numPerLevel[0]-1;
415 thr_bar->skip_per_level = machine_hierarchy.skipPerLevel;
416}
Jim Cownie5e8470a2013-09-27 10:38:44 +0000417
418//
419// When sorting by labels, __kmp_affinity_assign_child_nums() must first be
420// called to renumber the labels from [0..n] and place them into the child_num
421// vector of the address object. This is done in case the labels used for
Alp Toker8f2d3f02014-02-24 10:40:15 +0000422// the children at one node of the hierarchy differ from those used for
Jim Cownie5e8470a2013-09-27 10:38:44 +0000423// another node at the same level. Example: suppose the machine has 2 nodes
424// with 2 packages each. The first node contains packages 601 and 602, and
425// second node contains packages 603 and 604. If we try to sort the table
426// for "scatter" affinity, the table will still be sorted 601, 602, 603, 604
427// because we are paying attention to the labels themselves, not the ordinal
428// child numbers. By using the child numbers in the sort, the result is
429// {0,0}=601, {0,1}=603, {1,0}=602, {1,1}=604.
430//
431static void
432__kmp_affinity_assign_child_nums(AddrUnsPair *address2os,
433 int numAddrs)
434{
435 KMP_DEBUG_ASSERT(numAddrs > 0);
436 int depth = address2os->first.depth;
437 unsigned *counts = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
438 unsigned *lastLabel = (unsigned *)__kmp_allocate(depth
439 * sizeof(unsigned));
440 int labCt;
441 for (labCt = 0; labCt < depth; labCt++) {
442 address2os[0].first.childNums[labCt] = counts[labCt] = 0;
443 lastLabel[labCt] = address2os[0].first.labels[labCt];
444 }
445 int i;
446 for (i = 1; i < numAddrs; i++) {
447 for (labCt = 0; labCt < depth; labCt++) {
448 if (address2os[i].first.labels[labCt] != lastLabel[labCt]) {
449 int labCt2;
450 for (labCt2 = labCt + 1; labCt2 < depth; labCt2++) {
451 counts[labCt2] = 0;
452 lastLabel[labCt2] = address2os[i].first.labels[labCt2];
453 }
454 counts[labCt]++;
455 lastLabel[labCt] = address2os[i].first.labels[labCt];
456 break;
457 }
458 }
459 for (labCt = 0; labCt < depth; labCt++) {
460 address2os[i].first.childNums[labCt] = counts[labCt];
461 }
462 for (; labCt < (int)Address::maxDepth; labCt++) {
463 address2os[i].first.childNums[labCt] = 0;
464 }
465 }
466}
467
468
469//
470// All of the __kmp_affinity_create_*_map() routines should set
471// __kmp_affinity_masks to a vector of affinity mask objects of length
472// __kmp_affinity_num_masks, if __kmp_affinity_type != affinity_none, and
473// return the number of levels in the machine topology tree (zero if
474// __kmp_affinity_type == affinity_none).
475//
476// All of the __kmp_affinity_create_*_map() routines should set *fullMask
477// to the affinity mask for the initialization thread. They need to save and
478// restore the mask, and it could be needed later, so saving it is just an
479// optimization to avoid calling kmp_get_system_affinity() again.
480//
481static kmp_affin_mask_t *fullMask = NULL;
482
483kmp_affin_mask_t *
484__kmp_affinity_get_fullMask() { return fullMask; }
485
486
487static int nCoresPerPkg, nPackages;
Andrey Churbanovf696c822015-01-27 16:55:43 +0000488static int __kmp_nThreadsPerCore;
489#ifndef KMP_DFLT_NTH_CORES
490static int __kmp_ncores;
491#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000492
493//
494// __kmp_affinity_uniform_topology() doesn't work when called from
495// places which support arbitrarily many levels in the machine topology
496// map, i.e. the non-default cases in __kmp_affinity_create_cpuinfo_map()
497// __kmp_affinity_create_x2apicid_map().
498//
499inline static bool
500__kmp_affinity_uniform_topology()
501{
502 return __kmp_avail_proc == (__kmp_nThreadsPerCore * nCoresPerPkg * nPackages);
503}
504
505
506//
507// Print out the detailed machine topology map, i.e. the physical locations
508// of each OS proc.
509//
510static void
511__kmp_affinity_print_topology(AddrUnsPair *address2os, int len, int depth,
512 int pkgLevel, int coreLevel, int threadLevel)
513{
514 int proc;
515
516 KMP_INFORM(OSProcToPhysicalThreadMap, "KMP_AFFINITY");
517 for (proc = 0; proc < len; proc++) {
518 int level;
519 kmp_str_buf_t buf;
520 __kmp_str_buf_init(&buf);
521 for (level = 0; level < depth; level++) {
522 if (level == threadLevel) {
523 __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Thread));
524 }
525 else if (level == coreLevel) {
526 __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Core));
527 }
528 else if (level == pkgLevel) {
529 __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Package));
530 }
531 else if (level > pkgLevel) {
532 __kmp_str_buf_print(&buf, "%s_%d ", KMP_I18N_STR(Node),
533 level - pkgLevel - 1);
534 }
535 else {
536 __kmp_str_buf_print(&buf, "L%d ", level);
537 }
538 __kmp_str_buf_print(&buf, "%d ",
539 address2os[proc].first.labels[level]);
540 }
541 KMP_INFORM(OSProcMapToPack, "KMP_AFFINITY", address2os[proc].second,
542 buf.str);
543 __kmp_str_buf_free(&buf);
544 }
545}
546
547
548//
549// If we don't know how to retrieve the machine's processor topology, or
550// encounter an error in doing so, this routine is called to form a "flat"
551// mapping of os thread id's <-> processor id's.
552//
553static int
554__kmp_affinity_create_flat_map(AddrUnsPair **address2os,
555 kmp_i18n_id_t *const msg_id)
556{
557 *address2os = NULL;
558 *msg_id = kmp_i18n_null;
559
560 //
561 // Even if __kmp_affinity_type == affinity_none, this routine might still
Andrey Churbanovf696c822015-01-27 16:55:43 +0000562 // called to set __kmp_ncores, as well as
Jim Cownie5e8470a2013-09-27 10:38:44 +0000563 // __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
564 //
565 if (! KMP_AFFINITY_CAPABLE()) {
566 KMP_ASSERT(__kmp_affinity_type == affinity_none);
567 __kmp_ncores = nPackages = __kmp_xproc;
568 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000569 if (__kmp_affinity_verbose) {
570 KMP_INFORM(AffFlatTopology, "KMP_AFFINITY");
571 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
572 KMP_INFORM(Uniform, "KMP_AFFINITY");
573 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
574 __kmp_nThreadsPerCore, __kmp_ncores);
575 }
576 return 0;
577 }
578
579 //
580 // When affinity is off, this routine will still be called to set
Andrey Churbanovf696c822015-01-27 16:55:43 +0000581 // __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie5e8470a2013-09-27 10:38:44 +0000582 // nCoresPerPkg, & nPackages. Make sure all these vars are set
583 // correctly, and return now if affinity is not enabled.
584 //
585 __kmp_ncores = nPackages = __kmp_avail_proc;
586 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000587 if (__kmp_affinity_verbose) {
588 char buf[KMP_AFFIN_MASK_PRINT_LEN];
589 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, fullMask);
590
591 KMP_INFORM(AffCapableUseFlat, "KMP_AFFINITY");
592 if (__kmp_affinity_respect_mask) {
593 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
594 } else {
595 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
596 }
597 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
598 KMP_INFORM(Uniform, "KMP_AFFINITY");
599 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
600 __kmp_nThreadsPerCore, __kmp_ncores);
601 }
602 if (__kmp_affinity_type == affinity_none) {
603 return 0;
604 }
605
606 //
607 // Contruct the data structure to be returned.
608 //
609 *address2os = (AddrUnsPair*)
610 __kmp_allocate(sizeof(**address2os) * __kmp_avail_proc);
611 int avail_ct = 0;
612 unsigned int i;
613 for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
614 //
615 // Skip this proc if it is not included in the machine model.
616 //
617 if (! KMP_CPU_ISSET(i, fullMask)) {
618 continue;
619 }
620
621 Address addr(1);
622 addr.labels[0] = i;
623 (*address2os)[avail_ct++] = AddrUnsPair(addr,i);
624 }
625 if (__kmp_affinity_verbose) {
626 KMP_INFORM(OSProcToPackage, "KMP_AFFINITY");
627 }
628
629 if (__kmp_affinity_gran_levels < 0) {
630 //
631 // Only the package level is modeled in the machine topology map,
632 // so the #levels of granularity is either 0 or 1.
633 //
634 if (__kmp_affinity_gran > affinity_gran_package) {
635 __kmp_affinity_gran_levels = 1;
636 }
637 else {
638 __kmp_affinity_gran_levels = 0;
639 }
640 }
641 return 1;
642}
643
644
Andrey Churbanov7daf9802015-01-27 16:52:57 +0000645# if KMP_GROUP_AFFINITY
Jim Cownie5e8470a2013-09-27 10:38:44 +0000646
647//
648// If multiple Windows* OS processor groups exist, we can create a 2-level
649// topology map with the groups at level 0 and the individual procs at
650// level 1.
651//
652// This facilitates letting the threads float among all procs in a group,
653// if granularity=group (the default when there are multiple groups).
654//
655static int
656__kmp_affinity_create_proc_group_map(AddrUnsPair **address2os,
657 kmp_i18n_id_t *const msg_id)
658{
659 *address2os = NULL;
660 *msg_id = kmp_i18n_null;
661
662 //
663 // If we don't have multiple processor groups, return now.
664 // The flat mapping will be used.
665 //
666 if ((! KMP_AFFINITY_CAPABLE()) || (__kmp_get_proc_group(fullMask) >= 0)) {
667 // FIXME set *msg_id
668 return -1;
669 }
670
671 //
672 // Contruct the data structure to be returned.
673 //
674 *address2os = (AddrUnsPair*)
675 __kmp_allocate(sizeof(**address2os) * __kmp_avail_proc);
676 int avail_ct = 0;
677 int i;
678 for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
679 //
680 // Skip this proc if it is not included in the machine model.
681 //
682 if (! KMP_CPU_ISSET(i, fullMask)) {
683 continue;
684 }
685
686 Address addr(2);
687 addr.labels[0] = i / (CHAR_BIT * sizeof(DWORD_PTR));
688 addr.labels[1] = i % (CHAR_BIT * sizeof(DWORD_PTR));
689 (*address2os)[avail_ct++] = AddrUnsPair(addr,i);
690
691 if (__kmp_affinity_verbose) {
692 KMP_INFORM(AffOSProcToGroup, "KMP_AFFINITY", i, addr.labels[0],
693 addr.labels[1]);
694 }
695 }
696
697 if (__kmp_affinity_gran_levels < 0) {
698 if (__kmp_affinity_gran == affinity_gran_group) {
699 __kmp_affinity_gran_levels = 1;
700 }
701 else if ((__kmp_affinity_gran == affinity_gran_fine)
702 || (__kmp_affinity_gran == affinity_gran_thread)) {
703 __kmp_affinity_gran_levels = 0;
704 }
705 else {
706 const char *gran_str = NULL;
707 if (__kmp_affinity_gran == affinity_gran_core) {
708 gran_str = "core";
709 }
710 else if (__kmp_affinity_gran == affinity_gran_package) {
711 gran_str = "package";
712 }
713 else if (__kmp_affinity_gran == affinity_gran_node) {
714 gran_str = "node";
715 }
716 else {
717 KMP_ASSERT(0);
718 }
719
720 // Warning: can't use affinity granularity \"gran\" with group topology method, using "thread"
721 __kmp_affinity_gran_levels = 0;
722 }
723 }
724 return 2;
725}
726
Andrey Churbanov7daf9802015-01-27 16:52:57 +0000727# endif /* KMP_GROUP_AFFINITY */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000728
729
730# if KMP_ARCH_X86 || KMP_ARCH_X86_64
731
732static int
733__kmp_cpuid_mask_width(int count) {
734 int r = 0;
735
736 while((1<<r) < count)
737 ++r;
738 return r;
739}
740
741
742class apicThreadInfo {
743public:
744 unsigned osId; // param to __kmp_affinity_bind_thread
745 unsigned apicId; // from cpuid after binding
746 unsigned maxCoresPerPkg; // ""
747 unsigned maxThreadsPerPkg; // ""
748 unsigned pkgId; // inferred from above values
749 unsigned coreId; // ""
750 unsigned threadId; // ""
751};
752
753
754static int
755__kmp_affinity_cmp_apicThreadInfo_os_id(const void *a, const void *b)
756{
757 const apicThreadInfo *aa = (const apicThreadInfo *)a;
758 const apicThreadInfo *bb = (const apicThreadInfo *)b;
759 if (aa->osId < bb->osId) return -1;
760 if (aa->osId > bb->osId) return 1;
761 return 0;
762}
763
764
765static int
766__kmp_affinity_cmp_apicThreadInfo_phys_id(const void *a, const void *b)
767{
768 const apicThreadInfo *aa = (const apicThreadInfo *)a;
769 const apicThreadInfo *bb = (const apicThreadInfo *)b;
770 if (aa->pkgId < bb->pkgId) return -1;
771 if (aa->pkgId > bb->pkgId) return 1;
772 if (aa->coreId < bb->coreId) return -1;
773 if (aa->coreId > bb->coreId) return 1;
774 if (aa->threadId < bb->threadId) return -1;
775 if (aa->threadId > bb->threadId) return 1;
776 return 0;
777}
778
779
780//
781// On IA-32 architecture and Intel(R) 64 architecture, we attempt to use
782// an algorithm which cycles through the available os threads, setting
783// the current thread's affinity mask to that thread, and then retrieves
784// the Apic Id for each thread context using the cpuid instruction.
785//
786static int
787__kmp_affinity_create_apicid_map(AddrUnsPair **address2os,
788 kmp_i18n_id_t *const msg_id)
789{
Andrey Churbanov1c331292015-01-27 17:03:42 +0000790 kmp_cpuid buf;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000791 int rc;
792 *address2os = NULL;
793 *msg_id = kmp_i18n_null;
794
Andrey Churbanov1c331292015-01-27 17:03:42 +0000795 //
796 // Check if cpuid leaf 4 is supported.
797 //
Jim Cownie5e8470a2013-09-27 10:38:44 +0000798 __kmp_x86_cpuid(0, 0, &buf);
799 if (buf.eax < 4) {
800 *msg_id = kmp_i18n_str_NoLeaf4Support;
801 return -1;
802 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000803
804 //
Jim Cownie5e8470a2013-09-27 10:38:44 +0000805 // The algorithm used starts by setting the affinity to each available
Andrey Churbanov1c331292015-01-27 17:03:42 +0000806 // thread and retrieving info from the cpuid instruction, so if we are
807 // not capable of calling __kmp_get_system_affinity() and
808 // _kmp_get_system_affinity(), then we need to do something else - use
809 // the defaults that we calculated from issuing cpuid without binding
810 // to each proc.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000811 //
812 if (! KMP_AFFINITY_CAPABLE()) {
813 //
814 // Hack to try and infer the machine topology using only the data
815 // available from cpuid on the current thread, and __kmp_xproc.
816 //
817 KMP_ASSERT(__kmp_affinity_type == affinity_none);
818
819 //
820 // Get an upper bound on the number of threads per package using
821 // cpuid(1).
822 //
823 // On some OS/chps combinations where HT is supported by the chip
824 // but is disabled, this value will be 2 on a single core chip.
825 // Usually, it will be 2 if HT is enabled and 1 if HT is disabled.
826 //
Jim Cownie5e8470a2013-09-27 10:38:44 +0000827 __kmp_x86_cpuid(1, 0, &buf);
828 int maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
829 if (maxThreadsPerPkg == 0) {
830 maxThreadsPerPkg = 1;
831 }
832
833 //
834 // The num cores per pkg comes from cpuid(4).
835 // 1 must be added to the encoded value.
836 //
837 // The author of cpu_count.cpp treated this only an upper bound
838 // on the number of cores, but I haven't seen any cases where it
839 // was greater than the actual number of cores, so we will treat
840 // it as exact in this block of code.
841 //
842 // First, we need to check if cpuid(4) is supported on this chip.
843 // To see if cpuid(n) is supported, issue cpuid(0) and check if eax
844 // has the value n or greater.
845 //
846 __kmp_x86_cpuid(0, 0, &buf);
847 if (buf.eax >= 4) {
848 __kmp_x86_cpuid(4, 0, &buf);
849 nCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
850 }
851 else {
852 nCoresPerPkg = 1;
853 }
854
855 //
856 // There is no way to reliably tell if HT is enabled without issuing
857 // the cpuid instruction from every thread, can correlating the cpuid
858 // info, so if the machine is not affinity capable, we assume that HT
859 // is off. We have seen quite a few machines where maxThreadsPerPkg
860 // is 2, yet the machine does not support HT.
861 //
862 // - Older OSes are usually found on machines with older chips, which
863 // do not support HT.
864 //
865 // - The performance penalty for mistakenly identifying a machine as
866 // HT when it isn't (which results in blocktime being incorrecly set
867 // to 0) is greater than the penalty when for mistakenly identifying
868 // a machine as being 1 thread/core when it is really HT enabled
869 // (which results in blocktime being incorrectly set to a positive
870 // value).
871 //
872 __kmp_ncores = __kmp_xproc;
873 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
874 __kmp_nThreadsPerCore = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000875 if (__kmp_affinity_verbose) {
876 KMP_INFORM(AffNotCapableUseLocCpuid, "KMP_AFFINITY");
877 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
878 if (__kmp_affinity_uniform_topology()) {
879 KMP_INFORM(Uniform, "KMP_AFFINITY");
880 } else {
881 KMP_INFORM(NonUniform, "KMP_AFFINITY");
882 }
883 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
884 __kmp_nThreadsPerCore, __kmp_ncores);
885 }
886 return 0;
887 }
888
889 //
890 //
891 // From here on, we can assume that it is safe to call
892 // __kmp_get_system_affinity() and __kmp_set_system_affinity(),
893 // even if __kmp_affinity_type = affinity_none.
894 //
895
896 //
897 // Save the affinity mask for the current thread.
898 //
899 kmp_affin_mask_t *oldMask;
900 KMP_CPU_ALLOC(oldMask);
901 KMP_ASSERT(oldMask != NULL);
902 __kmp_get_system_affinity(oldMask, TRUE);
903
904 //
905 // Run through each of the available contexts, binding the current thread
906 // to it, and obtaining the pertinent information using the cpuid instr.
907 //
908 // The relevant information is:
909 //
910 // Apic Id: Bits 24:31 of ebx after issuing cpuid(1) - each thread context
911 // has a uniqie Apic Id, which is of the form pkg# : core# : thread#.
912 //
913 // Max Threads Per Pkg: Bits 16:23 of ebx after issuing cpuid(1). The
914 // value of this field determines the width of the core# + thread#
915 // fields in the Apic Id. It is also an upper bound on the number
916 // of threads per package, but it has been verified that situations
917 // happen were it is not exact. In particular, on certain OS/chip
918 // combinations where Intel(R) Hyper-Threading Technology is supported
919 // by the chip but has
920 // been disabled, the value of this field will be 2 (for a single core
921 // chip). On other OS/chip combinations supporting
922 // Intel(R) Hyper-Threading Technology, the value of
923 // this field will be 1 when Intel(R) Hyper-Threading Technology is
924 // disabled and 2 when it is enabled.
925 //
926 // Max Cores Per Pkg: Bits 26:31 of eax after issuing cpuid(4). The
927 // value of this field (+1) determines the width of the core# field in
928 // the Apic Id. The comments in "cpucount.cpp" say that this value is
929 // an upper bound, but the IA-32 architecture manual says that it is
930 // exactly the number of cores per package, and I haven't seen any
931 // case where it wasn't.
932 //
933 // From this information, deduce the package Id, core Id, and thread Id,
934 // and set the corresponding fields in the apicThreadInfo struct.
935 //
936 unsigned i;
937 apicThreadInfo *threadInfo = (apicThreadInfo *)__kmp_allocate(
938 __kmp_avail_proc * sizeof(apicThreadInfo));
939 unsigned nApics = 0;
940 for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
941 //
942 // Skip this proc if it is not included in the machine model.
943 //
944 if (! KMP_CPU_ISSET(i, fullMask)) {
945 continue;
946 }
947 KMP_DEBUG_ASSERT((int)nApics < __kmp_avail_proc);
948
949 __kmp_affinity_bind_thread(i);
950 threadInfo[nApics].osId = i;
951
952 //
953 // The apic id and max threads per pkg come from cpuid(1).
954 //
Jim Cownie5e8470a2013-09-27 10:38:44 +0000955 __kmp_x86_cpuid(1, 0, &buf);
956 if (! (buf.edx >> 9) & 1) {
957 __kmp_set_system_affinity(oldMask, TRUE);
958 __kmp_free(threadInfo);
959 KMP_CPU_FREE(oldMask);
960 *msg_id = kmp_i18n_str_ApicNotPresent;
961 return -1;
962 }
963 threadInfo[nApics].apicId = (buf.ebx >> 24) & 0xff;
964 threadInfo[nApics].maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
965 if (threadInfo[nApics].maxThreadsPerPkg == 0) {
966 threadInfo[nApics].maxThreadsPerPkg = 1;
967 }
968
969 //
970 // Max cores per pkg comes from cpuid(4).
971 // 1 must be added to the encoded value.
972 //
973 // First, we need to check if cpuid(4) is supported on this chip.
974 // To see if cpuid(n) is supported, issue cpuid(0) and check if eax
975 // has the value n or greater.
976 //
977 __kmp_x86_cpuid(0, 0, &buf);
978 if (buf.eax >= 4) {
979 __kmp_x86_cpuid(4, 0, &buf);
980 threadInfo[nApics].maxCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
981 }
982 else {
983 threadInfo[nApics].maxCoresPerPkg = 1;
984 }
985
986 //
987 // Infer the pkgId / coreId / threadId using only the info
988 // obtained locally.
989 //
990 int widthCT = __kmp_cpuid_mask_width(
991 threadInfo[nApics].maxThreadsPerPkg);
992 threadInfo[nApics].pkgId = threadInfo[nApics].apicId >> widthCT;
993
994 int widthC = __kmp_cpuid_mask_width(
995 threadInfo[nApics].maxCoresPerPkg);
996 int widthT = widthCT - widthC;
997 if (widthT < 0) {
998 //
999 // I've never seen this one happen, but I suppose it could, if
1000 // the cpuid instruction on a chip was really screwed up.
1001 // Make sure to restore the affinity mask before the tail call.
1002 //
1003 __kmp_set_system_affinity(oldMask, TRUE);
1004 __kmp_free(threadInfo);
1005 KMP_CPU_FREE(oldMask);
1006 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1007 return -1;
1008 }
1009
1010 int maskC = (1 << widthC) - 1;
1011 threadInfo[nApics].coreId = (threadInfo[nApics].apicId >> widthT)
1012 &maskC;
1013
1014 int maskT = (1 << widthT) - 1;
1015 threadInfo[nApics].threadId = threadInfo[nApics].apicId &maskT;
1016
1017 nApics++;
1018 }
1019
1020 //
1021 // We've collected all the info we need.
1022 // Restore the old affinity mask for this thread.
1023 //
1024 __kmp_set_system_affinity(oldMask, TRUE);
1025
1026 //
1027 // If there's only one thread context to bind to, form an Address object
1028 // with depth 1 and return immediately (or, if affinity is off, set
1029 // address2os to NULL and return).
1030 //
1031 // If it is configured to omit the package level when there is only a
1032 // single package, the logic at the end of this routine won't work if
1033 // there is only a single thread - it would try to form an Address
1034 // object with depth 0.
1035 //
1036 KMP_ASSERT(nApics > 0);
1037 if (nApics == 1) {
1038 __kmp_ncores = nPackages = 1;
1039 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001040 if (__kmp_affinity_verbose) {
1041 char buf[KMP_AFFIN_MASK_PRINT_LEN];
1042 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1043
1044 KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
1045 if (__kmp_affinity_respect_mask) {
1046 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
1047 } else {
1048 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
1049 }
1050 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
1051 KMP_INFORM(Uniform, "KMP_AFFINITY");
1052 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
1053 __kmp_nThreadsPerCore, __kmp_ncores);
1054 }
1055
1056 if (__kmp_affinity_type == affinity_none) {
1057 __kmp_free(threadInfo);
1058 KMP_CPU_FREE(oldMask);
1059 return 0;
1060 }
1061
1062 *address2os = (AddrUnsPair*)__kmp_allocate(sizeof(AddrUnsPair));
1063 Address addr(1);
1064 addr.labels[0] = threadInfo[0].pkgId;
1065 (*address2os)[0] = AddrUnsPair(addr, threadInfo[0].osId);
1066
1067 if (__kmp_affinity_gran_levels < 0) {
1068 __kmp_affinity_gran_levels = 0;
1069 }
1070
1071 if (__kmp_affinity_verbose) {
1072 __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
1073 }
1074
1075 __kmp_free(threadInfo);
1076 KMP_CPU_FREE(oldMask);
1077 return 1;
1078 }
1079
1080 //
1081 // Sort the threadInfo table by physical Id.
1082 //
1083 qsort(threadInfo, nApics, sizeof(*threadInfo),
1084 __kmp_affinity_cmp_apicThreadInfo_phys_id);
1085
1086 //
1087 // The table is now sorted by pkgId / coreId / threadId, but we really
1088 // don't know the radix of any of the fields. pkgId's may be sparsely
1089 // assigned among the chips on a system. Although coreId's are usually
1090 // assigned [0 .. coresPerPkg-1] and threadId's are usually assigned
1091 // [0..threadsPerCore-1], we don't want to make any such assumptions.
1092 //
1093 // For that matter, we don't know what coresPerPkg and threadsPerCore
1094 // (or the total # packages) are at this point - we want to determine
1095 // that now. We only have an upper bound on the first two figures.
1096 //
1097 // We also perform a consistency check at this point: the values returned
1098 // by the cpuid instruction for any thread bound to a given package had
1099 // better return the same info for maxThreadsPerPkg and maxCoresPerPkg.
1100 //
1101 nPackages = 1;
1102 nCoresPerPkg = 1;
1103 __kmp_nThreadsPerCore = 1;
1104 unsigned nCores = 1;
1105
1106 unsigned pkgCt = 1; // to determine radii
1107 unsigned lastPkgId = threadInfo[0].pkgId;
1108 unsigned coreCt = 1;
1109 unsigned lastCoreId = threadInfo[0].coreId;
1110 unsigned threadCt = 1;
1111 unsigned lastThreadId = threadInfo[0].threadId;
1112
1113 // intra-pkg consist checks
1114 unsigned prevMaxCoresPerPkg = threadInfo[0].maxCoresPerPkg;
1115 unsigned prevMaxThreadsPerPkg = threadInfo[0].maxThreadsPerPkg;
1116
1117 for (i = 1; i < nApics; i++) {
1118 if (threadInfo[i].pkgId != lastPkgId) {
1119 nCores++;
1120 pkgCt++;
1121 lastPkgId = threadInfo[i].pkgId;
1122 if ((int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt;
1123 coreCt = 1;
1124 lastCoreId = threadInfo[i].coreId;
1125 if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
1126 threadCt = 1;
1127 lastThreadId = threadInfo[i].threadId;
1128
1129 //
1130 // This is a different package, so go on to the next iteration
1131 // without doing any consistency checks. Reset the consistency
1132 // check vars, though.
1133 //
1134 prevMaxCoresPerPkg = threadInfo[i].maxCoresPerPkg;
1135 prevMaxThreadsPerPkg = threadInfo[i].maxThreadsPerPkg;
1136 continue;
1137 }
1138
1139 if (threadInfo[i].coreId != lastCoreId) {
1140 nCores++;
1141 coreCt++;
1142 lastCoreId = threadInfo[i].coreId;
1143 if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
1144 threadCt = 1;
1145 lastThreadId = threadInfo[i].threadId;
1146 }
1147 else if (threadInfo[i].threadId != lastThreadId) {
1148 threadCt++;
1149 lastThreadId = threadInfo[i].threadId;
1150 }
1151 else {
1152 __kmp_free(threadInfo);
1153 KMP_CPU_FREE(oldMask);
1154 *msg_id = kmp_i18n_str_LegacyApicIDsNotUnique;
1155 return -1;
1156 }
1157
1158 //
1159 // Check to make certain that the maxCoresPerPkg and maxThreadsPerPkg
1160 // fields agree between all the threads bounds to a given package.
1161 //
1162 if ((prevMaxCoresPerPkg != threadInfo[i].maxCoresPerPkg)
1163 || (prevMaxThreadsPerPkg != threadInfo[i].maxThreadsPerPkg)) {
1164 __kmp_free(threadInfo);
1165 KMP_CPU_FREE(oldMask);
1166 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1167 return -1;
1168 }
1169 }
1170 nPackages = pkgCt;
1171 if ((int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt;
1172 if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
1173
1174 //
1175 // When affinity is off, this routine will still be called to set
Andrey Churbanovf696c822015-01-27 16:55:43 +00001176 // __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie5e8470a2013-09-27 10:38:44 +00001177 // nCoresPerPkg, & nPackages. Make sure all these vars are set
1178 // correctly, and return now if affinity is not enabled.
1179 //
Jim Cownie5e8470a2013-09-27 10:38:44 +00001180 __kmp_ncores = nCores;
1181 if (__kmp_affinity_verbose) {
1182 char buf[KMP_AFFIN_MASK_PRINT_LEN];
1183 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1184
1185 KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
1186 if (__kmp_affinity_respect_mask) {
1187 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
1188 } else {
1189 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
1190 }
1191 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
1192 if (__kmp_affinity_uniform_topology()) {
1193 KMP_INFORM(Uniform, "KMP_AFFINITY");
1194 } else {
1195 KMP_INFORM(NonUniform, "KMP_AFFINITY");
1196 }
1197 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
1198 __kmp_nThreadsPerCore, __kmp_ncores);
1199
1200 }
1201
1202 if (__kmp_affinity_type == affinity_none) {
1203 __kmp_free(threadInfo);
1204 KMP_CPU_FREE(oldMask);
1205 return 0;
1206 }
1207
1208 //
1209 // Now that we've determined the number of packages, the number of cores
1210 // per package, and the number of threads per core, we can construct the
1211 // data structure that is to be returned.
1212 //
1213 int pkgLevel = 0;
1214 int coreLevel = (nCoresPerPkg <= 1) ? -1 : 1;
1215 int threadLevel = (__kmp_nThreadsPerCore <= 1) ? -1 : ((coreLevel >= 0) ? 2 : 1);
1216 unsigned depth = (pkgLevel >= 0) + (coreLevel >= 0) + (threadLevel >= 0);
1217
1218 KMP_ASSERT(depth > 0);
1219 *address2os = (AddrUnsPair*)__kmp_allocate(sizeof(AddrUnsPair) * nApics);
1220
1221 for (i = 0; i < nApics; ++i) {
1222 Address addr(depth);
1223 unsigned os = threadInfo[i].osId;
1224 int d = 0;
1225
1226 if (pkgLevel >= 0) {
1227 addr.labels[d++] = threadInfo[i].pkgId;
1228 }
1229 if (coreLevel >= 0) {
1230 addr.labels[d++] = threadInfo[i].coreId;
1231 }
1232 if (threadLevel >= 0) {
1233 addr.labels[d++] = threadInfo[i].threadId;
1234 }
1235 (*address2os)[i] = AddrUnsPair(addr, os);
1236 }
1237
1238 if (__kmp_affinity_gran_levels < 0) {
1239 //
1240 // Set the granularity level based on what levels are modeled
1241 // in the machine topology map.
1242 //
1243 __kmp_affinity_gran_levels = 0;
1244 if ((threadLevel >= 0)
1245 && (__kmp_affinity_gran > affinity_gran_thread)) {
1246 __kmp_affinity_gran_levels++;
1247 }
1248 if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
1249 __kmp_affinity_gran_levels++;
1250 }
1251 if ((pkgLevel >= 0) && (__kmp_affinity_gran > affinity_gran_package)) {
1252 __kmp_affinity_gran_levels++;
1253 }
1254 }
1255
1256 if (__kmp_affinity_verbose) {
1257 __kmp_affinity_print_topology(*address2os, nApics, depth, pkgLevel,
1258 coreLevel, threadLevel);
1259 }
1260
1261 __kmp_free(threadInfo);
1262 KMP_CPU_FREE(oldMask);
1263 return depth;
1264}
1265
1266
1267//
1268// Intel(R) microarchitecture code name Nehalem, Dunnington and later
1269// architectures support a newer interface for specifying the x2APIC Ids,
1270// based on cpuid leaf 11.
1271//
1272static int
1273__kmp_affinity_create_x2apicid_map(AddrUnsPair **address2os,
1274 kmp_i18n_id_t *const msg_id)
1275{
1276 kmp_cpuid buf;
1277
1278 *address2os = NULL;
1279 *msg_id = kmp_i18n_null;
1280
1281 //
1282 // Check to see if cpuid leaf 11 is supported.
1283 //
1284 __kmp_x86_cpuid(0, 0, &buf);
1285 if (buf.eax < 11) {
1286 *msg_id = kmp_i18n_str_NoLeaf11Support;
1287 return -1;
1288 }
1289 __kmp_x86_cpuid(11, 0, &buf);
1290 if (buf.ebx == 0) {
1291 *msg_id = kmp_i18n_str_NoLeaf11Support;
1292 return -1;
1293 }
1294
1295 //
1296 // Find the number of levels in the machine topology. While we're at it,
1297 // get the default values for __kmp_nThreadsPerCore & nCoresPerPkg. We will
1298 // try to get more accurate values later by explicitly counting them,
1299 // but get reasonable defaults now, in case we return early.
1300 //
1301 int level;
1302 int threadLevel = -1;
1303 int coreLevel = -1;
1304 int pkgLevel = -1;
1305 __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
1306
1307 for (level = 0;; level++) {
1308 if (level > 31) {
1309 //
1310 // FIXME: Hack for DPD200163180
1311 //
1312 // If level is big then something went wrong -> exiting
1313 //
1314 // There could actually be 32 valid levels in the machine topology,
1315 // but so far, the only machine we have seen which does not exit
1316 // this loop before iteration 32 has fubar x2APIC settings.
1317 //
1318 // For now, just reject this case based upon loop trip count.
1319 //
1320 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1321 return -1;
1322 }
1323 __kmp_x86_cpuid(11, level, &buf);
1324 if (buf.ebx == 0) {
1325 if (pkgLevel < 0) {
1326 //
1327 // Will infer nPackages from __kmp_xproc
1328 //
1329 pkgLevel = level;
1330 level++;
1331 }
1332 break;
1333 }
1334 int kind = (buf.ecx >> 8) & 0xff;
1335 if (kind == 1) {
1336 //
1337 // SMT level
1338 //
1339 threadLevel = level;
1340 coreLevel = -1;
1341 pkgLevel = -1;
1342 __kmp_nThreadsPerCore = buf.ebx & 0xff;
1343 if (__kmp_nThreadsPerCore == 0) {
1344 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1345 return -1;
1346 }
1347 }
1348 else if (kind == 2) {
1349 //
1350 // core level
1351 //
1352 coreLevel = level;
1353 pkgLevel = -1;
1354 nCoresPerPkg = buf.ebx & 0xff;
1355 if (nCoresPerPkg == 0) {
1356 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1357 return -1;
1358 }
1359 }
1360 else {
1361 if (level <= 0) {
1362 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1363 return -1;
1364 }
1365 if (pkgLevel >= 0) {
1366 continue;
1367 }
1368 pkgLevel = level;
1369 nPackages = buf.ebx & 0xff;
1370 if (nPackages == 0) {
1371 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1372 return -1;
1373 }
1374 }
1375 }
1376 int depth = level;
1377
1378 //
1379 // In the above loop, "level" was counted from the finest level (usually
1380 // thread) to the coarsest. The caller expects that we will place the
1381 // labels in (*address2os)[].first.labels[] in the inverse order, so
1382 // we need to invert the vars saying which level means what.
1383 //
1384 if (threadLevel >= 0) {
1385 threadLevel = depth - threadLevel - 1;
1386 }
1387 if (coreLevel >= 0) {
1388 coreLevel = depth - coreLevel - 1;
1389 }
1390 KMP_DEBUG_ASSERT(pkgLevel >= 0);
1391 pkgLevel = depth - pkgLevel - 1;
1392
1393 //
1394 // The algorithm used starts by setting the affinity to each available
Andrey Churbanov1c331292015-01-27 17:03:42 +00001395 // thread and retrieving info from the cpuid instruction, so if we are
1396 // not capable of calling __kmp_get_system_affinity() and
1397 // _kmp_get_system_affinity(), then we need to do something else - use
1398 // the defaults that we calculated from issuing cpuid without binding
1399 // to each proc.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001400 //
1401 if (! KMP_AFFINITY_CAPABLE())
1402 {
1403 //
1404 // Hack to try and infer the machine topology using only the data
1405 // available from cpuid on the current thread, and __kmp_xproc.
1406 //
1407 KMP_ASSERT(__kmp_affinity_type == affinity_none);
1408
1409 __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
1410 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001411 if (__kmp_affinity_verbose) {
1412 KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY");
1413 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
1414 if (__kmp_affinity_uniform_topology()) {
1415 KMP_INFORM(Uniform, "KMP_AFFINITY");
1416 } else {
1417 KMP_INFORM(NonUniform, "KMP_AFFINITY");
1418 }
1419 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
1420 __kmp_nThreadsPerCore, __kmp_ncores);
1421 }
1422 return 0;
1423 }
1424
1425 //
1426 //
1427 // From here on, we can assume that it is safe to call
1428 // __kmp_get_system_affinity() and __kmp_set_system_affinity(),
1429 // even if __kmp_affinity_type = affinity_none.
1430 //
1431
1432 //
1433 // Save the affinity mask for the current thread.
1434 //
1435 kmp_affin_mask_t *oldMask;
1436 KMP_CPU_ALLOC(oldMask);
1437 __kmp_get_system_affinity(oldMask, TRUE);
1438
1439 //
1440 // Allocate the data structure to be returned.
1441 //
1442 AddrUnsPair *retval = (AddrUnsPair *)
1443 __kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc);
1444
1445 //
1446 // Run through each of the available contexts, binding the current thread
1447 // to it, and obtaining the pertinent information using the cpuid instr.
1448 //
1449 unsigned int proc;
1450 int nApics = 0;
1451 for (proc = 0; proc < KMP_CPU_SETSIZE; ++proc) {
1452 //
1453 // Skip this proc if it is not included in the machine model.
1454 //
1455 if (! KMP_CPU_ISSET(proc, fullMask)) {
1456 continue;
1457 }
1458 KMP_DEBUG_ASSERT(nApics < __kmp_avail_proc);
1459
1460 __kmp_affinity_bind_thread(proc);
1461
1462 //
1463 // Extrach the labels for each level in the machine topology map
1464 // from the Apic ID.
1465 //
1466 Address addr(depth);
1467 int prev_shift = 0;
1468
1469 for (level = 0; level < depth; level++) {
1470 __kmp_x86_cpuid(11, level, &buf);
1471 unsigned apicId = buf.edx;
1472 if (buf.ebx == 0) {
1473 if (level != depth - 1) {
1474 KMP_CPU_FREE(oldMask);
1475 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1476 return -1;
1477 }
1478 addr.labels[depth - level - 1] = apicId >> prev_shift;
1479 level++;
1480 break;
1481 }
1482 int shift = buf.eax & 0x1f;
1483 int mask = (1 << shift) - 1;
1484 addr.labels[depth - level - 1] = (apicId & mask) >> prev_shift;
1485 prev_shift = shift;
1486 }
1487 if (level != depth) {
1488 KMP_CPU_FREE(oldMask);
1489 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1490 return -1;
1491 }
1492
1493 retval[nApics] = AddrUnsPair(addr, proc);
1494 nApics++;
1495 }
1496
1497 //
1498 // We've collected all the info we need.
1499 // Restore the old affinity mask for this thread.
1500 //
1501 __kmp_set_system_affinity(oldMask, TRUE);
1502
1503 //
1504 // If there's only one thread context to bind to, return now.
1505 //
1506 KMP_ASSERT(nApics > 0);
1507 if (nApics == 1) {
1508 __kmp_ncores = nPackages = 1;
1509 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001510 if (__kmp_affinity_verbose) {
1511 char buf[KMP_AFFIN_MASK_PRINT_LEN];
1512 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1513
1514 KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
1515 if (__kmp_affinity_respect_mask) {
1516 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
1517 } else {
1518 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
1519 }
1520 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
1521 KMP_INFORM(Uniform, "KMP_AFFINITY");
1522 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
1523 __kmp_nThreadsPerCore, __kmp_ncores);
1524 }
1525
1526 if (__kmp_affinity_type == affinity_none) {
1527 __kmp_free(retval);
1528 KMP_CPU_FREE(oldMask);
1529 return 0;
1530 }
1531
1532 //
1533 // Form an Address object which only includes the package level.
1534 //
1535 Address addr(1);
1536 addr.labels[0] = retval[0].first.labels[pkgLevel];
1537 retval[0].first = addr;
1538
1539 if (__kmp_affinity_gran_levels < 0) {
1540 __kmp_affinity_gran_levels = 0;
1541 }
1542
1543 if (__kmp_affinity_verbose) {
1544 __kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
1545 }
1546
1547 *address2os = retval;
1548 KMP_CPU_FREE(oldMask);
1549 return 1;
1550 }
1551
1552 //
1553 // Sort the table by physical Id.
1554 //
1555 qsort(retval, nApics, sizeof(*retval), __kmp_affinity_cmp_Address_labels);
1556
1557 //
1558 // Find the radix at each of the levels.
1559 //
1560 unsigned *totals = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
1561 unsigned *counts = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
1562 unsigned *maxCt = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
1563 unsigned *last = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
1564 for (level = 0; level < depth; level++) {
1565 totals[level] = 1;
1566 maxCt[level] = 1;
1567 counts[level] = 1;
1568 last[level] = retval[0].first.labels[level];
1569 }
1570
1571 //
1572 // From here on, the iteration variable "level" runs from the finest
1573 // level to the coarsest, i.e. we iterate forward through
1574 // (*address2os)[].first.labels[] - in the previous loops, we iterated
1575 // backwards.
1576 //
1577 for (proc = 1; (int)proc < nApics; proc++) {
1578 int level;
1579 for (level = 0; level < depth; level++) {
1580 if (retval[proc].first.labels[level] != last[level]) {
1581 int j;
1582 for (j = level + 1; j < depth; j++) {
1583 totals[j]++;
1584 counts[j] = 1;
1585 // The line below causes printing incorrect topology information
1586 // in case the max value for some level (maxCt[level]) is encountered earlier than
1587 // some less value while going through the array.
1588 // For example, let pkg0 has 4 cores and pkg1 has 2 cores. Then maxCt[1] == 2
1589 // whereas it must be 4.
1590 // TODO!!! Check if it can be commented safely
1591 //maxCt[j] = 1;
1592 last[j] = retval[proc].first.labels[j];
1593 }
1594 totals[level]++;
1595 counts[level]++;
1596 if (counts[level] > maxCt[level]) {
1597 maxCt[level] = counts[level];
1598 }
1599 last[level] = retval[proc].first.labels[level];
1600 break;
1601 }
1602 else if (level == depth - 1) {
1603 __kmp_free(last);
1604 __kmp_free(maxCt);
1605 __kmp_free(counts);
1606 __kmp_free(totals);
1607 __kmp_free(retval);
1608 KMP_CPU_FREE(oldMask);
1609 *msg_id = kmp_i18n_str_x2ApicIDsNotUnique;
1610 return -1;
1611 }
1612 }
1613 }
1614
1615 //
1616 // When affinity is off, this routine will still be called to set
Andrey Churbanovf696c822015-01-27 16:55:43 +00001617 // __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie5e8470a2013-09-27 10:38:44 +00001618 // nCoresPerPkg, & nPackages. Make sure all these vars are set
1619 // correctly, and return if affinity is not enabled.
1620 //
1621 if (threadLevel >= 0) {
1622 __kmp_nThreadsPerCore = maxCt[threadLevel];
1623 }
1624 else {
1625 __kmp_nThreadsPerCore = 1;
1626 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001627 nPackages = totals[pkgLevel];
1628
1629 if (coreLevel >= 0) {
1630 __kmp_ncores = totals[coreLevel];
1631 nCoresPerPkg = maxCt[coreLevel];
1632 }
1633 else {
1634 __kmp_ncores = nPackages;
1635 nCoresPerPkg = 1;
1636 }
1637
1638 //
1639 // Check to see if the machine topology is uniform
1640 //
1641 unsigned prod = maxCt[0];
1642 for (level = 1; level < depth; level++) {
1643 prod *= maxCt[level];
1644 }
1645 bool uniform = (prod == totals[level - 1]);
1646
1647 //
1648 // Print the machine topology summary.
1649 //
1650 if (__kmp_affinity_verbose) {
1651 char mask[KMP_AFFIN_MASK_PRINT_LEN];
1652 __kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1653
1654 KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
1655 if (__kmp_affinity_respect_mask) {
1656 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask);
1657 } else {
1658 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask);
1659 }
1660 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
1661 if (uniform) {
1662 KMP_INFORM(Uniform, "KMP_AFFINITY");
1663 } else {
1664 KMP_INFORM(NonUniform, "KMP_AFFINITY");
1665 }
1666
1667 kmp_str_buf_t buf;
1668 __kmp_str_buf_init(&buf);
1669
1670 __kmp_str_buf_print(&buf, "%d", totals[0]);
1671 for (level = 1; level <= pkgLevel; level++) {
1672 __kmp_str_buf_print(&buf, " x %d", maxCt[level]);
1673 }
1674 KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg,
1675 __kmp_nThreadsPerCore, __kmp_ncores);
1676
1677 __kmp_str_buf_free(&buf);
1678 }
1679
1680 if (__kmp_affinity_type == affinity_none) {
1681 __kmp_free(last);
1682 __kmp_free(maxCt);
1683 __kmp_free(counts);
1684 __kmp_free(totals);
1685 __kmp_free(retval);
1686 KMP_CPU_FREE(oldMask);
1687 return 0;
1688 }
1689
1690 //
1691 // Find any levels with radiix 1, and remove them from the map
1692 // (except for the package level).
1693 //
1694 int new_depth = 0;
1695 for (level = 0; level < depth; level++) {
1696 if ((maxCt[level] == 1) && (level != pkgLevel)) {
1697 continue;
1698 }
1699 new_depth++;
1700 }
1701
1702 //
1703 // If we are removing any levels, allocate a new vector to return,
1704 // and copy the relevant information to it.
1705 //
1706 if (new_depth != depth) {
1707 AddrUnsPair *new_retval = (AddrUnsPair *)__kmp_allocate(
1708 sizeof(AddrUnsPair) * nApics);
1709 for (proc = 0; (int)proc < nApics; proc++) {
1710 Address addr(new_depth);
1711 new_retval[proc] = AddrUnsPair(addr, retval[proc].second);
1712 }
1713 int new_level = 0;
1714 for (level = 0; level < depth; level++) {
1715 if ((maxCt[level] == 1) && (level != pkgLevel)) {
1716 if (level == threadLevel) {
1717 threadLevel = -1;
1718 }
1719 else if ((threadLevel >= 0) && (level < threadLevel)) {
1720 threadLevel--;
1721 }
1722 if (level == coreLevel) {
1723 coreLevel = -1;
1724 }
1725 else if ((coreLevel >= 0) && (level < coreLevel)) {
1726 coreLevel--;
1727 }
1728 if (level < pkgLevel) {
1729 pkgLevel--;
1730 }
1731 continue;
1732 }
1733 for (proc = 0; (int)proc < nApics; proc++) {
1734 new_retval[proc].first.labels[new_level]
1735 = retval[proc].first.labels[level];
1736 }
1737 new_level++;
1738 }
1739
1740 __kmp_free(retval);
1741 retval = new_retval;
1742 depth = new_depth;
1743 }
1744
1745 if (__kmp_affinity_gran_levels < 0) {
1746 //
1747 // Set the granularity level based on what levels are modeled
1748 // in the machine topology map.
1749 //
1750 __kmp_affinity_gran_levels = 0;
1751 if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
1752 __kmp_affinity_gran_levels++;
1753 }
1754 if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
1755 __kmp_affinity_gran_levels++;
1756 }
1757 if (__kmp_affinity_gran > affinity_gran_package) {
1758 __kmp_affinity_gran_levels++;
1759 }
1760 }
1761
1762 if (__kmp_affinity_verbose) {
1763 __kmp_affinity_print_topology(retval, nApics, depth, pkgLevel,
1764 coreLevel, threadLevel);
1765 }
1766
1767 __kmp_free(last);
1768 __kmp_free(maxCt);
1769 __kmp_free(counts);
1770 __kmp_free(totals);
1771 KMP_CPU_FREE(oldMask);
1772 *address2os = retval;
1773 return depth;
1774}
1775
1776
1777# endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1778
1779
1780#define osIdIndex 0
1781#define threadIdIndex 1
1782#define coreIdIndex 2
1783#define pkgIdIndex 3
1784#define nodeIdIndex 4
1785
1786typedef unsigned *ProcCpuInfo;
1787static unsigned maxIndex = pkgIdIndex;
1788
1789
1790static int
1791__kmp_affinity_cmp_ProcCpuInfo_os_id(const void *a, const void *b)
1792{
1793 const unsigned *aa = (const unsigned *)a;
1794 const unsigned *bb = (const unsigned *)b;
1795 if (aa[osIdIndex] < bb[osIdIndex]) return -1;
1796 if (aa[osIdIndex] > bb[osIdIndex]) return 1;
1797 return 0;
1798};
1799
1800
1801static int
1802__kmp_affinity_cmp_ProcCpuInfo_phys_id(const void *a, const void *b)
1803{
1804 unsigned i;
1805 const unsigned *aa = *((const unsigned **)a);
1806 const unsigned *bb = *((const unsigned **)b);
1807 for (i = maxIndex; ; i--) {
1808 if (aa[i] < bb[i]) return -1;
1809 if (aa[i] > bb[i]) return 1;
1810 if (i == osIdIndex) break;
1811 }
1812 return 0;
1813}
1814
1815
1816//
1817// Parse /proc/cpuinfo (or an alternate file in the same format) to obtain the
1818// affinity map.
1819//
1820static int
1821__kmp_affinity_create_cpuinfo_map(AddrUnsPair **address2os, int *line,
1822 kmp_i18n_id_t *const msg_id, FILE *f)
1823{
1824 *address2os = NULL;
1825 *msg_id = kmp_i18n_null;
1826
1827 //
1828 // Scan of the file, and count the number of "processor" (osId) fields,
Alp Toker8f2d3f02014-02-24 10:40:15 +00001829 // and find the highest value of <n> for a node_<n> field.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001830 //
1831 char buf[256];
1832 unsigned num_records = 0;
1833 while (! feof(f)) {
1834 buf[sizeof(buf) - 1] = 1;
1835 if (! fgets(buf, sizeof(buf), f)) {
1836 //
1837 // Read errors presumably because of EOF
1838 //
1839 break;
1840 }
1841
1842 char s1[] = "processor";
1843 if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
1844 num_records++;
1845 continue;
1846 }
1847
1848 //
1849 // FIXME - this will match "node_<n> <garbage>"
1850 //
1851 unsigned level;
1852 if (sscanf(buf, "node_%d id", &level) == 1) {
1853 if (nodeIdIndex + level >= maxIndex) {
1854 maxIndex = nodeIdIndex + level;
1855 }
1856 continue;
1857 }
1858 }
1859
1860 //
1861 // Check for empty file / no valid processor records, or too many.
1862 // The number of records can't exceed the number of valid bits in the
1863 // affinity mask.
1864 //
1865 if (num_records == 0) {
1866 *line = 0;
1867 *msg_id = kmp_i18n_str_NoProcRecords;
1868 return -1;
1869 }
1870 if (num_records > (unsigned)__kmp_xproc) {
1871 *line = 0;
1872 *msg_id = kmp_i18n_str_TooManyProcRecords;
1873 return -1;
1874 }
1875
1876 //
1877 // Set the file pointer back to the begginning, so that we can scan the
1878 // file again, this time performing a full parse of the data.
1879 // Allocate a vector of ProcCpuInfo object, where we will place the data.
1880 // Adding an extra element at the end allows us to remove a lot of extra
1881 // checks for termination conditions.
1882 //
1883 if (fseek(f, 0, SEEK_SET) != 0) {
1884 *line = 0;
1885 *msg_id = kmp_i18n_str_CantRewindCpuinfo;
1886 return -1;
1887 }
1888
1889 //
1890 // Allocate the array of records to store the proc info in. The dummy
1891 // element at the end makes the logic in filling them out easier to code.
1892 //
1893 unsigned **threadInfo = (unsigned **)__kmp_allocate((num_records + 1)
1894 * sizeof(unsigned *));
1895 unsigned i;
1896 for (i = 0; i <= num_records; i++) {
1897 threadInfo[i] = (unsigned *)__kmp_allocate((maxIndex + 1)
1898 * sizeof(unsigned));
1899 }
1900
1901#define CLEANUP_THREAD_INFO \
1902 for (i = 0; i <= num_records; i++) { \
1903 __kmp_free(threadInfo[i]); \
1904 } \
1905 __kmp_free(threadInfo);
1906
1907 //
1908 // A value of UINT_MAX means that we didn't find the field
1909 //
1910 unsigned __index;
1911
1912#define INIT_PROC_INFO(p) \
1913 for (__index = 0; __index <= maxIndex; __index++) { \
1914 (p)[__index] = UINT_MAX; \
1915 }
1916
1917 for (i = 0; i <= num_records; i++) {
1918 INIT_PROC_INFO(threadInfo[i]);
1919 }
1920
1921 unsigned num_avail = 0;
1922 *line = 0;
1923 while (! feof(f)) {
1924 //
1925 // Create an inner scoping level, so that all the goto targets at the
1926 // end of the loop appear in an outer scoping level. This avoids
1927 // warnings about jumping past an initialization to a target in the
1928 // same block.
1929 //
1930 {
1931 buf[sizeof(buf) - 1] = 1;
1932 bool long_line = false;
1933 if (! fgets(buf, sizeof(buf), f)) {
1934 //
1935 // Read errors presumably because of EOF
1936 //
1937 // If there is valid data in threadInfo[num_avail], then fake
1938 // a blank line in ensure that the last address gets parsed.
1939 //
1940 bool valid = false;
1941 for (i = 0; i <= maxIndex; i++) {
1942 if (threadInfo[num_avail][i] != UINT_MAX) {
1943 valid = true;
1944 }
1945 }
1946 if (! valid) {
1947 break;
1948 }
1949 buf[0] = 0;
1950 } else if (!buf[sizeof(buf) - 1]) {
1951 //
1952 // The line is longer than the buffer. Set a flag and don't
1953 // emit an error if we were going to ignore the line, anyway.
1954 //
1955 long_line = true;
1956
1957#define CHECK_LINE \
1958 if (long_line) { \
1959 CLEANUP_THREAD_INFO; \
1960 *msg_id = kmp_i18n_str_LongLineCpuinfo; \
1961 return -1; \
1962 }
1963 }
1964 (*line)++;
1965
1966 char s1[] = "processor";
1967 if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
1968 CHECK_LINE;
1969 char *p = strchr(buf + sizeof(s1) - 1, ':');
1970 unsigned val;
1971 if ((p == NULL) || (sscanf(p + 1, "%u\n", &val) != 1)) goto no_val;
1972 if (threadInfo[num_avail][osIdIndex] != UINT_MAX) goto dup_field;
1973 threadInfo[num_avail][osIdIndex] = val;
Jim Cownie181b4bb2013-12-23 17:28:57 +00001974#if KMP_OS_LINUX && USE_SYSFS_INFO
1975 char path[256];
1976 snprintf(path, sizeof(path),
1977 "/sys/devices/system/cpu/cpu%u/topology/physical_package_id",
1978 threadInfo[num_avail][osIdIndex]);
1979 __kmp_read_from_file(path, "%u", &threadInfo[num_avail][pkgIdIndex]);
1980
1981 snprintf(path, sizeof(path),
1982 "/sys/devices/system/cpu/cpu%u/topology/core_id",
1983 threadInfo[num_avail][osIdIndex]);
1984 __kmp_read_from_file(path, "%u", &threadInfo[num_avail][coreIdIndex]);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001985 continue;
Jim Cownie181b4bb2013-12-23 17:28:57 +00001986#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00001987 }
1988 char s2[] = "physical id";
1989 if (strncmp(buf, s2, sizeof(s2) - 1) == 0) {
1990 CHECK_LINE;
1991 char *p = strchr(buf + sizeof(s2) - 1, ':');
1992 unsigned val;
1993 if ((p == NULL) || (sscanf(p + 1, "%u\n", &val) != 1)) goto no_val;
1994 if (threadInfo[num_avail][pkgIdIndex] != UINT_MAX) goto dup_field;
1995 threadInfo[num_avail][pkgIdIndex] = val;
1996 continue;
1997 }
1998 char s3[] = "core id";
1999 if (strncmp(buf, s3, sizeof(s3) - 1) == 0) {
2000 CHECK_LINE;
2001 char *p = strchr(buf + sizeof(s3) - 1, ':');
2002 unsigned val;
2003 if ((p == NULL) || (sscanf(p + 1, "%u\n", &val) != 1)) goto no_val;
2004 if (threadInfo[num_avail][coreIdIndex] != UINT_MAX) goto dup_field;
2005 threadInfo[num_avail][coreIdIndex] = val;
2006 continue;
Jim Cownie181b4bb2013-12-23 17:28:57 +00002007#endif // KMP_OS_LINUX && USE_SYSFS_INFO
Jim Cownie5e8470a2013-09-27 10:38:44 +00002008 }
2009 char s4[] = "thread id";
2010 if (strncmp(buf, s4, sizeof(s4) - 1) == 0) {
2011 CHECK_LINE;
2012 char *p = strchr(buf + sizeof(s4) - 1, ':');
2013 unsigned val;
2014 if ((p == NULL) || (sscanf(p + 1, "%u\n", &val) != 1)) goto no_val;
2015 if (threadInfo[num_avail][threadIdIndex] != UINT_MAX) goto dup_field;
2016 threadInfo[num_avail][threadIdIndex] = val;
2017 continue;
2018 }
2019 unsigned level;
2020 if (sscanf(buf, "node_%d id", &level) == 1) {
2021 CHECK_LINE;
2022 char *p = strchr(buf + sizeof(s4) - 1, ':');
2023 unsigned val;
2024 if ((p == NULL) || (sscanf(p + 1, "%u\n", &val) != 1)) goto no_val;
2025 KMP_ASSERT(nodeIdIndex + level <= maxIndex);
2026 if (threadInfo[num_avail][nodeIdIndex + level] != UINT_MAX) goto dup_field;
2027 threadInfo[num_avail][nodeIdIndex + level] = val;
2028 continue;
2029 }
2030
2031 //
2032 // We didn't recognize the leading token on the line.
2033 // There are lots of leading tokens that we don't recognize -
2034 // if the line isn't empty, go on to the next line.
2035 //
2036 if ((*buf != 0) && (*buf != '\n')) {
2037 //
2038 // If the line is longer than the buffer, read characters
2039 // until we find a newline.
2040 //
2041 if (long_line) {
2042 int ch;
2043 while (((ch = fgetc(f)) != EOF) && (ch != '\n'));
2044 }
2045 continue;
2046 }
2047
2048 //
2049 // A newline has signalled the end of the processor record.
2050 // Check that there aren't too many procs specified.
2051 //
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002052 if ((int)num_avail == __kmp_xproc) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002053 CLEANUP_THREAD_INFO;
2054 *msg_id = kmp_i18n_str_TooManyEntries;
2055 return -1;
2056 }
2057
2058 //
2059 // Check for missing fields. The osId field must be there, and we
2060 // currently require that the physical id field is specified, also.
2061 //
2062 if (threadInfo[num_avail][osIdIndex] == UINT_MAX) {
2063 CLEANUP_THREAD_INFO;
2064 *msg_id = kmp_i18n_str_MissingProcField;
2065 return -1;
2066 }
2067 if (threadInfo[0][pkgIdIndex] == UINT_MAX) {
2068 CLEANUP_THREAD_INFO;
2069 *msg_id = kmp_i18n_str_MissingPhysicalIDField;
2070 return -1;
2071 }
2072
2073 //
2074 // Skip this proc if it is not included in the machine model.
2075 //
2076 if (! KMP_CPU_ISSET(threadInfo[num_avail][osIdIndex], fullMask)) {
2077 INIT_PROC_INFO(threadInfo[num_avail]);
2078 continue;
2079 }
2080
2081 //
2082 // We have a successful parse of this proc's info.
2083 // Increment the counter, and prepare for the next proc.
2084 //
2085 num_avail++;
2086 KMP_ASSERT(num_avail <= num_records);
2087 INIT_PROC_INFO(threadInfo[num_avail]);
2088 }
2089 continue;
2090
2091 no_val:
2092 CLEANUP_THREAD_INFO;
2093 *msg_id = kmp_i18n_str_MissingValCpuinfo;
2094 return -1;
2095
2096 dup_field:
2097 CLEANUP_THREAD_INFO;
2098 *msg_id = kmp_i18n_str_DuplicateFieldCpuinfo;
2099 return -1;
2100 }
2101 *line = 0;
2102
2103# if KMP_MIC && REDUCE_TEAM_SIZE
2104 unsigned teamSize = 0;
2105# endif // KMP_MIC && REDUCE_TEAM_SIZE
2106
2107 // check for num_records == __kmp_xproc ???
2108
2109 //
2110 // If there's only one thread context to bind to, form an Address object
2111 // with depth 1 and return immediately (or, if affinity is off, set
2112 // address2os to NULL and return).
2113 //
2114 // If it is configured to omit the package level when there is only a
2115 // single package, the logic at the end of this routine won't work if
2116 // there is only a single thread - it would try to form an Address
2117 // object with depth 0.
2118 //
2119 KMP_ASSERT(num_avail > 0);
2120 KMP_ASSERT(num_avail <= num_records);
2121 if (num_avail == 1) {
2122 __kmp_ncores = 1;
2123 __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002124 if (__kmp_affinity_verbose) {
2125 if (! KMP_AFFINITY_CAPABLE()) {
2126 KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
2127 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
2128 KMP_INFORM(Uniform, "KMP_AFFINITY");
2129 }
2130 else {
2131 char buf[KMP_AFFIN_MASK_PRINT_LEN];
2132 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
2133 fullMask);
2134 KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
2135 if (__kmp_affinity_respect_mask) {
2136 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
2137 } else {
2138 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
2139 }
2140 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
2141 KMP_INFORM(Uniform, "KMP_AFFINITY");
2142 }
2143 int index;
2144 kmp_str_buf_t buf;
2145 __kmp_str_buf_init(&buf);
2146 __kmp_str_buf_print(&buf, "1");
2147 for (index = maxIndex - 1; index > pkgIdIndex; index--) {
2148 __kmp_str_buf_print(&buf, " x 1");
2149 }
2150 KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, 1, 1, 1);
2151 __kmp_str_buf_free(&buf);
2152 }
2153
2154 if (__kmp_affinity_type == affinity_none) {
2155 CLEANUP_THREAD_INFO;
2156 return 0;
2157 }
2158
2159 *address2os = (AddrUnsPair*)__kmp_allocate(sizeof(AddrUnsPair));
2160 Address addr(1);
2161 addr.labels[0] = threadInfo[0][pkgIdIndex];
2162 (*address2os)[0] = AddrUnsPair(addr, threadInfo[0][osIdIndex]);
2163
2164 if (__kmp_affinity_gran_levels < 0) {
2165 __kmp_affinity_gran_levels = 0;
2166 }
2167
2168 if (__kmp_affinity_verbose) {
2169 __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
2170 }
2171
2172 CLEANUP_THREAD_INFO;
2173 return 1;
2174 }
2175
2176 //
2177 // Sort the threadInfo table by physical Id.
2178 //
2179 qsort(threadInfo, num_avail, sizeof(*threadInfo),
2180 __kmp_affinity_cmp_ProcCpuInfo_phys_id);
2181
2182 //
2183 // The table is now sorted by pkgId / coreId / threadId, but we really
2184 // don't know the radix of any of the fields. pkgId's may be sparsely
2185 // assigned among the chips on a system. Although coreId's are usually
2186 // assigned [0 .. coresPerPkg-1] and threadId's are usually assigned
2187 // [0..threadsPerCore-1], we don't want to make any such assumptions.
2188 //
2189 // For that matter, we don't know what coresPerPkg and threadsPerCore
2190 // (or the total # packages) are at this point - we want to determine
2191 // that now. We only have an upper bound on the first two figures.
2192 //
2193 unsigned *counts = (unsigned *)__kmp_allocate((maxIndex + 1)
2194 * sizeof(unsigned));
2195 unsigned *maxCt = (unsigned *)__kmp_allocate((maxIndex + 1)
2196 * sizeof(unsigned));
2197 unsigned *totals = (unsigned *)__kmp_allocate((maxIndex + 1)
2198 * sizeof(unsigned));
2199 unsigned *lastId = (unsigned *)__kmp_allocate((maxIndex + 1)
2200 * sizeof(unsigned));
2201
2202 bool assign_thread_ids = false;
2203 unsigned threadIdCt;
2204 unsigned index;
2205
2206 restart_radix_check:
2207 threadIdCt = 0;
2208
2209 //
2210 // Initialize the counter arrays with data from threadInfo[0].
2211 //
2212 if (assign_thread_ids) {
2213 if (threadInfo[0][threadIdIndex] == UINT_MAX) {
2214 threadInfo[0][threadIdIndex] = threadIdCt++;
2215 }
2216 else if (threadIdCt <= threadInfo[0][threadIdIndex]) {
2217 threadIdCt = threadInfo[0][threadIdIndex] + 1;
2218 }
2219 }
2220 for (index = 0; index <= maxIndex; index++) {
2221 counts[index] = 1;
2222 maxCt[index] = 1;
2223 totals[index] = 1;
2224 lastId[index] = threadInfo[0][index];;
2225 }
2226
2227 //
2228 // Run through the rest of the OS procs.
2229 //
2230 for (i = 1; i < num_avail; i++) {
2231 //
2232 // Find the most significant index whose id differs
2233 // from the id for the previous OS proc.
2234 //
2235 for (index = maxIndex; index >= threadIdIndex; index--) {
2236 if (assign_thread_ids && (index == threadIdIndex)) {
2237 //
2238 // Auto-assign the thread id field if it wasn't specified.
2239 //
2240 if (threadInfo[i][threadIdIndex] == UINT_MAX) {
2241 threadInfo[i][threadIdIndex] = threadIdCt++;
2242 }
2243
2244 //
2245 // Aparrently the thread id field was specified for some
2246 // entries and not others. Start the thread id counter
2247 // off at the next higher thread id.
2248 //
2249 else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
2250 threadIdCt = threadInfo[i][threadIdIndex] + 1;
2251 }
2252 }
2253 if (threadInfo[i][index] != lastId[index]) {
2254 //
2255 // Run through all indices which are less significant,
2256 // and reset the counts to 1.
2257 //
2258 // At all levels up to and including index, we need to
2259 // increment the totals and record the last id.
2260 //
2261 unsigned index2;
2262 for (index2 = threadIdIndex; index2 < index; index2++) {
2263 totals[index2]++;
2264 if (counts[index2] > maxCt[index2]) {
2265 maxCt[index2] = counts[index2];
2266 }
2267 counts[index2] = 1;
2268 lastId[index2] = threadInfo[i][index2];
2269 }
2270 counts[index]++;
2271 totals[index]++;
2272 lastId[index] = threadInfo[i][index];
2273
2274 if (assign_thread_ids && (index > threadIdIndex)) {
2275
2276# if KMP_MIC && REDUCE_TEAM_SIZE
2277 //
2278 // The default team size is the total #threads in the machine
2279 // minus 1 thread for every core that has 3 or more threads.
2280 //
2281 teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 );
2282# endif // KMP_MIC && REDUCE_TEAM_SIZE
2283
2284 //
2285 // Restart the thread counter, as we are on a new core.
2286 //
2287 threadIdCt = 0;
2288
2289 //
2290 // Auto-assign the thread id field if it wasn't specified.
2291 //
2292 if (threadInfo[i][threadIdIndex] == UINT_MAX) {
2293 threadInfo[i][threadIdIndex] = threadIdCt++;
2294 }
2295
2296 //
2297 // Aparrently the thread id field was specified for some
2298 // entries and not others. Start the thread id counter
2299 // off at the next higher thread id.
2300 //
2301 else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
2302 threadIdCt = threadInfo[i][threadIdIndex] + 1;
2303 }
2304 }
2305 break;
2306 }
2307 }
2308 if (index < threadIdIndex) {
2309 //
2310 // If thread ids were specified, it is an error if they are not
2311 // unique. Also, check that we waven't already restarted the
2312 // loop (to be safe - shouldn't need to).
2313 //
2314 if ((threadInfo[i][threadIdIndex] != UINT_MAX)
2315 || assign_thread_ids) {
2316 __kmp_free(lastId);
2317 __kmp_free(totals);
2318 __kmp_free(maxCt);
2319 __kmp_free(counts);
2320 CLEANUP_THREAD_INFO;
2321 *msg_id = kmp_i18n_str_PhysicalIDsNotUnique;
2322 return -1;
2323 }
2324
2325 //
2326 // If the thread ids were not specified and we see entries
2327 // entries that are duplicates, start the loop over and
2328 // assign the thread ids manually.
2329 //
2330 assign_thread_ids = true;
2331 goto restart_radix_check;
2332 }
2333 }
2334
2335# if KMP_MIC && REDUCE_TEAM_SIZE
2336 //
2337 // The default team size is the total #threads in the machine
2338 // minus 1 thread for every core that has 3 or more threads.
2339 //
2340 teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 );
2341# endif // KMP_MIC && REDUCE_TEAM_SIZE
2342
2343 for (index = threadIdIndex; index <= maxIndex; index++) {
2344 if (counts[index] > maxCt[index]) {
2345 maxCt[index] = counts[index];
2346 }
2347 }
2348
2349 __kmp_nThreadsPerCore = maxCt[threadIdIndex];
2350 nCoresPerPkg = maxCt[coreIdIndex];
2351 nPackages = totals[pkgIdIndex];
2352
2353 //
2354 // Check to see if the machine topology is uniform
2355 //
2356 unsigned prod = totals[maxIndex];
2357 for (index = threadIdIndex; index < maxIndex; index++) {
2358 prod *= maxCt[index];
2359 }
2360 bool uniform = (prod == totals[threadIdIndex]);
2361
2362 //
2363 // When affinity is off, this routine will still be called to set
Andrey Churbanovf696c822015-01-27 16:55:43 +00002364 // __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie5e8470a2013-09-27 10:38:44 +00002365 // nCoresPerPkg, & nPackages. Make sure all these vars are set
2366 // correctly, and return now if affinity is not enabled.
2367 //
Jim Cownie5e8470a2013-09-27 10:38:44 +00002368 __kmp_ncores = totals[coreIdIndex];
2369
2370 if (__kmp_affinity_verbose) {
2371 if (! KMP_AFFINITY_CAPABLE()) {
2372 KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
2373 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
2374 if (uniform) {
2375 KMP_INFORM(Uniform, "KMP_AFFINITY");
2376 } else {
2377 KMP_INFORM(NonUniform, "KMP_AFFINITY");
2378 }
2379 }
2380 else {
2381 char buf[KMP_AFFIN_MASK_PRINT_LEN];
2382 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, fullMask);
2383 KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
2384 if (__kmp_affinity_respect_mask) {
2385 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
2386 } else {
2387 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
2388 }
2389 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
2390 if (uniform) {
2391 KMP_INFORM(Uniform, "KMP_AFFINITY");
2392 } else {
2393 KMP_INFORM(NonUniform, "KMP_AFFINITY");
2394 }
2395 }
2396 kmp_str_buf_t buf;
2397 __kmp_str_buf_init(&buf);
2398
2399 __kmp_str_buf_print(&buf, "%d", totals[maxIndex]);
2400 for (index = maxIndex - 1; index >= pkgIdIndex; index--) {
2401 __kmp_str_buf_print(&buf, " x %d", maxCt[index]);
2402 }
2403 KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, maxCt[coreIdIndex],
2404 maxCt[threadIdIndex], __kmp_ncores);
2405
2406 __kmp_str_buf_free(&buf);
2407 }
2408
2409# if KMP_MIC && REDUCE_TEAM_SIZE
2410 //
2411 // Set the default team size.
2412 //
2413 if ((__kmp_dflt_team_nth == 0) && (teamSize > 0)) {
2414 __kmp_dflt_team_nth = teamSize;
2415 KA_TRACE(20, ("__kmp_affinity_create_cpuinfo_map: setting __kmp_dflt_team_nth = %d\n",
2416 __kmp_dflt_team_nth));
2417 }
2418# endif // KMP_MIC && REDUCE_TEAM_SIZE
2419
2420 if (__kmp_affinity_type == affinity_none) {
2421 __kmp_free(lastId);
2422 __kmp_free(totals);
2423 __kmp_free(maxCt);
2424 __kmp_free(counts);
2425 CLEANUP_THREAD_INFO;
2426 return 0;
2427 }
2428
2429 //
2430 // Count the number of levels which have more nodes at that level than
2431 // at the parent's level (with there being an implicit root node of
2432 // the top level). This is equivalent to saying that there is at least
2433 // one node at this level which has a sibling. These levels are in the
2434 // map, and the package level is always in the map.
2435 //
2436 bool *inMap = (bool *)__kmp_allocate((maxIndex + 1) * sizeof(bool));
2437 int level = 0;
2438 for (index = threadIdIndex; index < maxIndex; index++) {
2439 KMP_ASSERT(totals[index] >= totals[index + 1]);
2440 inMap[index] = (totals[index] > totals[index + 1]);
2441 }
2442 inMap[maxIndex] = (totals[maxIndex] > 1);
2443 inMap[pkgIdIndex] = true;
2444
2445 int depth = 0;
2446 for (index = threadIdIndex; index <= maxIndex; index++) {
2447 if (inMap[index]) {
2448 depth++;
2449 }
2450 }
2451 KMP_ASSERT(depth > 0);
2452
2453 //
2454 // Construct the data structure that is to be returned.
2455 //
2456 *address2os = (AddrUnsPair*)
2457 __kmp_allocate(sizeof(AddrUnsPair) * num_avail);
2458 int pkgLevel = -1;
2459 int coreLevel = -1;
2460 int threadLevel = -1;
2461
2462 for (i = 0; i < num_avail; ++i) {
2463 Address addr(depth);
2464 unsigned os = threadInfo[i][osIdIndex];
2465 int src_index;
2466 int dst_index = 0;
2467
2468 for (src_index = maxIndex; src_index >= threadIdIndex; src_index--) {
2469 if (! inMap[src_index]) {
2470 continue;
2471 }
2472 addr.labels[dst_index] = threadInfo[i][src_index];
2473 if (src_index == pkgIdIndex) {
2474 pkgLevel = dst_index;
2475 }
2476 else if (src_index == coreIdIndex) {
2477 coreLevel = dst_index;
2478 }
2479 else if (src_index == threadIdIndex) {
2480 threadLevel = dst_index;
2481 }
2482 dst_index++;
2483 }
2484 (*address2os)[i] = AddrUnsPair(addr, os);
2485 }
2486
2487 if (__kmp_affinity_gran_levels < 0) {
2488 //
2489 // Set the granularity level based on what levels are modeled
2490 // in the machine topology map.
2491 //
2492 unsigned src_index;
2493 __kmp_affinity_gran_levels = 0;
2494 for (src_index = threadIdIndex; src_index <= maxIndex; src_index++) {
2495 if (! inMap[src_index]) {
2496 continue;
2497 }
2498 switch (src_index) {
2499 case threadIdIndex:
2500 if (__kmp_affinity_gran > affinity_gran_thread) {
2501 __kmp_affinity_gran_levels++;
2502 }
2503
2504 break;
2505 case coreIdIndex:
2506 if (__kmp_affinity_gran > affinity_gran_core) {
2507 __kmp_affinity_gran_levels++;
2508 }
2509 break;
2510
2511 case pkgIdIndex:
2512 if (__kmp_affinity_gran > affinity_gran_package) {
2513 __kmp_affinity_gran_levels++;
2514 }
2515 break;
2516 }
2517 }
2518 }
2519
2520 if (__kmp_affinity_verbose) {
2521 __kmp_affinity_print_topology(*address2os, num_avail, depth, pkgLevel,
2522 coreLevel, threadLevel);
2523 }
2524
2525 __kmp_free(inMap);
2526 __kmp_free(lastId);
2527 __kmp_free(totals);
2528 __kmp_free(maxCt);
2529 __kmp_free(counts);
2530 CLEANUP_THREAD_INFO;
2531 return depth;
2532}
2533
2534
2535//
2536// Create and return a table of affinity masks, indexed by OS thread ID.
2537// This routine handles OR'ing together all the affinity masks of threads
2538// that are sufficiently close, if granularity > fine.
2539//
2540static kmp_affin_mask_t *
2541__kmp_create_masks(unsigned *maxIndex, unsigned *numUnique,
2542 AddrUnsPair *address2os, unsigned numAddrs)
2543{
2544 //
2545 // First form a table of affinity masks in order of OS thread id.
2546 //
2547 unsigned depth;
2548 unsigned maxOsId;
2549 unsigned i;
2550
2551 KMP_ASSERT(numAddrs > 0);
2552 depth = address2os[0].first.depth;
2553
2554 maxOsId = 0;
2555 for (i = 0; i < numAddrs; i++) {
2556 unsigned osId = address2os[i].second;
2557 if (osId > maxOsId) {
2558 maxOsId = osId;
2559 }
2560 }
2561 kmp_affin_mask_t *osId2Mask = (kmp_affin_mask_t *)__kmp_allocate(
2562 (maxOsId + 1) * __kmp_affin_mask_size);
2563
2564 //
2565 // Sort the address2os table according to physical order. Doing so
2566 // will put all threads on the same core/package/node in consecutive
2567 // locations.
2568 //
2569 qsort(address2os, numAddrs, sizeof(*address2os),
2570 __kmp_affinity_cmp_Address_labels);
2571
2572 KMP_ASSERT(__kmp_affinity_gran_levels >= 0);
2573 if (__kmp_affinity_verbose && (__kmp_affinity_gran_levels > 0)) {
2574 KMP_INFORM(ThreadsMigrate, "KMP_AFFINITY", __kmp_affinity_gran_levels);
2575 }
2576 if (__kmp_affinity_gran_levels >= (int)depth) {
2577 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
2578 && (__kmp_affinity_type != affinity_none))) {
2579 KMP_WARNING(AffThreadsMayMigrate);
2580 }
2581 }
2582
2583 //
2584 // Run through the table, forming the masks for all threads on each
2585 // core. Threads on the same core will have identical "Address"
2586 // objects, not considering the last level, which must be the thread
2587 // id. All threads on a core will appear consecutively.
2588 //
2589 unsigned unique = 0;
2590 unsigned j = 0; // index of 1st thread on core
2591 unsigned leader = 0;
2592 Address *leaderAddr = &(address2os[0].first);
2593 kmp_affin_mask_t *sum
2594 = (kmp_affin_mask_t *)alloca(__kmp_affin_mask_size);
2595 KMP_CPU_ZERO(sum);
2596 KMP_CPU_SET(address2os[0].second, sum);
2597 for (i = 1; i < numAddrs; i++) {
2598 //
Alp Toker8f2d3f02014-02-24 10:40:15 +00002599 // If this thread is sufficiently close to the leader (within the
Jim Cownie5e8470a2013-09-27 10:38:44 +00002600 // granularity setting), then set the bit for this os thread in the
2601 // affinity mask for this group, and go on to the next thread.
2602 //
2603 if (leaderAddr->isClose(address2os[i].first,
2604 __kmp_affinity_gran_levels)) {
2605 KMP_CPU_SET(address2os[i].second, sum);
2606 continue;
2607 }
2608
2609 //
2610 // For every thread in this group, copy the mask to the thread's
2611 // entry in the osId2Mask table. Mark the first address as a
2612 // leader.
2613 //
2614 for (; j < i; j++) {
2615 unsigned osId = address2os[j].second;
2616 KMP_DEBUG_ASSERT(osId <= maxOsId);
2617 kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
2618 KMP_CPU_COPY(mask, sum);
2619 address2os[j].first.leader = (j == leader);
2620 }
2621 unique++;
2622
2623 //
2624 // Start a new mask.
2625 //
2626 leader = i;
2627 leaderAddr = &(address2os[i].first);
2628 KMP_CPU_ZERO(sum);
2629 KMP_CPU_SET(address2os[i].second, sum);
2630 }
2631
2632 //
2633 // For every thread in last group, copy the mask to the thread's
2634 // entry in the osId2Mask table.
2635 //
2636 for (; j < i; j++) {
2637 unsigned osId = address2os[j].second;
2638 KMP_DEBUG_ASSERT(osId <= maxOsId);
2639 kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
2640 KMP_CPU_COPY(mask, sum);
2641 address2os[j].first.leader = (j == leader);
2642 }
2643 unique++;
2644
2645 *maxIndex = maxOsId;
2646 *numUnique = unique;
2647 return osId2Mask;
2648}
2649
2650
2651//
2652// Stuff for the affinity proclist parsers. It's easier to declare these vars
2653// as file-static than to try and pass them through the calling sequence of
2654// the recursive-descent OMP_PLACES parser.
2655//
2656static kmp_affin_mask_t *newMasks;
2657static int numNewMasks;
2658static int nextNewMask;
2659
2660#define ADD_MASK(_mask) \
2661 { \
2662 if (nextNewMask >= numNewMasks) { \
2663 numNewMasks *= 2; \
2664 newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_REALLOC(newMasks, \
2665 numNewMasks * __kmp_affin_mask_size); \
2666 } \
2667 KMP_CPU_COPY(KMP_CPU_INDEX(newMasks, nextNewMask), (_mask)); \
2668 nextNewMask++; \
2669 }
2670
2671#define ADD_MASK_OSID(_osId,_osId2Mask,_maxOsId) \
2672 { \
2673 if (((_osId) > _maxOsId) || \
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002674 (! KMP_CPU_ISSET((_osId), KMP_CPU_INDEX((_osId2Mask), (_osId))))) { \
Jim Cownie5e8470a2013-09-27 10:38:44 +00002675 if (__kmp_affinity_verbose || (__kmp_affinity_warnings \
2676 && (__kmp_affinity_type != affinity_none))) { \
2677 KMP_WARNING(AffIgnoreInvalidProcID, _osId); \
2678 } \
2679 } \
2680 else { \
2681 ADD_MASK(KMP_CPU_INDEX(_osId2Mask, (_osId))); \
2682 } \
2683 }
2684
2685
2686//
2687// Re-parse the proclist (for the explicit affinity type), and form the list
2688// of affinity newMasks indexed by gtid.
2689//
2690static void
2691__kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks,
2692 unsigned int *out_numMasks, const char *proclist,
2693 kmp_affin_mask_t *osId2Mask, int maxOsId)
2694{
2695 const char *scan = proclist;
2696 const char *next = proclist;
2697
2698 //
2699 // We use malloc() for the temporary mask vector,
2700 // so that we can use realloc() to extend it.
2701 //
2702 numNewMasks = 2;
2703 newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(numNewMasks
2704 * __kmp_affin_mask_size);
2705 nextNewMask = 0;
2706 kmp_affin_mask_t *sumMask = (kmp_affin_mask_t *)__kmp_allocate(
2707 __kmp_affin_mask_size);
2708 int setSize = 0;
2709
2710 for (;;) {
2711 int start, end, stride;
2712
2713 SKIP_WS(scan);
2714 next = scan;
2715 if (*next == '\0') {
2716 break;
2717 }
2718
2719 if (*next == '{') {
2720 int num;
2721 setSize = 0;
2722 next++; // skip '{'
2723 SKIP_WS(next);
2724 scan = next;
2725
2726 //
2727 // Read the first integer in the set.
2728 //
2729 KMP_ASSERT2((*next >= '0') && (*next <= '9'),
2730 "bad proclist");
2731 SKIP_DIGITS(next);
2732 num = __kmp_str_to_int(scan, *next);
2733 KMP_ASSERT2(num >= 0, "bad explicit proc list");
2734
2735 //
2736 // Copy the mask for that osId to the sum (union) mask.
2737 //
2738 if ((num > maxOsId) ||
2739 (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
2740 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
2741 && (__kmp_affinity_type != affinity_none))) {
2742 KMP_WARNING(AffIgnoreInvalidProcID, num);
2743 }
2744 KMP_CPU_ZERO(sumMask);
2745 }
2746 else {
2747 KMP_CPU_COPY(sumMask, KMP_CPU_INDEX(osId2Mask, num));
2748 setSize = 1;
2749 }
2750
2751 for (;;) {
2752 //
2753 // Check for end of set.
2754 //
2755 SKIP_WS(next);
2756 if (*next == '}') {
2757 next++; // skip '}'
2758 break;
2759 }
2760
2761 //
2762 // Skip optional comma.
2763 //
2764 if (*next == ',') {
2765 next++;
2766 }
2767 SKIP_WS(next);
2768
2769 //
2770 // Read the next integer in the set.
2771 //
2772 scan = next;
2773 KMP_ASSERT2((*next >= '0') && (*next <= '9'),
2774 "bad explicit proc list");
2775
2776 SKIP_DIGITS(next);
2777 num = __kmp_str_to_int(scan, *next);
2778 KMP_ASSERT2(num >= 0, "bad explicit proc list");
2779
2780 //
2781 // Add the mask for that osId to the sum mask.
2782 //
2783 if ((num > maxOsId) ||
2784 (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
2785 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
2786 && (__kmp_affinity_type != affinity_none))) {
2787 KMP_WARNING(AffIgnoreInvalidProcID, num);
2788 }
2789 }
2790 else {
2791 KMP_CPU_UNION(sumMask, KMP_CPU_INDEX(osId2Mask, num));
2792 setSize++;
2793 }
2794 }
2795 if (setSize > 0) {
2796 ADD_MASK(sumMask);
2797 }
2798
2799 SKIP_WS(next);
2800 if (*next == ',') {
2801 next++;
2802 }
2803 scan = next;
2804 continue;
2805 }
2806
2807 //
2808 // Read the first integer.
2809 //
2810 KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list");
2811 SKIP_DIGITS(next);
2812 start = __kmp_str_to_int(scan, *next);
2813 KMP_ASSERT2(start >= 0, "bad explicit proc list");
2814 SKIP_WS(next);
2815
2816 //
2817 // If this isn't a range, then add a mask to the list and go on.
2818 //
2819 if (*next != '-') {
2820 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2821
2822 //
2823 // Skip optional comma.
2824 //
2825 if (*next == ',') {
2826 next++;
2827 }
2828 scan = next;
2829 continue;
2830 }
2831
2832 //
2833 // This is a range. Skip over the '-' and read in the 2nd int.
2834 //
2835 next++; // skip '-'
2836 SKIP_WS(next);
2837 scan = next;
2838 KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list");
2839 SKIP_DIGITS(next);
2840 end = __kmp_str_to_int(scan, *next);
2841 KMP_ASSERT2(end >= 0, "bad explicit proc list");
2842
2843 //
2844 // Check for a stride parameter
2845 //
2846 stride = 1;
2847 SKIP_WS(next);
2848 if (*next == ':') {
2849 //
2850 // A stride is specified. Skip over the ':" and read the 3rd int.
2851 //
2852 int sign = +1;
2853 next++; // skip ':'
2854 SKIP_WS(next);
2855 scan = next;
2856 if (*next == '-') {
2857 sign = -1;
2858 next++;
2859 SKIP_WS(next);
2860 scan = next;
2861 }
2862 KMP_ASSERT2((*next >= '0') && (*next <= '9'),
2863 "bad explicit proc list");
2864 SKIP_DIGITS(next);
2865 stride = __kmp_str_to_int(scan, *next);
2866 KMP_ASSERT2(stride >= 0, "bad explicit proc list");
2867 stride *= sign;
2868 }
2869
2870 //
2871 // Do some range checks.
2872 //
2873 KMP_ASSERT2(stride != 0, "bad explicit proc list");
2874 if (stride > 0) {
2875 KMP_ASSERT2(start <= end, "bad explicit proc list");
2876 }
2877 else {
2878 KMP_ASSERT2(start >= end, "bad explicit proc list");
2879 }
2880 KMP_ASSERT2((end - start) / stride <= 65536, "bad explicit proc list");
2881
2882 //
2883 // Add the mask for each OS proc # to the list.
2884 //
2885 if (stride > 0) {
2886 do {
2887 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2888 start += stride;
2889 } while (start <= end);
2890 }
2891 else {
2892 do {
2893 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2894 start += stride;
2895 } while (start >= end);
2896 }
2897
2898 //
2899 // Skip optional comma.
2900 //
2901 SKIP_WS(next);
2902 if (*next == ',') {
2903 next++;
2904 }
2905 scan = next;
2906 }
2907
2908 *out_numMasks = nextNewMask;
2909 if (nextNewMask == 0) {
2910 *out_masks = NULL;
2911 KMP_INTERNAL_FREE(newMasks);
2912 return;
2913 }
2914 *out_masks
2915 = (kmp_affin_mask_t *)__kmp_allocate(nextNewMask * __kmp_affin_mask_size);
2916 memcpy(*out_masks, newMasks, nextNewMask * __kmp_affin_mask_size);
2917 __kmp_free(sumMask);
2918 KMP_INTERNAL_FREE(newMasks);
2919}
2920
2921
2922# if OMP_40_ENABLED
2923
2924/*-----------------------------------------------------------------------------
2925
2926Re-parse the OMP_PLACES proc id list, forming the newMasks for the different
2927places. Again, Here is the grammar:
2928
2929place_list := place
2930place_list := place , place_list
2931place := num
2932place := place : num
2933place := place : num : signed
2934place := { subplacelist }
2935place := ! place // (lowest priority)
2936subplace_list := subplace
2937subplace_list := subplace , subplace_list
2938subplace := num
2939subplace := num : num
2940subplace := num : num : signed
2941signed := num
2942signed := + signed
2943signed := - signed
2944
2945-----------------------------------------------------------------------------*/
2946
2947static void
2948__kmp_process_subplace_list(const char **scan, kmp_affin_mask_t *osId2Mask,
2949 int maxOsId, kmp_affin_mask_t *tempMask, int *setSize)
2950{
2951 const char *next;
2952
2953 for (;;) {
2954 int start, count, stride, i;
2955
2956 //
2957 // Read in the starting proc id
2958 //
2959 SKIP_WS(*scan);
2960 KMP_ASSERT2((**scan >= '0') && (**scan <= '9'),
2961 "bad explicit places list");
2962 next = *scan;
2963 SKIP_DIGITS(next);
2964 start = __kmp_str_to_int(*scan, *next);
2965 KMP_ASSERT(start >= 0);
2966 *scan = next;
2967
2968 //
2969 // valid follow sets are ',' ':' and '}'
2970 //
2971 SKIP_WS(*scan);
2972 if (**scan == '}' || **scan == ',') {
2973 if ((start > maxOsId) ||
2974 (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
2975 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
2976 && (__kmp_affinity_type != affinity_none))) {
2977 KMP_WARNING(AffIgnoreInvalidProcID, start);
2978 }
2979 }
2980 else {
2981 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
2982 (*setSize)++;
2983 }
2984 if (**scan == '}') {
2985 break;
2986 }
2987 (*scan)++; // skip ','
2988 continue;
2989 }
2990 KMP_ASSERT2(**scan == ':', "bad explicit places list");
2991 (*scan)++; // skip ':'
2992
2993 //
2994 // Read count parameter
2995 //
2996 SKIP_WS(*scan);
2997 KMP_ASSERT2((**scan >= '0') && (**scan <= '9'),
2998 "bad explicit places list");
2999 next = *scan;
3000 SKIP_DIGITS(next);
3001 count = __kmp_str_to_int(*scan, *next);
3002 KMP_ASSERT(count >= 0);
3003 *scan = next;
3004
3005 //
3006 // valid follow sets are ',' ':' and '}'
3007 //
3008 SKIP_WS(*scan);
3009 if (**scan == '}' || **scan == ',') {
3010 for (i = 0; i < count; i++) {
3011 if ((start > maxOsId) ||
3012 (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
3013 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3014 && (__kmp_affinity_type != affinity_none))) {
3015 KMP_WARNING(AffIgnoreInvalidProcID, start);
3016 }
3017 break; // don't proliferate warnings for large count
3018 }
3019 else {
3020 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
3021 start++;
3022 (*setSize)++;
3023 }
3024 }
3025 if (**scan == '}') {
3026 break;
3027 }
3028 (*scan)++; // skip ','
3029 continue;
3030 }
3031 KMP_ASSERT2(**scan == ':', "bad explicit places list");
3032 (*scan)++; // skip ':'
3033
3034 //
3035 // Read stride parameter
3036 //
3037 int sign = +1;
3038 for (;;) {
3039 SKIP_WS(*scan);
3040 if (**scan == '+') {
3041 (*scan)++; // skip '+'
3042 continue;
3043 }
3044 if (**scan == '-') {
3045 sign *= -1;
3046 (*scan)++; // skip '-'
3047 continue;
3048 }
3049 break;
3050 }
3051 SKIP_WS(*scan);
3052 KMP_ASSERT2((**scan >= '0') && (**scan <= '9'),
3053 "bad explicit places list");
3054 next = *scan;
3055 SKIP_DIGITS(next);
3056 stride = __kmp_str_to_int(*scan, *next);
3057 KMP_ASSERT(stride >= 0);
3058 *scan = next;
3059 stride *= sign;
3060
3061 //
3062 // valid follow sets are ',' and '}'
3063 //
3064 SKIP_WS(*scan);
3065 if (**scan == '}' || **scan == ',') {
3066 for (i = 0; i < count; i++) {
3067 if ((start > maxOsId) ||
3068 (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
3069 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3070 && (__kmp_affinity_type != affinity_none))) {
3071 KMP_WARNING(AffIgnoreInvalidProcID, start);
3072 }
3073 break; // don't proliferate warnings for large count
3074 }
3075 else {
3076 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
3077 start += stride;
3078 (*setSize)++;
3079 }
3080 }
3081 if (**scan == '}') {
3082 break;
3083 }
3084 (*scan)++; // skip ','
3085 continue;
3086 }
3087
3088 KMP_ASSERT2(0, "bad explicit places list");
3089 }
3090}
3091
3092
3093static void
3094__kmp_process_place(const char **scan, kmp_affin_mask_t *osId2Mask,
3095 int maxOsId, kmp_affin_mask_t *tempMask, int *setSize)
3096{
3097 const char *next;
3098
3099 //
3100 // valid follow sets are '{' '!' and num
3101 //
3102 SKIP_WS(*scan);
3103 if (**scan == '{') {
3104 (*scan)++; // skip '{'
3105 __kmp_process_subplace_list(scan, osId2Mask, maxOsId , tempMask,
3106 setSize);
3107 KMP_ASSERT2(**scan == '}', "bad explicit places list");
3108 (*scan)++; // skip '}'
3109 }
3110 else if (**scan == '!') {
3111 __kmp_process_place(scan, osId2Mask, maxOsId, tempMask, setSize);
3112 KMP_CPU_COMPLEMENT(tempMask);
3113 (*scan)++; // skip '!'
3114 }
3115 else if ((**scan >= '0') && (**scan <= '9')) {
3116 next = *scan;
3117 SKIP_DIGITS(next);
3118 int num = __kmp_str_to_int(*scan, *next);
3119 KMP_ASSERT(num >= 0);
3120 if ((num > maxOsId) ||
3121 (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
3122 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3123 && (__kmp_affinity_type != affinity_none))) {
3124 KMP_WARNING(AffIgnoreInvalidProcID, num);
3125 }
3126 }
3127 else {
3128 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, num));
3129 (*setSize)++;
3130 }
3131 *scan = next; // skip num
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003132 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003133 else {
3134 KMP_ASSERT2(0, "bad explicit places list");
3135 }
3136}
3137
3138
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003139//static void
3140void
Jim Cownie5e8470a2013-09-27 10:38:44 +00003141__kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
3142 unsigned int *out_numMasks, const char *placelist,
3143 kmp_affin_mask_t *osId2Mask, int maxOsId)
3144{
3145 const char *scan = placelist;
3146 const char *next = placelist;
3147
3148 numNewMasks = 2;
3149 newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(numNewMasks
3150 * __kmp_affin_mask_size);
3151 nextNewMask = 0;
3152
3153 kmp_affin_mask_t *tempMask = (kmp_affin_mask_t *)__kmp_allocate(
3154 __kmp_affin_mask_size);
3155 KMP_CPU_ZERO(tempMask);
3156 int setSize = 0;
3157
3158 for (;;) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003159 __kmp_process_place(&scan, osId2Mask, maxOsId, tempMask, &setSize);
3160
3161 //
3162 // valid follow sets are ',' ':' and EOL
3163 //
3164 SKIP_WS(scan);
3165 if (*scan == '\0' || *scan == ',') {
3166 if (setSize > 0) {
3167 ADD_MASK(tempMask);
3168 }
3169 KMP_CPU_ZERO(tempMask);
3170 setSize = 0;
3171 if (*scan == '\0') {
3172 break;
3173 }
3174 scan++; // skip ','
3175 continue;
3176 }
3177
3178 KMP_ASSERT2(*scan == ':', "bad explicit places list");
3179 scan++; // skip ':'
3180
3181 //
3182 // Read count parameter
3183 //
3184 SKIP_WS(scan);
3185 KMP_ASSERT2((*scan >= '0') && (*scan <= '9'),
3186 "bad explicit places list");
3187 next = scan;
3188 SKIP_DIGITS(next);
Jim Cownie181b4bb2013-12-23 17:28:57 +00003189 int count = __kmp_str_to_int(scan, *next);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003190 KMP_ASSERT(count >= 0);
3191 scan = next;
3192
3193 //
3194 // valid follow sets are ',' ':' and EOL
3195 //
3196 SKIP_WS(scan);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003197 int stride;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003198 if (*scan == '\0' || *scan == ',') {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003199 stride = +1;
3200 }
3201 else {
3202 KMP_ASSERT2(*scan == ':', "bad explicit places list");
3203 scan++; // skip ':'
Jim Cownie5e8470a2013-09-27 10:38:44 +00003204
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003205 //
3206 // Read stride parameter
3207 //
3208 int sign = +1;
3209 for (;;) {
3210 SKIP_WS(scan);
3211 if (*scan == '+') {
3212 scan++; // skip '+'
3213 continue;
3214 }
3215 if (*scan == '-') {
3216 sign *= -1;
3217 scan++; // skip '-'
3218 continue;
3219 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003220 break;
3221 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003222 SKIP_WS(scan);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003223 KMP_ASSERT2((*scan >= '0') && (*scan <= '9'),
3224 "bad explicit places list");
3225 next = scan;
3226 SKIP_DIGITS(next);
3227 stride = __kmp_str_to_int(scan, *next);
3228 KMP_DEBUG_ASSERT(stride >= 0);
3229 scan = next;
3230 stride *= sign;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003231 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003232
3233 if (stride > 0) {
3234 int i;
3235 for (i = 0; i < count; i++) {
3236 int j;
3237 if (setSize == 0) {
3238 break;
3239 }
3240 ADD_MASK(tempMask);
3241 setSize = 0;
3242 for (j = __kmp_affin_mask_size * CHAR_BIT - 1; j >= stride; j--) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003243 if (! KMP_CPU_ISSET(j - stride, tempMask)) {
3244 KMP_CPU_CLR(j, tempMask);
3245 }
3246 else if ((j > maxOsId) ||
3247 (! KMP_CPU_ISSET(j, KMP_CPU_INDEX(osId2Mask, j)))) {
Andrey Churbanov16a14322015-03-10 09:34:38 +00003248 if ((__kmp_affinity_verbose || (__kmp_affinity_warnings
3249 && (__kmp_affinity_type != affinity_none))) && i < count - 1) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003250 KMP_WARNING(AffIgnoreInvalidProcID, j);
3251 }
3252 KMP_CPU_CLR(j, tempMask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003253 }
3254 else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003255 KMP_CPU_SET(j, tempMask);
3256 setSize++;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003257 }
3258 }
3259 for (; j >= 0; j--) {
3260 KMP_CPU_CLR(j, tempMask);
3261 }
3262 }
3263 }
3264 else {
3265 int i;
3266 for (i = 0; i < count; i++) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003267 int j;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003268 if (setSize == 0) {
3269 break;
3270 }
3271 ADD_MASK(tempMask);
3272 setSize = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003273 for (j = 0; j < ((int)__kmp_affin_mask_size * CHAR_BIT) + stride;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003274 j++) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003275 if (! KMP_CPU_ISSET(j - stride, tempMask)) {
3276 KMP_CPU_CLR(j, tempMask);
3277 }
3278 else if ((j > maxOsId) ||
3279 (! KMP_CPU_ISSET(j, KMP_CPU_INDEX(osId2Mask, j)))) {
Andrey Churbanov16a14322015-03-10 09:34:38 +00003280 if ((__kmp_affinity_verbose || (__kmp_affinity_warnings
3281 && (__kmp_affinity_type != affinity_none))) && i < count - 1) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003282 KMP_WARNING(AffIgnoreInvalidProcID, j);
3283 }
3284 KMP_CPU_CLR(j, tempMask);
3285 }
3286 else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003287 KMP_CPU_SET(j, tempMask);
3288 setSize++;
3289 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003290 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003291 for (; j < (int)__kmp_affin_mask_size * CHAR_BIT; j++) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003292 KMP_CPU_CLR(j, tempMask);
3293 }
3294 }
3295 }
3296 KMP_CPU_ZERO(tempMask);
3297 setSize = 0;
3298
3299 //
3300 // valid follow sets are ',' and EOL
3301 //
3302 SKIP_WS(scan);
3303 if (*scan == '\0') {
3304 break;
3305 }
3306 if (*scan == ',') {
3307 scan++; // skip ','
3308 continue;
3309 }
3310
3311 KMP_ASSERT2(0, "bad explicit places list");
3312 }
3313
3314 *out_numMasks = nextNewMask;
3315 if (nextNewMask == 0) {
3316 *out_masks = NULL;
3317 KMP_INTERNAL_FREE(newMasks);
3318 return;
3319 }
3320 *out_masks
3321 = (kmp_affin_mask_t *)__kmp_allocate(nextNewMask * __kmp_affin_mask_size);
3322 memcpy(*out_masks, newMasks, nextNewMask * __kmp_affin_mask_size);
3323 __kmp_free(tempMask);
3324 KMP_INTERNAL_FREE(newMasks);
3325}
3326
3327# endif /* OMP_40_ENABLED */
3328
3329#undef ADD_MASK
3330#undef ADD_MASK_OSID
3331
Jim Cownie5e8470a2013-09-27 10:38:44 +00003332static void
3333__kmp_apply_thread_places(AddrUnsPair **pAddr, int depth)
3334{
3335 if ( __kmp_place_num_cores == 0 ) {
3336 if ( __kmp_place_num_threads_per_core == 0 ) {
3337 return; // no cores limiting actions requested, exit
3338 }
3339 __kmp_place_num_cores = nCoresPerPkg; // use all available cores
3340 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003341 if ( !__kmp_affinity_uniform_topology() ) {
3342 KMP_WARNING( AffThrPlaceNonUniform );
3343 return; // don't support non-uniform topology
3344 }
3345 if ( depth != 3 ) {
3346 KMP_WARNING( AffThrPlaceNonThreeLevel );
3347 return; // don't support not-3-level topology
Jim Cownie5e8470a2013-09-27 10:38:44 +00003348 }
3349 if ( __kmp_place_num_threads_per_core == 0 ) {
3350 __kmp_place_num_threads_per_core = __kmp_nThreadsPerCore; // use all HW contexts
3351 }
Andrey Churbanov12875572015-03-10 09:00:36 +00003352 if ( __kmp_place_core_offset + __kmp_place_num_cores > nCoresPerPkg ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003353 KMP_WARNING( AffThrPlaceManyCores );
3354 return;
3355 }
3356
3357 AddrUnsPair *newAddr = (AddrUnsPair *)__kmp_allocate( sizeof(AddrUnsPair) *
3358 nPackages * __kmp_place_num_cores * __kmp_place_num_threads_per_core);
3359 int i, j, k, n_old = 0, n_new = 0;
3360 for ( i = 0; i < nPackages; ++i ) {
3361 for ( j = 0; j < nCoresPerPkg; ++j ) {
Andrey Churbanov12875572015-03-10 09:00:36 +00003362 if ( j < __kmp_place_core_offset || j >= __kmp_place_core_offset + __kmp_place_num_cores ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003363 n_old += __kmp_nThreadsPerCore; // skip not-requested core
3364 } else {
3365 for ( k = 0; k < __kmp_nThreadsPerCore; ++k ) {
Andrey Churbanov12875572015-03-10 09:00:36 +00003366 if ( k < __kmp_place_num_threads_per_core ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003367 newAddr[n_new] = (*pAddr)[n_old]; // copy requested core' data to new location
3368 n_new++;
3369 }
3370 n_old++;
3371 }
3372 }
3373 }
3374 }
3375 nCoresPerPkg = __kmp_place_num_cores; // correct nCoresPerPkg
3376 __kmp_nThreadsPerCore = __kmp_place_num_threads_per_core; // correct __kmp_nThreadsPerCore
3377 __kmp_avail_proc = n_new; // correct avail_proc
3378 __kmp_ncores = nPackages * __kmp_place_num_cores; // correct ncores
3379
3380 __kmp_free( *pAddr );
3381 *pAddr = newAddr; // replace old topology with new one
3382}
3383
Jim Cownie5e8470a2013-09-27 10:38:44 +00003384
3385static AddrUnsPair *address2os = NULL;
3386static int * procarr = NULL;
3387static int __kmp_aff_depth = 0;
3388
3389static void
3390__kmp_aux_affinity_initialize(void)
3391{
3392 if (__kmp_affinity_masks != NULL) {
3393 KMP_ASSERT(fullMask != NULL);
3394 return;
3395 }
3396
3397 //
3398 // Create the "full" mask - this defines all of the processors that we
3399 // consider to be in the machine model. If respect is set, then it is
3400 // the initialization thread's affinity mask. Otherwise, it is all
3401 // processors that we know about on the machine.
3402 //
3403 if (fullMask == NULL) {
3404 fullMask = (kmp_affin_mask_t *)__kmp_allocate(__kmp_affin_mask_size);
3405 }
3406 if (KMP_AFFINITY_CAPABLE()) {
3407 if (__kmp_affinity_respect_mask) {
3408 __kmp_get_system_affinity(fullMask, TRUE);
3409
3410 //
3411 // Count the number of available processors.
3412 //
3413 unsigned i;
3414 __kmp_avail_proc = 0;
3415 for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
3416 if (! KMP_CPU_ISSET(i, fullMask)) {
3417 continue;
3418 }
3419 __kmp_avail_proc++;
3420 }
3421 if (__kmp_avail_proc > __kmp_xproc) {
3422 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3423 && (__kmp_affinity_type != affinity_none))) {
3424 KMP_WARNING(ErrorInitializeAffinity);
3425 }
3426 __kmp_affinity_type = affinity_none;
Andrey Churbanov1f037e42015-03-10 09:15:26 +00003427 KMP_AFFINITY_DISABLE();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003428 return;
3429 }
3430 }
3431 else {
3432 __kmp_affinity_entire_machine_mask(fullMask);
3433 __kmp_avail_proc = __kmp_xproc;
3434 }
3435 }
3436
3437 int depth = -1;
3438 kmp_i18n_id_t msg_id = kmp_i18n_null;
3439
3440 //
Alp Toker8f2d3f02014-02-24 10:40:15 +00003441 // For backward compatibility, setting KMP_CPUINFO_FILE =>
Jim Cownie5e8470a2013-09-27 10:38:44 +00003442 // KMP_TOPOLOGY_METHOD=cpuinfo
3443 //
3444 if ((__kmp_cpuinfo_file != NULL) &&
3445 (__kmp_affinity_top_method == affinity_top_method_all)) {
3446 __kmp_affinity_top_method = affinity_top_method_cpuinfo;
3447 }
3448
3449 if (__kmp_affinity_top_method == affinity_top_method_all) {
3450 //
3451 // In the default code path, errors are not fatal - we just try using
3452 // another method. We only emit a warning message if affinity is on,
3453 // or the verbose flag is set, an the nowarnings flag was not set.
3454 //
3455 const char *file_name = NULL;
3456 int line = 0;
3457
3458# if KMP_ARCH_X86 || KMP_ARCH_X86_64
3459
3460 if (__kmp_affinity_verbose) {
3461 KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
3462 }
3463
3464 file_name = NULL;
3465 depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
3466 if (depth == 0) {
3467 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3468 KMP_ASSERT(address2os == NULL);
3469 return;
3470 }
3471
3472 if (depth < 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003473 if (__kmp_affinity_verbose) {
3474 if (msg_id != kmp_i18n_null) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003475 KMP_INFORM(AffInfoStrStr, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id),
3476 KMP_I18N_STR(DecodingLegacyAPIC));
3477 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003478 else {
3479 KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(DecodingLegacyAPIC));
3480 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003481 }
3482
3483 file_name = NULL;
3484 depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
3485 if (depth == 0) {
3486 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3487 KMP_ASSERT(address2os == NULL);
3488 return;
3489 }
3490 }
3491
3492# endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
3493
3494# if KMP_OS_LINUX
3495
3496 if (depth < 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003497 if (__kmp_affinity_verbose) {
3498 if (msg_id != kmp_i18n_null) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003499 KMP_INFORM(AffStrParseFilename, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id), "/proc/cpuinfo");
3500 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003501 else {
3502 KMP_INFORM(AffParseFilename, "KMP_AFFINITY", "/proc/cpuinfo");
3503 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003504 }
3505
3506 FILE *f = fopen("/proc/cpuinfo", "r");
3507 if (f == NULL) {
3508 msg_id = kmp_i18n_str_CantOpenCpuinfo;
3509 }
3510 else {
3511 file_name = "/proc/cpuinfo";
3512 depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
3513 fclose(f);
3514 if (depth == 0) {
3515 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3516 KMP_ASSERT(address2os == NULL);
3517 return;
3518 }
3519 }
3520 }
3521
3522# endif /* KMP_OS_LINUX */
3523
Andrey Churbanov7daf9802015-01-27 16:52:57 +00003524# if KMP_GROUP_AFFINITY
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003525
3526 if ((depth < 0) && (__kmp_num_proc_groups > 1)) {
3527 if (__kmp_affinity_verbose) {
3528 KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
3529 }
3530
3531 depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
3532 KMP_ASSERT(depth != 0);
3533 }
3534
Andrey Churbanov7daf9802015-01-27 16:52:57 +00003535# endif /* KMP_GROUP_AFFINITY */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003536
Jim Cownie5e8470a2013-09-27 10:38:44 +00003537 if (depth < 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003538 if (__kmp_affinity_verbose && (msg_id != kmp_i18n_null)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003539 if (file_name == NULL) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003540 KMP_INFORM(UsingFlatOS, __kmp_i18n_catgets(msg_id));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003541 }
3542 else if (line == 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003543 KMP_INFORM(UsingFlatOSFile, file_name, __kmp_i18n_catgets(msg_id));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003544 }
3545 else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003546 KMP_INFORM(UsingFlatOSFileLine, file_name, line, __kmp_i18n_catgets(msg_id));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003547 }
3548 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003549 // FIXME - print msg if msg_id = kmp_i18n_null ???
Jim Cownie5e8470a2013-09-27 10:38:44 +00003550
3551 file_name = "";
3552 depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
3553 if (depth == 0) {
3554 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3555 KMP_ASSERT(address2os == NULL);
3556 return;
3557 }
3558 KMP_ASSERT(depth > 0);
3559 KMP_ASSERT(address2os != NULL);
3560 }
3561 }
3562
3563 //
3564 // If the user has specified that a paricular topology discovery method
3565 // is to be used, then we abort if that method fails. The exception is
3566 // group affinity, which might have been implicitly set.
3567 //
3568
3569# if KMP_ARCH_X86 || KMP_ARCH_X86_64
3570
3571 else if (__kmp_affinity_top_method == affinity_top_method_x2apicid) {
3572 if (__kmp_affinity_verbose) {
3573 KMP_INFORM(AffInfoStr, "KMP_AFFINITY",
3574 KMP_I18N_STR(Decodingx2APIC));
3575 }
3576
3577 depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
3578 if (depth == 0) {
3579 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3580 KMP_ASSERT(address2os == NULL);
3581 return;
3582 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003583 if (depth < 0) {
3584 KMP_ASSERT(msg_id != kmp_i18n_null);
3585 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
3586 }
3587 }
3588 else if (__kmp_affinity_top_method == affinity_top_method_apicid) {
3589 if (__kmp_affinity_verbose) {
3590 KMP_INFORM(AffInfoStr, "KMP_AFFINITY",
3591 KMP_I18N_STR(DecodingLegacyAPIC));
3592 }
3593
3594 depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
3595 if (depth == 0) {
3596 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3597 KMP_ASSERT(address2os == NULL);
3598 return;
3599 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003600 if (depth < 0) {
3601 KMP_ASSERT(msg_id != kmp_i18n_null);
3602 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
3603 }
3604 }
3605
3606# endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
3607
3608 else if (__kmp_affinity_top_method == affinity_top_method_cpuinfo) {
3609 const char *filename;
3610 if (__kmp_cpuinfo_file != NULL) {
3611 filename = __kmp_cpuinfo_file;
3612 }
3613 else {
3614 filename = "/proc/cpuinfo";
3615 }
3616
3617 if (__kmp_affinity_verbose) {
3618 KMP_INFORM(AffParseFilename, "KMP_AFFINITY", filename);
3619 }
3620
3621 FILE *f = fopen(filename, "r");
3622 if (f == NULL) {
3623 int code = errno;
3624 if (__kmp_cpuinfo_file != NULL) {
3625 __kmp_msg(
3626 kmp_ms_fatal,
3627 KMP_MSG(CantOpenFileForReading, filename),
3628 KMP_ERR(code),
3629 KMP_HNT(NameComesFrom_CPUINFO_FILE),
3630 __kmp_msg_null
3631 );
3632 }
3633 else {
3634 __kmp_msg(
3635 kmp_ms_fatal,
3636 KMP_MSG(CantOpenFileForReading, filename),
3637 KMP_ERR(code),
3638 __kmp_msg_null
3639 );
3640 }
3641 }
3642 int line = 0;
3643 depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
3644 fclose(f);
3645 if (depth < 0) {
3646 KMP_ASSERT(msg_id != kmp_i18n_null);
3647 if (line > 0) {
3648 KMP_FATAL(FileLineMsgExiting, filename, line, __kmp_i18n_catgets(msg_id));
3649 }
3650 else {
3651 KMP_FATAL(FileMsgExiting, filename, __kmp_i18n_catgets(msg_id));
3652 }
3653 }
3654 if (__kmp_affinity_type == affinity_none) {
3655 KMP_ASSERT(depth == 0);
3656 KMP_ASSERT(address2os == NULL);
3657 return;
3658 }
3659 }
3660
Andrey Churbanov7daf9802015-01-27 16:52:57 +00003661# if KMP_GROUP_AFFINITY
Jim Cownie5e8470a2013-09-27 10:38:44 +00003662
3663 else if (__kmp_affinity_top_method == affinity_top_method_group) {
3664 if (__kmp_affinity_verbose) {
3665 KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
3666 }
3667
3668 depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
3669 KMP_ASSERT(depth != 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003670 if (depth < 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003671 KMP_ASSERT(msg_id != kmp_i18n_null);
3672 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003673 }
3674 }
3675
Andrey Churbanov7daf9802015-01-27 16:52:57 +00003676# endif /* KMP_GROUP_AFFINITY */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003677
3678 else if (__kmp_affinity_top_method == affinity_top_method_flat) {
3679 if (__kmp_affinity_verbose) {
3680 KMP_INFORM(AffUsingFlatOS, "KMP_AFFINITY");
3681 }
3682
3683 depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
3684 if (depth == 0) {
3685 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3686 KMP_ASSERT(address2os == NULL);
3687 return;
3688 }
3689 // should not fail
3690 KMP_ASSERT(depth > 0);
3691 KMP_ASSERT(address2os != NULL);
3692 }
3693
3694 if (address2os == NULL) {
3695 if (KMP_AFFINITY_CAPABLE()
3696 && (__kmp_affinity_verbose || (__kmp_affinity_warnings
3697 && (__kmp_affinity_type != affinity_none)))) {
3698 KMP_WARNING(ErrorInitializeAffinity);
3699 }
3700 __kmp_affinity_type = affinity_none;
Andrey Churbanov1f037e42015-03-10 09:15:26 +00003701 KMP_AFFINITY_DISABLE();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003702 return;
3703 }
3704
Jim Cownie5e8470a2013-09-27 10:38:44 +00003705 __kmp_apply_thread_places(&address2os, depth);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003706
3707 //
3708 // Create the table of masks, indexed by thread Id.
3709 //
3710 unsigned maxIndex;
3711 unsigned numUnique;
3712 kmp_affin_mask_t *osId2Mask = __kmp_create_masks(&maxIndex, &numUnique,
3713 address2os, __kmp_avail_proc);
3714 if (__kmp_affinity_gran_levels == 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003715 KMP_DEBUG_ASSERT((int)numUnique == __kmp_avail_proc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003716 }
3717
3718 //
3719 // Set the childNums vector in all Address objects. This must be done
3720 // before we can sort using __kmp_affinity_cmp_Address_child_num(),
3721 // which takes into account the setting of __kmp_affinity_compact.
3722 //
3723 __kmp_affinity_assign_child_nums(address2os, __kmp_avail_proc);
3724
3725 switch (__kmp_affinity_type) {
3726
3727 case affinity_explicit:
3728 KMP_DEBUG_ASSERT(__kmp_affinity_proclist != NULL);
3729# if OMP_40_ENABLED
3730 if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
3731# endif
3732 {
3733 __kmp_affinity_process_proclist(&__kmp_affinity_masks,
3734 &__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask,
3735 maxIndex);
3736 }
3737# if OMP_40_ENABLED
3738 else {
3739 __kmp_affinity_process_placelist(&__kmp_affinity_masks,
3740 &__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask,
3741 maxIndex);
3742 }
3743# endif
3744 if (__kmp_affinity_num_masks == 0) {
3745 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3746 && (__kmp_affinity_type != affinity_none))) {
3747 KMP_WARNING(AffNoValidProcID);
3748 }
3749 __kmp_affinity_type = affinity_none;
3750 return;
3751 }
3752 break;
3753
3754 //
3755 // The other affinity types rely on sorting the Addresses according
3756 // to some permutation of the machine topology tree. Set
3757 // __kmp_affinity_compact and __kmp_affinity_offset appropriately,
3758 // then jump to a common code fragment to do the sort and create
3759 // the array of affinity masks.
3760 //
3761
3762 case affinity_logical:
3763 __kmp_affinity_compact = 0;
3764 if (__kmp_affinity_offset) {
3765 __kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset
3766 % __kmp_avail_proc;
3767 }
3768 goto sortAddresses;
3769
3770 case affinity_physical:
3771 if (__kmp_nThreadsPerCore > 1) {
3772 __kmp_affinity_compact = 1;
3773 if (__kmp_affinity_compact >= depth) {
3774 __kmp_affinity_compact = 0;
3775 }
3776 } else {
3777 __kmp_affinity_compact = 0;
3778 }
3779 if (__kmp_affinity_offset) {
3780 __kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset
3781 % __kmp_avail_proc;
3782 }
3783 goto sortAddresses;
3784
3785 case affinity_scatter:
3786 if (__kmp_affinity_compact >= depth) {
3787 __kmp_affinity_compact = 0;
3788 }
3789 else {
3790 __kmp_affinity_compact = depth - 1 - __kmp_affinity_compact;
3791 }
3792 goto sortAddresses;
3793
3794 case affinity_compact:
3795 if (__kmp_affinity_compact >= depth) {
3796 __kmp_affinity_compact = depth - 1;
3797 }
3798 goto sortAddresses;
3799
Jim Cownie5e8470a2013-09-27 10:38:44 +00003800 case affinity_balanced:
Andrey Churbanove4b92132015-03-05 17:46:50 +00003801 // Balanced works only for the case of a single package and uniform topology
Jim Cownie5e8470a2013-09-27 10:38:44 +00003802 if( nPackages > 1 ) {
3803 if( __kmp_affinity_verbose || __kmp_affinity_warnings ) {
3804 KMP_WARNING( AffBalancedNotAvail, "KMP_AFFINITY" );
3805 }
3806 __kmp_affinity_type = affinity_none;
3807 return;
3808 } else if( __kmp_affinity_uniform_topology() ) {
3809 break;
3810 } else { // Non-uniform topology
3811
3812 // Save the depth for further usage
3813 __kmp_aff_depth = depth;
3814
3815 // Number of hyper threads per core in HT machine
3816 int nth_per_core = __kmp_nThreadsPerCore;
3817
3818 int core_level;
3819 if( nth_per_core > 1 ) {
3820 core_level = depth - 2;
3821 } else {
3822 core_level = depth - 1;
3823 }
3824 int ncores = address2os[ __kmp_avail_proc - 1 ].first.labels[ core_level ] + 1;
3825 int nproc = nth_per_core * ncores;
3826
3827 procarr = ( int * )__kmp_allocate( sizeof( int ) * nproc );
3828 for( int i = 0; i < nproc; i++ ) {
3829 procarr[ i ] = -1;
3830 }
3831
3832 for( int i = 0; i < __kmp_avail_proc; i++ ) {
3833 int proc = address2os[ i ].second;
3834 // If depth == 3 then level=0 - package, level=1 - core, level=2 - thread.
3835 // If there is only one thread per core then depth == 2: level 0 - package,
3836 // level 1 - core.
3837 int level = depth - 1;
3838
3839 // __kmp_nth_per_core == 1
3840 int thread = 0;
3841 int core = address2os[ i ].first.labels[ level ];
3842 // If the thread level exists, that is we have more than one thread context per core
3843 if( nth_per_core > 1 ) {
3844 thread = address2os[ i ].first.labels[ level ] % nth_per_core;
3845 core = address2os[ i ].first.labels[ level - 1 ];
3846 }
3847 procarr[ core * nth_per_core + thread ] = proc;
3848 }
3849
3850 break;
3851 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003852
3853 sortAddresses:
3854 //
3855 // Allocate the gtid->affinity mask table.
3856 //
3857 if (__kmp_affinity_dups) {
3858 __kmp_affinity_num_masks = __kmp_avail_proc;
3859 }
3860 else {
3861 __kmp_affinity_num_masks = numUnique;
3862 }
3863
3864# if OMP_40_ENABLED
3865 if ( ( __kmp_nested_proc_bind.bind_types[0] != proc_bind_intel )
3866 && ( __kmp_affinity_num_places > 0 )
3867 && ( (unsigned)__kmp_affinity_num_places < __kmp_affinity_num_masks ) ) {
3868 __kmp_affinity_num_masks = __kmp_affinity_num_places;
3869 }
3870# endif
3871
3872 __kmp_affinity_masks = (kmp_affin_mask_t*)__kmp_allocate(
3873 __kmp_affinity_num_masks * __kmp_affin_mask_size);
3874
3875 //
3876 // Sort the address2os table according to the current setting of
3877 // __kmp_affinity_compact, then fill out __kmp_affinity_masks.
3878 //
3879 qsort(address2os, __kmp_avail_proc, sizeof(*address2os),
3880 __kmp_affinity_cmp_Address_child_num);
3881 {
3882 int i;
3883 unsigned j;
3884 for (i = 0, j = 0; i < __kmp_avail_proc; i++) {
3885 if ((! __kmp_affinity_dups) && (! address2os[i].first.leader)) {
3886 continue;
3887 }
3888 unsigned osId = address2os[i].second;
3889 kmp_affin_mask_t *src = KMP_CPU_INDEX(osId2Mask, osId);
3890 kmp_affin_mask_t *dest
3891 = KMP_CPU_INDEX(__kmp_affinity_masks, j);
3892 KMP_ASSERT(KMP_CPU_ISSET(osId, src));
3893 KMP_CPU_COPY(dest, src);
3894 if (++j >= __kmp_affinity_num_masks) {
3895 break;
3896 }
3897 }
3898 KMP_DEBUG_ASSERT(j == __kmp_affinity_num_masks);
3899 }
3900 break;
3901
3902 default:
3903 KMP_ASSERT2(0, "Unexpected affinity setting");
3904 }
3905
3906 __kmp_free(osId2Mask);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003907 machine_hierarchy.init(address2os, __kmp_avail_proc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003908}
3909
3910
3911void
3912__kmp_affinity_initialize(void)
3913{
3914 //
3915 // Much of the code above was written assumming that if a machine was not
3916 // affinity capable, then __kmp_affinity_type == affinity_none. We now
3917 // explicitly represent this as __kmp_affinity_type == affinity_disabled.
3918 //
3919 // There are too many checks for __kmp_affinity_type == affinity_none
3920 // in this code. Instead of trying to change them all, check if
3921 // __kmp_affinity_type == affinity_disabled, and if so, slam it with
3922 // affinity_none, call the real initialization routine, then restore
3923 // __kmp_affinity_type to affinity_disabled.
3924 //
3925 int disabled = (__kmp_affinity_type == affinity_disabled);
3926 if (! KMP_AFFINITY_CAPABLE()) {
3927 KMP_ASSERT(disabled);
3928 }
3929 if (disabled) {
3930 __kmp_affinity_type = affinity_none;
3931 }
3932 __kmp_aux_affinity_initialize();
3933 if (disabled) {
3934 __kmp_affinity_type = affinity_disabled;
3935 }
3936}
3937
3938
3939void
3940__kmp_affinity_uninitialize(void)
3941{
3942 if (__kmp_affinity_masks != NULL) {
3943 __kmp_free(__kmp_affinity_masks);
3944 __kmp_affinity_masks = NULL;
3945 }
3946 if (fullMask != NULL) {
3947 KMP_CPU_FREE(fullMask);
3948 fullMask = NULL;
3949 }
3950 __kmp_affinity_num_masks = 0;
3951# if OMP_40_ENABLED
3952 __kmp_affinity_num_places = 0;
3953# endif
3954 if (__kmp_affinity_proclist != NULL) {
3955 __kmp_free(__kmp_affinity_proclist);
3956 __kmp_affinity_proclist = NULL;
3957 }
3958 if( address2os != NULL ) {
3959 __kmp_free( address2os );
3960 address2os = NULL;
3961 }
3962 if( procarr != NULL ) {
3963 __kmp_free( procarr );
3964 procarr = NULL;
3965 }
3966}
3967
3968
3969void
3970__kmp_affinity_set_init_mask(int gtid, int isa_root)
3971{
3972 if (! KMP_AFFINITY_CAPABLE()) {
3973 return;
3974 }
3975
3976 kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
3977 if (th->th.th_affin_mask == NULL) {
3978 KMP_CPU_ALLOC(th->th.th_affin_mask);
3979 }
3980 else {
3981 KMP_CPU_ZERO(th->th.th_affin_mask);
3982 }
3983
3984 //
3985 // Copy the thread mask to the kmp_info_t strucuture.
3986 // If __kmp_affinity_type == affinity_none, copy the "full" mask, i.e. one
3987 // that has all of the OS proc ids set, or if __kmp_affinity_respect_mask
3988 // is set, then the full mask is the same as the mask of the initialization
3989 // thread.
3990 //
3991 kmp_affin_mask_t *mask;
3992 int i;
3993
3994# if OMP_40_ENABLED
3995 if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
3996# endif
3997 {
Andrey Churbanovf28f6132015-01-13 14:54:00 +00003998 if ((__kmp_affinity_type == affinity_none) || (__kmp_affinity_type == affinity_balanced)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003999 ) {
Andrey Churbanov7daf9802015-01-27 16:52:57 +00004000# if KMP_GROUP_AFFINITY
Jim Cownie5e8470a2013-09-27 10:38:44 +00004001 if (__kmp_num_proc_groups > 1) {
4002 return;
4003 }
4004# endif
4005 KMP_ASSERT(fullMask != NULL);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004006 i = KMP_PLACE_ALL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004007 mask = fullMask;
4008 }
4009 else {
4010 KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 );
4011 i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
4012 mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
4013 }
4014 }
4015# if OMP_40_ENABLED
4016 else {
4017 if ((! isa_root)
4018 || (__kmp_nested_proc_bind.bind_types[0] == proc_bind_false)) {
Andrey Churbanov7daf9802015-01-27 16:52:57 +00004019# if KMP_GROUP_AFFINITY
Jim Cownie5e8470a2013-09-27 10:38:44 +00004020 if (__kmp_num_proc_groups > 1) {
4021 return;
4022 }
4023# endif
4024 KMP_ASSERT(fullMask != NULL);
4025 i = KMP_PLACE_ALL;
4026 mask = fullMask;
4027 }
4028 else {
4029 //
4030 // int i = some hash function or just a counter that doesn't
4031 // always start at 0. Use gtid for now.
4032 //
4033 KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 );
4034 i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
4035 mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
4036 }
4037 }
4038# endif
4039
4040# if OMP_40_ENABLED
4041 th->th.th_current_place = i;
4042 if (isa_root) {
4043 th->th.th_new_place = i;
4044 th->th.th_first_place = 0;
4045 th->th.th_last_place = __kmp_affinity_num_masks - 1;
4046 }
4047
4048 if (i == KMP_PLACE_ALL) {
4049 KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to all places\n",
4050 gtid));
4051 }
4052 else {
4053 KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to place %d\n",
4054 gtid, i));
4055 }
4056# else
4057 if (i == -1) {
4058 KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to fullMask\n",
4059 gtid));
4060 }
4061 else {
4062 KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to mask %d\n",
4063 gtid, i));
4064 }
4065# endif /* OMP_40_ENABLED */
4066
4067 KMP_CPU_COPY(th->th.th_affin_mask, mask);
4068
4069 if (__kmp_affinity_verbose) {
4070 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4071 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4072 th->th.th_affin_mask);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004073 KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(), gtid,
4074 buf);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004075 }
4076
4077# if KMP_OS_WINDOWS
4078 //
4079 // On Windows* OS, the process affinity mask might have changed.
4080 // If the user didn't request affinity and this call fails,
4081 // just continue silently. See CQ171393.
4082 //
4083 if ( __kmp_affinity_type == affinity_none ) {
4084 __kmp_set_system_affinity(th->th.th_affin_mask, FALSE);
4085 }
4086 else
4087# endif
4088 __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
4089}
4090
4091
4092# if OMP_40_ENABLED
4093
4094void
4095__kmp_affinity_set_place(int gtid)
4096{
4097 int retval;
4098
4099 if (! KMP_AFFINITY_CAPABLE()) {
4100 return;
4101 }
4102
4103 kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
4104
4105 KA_TRACE(100, ("__kmp_affinity_set_place: binding T#%d to place %d (current place = %d)\n",
4106 gtid, th->th.th_new_place, th->th.th_current_place));
4107
4108 //
Alp Toker8f2d3f02014-02-24 10:40:15 +00004109 // Check that the new place is within this thread's partition.
Jim Cownie5e8470a2013-09-27 10:38:44 +00004110 //
4111 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004112 KMP_ASSERT(th->th.th_new_place >= 0);
4113 KMP_ASSERT((unsigned)th->th.th_new_place <= __kmp_affinity_num_masks);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004114 if (th->th.th_first_place <= th->th.th_last_place) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004115 KMP_ASSERT((th->th.th_new_place >= th->th.th_first_place)
Jim Cownie5e8470a2013-09-27 10:38:44 +00004116 && (th->th.th_new_place <= th->th.th_last_place));
4117 }
4118 else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004119 KMP_ASSERT((th->th.th_new_place <= th->th.th_first_place)
Jim Cownie5e8470a2013-09-27 10:38:44 +00004120 || (th->th.th_new_place >= th->th.th_last_place));
4121 }
4122
4123 //
4124 // Copy the thread mask to the kmp_info_t strucuture,
4125 // and set this thread's affinity.
4126 //
4127 kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity_masks,
4128 th->th.th_new_place);
4129 KMP_CPU_COPY(th->th.th_affin_mask, mask);
4130 th->th.th_current_place = th->th.th_new_place;
4131
4132 if (__kmp_affinity_verbose) {
4133 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4134 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4135 th->th.th_affin_mask);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004136 KMP_INFORM(BoundToOSProcSet, "OMP_PROC_BIND", (kmp_int32)getpid(),
4137 gtid, buf);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004138 }
4139 __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
4140}
4141
4142# endif /* OMP_40_ENABLED */
4143
4144
4145int
4146__kmp_aux_set_affinity(void **mask)
4147{
4148 int gtid;
4149 kmp_info_t *th;
4150 int retval;
4151
4152 if (! KMP_AFFINITY_CAPABLE()) {
4153 return -1;
4154 }
4155
4156 gtid = __kmp_entry_gtid();
4157 KA_TRACE(1000, ;{
4158 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4159 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4160 (kmp_affin_mask_t *)(*mask));
4161 __kmp_debug_printf("kmp_set_affinity: setting affinity mask for thread %d = %s\n",
4162 gtid, buf);
4163 });
4164
4165 if (__kmp_env_consistency_check) {
4166 if ((mask == NULL) || (*mask == NULL)) {
4167 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
4168 }
4169 else {
4170 unsigned proc;
4171 int num_procs = 0;
4172
4173 for (proc = 0; proc < KMP_CPU_SETSIZE; proc++) {
4174 if (! KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask))) {
4175 continue;
4176 }
4177 num_procs++;
4178 if (! KMP_CPU_ISSET(proc, fullMask)) {
4179 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
4180 break;
4181 }
4182 }
4183 if (num_procs == 0) {
4184 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
4185 }
4186
Andrey Churbanov7daf9802015-01-27 16:52:57 +00004187# if KMP_GROUP_AFFINITY
Jim Cownie5e8470a2013-09-27 10:38:44 +00004188 if (__kmp_get_proc_group((kmp_affin_mask_t *)(*mask)) < 0) {
4189 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
4190 }
Andrey Churbanov7daf9802015-01-27 16:52:57 +00004191# endif /* KMP_GROUP_AFFINITY */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004192
4193 }
4194 }
4195
4196 th = __kmp_threads[gtid];
4197 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
4198 retval = __kmp_set_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
4199 if (retval == 0) {
4200 KMP_CPU_COPY(th->th.th_affin_mask, (kmp_affin_mask_t *)(*mask));
4201 }
4202
4203# if OMP_40_ENABLED
4204 th->th.th_current_place = KMP_PLACE_UNDEFINED;
4205 th->th.th_new_place = KMP_PLACE_UNDEFINED;
4206 th->th.th_first_place = 0;
4207 th->th.th_last_place = __kmp_affinity_num_masks - 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004208
4209 //
4210 // Turn off 4.0 affinity for the current tread at this parallel level.
4211 //
4212 th->th.th_current_task->td_icvs.proc_bind = proc_bind_false;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004213# endif
4214
4215 return retval;
4216}
4217
4218
4219int
4220__kmp_aux_get_affinity(void **mask)
4221{
4222 int gtid;
4223 int retval;
4224 kmp_info_t *th;
4225
4226 if (! KMP_AFFINITY_CAPABLE()) {
4227 return -1;
4228 }
4229
4230 gtid = __kmp_entry_gtid();
4231 th = __kmp_threads[gtid];
4232 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
4233
4234 KA_TRACE(1000, ;{
4235 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4236 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4237 th->th.th_affin_mask);
4238 __kmp_printf("kmp_get_affinity: stored affinity mask for thread %d = %s\n", gtid, buf);
4239 });
4240
4241 if (__kmp_env_consistency_check) {
4242 if ((mask == NULL) || (*mask == NULL)) {
4243 KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity");
4244 }
4245 }
4246
4247# if !KMP_OS_WINDOWS
4248
4249 retval = __kmp_get_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
4250 KA_TRACE(1000, ;{
4251 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4252 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4253 (kmp_affin_mask_t *)(*mask));
4254 __kmp_printf("kmp_get_affinity: system affinity mask for thread %d = %s\n", gtid, buf);
4255 });
4256 return retval;
4257
4258# else
4259
4260 KMP_CPU_COPY((kmp_affin_mask_t *)(*mask), th->th.th_affin_mask);
4261 return 0;
4262
4263# endif /* KMP_OS_WINDOWS */
4264
4265}
4266
Jim Cownie5e8470a2013-09-27 10:38:44 +00004267int
4268__kmp_aux_set_affinity_mask_proc(int proc, void **mask)
4269{
4270 int retval;
4271
4272 if (! KMP_AFFINITY_CAPABLE()) {
4273 return -1;
4274 }
4275
4276 KA_TRACE(1000, ;{
4277 int gtid = __kmp_entry_gtid();
4278 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4279 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4280 (kmp_affin_mask_t *)(*mask));
4281 __kmp_debug_printf("kmp_set_affinity_mask_proc: setting proc %d in affinity mask for thread %d = %s\n",
4282 proc, gtid, buf);
4283 });
4284
4285 if (__kmp_env_consistency_check) {
4286 if ((mask == NULL) || (*mask == NULL)) {
4287 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity_mask_proc");
4288 }
4289 }
4290
4291 if ((proc < 0) || ((unsigned)proc >= KMP_CPU_SETSIZE)) {
4292 return -1;
4293 }
4294 if (! KMP_CPU_ISSET(proc, fullMask)) {
4295 return -2;
4296 }
4297
4298 KMP_CPU_SET(proc, (kmp_affin_mask_t *)(*mask));
4299 return 0;
4300}
4301
4302
4303int
4304__kmp_aux_unset_affinity_mask_proc(int proc, void **mask)
4305{
4306 int retval;
4307
4308 if (! KMP_AFFINITY_CAPABLE()) {
4309 return -1;
4310 }
4311
4312 KA_TRACE(1000, ;{
4313 int gtid = __kmp_entry_gtid();
4314 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4315 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4316 (kmp_affin_mask_t *)(*mask));
4317 __kmp_debug_printf("kmp_unset_affinity_mask_proc: unsetting proc %d in affinity mask for thread %d = %s\n",
4318 proc, gtid, buf);
4319 });
4320
4321 if (__kmp_env_consistency_check) {
4322 if ((mask == NULL) || (*mask == NULL)) {
4323 KMP_FATAL(AffinityInvalidMask, "kmp_unset_affinity_mask_proc");
4324 }
4325 }
4326
4327 if ((proc < 0) || ((unsigned)proc >= KMP_CPU_SETSIZE)) {
4328 return -1;
4329 }
4330 if (! KMP_CPU_ISSET(proc, fullMask)) {
4331 return -2;
4332 }
4333
4334 KMP_CPU_CLR(proc, (kmp_affin_mask_t *)(*mask));
4335 return 0;
4336}
4337
4338
4339int
4340__kmp_aux_get_affinity_mask_proc(int proc, void **mask)
4341{
4342 int retval;
4343
4344 if (! KMP_AFFINITY_CAPABLE()) {
4345 return -1;
4346 }
4347
4348 KA_TRACE(1000, ;{
4349 int gtid = __kmp_entry_gtid();
4350 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4351 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4352 (kmp_affin_mask_t *)(*mask));
4353 __kmp_debug_printf("kmp_get_affinity_mask_proc: getting proc %d in affinity mask for thread %d = %s\n",
4354 proc, gtid, buf);
4355 });
4356
4357 if (__kmp_env_consistency_check) {
4358 if ((mask == NULL) || (*mask == NULL)) {
Andrey Churbanov4b2f17a2015-01-29 15:49:22 +00004359 KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity_mask_proc");
Jim Cownie5e8470a2013-09-27 10:38:44 +00004360 }
4361 }
4362
4363 if ((proc < 0) || ((unsigned)proc >= KMP_CPU_SETSIZE)) {
4364 return 0;
4365 }
4366 if (! KMP_CPU_ISSET(proc, fullMask)) {
4367 return 0;
4368 }
4369
4370 return KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask));
4371}
4372
Jim Cownie5e8470a2013-09-27 10:38:44 +00004373
4374// Dynamic affinity settings - Affinity balanced
4375void __kmp_balanced_affinity( int tid, int nthreads )
4376{
4377 if( __kmp_affinity_uniform_topology() ) {
4378 int coreID;
4379 int threadID;
4380 // Number of hyper threads per core in HT machine
4381 int __kmp_nth_per_core = __kmp_avail_proc / __kmp_ncores;
4382 // Number of cores
4383 int ncores = __kmp_ncores;
4384 // How many threads will be bound to each core
4385 int chunk = nthreads / ncores;
4386 // How many cores will have an additional thread bound to it - "big cores"
4387 int big_cores = nthreads % ncores;
4388 // Number of threads on the big cores
4389 int big_nth = ( chunk + 1 ) * big_cores;
4390 if( tid < big_nth ) {
4391 coreID = tid / (chunk + 1 );
4392 threadID = ( tid % (chunk + 1 ) ) % __kmp_nth_per_core ;
4393 } else { //tid >= big_nth
4394 coreID = ( tid - big_cores ) / chunk;
4395 threadID = ( ( tid - big_cores ) % chunk ) % __kmp_nth_per_core ;
4396 }
4397
4398 KMP_DEBUG_ASSERT2(KMP_AFFINITY_CAPABLE(),
4399 "Illegal set affinity operation when not capable");
4400
4401 kmp_affin_mask_t *mask = (kmp_affin_mask_t *)alloca(__kmp_affin_mask_size);
4402 KMP_CPU_ZERO(mask);
4403
4404 // Granularity == thread
4405 if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) {
4406 int osID = address2os[ coreID * __kmp_nth_per_core + threadID ].second;
4407 KMP_CPU_SET( osID, mask);
4408 } else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
4409 for( int i = 0; i < __kmp_nth_per_core; i++ ) {
4410 int osID;
4411 osID = address2os[ coreID * __kmp_nth_per_core + i ].second;
4412 KMP_CPU_SET( osID, mask);
4413 }
4414 }
4415 if (__kmp_affinity_verbose) {
4416 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4417 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004418 KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
4419 tid, buf);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004420 }
4421 __kmp_set_system_affinity( mask, TRUE );
4422 } else { // Non-uniform topology
4423
4424 kmp_affin_mask_t *mask = (kmp_affin_mask_t *)alloca(__kmp_affin_mask_size);
4425 KMP_CPU_ZERO(mask);
4426
4427 // Number of hyper threads per core in HT machine
4428 int nth_per_core = __kmp_nThreadsPerCore;
4429 int core_level;
4430 if( nth_per_core > 1 ) {
4431 core_level = __kmp_aff_depth - 2;
4432 } else {
4433 core_level = __kmp_aff_depth - 1;
4434 }
4435
4436 // Number of cores - maximum value; it does not count trail cores with 0 processors
4437 int ncores = address2os[ __kmp_avail_proc - 1 ].first.labels[ core_level ] + 1;
4438
4439 // For performance gain consider the special case nthreads == __kmp_avail_proc
4440 if( nthreads == __kmp_avail_proc ) {
4441 if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) {
4442 int osID = address2os[ tid ].second;
4443 KMP_CPU_SET( osID, mask);
4444 } else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
4445 int coreID = address2os[ tid ].first.labels[ core_level ];
4446 // We'll count found osIDs for the current core; they can be not more than nth_per_core;
4447 // since the address2os is sortied we can break when cnt==nth_per_core
4448 int cnt = 0;
4449 for( int i = 0; i < __kmp_avail_proc; i++ ) {
4450 int osID = address2os[ i ].second;
4451 int core = address2os[ i ].first.labels[ core_level ];
4452 if( core == coreID ) {
4453 KMP_CPU_SET( osID, mask);
4454 cnt++;
4455 if( cnt == nth_per_core ) {
4456 break;
4457 }
4458 }
4459 }
4460 }
4461 } else if( nthreads <= __kmp_ncores ) {
4462
4463 int core = 0;
4464 for( int i = 0; i < ncores; i++ ) {
4465 // Check if this core from procarr[] is in the mask
4466 int in_mask = 0;
4467 for( int j = 0; j < nth_per_core; j++ ) {
4468 if( procarr[ i * nth_per_core + j ] != - 1 ) {
4469 in_mask = 1;
4470 break;
4471 }
4472 }
4473 if( in_mask ) {
4474 if( tid == core ) {
4475 for( int j = 0; j < nth_per_core; j++ ) {
4476 int osID = procarr[ i * nth_per_core + j ];
4477 if( osID != -1 ) {
4478 KMP_CPU_SET( osID, mask );
4479 // For granularity=thread it is enough to set the first available osID for this core
4480 if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) {
4481 break;
4482 }
4483 }
4484 }
4485 break;
4486 } else {
4487 core++;
4488 }
4489 }
4490 }
4491
4492 } else { // nthreads > __kmp_ncores
4493
4494 // Array to save the number of processors at each core
4495 int nproc_at_core[ ncores ];
4496 // Array to save the number of cores with "x" available processors;
4497 int ncores_with_x_procs[ nth_per_core + 1 ];
4498 // Array to save the number of cores with # procs from x to nth_per_core
4499 int ncores_with_x_to_max_procs[ nth_per_core + 1 ];
4500
4501 for( int i = 0; i <= nth_per_core; i++ ) {
4502 ncores_with_x_procs[ i ] = 0;
4503 ncores_with_x_to_max_procs[ i ] = 0;
4504 }
4505
4506 for( int i = 0; i < ncores; i++ ) {
4507 int cnt = 0;
4508 for( int j = 0; j < nth_per_core; j++ ) {
4509 if( procarr[ i * nth_per_core + j ] != -1 ) {
4510 cnt++;
4511 }
4512 }
4513 nproc_at_core[ i ] = cnt;
4514 ncores_with_x_procs[ cnt ]++;
4515 }
4516
4517 for( int i = 0; i <= nth_per_core; i++ ) {
4518 for( int j = i; j <= nth_per_core; j++ ) {
4519 ncores_with_x_to_max_procs[ i ] += ncores_with_x_procs[ j ];
4520 }
4521 }
4522
4523 // Max number of processors
4524 int nproc = nth_per_core * ncores;
4525 // An array to keep number of threads per each context
4526 int * newarr = ( int * )__kmp_allocate( sizeof( int ) * nproc );
4527 for( int i = 0; i < nproc; i++ ) {
4528 newarr[ i ] = 0;
4529 }
4530
4531 int nth = nthreads;
4532 int flag = 0;
4533 while( nth > 0 ) {
4534 for( int j = 1; j <= nth_per_core; j++ ) {
4535 int cnt = ncores_with_x_to_max_procs[ j ];
4536 for( int i = 0; i < ncores; i++ ) {
4537 // Skip the core with 0 processors
4538 if( nproc_at_core[ i ] == 0 ) {
4539 continue;
4540 }
4541 for( int k = 0; k < nth_per_core; k++ ) {
4542 if( procarr[ i * nth_per_core + k ] != -1 ) {
4543 if( newarr[ i * nth_per_core + k ] == 0 ) {
4544 newarr[ i * nth_per_core + k ] = 1;
4545 cnt--;
4546 nth--;
4547 break;
4548 } else {
4549 if( flag != 0 ) {
4550 newarr[ i * nth_per_core + k ] ++;
4551 cnt--;
4552 nth--;
4553 break;
4554 }
4555 }
4556 }
4557 }
4558 if( cnt == 0 || nth == 0 ) {
4559 break;
4560 }
4561 }
4562 if( nth == 0 ) {
4563 break;
4564 }
4565 }
4566 flag = 1;
4567 }
4568 int sum = 0;
4569 for( int i = 0; i < nproc; i++ ) {
4570 sum += newarr[ i ];
4571 if( sum > tid ) {
4572 // Granularity == thread
4573 if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) {
4574 int osID = procarr[ i ];
4575 KMP_CPU_SET( osID, mask);
4576 } else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
4577 int coreID = i / nth_per_core;
4578 for( int ii = 0; ii < nth_per_core; ii++ ) {
4579 int osID = procarr[ coreID * nth_per_core + ii ];
4580 if( osID != -1 ) {
4581 KMP_CPU_SET( osID, mask);
4582 }
4583 }
4584 }
4585 break;
4586 }
4587 }
4588 __kmp_free( newarr );
4589 }
4590
4591 if (__kmp_affinity_verbose) {
4592 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4593 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004594 KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
4595 tid, buf);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004596 }
4597 __kmp_set_system_affinity( mask, TRUE );
4598 }
4599}
4600
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004601#else
4602 // affinity not supported
4603
4604kmp_uint32 mac_skipPerLevel[7];
4605kmp_uint32 mac_depth;
4606kmp_uint8 mac_leaf_kids;
4607void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
4608 static int first = 1;
4609 if (first) {
4610 const kmp_uint32 maxLevels = 7;
4611 kmp_uint32 numPerLevel[maxLevels];
4612
4613 for (kmp_uint32 i=0; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
4614 numPerLevel[i] = 1;
4615 mac_skipPerLevel[i] = 1;
4616 }
4617
4618 mac_depth = 2;
4619 numPerLevel[0] = nproc;
4620
4621 kmp_uint32 branch = 4;
4622 if (numPerLevel[0] == 1) branch = nproc/4;
4623 if (branch<4) branch=4;
4624 for (kmp_uint32 d=0; d<mac_depth-1; ++d) { // optimize hierarchy width
4625 while (numPerLevel[d] > branch || (d==0 && numPerLevel[d]>4)) { // max 4 on level 0!
4626 if (numPerLevel[d] & 1) numPerLevel[d]++;
4627 numPerLevel[d] = numPerLevel[d] >> 1;
4628 if (numPerLevel[d+1] == 1) mac_depth++;
4629 numPerLevel[d+1] = numPerLevel[d+1] << 1;
4630 }
4631 if(numPerLevel[0] == 1) {
4632 branch = branch >> 1;
4633 if (branch<4) branch = 4;
4634 }
4635 }
4636
4637 for (kmp_uint32 i=1; i<mac_depth; ++i)
4638 mac_skipPerLevel[i] = numPerLevel[i-1] * mac_skipPerLevel[i-1];
4639 mac_leaf_kids = (kmp_uint8)numPerLevel[0]-1;
4640 first=0;
4641 }
4642 thr_bar->depth = mac_depth;
4643 thr_bar->base_leaf_kids = mac_leaf_kids;
4644 thr_bar->skip_per_level = mac_skipPerLevel;
4645}
4646
Alp Toker763b9392014-02-28 09:42:41 +00004647#endif // KMP_AFFINITY_SUPPORTED