blob: 7f1a29d53f36d8ce146de64cd5298049c412aa5f [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
2 * kmp_affinity.cpp -- affinity management
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16#include "kmp.h"
17#include "kmp_i18n.h"
18#include "kmp_io.h"
19#include "kmp_str.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000020#include "kmp_wrapper_getpid.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000021
Alp Toker763b9392014-02-28 09:42:41 +000022#if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +000023
24//
25// Print the affinity mask to the character array in a pretty format.
26//
27char *
28__kmp_affinity_print_mask(char *buf, int buf_len, kmp_affin_mask_t *mask)
29{
30 KMP_ASSERT(buf_len >= 40);
31 char *scan = buf;
32 char *end = buf + buf_len - 1;
33
34 //
35 // Find first element / check for empty set.
36 //
37 size_t i;
38 for (i = 0; i < KMP_CPU_SETSIZE; i++) {
39 if (KMP_CPU_ISSET(i, mask)) {
40 break;
41 }
42 }
43 if (i == KMP_CPU_SETSIZE) {
44 sprintf(scan, "{<empty>}");
45 while (*scan != '\0') scan++;
46 KMP_ASSERT(scan <= end);
47 return buf;
48 }
49
Jim Cownie4cc4bb42014-10-07 16:25:50 +000050 sprintf(scan, "{%ld", (long)i);
Jim Cownie5e8470a2013-09-27 10:38:44 +000051 while (*scan != '\0') scan++;
52 i++;
53 for (; i < KMP_CPU_SETSIZE; i++) {
54 if (! KMP_CPU_ISSET(i, mask)) {
55 continue;
56 }
57
58 //
59 // Check for buffer overflow. A string of the form ",<n>" will have
60 // at most 10 characters, plus we want to leave room to print ",...}"
61 // if the set is too large to print for a total of 15 characters.
62 // We already left room for '\0' in setting end.
63 //
64 if (end - scan < 15) {
65 break;
66 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +000067 sprintf(scan, ",%-ld", (long)i);
Jim Cownie5e8470a2013-09-27 10:38:44 +000068 while (*scan != '\0') scan++;
69 }
70 if (i < KMP_CPU_SETSIZE) {
71 sprintf(scan, ",...");
72 while (*scan != '\0') scan++;
73 }
74 sprintf(scan, "}");
75 while (*scan != '\0') scan++;
76 KMP_ASSERT(scan <= end);
77 return buf;
78}
79
80
81void
82__kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask)
83{
84 KMP_CPU_ZERO(mask);
85
Andrey Churbanov7daf9802015-01-27 16:52:57 +000086# if KMP_GROUP_AFFINITY
Jim Cownie5e8470a2013-09-27 10:38:44 +000087
88 if (__kmp_num_proc_groups > 1) {
89 int group;
Jim Cownie5e8470a2013-09-27 10:38:44 +000090 KMP_DEBUG_ASSERT(__kmp_GetActiveProcessorCount != NULL);
91 for (group = 0; group < __kmp_num_proc_groups; group++) {
92 int i;
93 int num = __kmp_GetActiveProcessorCount(group);
94 for (i = 0; i < num; i++) {
95 KMP_CPU_SET(i + group * (CHAR_BIT * sizeof(DWORD_PTR)), mask);
96 }
97 }
98 }
99 else
100
Andrey Churbanov7daf9802015-01-27 16:52:57 +0000101# endif /* KMP_GROUP_AFFINITY */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000102
103 {
104 int proc;
105 for (proc = 0; proc < __kmp_xproc; proc++) {
106 KMP_CPU_SET(proc, mask);
107 }
108 }
109}
110
111
112//
113// In Linux* OS debug & cover (-O0) builds, we need to avoid inline member
114// functions.
115//
116// The icc codegen emits sections with extremely long names, of the form
117// ".gnu.linkonce.<mangled_name>". There seems to have been a linker bug
118// introduced between GNU ld version 2.14.90.0.4 and 2.15.92.0.2 involving
119// some sort of memory corruption or table overflow that is triggered by
120// these long strings. I checked the latest version of the linker -
121// GNU ld (Linux* OS/GNU Binutils) 2.18.50.0.7.20080422 - and the bug is not
122// fixed.
123//
124// Unfortunately, my attempts to reproduce it in a smaller example have
125// failed - I'm not sure what the prospects are of getting it fixed
126// properly - but we need a reproducer smaller than all of libiomp.
127//
128// Work around the problem by avoiding inline constructors in such builds.
129// We do this for all platforms, not just Linux* OS - non-inline functions are
130// more debuggable and provide better coverage into than inline functions.
131// Use inline functions in shipping libs, for performance.
132//
133
134# if !defined(KMP_DEBUG) && !defined(COVER)
135
136class Address {
137public:
138 static const unsigned maxDepth = 32;
139 unsigned labels[maxDepth];
140 unsigned childNums[maxDepth];
141 unsigned depth;
142 unsigned leader;
143 Address(unsigned _depth)
144 : depth(_depth), leader(FALSE) {
145 }
146 Address &operator=(const Address &b) {
147 depth = b.depth;
148 for (unsigned i = 0; i < depth; i++) {
149 labels[i] = b.labels[i];
150 childNums[i] = b.childNums[i];
151 }
152 leader = FALSE;
153 return *this;
154 }
155 bool operator==(const Address &b) const {
156 if (depth != b.depth)
157 return false;
158 for (unsigned i = 0; i < depth; i++)
159 if(labels[i] != b.labels[i])
160 return false;
161 return true;
162 }
163 bool isClose(const Address &b, int level) const {
164 if (depth != b.depth)
165 return false;
166 if ((unsigned)level >= depth)
167 return true;
168 for (unsigned i = 0; i < (depth - level); i++)
169 if(labels[i] != b.labels[i])
170 return false;
171 return true;
172 }
173 bool operator!=(const Address &b) const {
174 return !operator==(b);
175 }
176};
177
178class AddrUnsPair {
179public:
180 Address first;
181 unsigned second;
182 AddrUnsPair(Address _first, unsigned _second)
183 : first(_first), second(_second) {
184 }
185 AddrUnsPair &operator=(const AddrUnsPair &b)
186 {
187 first = b.first;
188 second = b.second;
189 return *this;
190 }
191};
192
193# else
194
195class Address {
196public:
197 static const unsigned maxDepth = 32;
198 unsigned labels[maxDepth];
199 unsigned childNums[maxDepth];
200 unsigned depth;
201 unsigned leader;
202 Address(unsigned _depth);
203 Address &operator=(const Address &b);
204 bool operator==(const Address &b) const;
205 bool isClose(const Address &b, int level) const;
206 bool operator!=(const Address &b) const;
207};
208
209Address::Address(unsigned _depth)
210{
211 depth = _depth;
212 leader = FALSE;
213}
214
215Address &Address::operator=(const Address &b) {
216 depth = b.depth;
217 for (unsigned i = 0; i < depth; i++) {
218 labels[i] = b.labels[i];
219 childNums[i] = b.childNums[i];
220 }
221 leader = FALSE;
222 return *this;
223}
224
225bool Address::operator==(const Address &b) const {
226 if (depth != b.depth)
227 return false;
228 for (unsigned i = 0; i < depth; i++)
229 if(labels[i] != b.labels[i])
230 return false;
231 return true;
232}
233
234bool Address::isClose(const Address &b, int level) const {
235 if (depth != b.depth)
236 return false;
237 if ((unsigned)level >= depth)
238 return true;
239 for (unsigned i = 0; i < (depth - level); i++)
240 if(labels[i] != b.labels[i])
241 return false;
242 return true;
243}
244
245bool Address::operator!=(const Address &b) const {
246 return !operator==(b);
247}
248
249class AddrUnsPair {
250public:
251 Address first;
252 unsigned second;
253 AddrUnsPair(Address _first, unsigned _second);
254 AddrUnsPair &operator=(const AddrUnsPair &b);
255};
256
257AddrUnsPair::AddrUnsPair(Address _first, unsigned _second)
258 : first(_first), second(_second)
259{
260}
261
262AddrUnsPair &AddrUnsPair::operator=(const AddrUnsPair &b)
263{
264 first = b.first;
265 second = b.second;
266 return *this;
267}
268
269# endif /* !defined(KMP_DEBUG) && !defined(COVER) */
270
271
272static int
273__kmp_affinity_cmp_Address_labels(const void *a, const void *b)
274{
275 const Address *aa = (const Address *)&(((AddrUnsPair *)a)
276 ->first);
277 const Address *bb = (const Address *)&(((AddrUnsPair *)b)
278 ->first);
279 unsigned depth = aa->depth;
280 unsigned i;
281 KMP_DEBUG_ASSERT(depth == bb->depth);
282 for (i = 0; i < depth; i++) {
283 if (aa->labels[i] < bb->labels[i]) return -1;
284 if (aa->labels[i] > bb->labels[i]) return 1;
285 }
286 return 0;
287}
288
289
290static int
291__kmp_affinity_cmp_Address_child_num(const void *a, const void *b)
292{
293 const Address *aa = (const Address *)&(((AddrUnsPair *)a)
294 ->first);
295 const Address *bb = (const Address *)&(((AddrUnsPair *)b)
296 ->first);
297 unsigned depth = aa->depth;
298 unsigned i;
299 KMP_DEBUG_ASSERT(depth == bb->depth);
300 KMP_DEBUG_ASSERT((unsigned)__kmp_affinity_compact <= depth);
301 KMP_DEBUG_ASSERT(__kmp_affinity_compact >= 0);
302 for (i = 0; i < (unsigned)__kmp_affinity_compact; i++) {
303 int j = depth - i - 1;
304 if (aa->childNums[j] < bb->childNums[j]) return -1;
305 if (aa->childNums[j] > bb->childNums[j]) return 1;
306 }
307 for (; i < depth; i++) {
308 int j = i - __kmp_affinity_compact;
309 if (aa->childNums[j] < bb->childNums[j]) return -1;
310 if (aa->childNums[j] > bb->childNums[j]) return 1;
311 }
312 return 0;
313}
314
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000315/** A structure for holding machine-specific hierarchy info to be computed once at init. */
316class hierarchy_info {
317public:
318 /** Typical levels are threads/core, cores/package or socket, packages/node, nodes/machine,
319 etc. We don't want to get specific with nomenclature */
320 static const kmp_uint32 maxLevels=7;
321
322 /** This is specifically the depth of the machine configuration hierarchy, in terms of the
323 number of levels along the longest path from root to any leaf. It corresponds to the
324 number of entries in numPerLevel if we exclude all but one trailing 1. */
325 kmp_uint32 depth;
326 kmp_uint32 base_depth;
327 kmp_uint32 base_num_threads;
328 bool uninitialized;
329
330 /** Level 0 corresponds to leaves. numPerLevel[i] is the number of children the parent of a
331 node at level i has. For example, if we have a machine with 4 packages, 4 cores/package
332 and 2 HT per core, then numPerLevel = {2, 4, 4, 1, 1}. All empty levels are set to 1. */
333 kmp_uint32 numPerLevel[maxLevels];
334 kmp_uint32 skipPerLevel[maxLevels];
335
336 void deriveLevels(AddrUnsPair *adr2os, int num_addrs) {
337 int hier_depth = adr2os[0].first.depth;
338 int level = 0;
339 for (int i=hier_depth-1; i>=0; --i) {
340 int max = -1;
341 for (int j=0; j<num_addrs; ++j) {
342 int next = adr2os[j].first.childNums[i];
343 if (next > max) max = next;
344 }
345 numPerLevel[level] = max+1;
346 ++level;
347 }
348 }
349
350 hierarchy_info() : depth(1), uninitialized(true) {}
351 void init(AddrUnsPair *adr2os, int num_addrs)
352 {
Andrey Churbanovb41e62b2015-02-10 20:10:21 +0000353 /* Added explicit initialization of the depth here to prevent usage of dirty value
354 observed when static library is re-initialized multiple times (e.g. when
355 non-OpenMP thread repeatedly launches/joins thread that uses OpenMP). */
356 depth = 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000357 uninitialized = false;
358 for (kmp_uint32 i=0; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
359 numPerLevel[i] = 1;
360 skipPerLevel[i] = 1;
361 }
362
363 // Sort table by physical ID
364 if (adr2os) {
365 qsort(adr2os, num_addrs, sizeof(*adr2os), __kmp_affinity_cmp_Address_labels);
366 deriveLevels(adr2os, num_addrs);
367 }
368 else {
369 numPerLevel[0] = 4;
370 numPerLevel[1] = num_addrs/4;
371 if (num_addrs%4) numPerLevel[1]++;
372 }
373
374 base_num_threads = num_addrs;
375 for (int i=maxLevels-1; i>=0; --i) // count non-empty levels to get depth
376 if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1'
377 depth++;
378
379 kmp_uint32 branch = 4;
380 if (numPerLevel[0] == 1) branch = num_addrs/4;
381 if (branch<4) branch=4;
382 for (kmp_uint32 d=0; d<depth-1; ++d) { // optimize hierarchy width
383 while (numPerLevel[d] > branch || (d==0 && numPerLevel[d]>4)) { // max 4 on level 0!
384 if (numPerLevel[d] & 1) numPerLevel[d]++;
385 numPerLevel[d] = numPerLevel[d] >> 1;
386 if (numPerLevel[d+1] == 1) depth++;
387 numPerLevel[d+1] = numPerLevel[d+1] << 1;
388 }
389 if(numPerLevel[0] == 1) {
390 branch = branch >> 1;
391 if (branch<4) branch = 4;
392 }
393 }
394
395 for (kmp_uint32 i=1; i<depth; ++i)
396 skipPerLevel[i] = numPerLevel[i-1] * skipPerLevel[i-1];
397
398 base_depth = depth;
399 }
400};
401
402static hierarchy_info machine_hierarchy;
403
404void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
405 if (machine_hierarchy.uninitialized)
406 machine_hierarchy.init(NULL, nproc);
407
408 if (nproc <= machine_hierarchy.base_num_threads)
409 machine_hierarchy.depth = machine_hierarchy.base_depth;
410 KMP_DEBUG_ASSERT(machine_hierarchy.depth > 0);
411 while (nproc > machine_hierarchy.skipPerLevel[machine_hierarchy.depth-1]) {
412 machine_hierarchy.depth++;
413 machine_hierarchy.skipPerLevel[machine_hierarchy.depth-1] = 2*machine_hierarchy.skipPerLevel[machine_hierarchy.depth-2];
414 }
415 thr_bar->depth = machine_hierarchy.depth;
416 thr_bar->base_leaf_kids = (kmp_uint8)machine_hierarchy.numPerLevel[0]-1;
417 thr_bar->skip_per_level = machine_hierarchy.skipPerLevel;
418}
Jim Cownie5e8470a2013-09-27 10:38:44 +0000419
420//
421// When sorting by labels, __kmp_affinity_assign_child_nums() must first be
422// called to renumber the labels from [0..n] and place them into the child_num
423// vector of the address object. This is done in case the labels used for
Alp Toker8f2d3f02014-02-24 10:40:15 +0000424// the children at one node of the hierarchy differ from those used for
Jim Cownie5e8470a2013-09-27 10:38:44 +0000425// another node at the same level. Example: suppose the machine has 2 nodes
426// with 2 packages each. The first node contains packages 601 and 602, and
427// second node contains packages 603 and 604. If we try to sort the table
428// for "scatter" affinity, the table will still be sorted 601, 602, 603, 604
429// because we are paying attention to the labels themselves, not the ordinal
430// child numbers. By using the child numbers in the sort, the result is
431// {0,0}=601, {0,1}=603, {1,0}=602, {1,1}=604.
432//
433static void
434__kmp_affinity_assign_child_nums(AddrUnsPair *address2os,
435 int numAddrs)
436{
437 KMP_DEBUG_ASSERT(numAddrs > 0);
438 int depth = address2os->first.depth;
439 unsigned *counts = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
440 unsigned *lastLabel = (unsigned *)__kmp_allocate(depth
441 * sizeof(unsigned));
442 int labCt;
443 for (labCt = 0; labCt < depth; labCt++) {
444 address2os[0].first.childNums[labCt] = counts[labCt] = 0;
445 lastLabel[labCt] = address2os[0].first.labels[labCt];
446 }
447 int i;
448 for (i = 1; i < numAddrs; i++) {
449 for (labCt = 0; labCt < depth; labCt++) {
450 if (address2os[i].first.labels[labCt] != lastLabel[labCt]) {
451 int labCt2;
452 for (labCt2 = labCt + 1; labCt2 < depth; labCt2++) {
453 counts[labCt2] = 0;
454 lastLabel[labCt2] = address2os[i].first.labels[labCt2];
455 }
456 counts[labCt]++;
457 lastLabel[labCt] = address2os[i].first.labels[labCt];
458 break;
459 }
460 }
461 for (labCt = 0; labCt < depth; labCt++) {
462 address2os[i].first.childNums[labCt] = counts[labCt];
463 }
464 for (; labCt < (int)Address::maxDepth; labCt++) {
465 address2os[i].first.childNums[labCt] = 0;
466 }
467 }
468}
469
470
471//
472// All of the __kmp_affinity_create_*_map() routines should set
473// __kmp_affinity_masks to a vector of affinity mask objects of length
474// __kmp_affinity_num_masks, if __kmp_affinity_type != affinity_none, and
475// return the number of levels in the machine topology tree (zero if
476// __kmp_affinity_type == affinity_none).
477//
478// All of the __kmp_affinity_create_*_map() routines should set *fullMask
479// to the affinity mask for the initialization thread. They need to save and
480// restore the mask, and it could be needed later, so saving it is just an
481// optimization to avoid calling kmp_get_system_affinity() again.
482//
483static kmp_affin_mask_t *fullMask = NULL;
484
485kmp_affin_mask_t *
486__kmp_affinity_get_fullMask() { return fullMask; }
487
488
489static int nCoresPerPkg, nPackages;
Andrey Churbanovf696c822015-01-27 16:55:43 +0000490static int __kmp_nThreadsPerCore;
491#ifndef KMP_DFLT_NTH_CORES
492static int __kmp_ncores;
493#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000494
495//
496// __kmp_affinity_uniform_topology() doesn't work when called from
497// places which support arbitrarily many levels in the machine topology
498// map, i.e. the non-default cases in __kmp_affinity_create_cpuinfo_map()
499// __kmp_affinity_create_x2apicid_map().
500//
501inline static bool
502__kmp_affinity_uniform_topology()
503{
504 return __kmp_avail_proc == (__kmp_nThreadsPerCore * nCoresPerPkg * nPackages);
505}
506
507
508//
509// Print out the detailed machine topology map, i.e. the physical locations
510// of each OS proc.
511//
512static void
513__kmp_affinity_print_topology(AddrUnsPair *address2os, int len, int depth,
514 int pkgLevel, int coreLevel, int threadLevel)
515{
516 int proc;
517
518 KMP_INFORM(OSProcToPhysicalThreadMap, "KMP_AFFINITY");
519 for (proc = 0; proc < len; proc++) {
520 int level;
521 kmp_str_buf_t buf;
522 __kmp_str_buf_init(&buf);
523 for (level = 0; level < depth; level++) {
524 if (level == threadLevel) {
525 __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Thread));
526 }
527 else if (level == coreLevel) {
528 __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Core));
529 }
530 else if (level == pkgLevel) {
531 __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Package));
532 }
533 else if (level > pkgLevel) {
534 __kmp_str_buf_print(&buf, "%s_%d ", KMP_I18N_STR(Node),
535 level - pkgLevel - 1);
536 }
537 else {
538 __kmp_str_buf_print(&buf, "L%d ", level);
539 }
540 __kmp_str_buf_print(&buf, "%d ",
541 address2os[proc].first.labels[level]);
542 }
543 KMP_INFORM(OSProcMapToPack, "KMP_AFFINITY", address2os[proc].second,
544 buf.str);
545 __kmp_str_buf_free(&buf);
546 }
547}
548
549
550//
551// If we don't know how to retrieve the machine's processor topology, or
552// encounter an error in doing so, this routine is called to form a "flat"
553// mapping of os thread id's <-> processor id's.
554//
555static int
556__kmp_affinity_create_flat_map(AddrUnsPair **address2os,
557 kmp_i18n_id_t *const msg_id)
558{
559 *address2os = NULL;
560 *msg_id = kmp_i18n_null;
561
562 //
563 // Even if __kmp_affinity_type == affinity_none, this routine might still
Andrey Churbanovf696c822015-01-27 16:55:43 +0000564 // called to set __kmp_ncores, as well as
Jim Cownie5e8470a2013-09-27 10:38:44 +0000565 // __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
566 //
567 if (! KMP_AFFINITY_CAPABLE()) {
568 KMP_ASSERT(__kmp_affinity_type == affinity_none);
569 __kmp_ncores = nPackages = __kmp_xproc;
570 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000571 if (__kmp_affinity_verbose) {
572 KMP_INFORM(AffFlatTopology, "KMP_AFFINITY");
573 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
574 KMP_INFORM(Uniform, "KMP_AFFINITY");
575 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
576 __kmp_nThreadsPerCore, __kmp_ncores);
577 }
578 return 0;
579 }
580
581 //
582 // When affinity is off, this routine will still be called to set
Andrey Churbanovf696c822015-01-27 16:55:43 +0000583 // __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie5e8470a2013-09-27 10:38:44 +0000584 // nCoresPerPkg, & nPackages. Make sure all these vars are set
585 // correctly, and return now if affinity is not enabled.
586 //
587 __kmp_ncores = nPackages = __kmp_avail_proc;
588 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000589 if (__kmp_affinity_verbose) {
590 char buf[KMP_AFFIN_MASK_PRINT_LEN];
591 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, fullMask);
592
593 KMP_INFORM(AffCapableUseFlat, "KMP_AFFINITY");
594 if (__kmp_affinity_respect_mask) {
595 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
596 } else {
597 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
598 }
599 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
600 KMP_INFORM(Uniform, "KMP_AFFINITY");
601 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
602 __kmp_nThreadsPerCore, __kmp_ncores);
603 }
604 if (__kmp_affinity_type == affinity_none) {
605 return 0;
606 }
607
608 //
609 // Contruct the data structure to be returned.
610 //
611 *address2os = (AddrUnsPair*)
612 __kmp_allocate(sizeof(**address2os) * __kmp_avail_proc);
613 int avail_ct = 0;
614 unsigned int i;
615 for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
616 //
617 // Skip this proc if it is not included in the machine model.
618 //
619 if (! KMP_CPU_ISSET(i, fullMask)) {
620 continue;
621 }
622
623 Address addr(1);
624 addr.labels[0] = i;
625 (*address2os)[avail_ct++] = AddrUnsPair(addr,i);
626 }
627 if (__kmp_affinity_verbose) {
628 KMP_INFORM(OSProcToPackage, "KMP_AFFINITY");
629 }
630
631 if (__kmp_affinity_gran_levels < 0) {
632 //
633 // Only the package level is modeled in the machine topology map,
634 // so the #levels of granularity is either 0 or 1.
635 //
636 if (__kmp_affinity_gran > affinity_gran_package) {
637 __kmp_affinity_gran_levels = 1;
638 }
639 else {
640 __kmp_affinity_gran_levels = 0;
641 }
642 }
643 return 1;
644}
645
646
Andrey Churbanov7daf9802015-01-27 16:52:57 +0000647# if KMP_GROUP_AFFINITY
Jim Cownie5e8470a2013-09-27 10:38:44 +0000648
649//
650// If multiple Windows* OS processor groups exist, we can create a 2-level
651// topology map with the groups at level 0 and the individual procs at
652// level 1.
653//
654// This facilitates letting the threads float among all procs in a group,
655// if granularity=group (the default when there are multiple groups).
656//
657static int
658__kmp_affinity_create_proc_group_map(AddrUnsPair **address2os,
659 kmp_i18n_id_t *const msg_id)
660{
661 *address2os = NULL;
662 *msg_id = kmp_i18n_null;
663
664 //
665 // If we don't have multiple processor groups, return now.
666 // The flat mapping will be used.
667 //
668 if ((! KMP_AFFINITY_CAPABLE()) || (__kmp_get_proc_group(fullMask) >= 0)) {
669 // FIXME set *msg_id
670 return -1;
671 }
672
673 //
674 // Contruct the data structure to be returned.
675 //
676 *address2os = (AddrUnsPair*)
677 __kmp_allocate(sizeof(**address2os) * __kmp_avail_proc);
678 int avail_ct = 0;
679 int i;
680 for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
681 //
682 // Skip this proc if it is not included in the machine model.
683 //
684 if (! KMP_CPU_ISSET(i, fullMask)) {
685 continue;
686 }
687
688 Address addr(2);
689 addr.labels[0] = i / (CHAR_BIT * sizeof(DWORD_PTR));
690 addr.labels[1] = i % (CHAR_BIT * sizeof(DWORD_PTR));
691 (*address2os)[avail_ct++] = AddrUnsPair(addr,i);
692
693 if (__kmp_affinity_verbose) {
694 KMP_INFORM(AffOSProcToGroup, "KMP_AFFINITY", i, addr.labels[0],
695 addr.labels[1]);
696 }
697 }
698
699 if (__kmp_affinity_gran_levels < 0) {
700 if (__kmp_affinity_gran == affinity_gran_group) {
701 __kmp_affinity_gran_levels = 1;
702 }
703 else if ((__kmp_affinity_gran == affinity_gran_fine)
704 || (__kmp_affinity_gran == affinity_gran_thread)) {
705 __kmp_affinity_gran_levels = 0;
706 }
707 else {
708 const char *gran_str = NULL;
709 if (__kmp_affinity_gran == affinity_gran_core) {
710 gran_str = "core";
711 }
712 else if (__kmp_affinity_gran == affinity_gran_package) {
713 gran_str = "package";
714 }
715 else if (__kmp_affinity_gran == affinity_gran_node) {
716 gran_str = "node";
717 }
718 else {
719 KMP_ASSERT(0);
720 }
721
722 // Warning: can't use affinity granularity \"gran\" with group topology method, using "thread"
723 __kmp_affinity_gran_levels = 0;
724 }
725 }
726 return 2;
727}
728
Andrey Churbanov7daf9802015-01-27 16:52:57 +0000729# endif /* KMP_GROUP_AFFINITY */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000730
731
732# if KMP_ARCH_X86 || KMP_ARCH_X86_64
733
734static int
735__kmp_cpuid_mask_width(int count) {
736 int r = 0;
737
738 while((1<<r) < count)
739 ++r;
740 return r;
741}
742
743
744class apicThreadInfo {
745public:
746 unsigned osId; // param to __kmp_affinity_bind_thread
747 unsigned apicId; // from cpuid after binding
748 unsigned maxCoresPerPkg; // ""
749 unsigned maxThreadsPerPkg; // ""
750 unsigned pkgId; // inferred from above values
751 unsigned coreId; // ""
752 unsigned threadId; // ""
753};
754
755
756static int
757__kmp_affinity_cmp_apicThreadInfo_os_id(const void *a, const void *b)
758{
759 const apicThreadInfo *aa = (const apicThreadInfo *)a;
760 const apicThreadInfo *bb = (const apicThreadInfo *)b;
761 if (aa->osId < bb->osId) return -1;
762 if (aa->osId > bb->osId) return 1;
763 return 0;
764}
765
766
767static int
768__kmp_affinity_cmp_apicThreadInfo_phys_id(const void *a, const void *b)
769{
770 const apicThreadInfo *aa = (const apicThreadInfo *)a;
771 const apicThreadInfo *bb = (const apicThreadInfo *)b;
772 if (aa->pkgId < bb->pkgId) return -1;
773 if (aa->pkgId > bb->pkgId) return 1;
774 if (aa->coreId < bb->coreId) return -1;
775 if (aa->coreId > bb->coreId) return 1;
776 if (aa->threadId < bb->threadId) return -1;
777 if (aa->threadId > bb->threadId) return 1;
778 return 0;
779}
780
781
782//
783// On IA-32 architecture and Intel(R) 64 architecture, we attempt to use
784// an algorithm which cycles through the available os threads, setting
785// the current thread's affinity mask to that thread, and then retrieves
786// the Apic Id for each thread context using the cpuid instruction.
787//
788static int
789__kmp_affinity_create_apicid_map(AddrUnsPair **address2os,
790 kmp_i18n_id_t *const msg_id)
791{
Andrey Churbanov1c331292015-01-27 17:03:42 +0000792 kmp_cpuid buf;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000793 int rc;
794 *address2os = NULL;
795 *msg_id = kmp_i18n_null;
796
Andrey Churbanov1c331292015-01-27 17:03:42 +0000797 //
798 // Check if cpuid leaf 4 is supported.
799 //
Jim Cownie5e8470a2013-09-27 10:38:44 +0000800 __kmp_x86_cpuid(0, 0, &buf);
801 if (buf.eax < 4) {
802 *msg_id = kmp_i18n_str_NoLeaf4Support;
803 return -1;
804 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000805
806 //
Jim Cownie5e8470a2013-09-27 10:38:44 +0000807 // The algorithm used starts by setting the affinity to each available
Andrey Churbanov1c331292015-01-27 17:03:42 +0000808 // thread and retrieving info from the cpuid instruction, so if we are
809 // not capable of calling __kmp_get_system_affinity() and
810 // _kmp_get_system_affinity(), then we need to do something else - use
811 // the defaults that we calculated from issuing cpuid without binding
812 // to each proc.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000813 //
814 if (! KMP_AFFINITY_CAPABLE()) {
815 //
816 // Hack to try and infer the machine topology using only the data
817 // available from cpuid on the current thread, and __kmp_xproc.
818 //
819 KMP_ASSERT(__kmp_affinity_type == affinity_none);
820
821 //
822 // Get an upper bound on the number of threads per package using
823 // cpuid(1).
824 //
825 // On some OS/chps combinations where HT is supported by the chip
826 // but is disabled, this value will be 2 on a single core chip.
827 // Usually, it will be 2 if HT is enabled and 1 if HT is disabled.
828 //
Jim Cownie5e8470a2013-09-27 10:38:44 +0000829 __kmp_x86_cpuid(1, 0, &buf);
830 int maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
831 if (maxThreadsPerPkg == 0) {
832 maxThreadsPerPkg = 1;
833 }
834
835 //
836 // The num cores per pkg comes from cpuid(4).
837 // 1 must be added to the encoded value.
838 //
839 // The author of cpu_count.cpp treated this only an upper bound
840 // on the number of cores, but I haven't seen any cases where it
841 // was greater than the actual number of cores, so we will treat
842 // it as exact in this block of code.
843 //
844 // First, we need to check if cpuid(4) is supported on this chip.
845 // To see if cpuid(n) is supported, issue cpuid(0) and check if eax
846 // has the value n or greater.
847 //
848 __kmp_x86_cpuid(0, 0, &buf);
849 if (buf.eax >= 4) {
850 __kmp_x86_cpuid(4, 0, &buf);
851 nCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
852 }
853 else {
854 nCoresPerPkg = 1;
855 }
856
857 //
858 // There is no way to reliably tell if HT is enabled without issuing
859 // the cpuid instruction from every thread, can correlating the cpuid
860 // info, so if the machine is not affinity capable, we assume that HT
861 // is off. We have seen quite a few machines where maxThreadsPerPkg
862 // is 2, yet the machine does not support HT.
863 //
864 // - Older OSes are usually found on machines with older chips, which
865 // do not support HT.
866 //
867 // - The performance penalty for mistakenly identifying a machine as
868 // HT when it isn't (which results in blocktime being incorrecly set
869 // to 0) is greater than the penalty when for mistakenly identifying
870 // a machine as being 1 thread/core when it is really HT enabled
871 // (which results in blocktime being incorrectly set to a positive
872 // value).
873 //
874 __kmp_ncores = __kmp_xproc;
875 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
876 __kmp_nThreadsPerCore = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000877 if (__kmp_affinity_verbose) {
878 KMP_INFORM(AffNotCapableUseLocCpuid, "KMP_AFFINITY");
879 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
880 if (__kmp_affinity_uniform_topology()) {
881 KMP_INFORM(Uniform, "KMP_AFFINITY");
882 } else {
883 KMP_INFORM(NonUniform, "KMP_AFFINITY");
884 }
885 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
886 __kmp_nThreadsPerCore, __kmp_ncores);
887 }
888 return 0;
889 }
890
891 //
892 //
893 // From here on, we can assume that it is safe to call
894 // __kmp_get_system_affinity() and __kmp_set_system_affinity(),
895 // even if __kmp_affinity_type = affinity_none.
896 //
897
898 //
899 // Save the affinity mask for the current thread.
900 //
901 kmp_affin_mask_t *oldMask;
902 KMP_CPU_ALLOC(oldMask);
903 KMP_ASSERT(oldMask != NULL);
904 __kmp_get_system_affinity(oldMask, TRUE);
905
906 //
907 // Run through each of the available contexts, binding the current thread
908 // to it, and obtaining the pertinent information using the cpuid instr.
909 //
910 // The relevant information is:
911 //
912 // Apic Id: Bits 24:31 of ebx after issuing cpuid(1) - each thread context
913 // has a uniqie Apic Id, which is of the form pkg# : core# : thread#.
914 //
915 // Max Threads Per Pkg: Bits 16:23 of ebx after issuing cpuid(1). The
916 // value of this field determines the width of the core# + thread#
917 // fields in the Apic Id. It is also an upper bound on the number
918 // of threads per package, but it has been verified that situations
919 // happen were it is not exact. In particular, on certain OS/chip
920 // combinations where Intel(R) Hyper-Threading Technology is supported
921 // by the chip but has
922 // been disabled, the value of this field will be 2 (for a single core
923 // chip). On other OS/chip combinations supporting
924 // Intel(R) Hyper-Threading Technology, the value of
925 // this field will be 1 when Intel(R) Hyper-Threading Technology is
926 // disabled and 2 when it is enabled.
927 //
928 // Max Cores Per Pkg: Bits 26:31 of eax after issuing cpuid(4). The
929 // value of this field (+1) determines the width of the core# field in
930 // the Apic Id. The comments in "cpucount.cpp" say that this value is
931 // an upper bound, but the IA-32 architecture manual says that it is
932 // exactly the number of cores per package, and I haven't seen any
933 // case where it wasn't.
934 //
935 // From this information, deduce the package Id, core Id, and thread Id,
936 // and set the corresponding fields in the apicThreadInfo struct.
937 //
938 unsigned i;
939 apicThreadInfo *threadInfo = (apicThreadInfo *)__kmp_allocate(
940 __kmp_avail_proc * sizeof(apicThreadInfo));
941 unsigned nApics = 0;
942 for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
943 //
944 // Skip this proc if it is not included in the machine model.
945 //
946 if (! KMP_CPU_ISSET(i, fullMask)) {
947 continue;
948 }
949 KMP_DEBUG_ASSERT((int)nApics < __kmp_avail_proc);
950
951 __kmp_affinity_bind_thread(i);
952 threadInfo[nApics].osId = i;
953
954 //
955 // The apic id and max threads per pkg come from cpuid(1).
956 //
Jim Cownie5e8470a2013-09-27 10:38:44 +0000957 __kmp_x86_cpuid(1, 0, &buf);
958 if (! (buf.edx >> 9) & 1) {
959 __kmp_set_system_affinity(oldMask, TRUE);
960 __kmp_free(threadInfo);
961 KMP_CPU_FREE(oldMask);
962 *msg_id = kmp_i18n_str_ApicNotPresent;
963 return -1;
964 }
965 threadInfo[nApics].apicId = (buf.ebx >> 24) & 0xff;
966 threadInfo[nApics].maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
967 if (threadInfo[nApics].maxThreadsPerPkg == 0) {
968 threadInfo[nApics].maxThreadsPerPkg = 1;
969 }
970
971 //
972 // Max cores per pkg comes from cpuid(4).
973 // 1 must be added to the encoded value.
974 //
975 // First, we need to check if cpuid(4) is supported on this chip.
976 // To see if cpuid(n) is supported, issue cpuid(0) and check if eax
977 // has the value n or greater.
978 //
979 __kmp_x86_cpuid(0, 0, &buf);
980 if (buf.eax >= 4) {
981 __kmp_x86_cpuid(4, 0, &buf);
982 threadInfo[nApics].maxCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
983 }
984 else {
985 threadInfo[nApics].maxCoresPerPkg = 1;
986 }
987
988 //
989 // Infer the pkgId / coreId / threadId using only the info
990 // obtained locally.
991 //
992 int widthCT = __kmp_cpuid_mask_width(
993 threadInfo[nApics].maxThreadsPerPkg);
994 threadInfo[nApics].pkgId = threadInfo[nApics].apicId >> widthCT;
995
996 int widthC = __kmp_cpuid_mask_width(
997 threadInfo[nApics].maxCoresPerPkg);
998 int widthT = widthCT - widthC;
999 if (widthT < 0) {
1000 //
1001 // I've never seen this one happen, but I suppose it could, if
1002 // the cpuid instruction on a chip was really screwed up.
1003 // Make sure to restore the affinity mask before the tail call.
1004 //
1005 __kmp_set_system_affinity(oldMask, TRUE);
1006 __kmp_free(threadInfo);
1007 KMP_CPU_FREE(oldMask);
1008 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1009 return -1;
1010 }
1011
1012 int maskC = (1 << widthC) - 1;
1013 threadInfo[nApics].coreId = (threadInfo[nApics].apicId >> widthT)
1014 &maskC;
1015
1016 int maskT = (1 << widthT) - 1;
1017 threadInfo[nApics].threadId = threadInfo[nApics].apicId &maskT;
1018
1019 nApics++;
1020 }
1021
1022 //
1023 // We've collected all the info we need.
1024 // Restore the old affinity mask for this thread.
1025 //
1026 __kmp_set_system_affinity(oldMask, TRUE);
1027
1028 //
1029 // If there's only one thread context to bind to, form an Address object
1030 // with depth 1 and return immediately (or, if affinity is off, set
1031 // address2os to NULL and return).
1032 //
1033 // If it is configured to omit the package level when there is only a
1034 // single package, the logic at the end of this routine won't work if
1035 // there is only a single thread - it would try to form an Address
1036 // object with depth 0.
1037 //
1038 KMP_ASSERT(nApics > 0);
1039 if (nApics == 1) {
1040 __kmp_ncores = nPackages = 1;
1041 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001042 if (__kmp_affinity_verbose) {
1043 char buf[KMP_AFFIN_MASK_PRINT_LEN];
1044 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1045
1046 KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
1047 if (__kmp_affinity_respect_mask) {
1048 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
1049 } else {
1050 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
1051 }
1052 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
1053 KMP_INFORM(Uniform, "KMP_AFFINITY");
1054 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
1055 __kmp_nThreadsPerCore, __kmp_ncores);
1056 }
1057
1058 if (__kmp_affinity_type == affinity_none) {
1059 __kmp_free(threadInfo);
1060 KMP_CPU_FREE(oldMask);
1061 return 0;
1062 }
1063
1064 *address2os = (AddrUnsPair*)__kmp_allocate(sizeof(AddrUnsPair));
1065 Address addr(1);
1066 addr.labels[0] = threadInfo[0].pkgId;
1067 (*address2os)[0] = AddrUnsPair(addr, threadInfo[0].osId);
1068
1069 if (__kmp_affinity_gran_levels < 0) {
1070 __kmp_affinity_gran_levels = 0;
1071 }
1072
1073 if (__kmp_affinity_verbose) {
1074 __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
1075 }
1076
1077 __kmp_free(threadInfo);
1078 KMP_CPU_FREE(oldMask);
1079 return 1;
1080 }
1081
1082 //
1083 // Sort the threadInfo table by physical Id.
1084 //
1085 qsort(threadInfo, nApics, sizeof(*threadInfo),
1086 __kmp_affinity_cmp_apicThreadInfo_phys_id);
1087
1088 //
1089 // The table is now sorted by pkgId / coreId / threadId, but we really
1090 // don't know the radix of any of the fields. pkgId's may be sparsely
1091 // assigned among the chips on a system. Although coreId's are usually
1092 // assigned [0 .. coresPerPkg-1] and threadId's are usually assigned
1093 // [0..threadsPerCore-1], we don't want to make any such assumptions.
1094 //
1095 // For that matter, we don't know what coresPerPkg and threadsPerCore
1096 // (or the total # packages) are at this point - we want to determine
1097 // that now. We only have an upper bound on the first two figures.
1098 //
1099 // We also perform a consistency check at this point: the values returned
1100 // by the cpuid instruction for any thread bound to a given package had
1101 // better return the same info for maxThreadsPerPkg and maxCoresPerPkg.
1102 //
1103 nPackages = 1;
1104 nCoresPerPkg = 1;
1105 __kmp_nThreadsPerCore = 1;
1106 unsigned nCores = 1;
1107
1108 unsigned pkgCt = 1; // to determine radii
1109 unsigned lastPkgId = threadInfo[0].pkgId;
1110 unsigned coreCt = 1;
1111 unsigned lastCoreId = threadInfo[0].coreId;
1112 unsigned threadCt = 1;
1113 unsigned lastThreadId = threadInfo[0].threadId;
1114
1115 // intra-pkg consist checks
1116 unsigned prevMaxCoresPerPkg = threadInfo[0].maxCoresPerPkg;
1117 unsigned prevMaxThreadsPerPkg = threadInfo[0].maxThreadsPerPkg;
1118
1119 for (i = 1; i < nApics; i++) {
1120 if (threadInfo[i].pkgId != lastPkgId) {
1121 nCores++;
1122 pkgCt++;
1123 lastPkgId = threadInfo[i].pkgId;
1124 if ((int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt;
1125 coreCt = 1;
1126 lastCoreId = threadInfo[i].coreId;
1127 if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
1128 threadCt = 1;
1129 lastThreadId = threadInfo[i].threadId;
1130
1131 //
1132 // This is a different package, so go on to the next iteration
1133 // without doing any consistency checks. Reset the consistency
1134 // check vars, though.
1135 //
1136 prevMaxCoresPerPkg = threadInfo[i].maxCoresPerPkg;
1137 prevMaxThreadsPerPkg = threadInfo[i].maxThreadsPerPkg;
1138 continue;
1139 }
1140
1141 if (threadInfo[i].coreId != lastCoreId) {
1142 nCores++;
1143 coreCt++;
1144 lastCoreId = threadInfo[i].coreId;
1145 if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
1146 threadCt = 1;
1147 lastThreadId = threadInfo[i].threadId;
1148 }
1149 else if (threadInfo[i].threadId != lastThreadId) {
1150 threadCt++;
1151 lastThreadId = threadInfo[i].threadId;
1152 }
1153 else {
1154 __kmp_free(threadInfo);
1155 KMP_CPU_FREE(oldMask);
1156 *msg_id = kmp_i18n_str_LegacyApicIDsNotUnique;
1157 return -1;
1158 }
1159
1160 //
1161 // Check to make certain that the maxCoresPerPkg and maxThreadsPerPkg
1162 // fields agree between all the threads bounds to a given package.
1163 //
1164 if ((prevMaxCoresPerPkg != threadInfo[i].maxCoresPerPkg)
1165 || (prevMaxThreadsPerPkg != threadInfo[i].maxThreadsPerPkg)) {
1166 __kmp_free(threadInfo);
1167 KMP_CPU_FREE(oldMask);
1168 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1169 return -1;
1170 }
1171 }
1172 nPackages = pkgCt;
1173 if ((int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt;
1174 if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
1175
1176 //
1177 // When affinity is off, this routine will still be called to set
Andrey Churbanovf696c822015-01-27 16:55:43 +00001178 // __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie5e8470a2013-09-27 10:38:44 +00001179 // nCoresPerPkg, & nPackages. Make sure all these vars are set
1180 // correctly, and return now if affinity is not enabled.
1181 //
Jim Cownie5e8470a2013-09-27 10:38:44 +00001182 __kmp_ncores = nCores;
1183 if (__kmp_affinity_verbose) {
1184 char buf[KMP_AFFIN_MASK_PRINT_LEN];
1185 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1186
1187 KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
1188 if (__kmp_affinity_respect_mask) {
1189 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
1190 } else {
1191 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
1192 }
1193 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
1194 if (__kmp_affinity_uniform_topology()) {
1195 KMP_INFORM(Uniform, "KMP_AFFINITY");
1196 } else {
1197 KMP_INFORM(NonUniform, "KMP_AFFINITY");
1198 }
1199 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
1200 __kmp_nThreadsPerCore, __kmp_ncores);
1201
1202 }
1203
1204 if (__kmp_affinity_type == affinity_none) {
1205 __kmp_free(threadInfo);
1206 KMP_CPU_FREE(oldMask);
1207 return 0;
1208 }
1209
1210 //
1211 // Now that we've determined the number of packages, the number of cores
1212 // per package, and the number of threads per core, we can construct the
1213 // data structure that is to be returned.
1214 //
1215 int pkgLevel = 0;
1216 int coreLevel = (nCoresPerPkg <= 1) ? -1 : 1;
1217 int threadLevel = (__kmp_nThreadsPerCore <= 1) ? -1 : ((coreLevel >= 0) ? 2 : 1);
1218 unsigned depth = (pkgLevel >= 0) + (coreLevel >= 0) + (threadLevel >= 0);
1219
1220 KMP_ASSERT(depth > 0);
1221 *address2os = (AddrUnsPair*)__kmp_allocate(sizeof(AddrUnsPair) * nApics);
1222
1223 for (i = 0; i < nApics; ++i) {
1224 Address addr(depth);
1225 unsigned os = threadInfo[i].osId;
1226 int d = 0;
1227
1228 if (pkgLevel >= 0) {
1229 addr.labels[d++] = threadInfo[i].pkgId;
1230 }
1231 if (coreLevel >= 0) {
1232 addr.labels[d++] = threadInfo[i].coreId;
1233 }
1234 if (threadLevel >= 0) {
1235 addr.labels[d++] = threadInfo[i].threadId;
1236 }
1237 (*address2os)[i] = AddrUnsPair(addr, os);
1238 }
1239
1240 if (__kmp_affinity_gran_levels < 0) {
1241 //
1242 // Set the granularity level based on what levels are modeled
1243 // in the machine topology map.
1244 //
1245 __kmp_affinity_gran_levels = 0;
1246 if ((threadLevel >= 0)
1247 && (__kmp_affinity_gran > affinity_gran_thread)) {
1248 __kmp_affinity_gran_levels++;
1249 }
1250 if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
1251 __kmp_affinity_gran_levels++;
1252 }
1253 if ((pkgLevel >= 0) && (__kmp_affinity_gran > affinity_gran_package)) {
1254 __kmp_affinity_gran_levels++;
1255 }
1256 }
1257
1258 if (__kmp_affinity_verbose) {
1259 __kmp_affinity_print_topology(*address2os, nApics, depth, pkgLevel,
1260 coreLevel, threadLevel);
1261 }
1262
1263 __kmp_free(threadInfo);
1264 KMP_CPU_FREE(oldMask);
1265 return depth;
1266}
1267
1268
1269//
1270// Intel(R) microarchitecture code name Nehalem, Dunnington and later
1271// architectures support a newer interface for specifying the x2APIC Ids,
1272// based on cpuid leaf 11.
1273//
1274static int
1275__kmp_affinity_create_x2apicid_map(AddrUnsPair **address2os,
1276 kmp_i18n_id_t *const msg_id)
1277{
1278 kmp_cpuid buf;
1279
1280 *address2os = NULL;
1281 *msg_id = kmp_i18n_null;
1282
1283 //
1284 // Check to see if cpuid leaf 11 is supported.
1285 //
1286 __kmp_x86_cpuid(0, 0, &buf);
1287 if (buf.eax < 11) {
1288 *msg_id = kmp_i18n_str_NoLeaf11Support;
1289 return -1;
1290 }
1291 __kmp_x86_cpuid(11, 0, &buf);
1292 if (buf.ebx == 0) {
1293 *msg_id = kmp_i18n_str_NoLeaf11Support;
1294 return -1;
1295 }
1296
1297 //
1298 // Find the number of levels in the machine topology. While we're at it,
1299 // get the default values for __kmp_nThreadsPerCore & nCoresPerPkg. We will
1300 // try to get more accurate values later by explicitly counting them,
1301 // but get reasonable defaults now, in case we return early.
1302 //
1303 int level;
1304 int threadLevel = -1;
1305 int coreLevel = -1;
1306 int pkgLevel = -1;
1307 __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
1308
1309 for (level = 0;; level++) {
1310 if (level > 31) {
1311 //
1312 // FIXME: Hack for DPD200163180
1313 //
1314 // If level is big then something went wrong -> exiting
1315 //
1316 // There could actually be 32 valid levels in the machine topology,
1317 // but so far, the only machine we have seen which does not exit
1318 // this loop before iteration 32 has fubar x2APIC settings.
1319 //
1320 // For now, just reject this case based upon loop trip count.
1321 //
1322 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1323 return -1;
1324 }
1325 __kmp_x86_cpuid(11, level, &buf);
1326 if (buf.ebx == 0) {
1327 if (pkgLevel < 0) {
1328 //
1329 // Will infer nPackages from __kmp_xproc
1330 //
1331 pkgLevel = level;
1332 level++;
1333 }
1334 break;
1335 }
1336 int kind = (buf.ecx >> 8) & 0xff;
1337 if (kind == 1) {
1338 //
1339 // SMT level
1340 //
1341 threadLevel = level;
1342 coreLevel = -1;
1343 pkgLevel = -1;
1344 __kmp_nThreadsPerCore = buf.ebx & 0xff;
1345 if (__kmp_nThreadsPerCore == 0) {
1346 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1347 return -1;
1348 }
1349 }
1350 else if (kind == 2) {
1351 //
1352 // core level
1353 //
1354 coreLevel = level;
1355 pkgLevel = -1;
1356 nCoresPerPkg = buf.ebx & 0xff;
1357 if (nCoresPerPkg == 0) {
1358 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1359 return -1;
1360 }
1361 }
1362 else {
1363 if (level <= 0) {
1364 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1365 return -1;
1366 }
1367 if (pkgLevel >= 0) {
1368 continue;
1369 }
1370 pkgLevel = level;
1371 nPackages = buf.ebx & 0xff;
1372 if (nPackages == 0) {
1373 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1374 return -1;
1375 }
1376 }
1377 }
1378 int depth = level;
1379
1380 //
1381 // In the above loop, "level" was counted from the finest level (usually
1382 // thread) to the coarsest. The caller expects that we will place the
1383 // labels in (*address2os)[].first.labels[] in the inverse order, so
1384 // we need to invert the vars saying which level means what.
1385 //
1386 if (threadLevel >= 0) {
1387 threadLevel = depth - threadLevel - 1;
1388 }
1389 if (coreLevel >= 0) {
1390 coreLevel = depth - coreLevel - 1;
1391 }
1392 KMP_DEBUG_ASSERT(pkgLevel >= 0);
1393 pkgLevel = depth - pkgLevel - 1;
1394
1395 //
1396 // The algorithm used starts by setting the affinity to each available
Andrey Churbanov1c331292015-01-27 17:03:42 +00001397 // thread and retrieving info from the cpuid instruction, so if we are
1398 // not capable of calling __kmp_get_system_affinity() and
1399 // _kmp_get_system_affinity(), then we need to do something else - use
1400 // the defaults that we calculated from issuing cpuid without binding
1401 // to each proc.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001402 //
1403 if (! KMP_AFFINITY_CAPABLE())
1404 {
1405 //
1406 // Hack to try and infer the machine topology using only the data
1407 // available from cpuid on the current thread, and __kmp_xproc.
1408 //
1409 KMP_ASSERT(__kmp_affinity_type == affinity_none);
1410
1411 __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
1412 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001413 if (__kmp_affinity_verbose) {
1414 KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY");
1415 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
1416 if (__kmp_affinity_uniform_topology()) {
1417 KMP_INFORM(Uniform, "KMP_AFFINITY");
1418 } else {
1419 KMP_INFORM(NonUniform, "KMP_AFFINITY");
1420 }
1421 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
1422 __kmp_nThreadsPerCore, __kmp_ncores);
1423 }
1424 return 0;
1425 }
1426
1427 //
1428 //
1429 // From here on, we can assume that it is safe to call
1430 // __kmp_get_system_affinity() and __kmp_set_system_affinity(),
1431 // even if __kmp_affinity_type = affinity_none.
1432 //
1433
1434 //
1435 // Save the affinity mask for the current thread.
1436 //
1437 kmp_affin_mask_t *oldMask;
1438 KMP_CPU_ALLOC(oldMask);
1439 __kmp_get_system_affinity(oldMask, TRUE);
1440
1441 //
1442 // Allocate the data structure to be returned.
1443 //
1444 AddrUnsPair *retval = (AddrUnsPair *)
1445 __kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc);
1446
1447 //
1448 // Run through each of the available contexts, binding the current thread
1449 // to it, and obtaining the pertinent information using the cpuid instr.
1450 //
1451 unsigned int proc;
1452 int nApics = 0;
1453 for (proc = 0; proc < KMP_CPU_SETSIZE; ++proc) {
1454 //
1455 // Skip this proc if it is not included in the machine model.
1456 //
1457 if (! KMP_CPU_ISSET(proc, fullMask)) {
1458 continue;
1459 }
1460 KMP_DEBUG_ASSERT(nApics < __kmp_avail_proc);
1461
1462 __kmp_affinity_bind_thread(proc);
1463
1464 //
1465 // Extrach the labels for each level in the machine topology map
1466 // from the Apic ID.
1467 //
1468 Address addr(depth);
1469 int prev_shift = 0;
1470
1471 for (level = 0; level < depth; level++) {
1472 __kmp_x86_cpuid(11, level, &buf);
1473 unsigned apicId = buf.edx;
1474 if (buf.ebx == 0) {
1475 if (level != depth - 1) {
1476 KMP_CPU_FREE(oldMask);
1477 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1478 return -1;
1479 }
1480 addr.labels[depth - level - 1] = apicId >> prev_shift;
1481 level++;
1482 break;
1483 }
1484 int shift = buf.eax & 0x1f;
1485 int mask = (1 << shift) - 1;
1486 addr.labels[depth - level - 1] = (apicId & mask) >> prev_shift;
1487 prev_shift = shift;
1488 }
1489 if (level != depth) {
1490 KMP_CPU_FREE(oldMask);
1491 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1492 return -1;
1493 }
1494
1495 retval[nApics] = AddrUnsPair(addr, proc);
1496 nApics++;
1497 }
1498
1499 //
1500 // We've collected all the info we need.
1501 // Restore the old affinity mask for this thread.
1502 //
1503 __kmp_set_system_affinity(oldMask, TRUE);
1504
1505 //
1506 // If there's only one thread context to bind to, return now.
1507 //
1508 KMP_ASSERT(nApics > 0);
1509 if (nApics == 1) {
1510 __kmp_ncores = nPackages = 1;
1511 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001512 if (__kmp_affinity_verbose) {
1513 char buf[KMP_AFFIN_MASK_PRINT_LEN];
1514 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1515
1516 KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
1517 if (__kmp_affinity_respect_mask) {
1518 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
1519 } else {
1520 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
1521 }
1522 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
1523 KMP_INFORM(Uniform, "KMP_AFFINITY");
1524 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
1525 __kmp_nThreadsPerCore, __kmp_ncores);
1526 }
1527
1528 if (__kmp_affinity_type == affinity_none) {
1529 __kmp_free(retval);
1530 KMP_CPU_FREE(oldMask);
1531 return 0;
1532 }
1533
1534 //
1535 // Form an Address object which only includes the package level.
1536 //
1537 Address addr(1);
1538 addr.labels[0] = retval[0].first.labels[pkgLevel];
1539 retval[0].first = addr;
1540
1541 if (__kmp_affinity_gran_levels < 0) {
1542 __kmp_affinity_gran_levels = 0;
1543 }
1544
1545 if (__kmp_affinity_verbose) {
1546 __kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
1547 }
1548
1549 *address2os = retval;
1550 KMP_CPU_FREE(oldMask);
1551 return 1;
1552 }
1553
1554 //
1555 // Sort the table by physical Id.
1556 //
1557 qsort(retval, nApics, sizeof(*retval), __kmp_affinity_cmp_Address_labels);
1558
1559 //
1560 // Find the radix at each of the levels.
1561 //
1562 unsigned *totals = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
1563 unsigned *counts = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
1564 unsigned *maxCt = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
1565 unsigned *last = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
1566 for (level = 0; level < depth; level++) {
1567 totals[level] = 1;
1568 maxCt[level] = 1;
1569 counts[level] = 1;
1570 last[level] = retval[0].first.labels[level];
1571 }
1572
1573 //
1574 // From here on, the iteration variable "level" runs from the finest
1575 // level to the coarsest, i.e. we iterate forward through
1576 // (*address2os)[].first.labels[] - in the previous loops, we iterated
1577 // backwards.
1578 //
1579 for (proc = 1; (int)proc < nApics; proc++) {
1580 int level;
1581 for (level = 0; level < depth; level++) {
1582 if (retval[proc].first.labels[level] != last[level]) {
1583 int j;
1584 for (j = level + 1; j < depth; j++) {
1585 totals[j]++;
1586 counts[j] = 1;
1587 // The line below causes printing incorrect topology information
1588 // in case the max value for some level (maxCt[level]) is encountered earlier than
1589 // some less value while going through the array.
1590 // For example, let pkg0 has 4 cores and pkg1 has 2 cores. Then maxCt[1] == 2
1591 // whereas it must be 4.
1592 // TODO!!! Check if it can be commented safely
1593 //maxCt[j] = 1;
1594 last[j] = retval[proc].first.labels[j];
1595 }
1596 totals[level]++;
1597 counts[level]++;
1598 if (counts[level] > maxCt[level]) {
1599 maxCt[level] = counts[level];
1600 }
1601 last[level] = retval[proc].first.labels[level];
1602 break;
1603 }
1604 else if (level == depth - 1) {
1605 __kmp_free(last);
1606 __kmp_free(maxCt);
1607 __kmp_free(counts);
1608 __kmp_free(totals);
1609 __kmp_free(retval);
1610 KMP_CPU_FREE(oldMask);
1611 *msg_id = kmp_i18n_str_x2ApicIDsNotUnique;
1612 return -1;
1613 }
1614 }
1615 }
1616
1617 //
1618 // When affinity is off, this routine will still be called to set
Andrey Churbanovf696c822015-01-27 16:55:43 +00001619 // __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie5e8470a2013-09-27 10:38:44 +00001620 // nCoresPerPkg, & nPackages. Make sure all these vars are set
1621 // correctly, and return if affinity is not enabled.
1622 //
1623 if (threadLevel >= 0) {
1624 __kmp_nThreadsPerCore = maxCt[threadLevel];
1625 }
1626 else {
1627 __kmp_nThreadsPerCore = 1;
1628 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001629 nPackages = totals[pkgLevel];
1630
1631 if (coreLevel >= 0) {
1632 __kmp_ncores = totals[coreLevel];
1633 nCoresPerPkg = maxCt[coreLevel];
1634 }
1635 else {
1636 __kmp_ncores = nPackages;
1637 nCoresPerPkg = 1;
1638 }
1639
1640 //
1641 // Check to see if the machine topology is uniform
1642 //
1643 unsigned prod = maxCt[0];
1644 for (level = 1; level < depth; level++) {
1645 prod *= maxCt[level];
1646 }
1647 bool uniform = (prod == totals[level - 1]);
1648
1649 //
1650 // Print the machine topology summary.
1651 //
1652 if (__kmp_affinity_verbose) {
1653 char mask[KMP_AFFIN_MASK_PRINT_LEN];
1654 __kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1655
1656 KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
1657 if (__kmp_affinity_respect_mask) {
1658 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask);
1659 } else {
1660 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask);
1661 }
1662 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
1663 if (uniform) {
1664 KMP_INFORM(Uniform, "KMP_AFFINITY");
1665 } else {
1666 KMP_INFORM(NonUniform, "KMP_AFFINITY");
1667 }
1668
1669 kmp_str_buf_t buf;
1670 __kmp_str_buf_init(&buf);
1671
1672 __kmp_str_buf_print(&buf, "%d", totals[0]);
1673 for (level = 1; level <= pkgLevel; level++) {
1674 __kmp_str_buf_print(&buf, " x %d", maxCt[level]);
1675 }
1676 KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg,
1677 __kmp_nThreadsPerCore, __kmp_ncores);
1678
1679 __kmp_str_buf_free(&buf);
1680 }
1681
1682 if (__kmp_affinity_type == affinity_none) {
1683 __kmp_free(last);
1684 __kmp_free(maxCt);
1685 __kmp_free(counts);
1686 __kmp_free(totals);
1687 __kmp_free(retval);
1688 KMP_CPU_FREE(oldMask);
1689 return 0;
1690 }
1691
1692 //
1693 // Find any levels with radiix 1, and remove them from the map
1694 // (except for the package level).
1695 //
1696 int new_depth = 0;
1697 for (level = 0; level < depth; level++) {
1698 if ((maxCt[level] == 1) && (level != pkgLevel)) {
1699 continue;
1700 }
1701 new_depth++;
1702 }
1703
1704 //
1705 // If we are removing any levels, allocate a new vector to return,
1706 // and copy the relevant information to it.
1707 //
1708 if (new_depth != depth) {
1709 AddrUnsPair *new_retval = (AddrUnsPair *)__kmp_allocate(
1710 sizeof(AddrUnsPair) * nApics);
1711 for (proc = 0; (int)proc < nApics; proc++) {
1712 Address addr(new_depth);
1713 new_retval[proc] = AddrUnsPair(addr, retval[proc].second);
1714 }
1715 int new_level = 0;
1716 for (level = 0; level < depth; level++) {
1717 if ((maxCt[level] == 1) && (level != pkgLevel)) {
1718 if (level == threadLevel) {
1719 threadLevel = -1;
1720 }
1721 else if ((threadLevel >= 0) && (level < threadLevel)) {
1722 threadLevel--;
1723 }
1724 if (level == coreLevel) {
1725 coreLevel = -1;
1726 }
1727 else if ((coreLevel >= 0) && (level < coreLevel)) {
1728 coreLevel--;
1729 }
1730 if (level < pkgLevel) {
1731 pkgLevel--;
1732 }
1733 continue;
1734 }
1735 for (proc = 0; (int)proc < nApics; proc++) {
1736 new_retval[proc].first.labels[new_level]
1737 = retval[proc].first.labels[level];
1738 }
1739 new_level++;
1740 }
1741
1742 __kmp_free(retval);
1743 retval = new_retval;
1744 depth = new_depth;
1745 }
1746
1747 if (__kmp_affinity_gran_levels < 0) {
1748 //
1749 // Set the granularity level based on what levels are modeled
1750 // in the machine topology map.
1751 //
1752 __kmp_affinity_gran_levels = 0;
1753 if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
1754 __kmp_affinity_gran_levels++;
1755 }
1756 if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
1757 __kmp_affinity_gran_levels++;
1758 }
1759 if (__kmp_affinity_gran > affinity_gran_package) {
1760 __kmp_affinity_gran_levels++;
1761 }
1762 }
1763
1764 if (__kmp_affinity_verbose) {
1765 __kmp_affinity_print_topology(retval, nApics, depth, pkgLevel,
1766 coreLevel, threadLevel);
1767 }
1768
1769 __kmp_free(last);
1770 __kmp_free(maxCt);
1771 __kmp_free(counts);
1772 __kmp_free(totals);
1773 KMP_CPU_FREE(oldMask);
1774 *address2os = retval;
1775 return depth;
1776}
1777
1778
1779# endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1780
1781
1782#define osIdIndex 0
1783#define threadIdIndex 1
1784#define coreIdIndex 2
1785#define pkgIdIndex 3
1786#define nodeIdIndex 4
1787
1788typedef unsigned *ProcCpuInfo;
1789static unsigned maxIndex = pkgIdIndex;
1790
1791
1792static int
1793__kmp_affinity_cmp_ProcCpuInfo_os_id(const void *a, const void *b)
1794{
1795 const unsigned *aa = (const unsigned *)a;
1796 const unsigned *bb = (const unsigned *)b;
1797 if (aa[osIdIndex] < bb[osIdIndex]) return -1;
1798 if (aa[osIdIndex] > bb[osIdIndex]) return 1;
1799 return 0;
1800};
1801
1802
1803static int
1804__kmp_affinity_cmp_ProcCpuInfo_phys_id(const void *a, const void *b)
1805{
1806 unsigned i;
1807 const unsigned *aa = *((const unsigned **)a);
1808 const unsigned *bb = *((const unsigned **)b);
1809 for (i = maxIndex; ; i--) {
1810 if (aa[i] < bb[i]) return -1;
1811 if (aa[i] > bb[i]) return 1;
1812 if (i == osIdIndex) break;
1813 }
1814 return 0;
1815}
1816
1817
1818//
1819// Parse /proc/cpuinfo (or an alternate file in the same format) to obtain the
1820// affinity map.
1821//
1822static int
1823__kmp_affinity_create_cpuinfo_map(AddrUnsPair **address2os, int *line,
1824 kmp_i18n_id_t *const msg_id, FILE *f)
1825{
1826 *address2os = NULL;
1827 *msg_id = kmp_i18n_null;
1828
1829 //
1830 // Scan of the file, and count the number of "processor" (osId) fields,
Alp Toker8f2d3f02014-02-24 10:40:15 +00001831 // and find the highest value of <n> for a node_<n> field.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001832 //
1833 char buf[256];
1834 unsigned num_records = 0;
1835 while (! feof(f)) {
1836 buf[sizeof(buf) - 1] = 1;
1837 if (! fgets(buf, sizeof(buf), f)) {
1838 //
1839 // Read errors presumably because of EOF
1840 //
1841 break;
1842 }
1843
1844 char s1[] = "processor";
1845 if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
1846 num_records++;
1847 continue;
1848 }
1849
1850 //
1851 // FIXME - this will match "node_<n> <garbage>"
1852 //
1853 unsigned level;
1854 if (sscanf(buf, "node_%d id", &level) == 1) {
1855 if (nodeIdIndex + level >= maxIndex) {
1856 maxIndex = nodeIdIndex + level;
1857 }
1858 continue;
1859 }
1860 }
1861
1862 //
1863 // Check for empty file / no valid processor records, or too many.
1864 // The number of records can't exceed the number of valid bits in the
1865 // affinity mask.
1866 //
1867 if (num_records == 0) {
1868 *line = 0;
1869 *msg_id = kmp_i18n_str_NoProcRecords;
1870 return -1;
1871 }
1872 if (num_records > (unsigned)__kmp_xproc) {
1873 *line = 0;
1874 *msg_id = kmp_i18n_str_TooManyProcRecords;
1875 return -1;
1876 }
1877
1878 //
1879 // Set the file pointer back to the begginning, so that we can scan the
1880 // file again, this time performing a full parse of the data.
1881 // Allocate a vector of ProcCpuInfo object, where we will place the data.
1882 // Adding an extra element at the end allows us to remove a lot of extra
1883 // checks for termination conditions.
1884 //
1885 if (fseek(f, 0, SEEK_SET) != 0) {
1886 *line = 0;
1887 *msg_id = kmp_i18n_str_CantRewindCpuinfo;
1888 return -1;
1889 }
1890
1891 //
1892 // Allocate the array of records to store the proc info in. The dummy
1893 // element at the end makes the logic in filling them out easier to code.
1894 //
1895 unsigned **threadInfo = (unsigned **)__kmp_allocate((num_records + 1)
1896 * sizeof(unsigned *));
1897 unsigned i;
1898 for (i = 0; i <= num_records; i++) {
1899 threadInfo[i] = (unsigned *)__kmp_allocate((maxIndex + 1)
1900 * sizeof(unsigned));
1901 }
1902
1903#define CLEANUP_THREAD_INFO \
1904 for (i = 0; i <= num_records; i++) { \
1905 __kmp_free(threadInfo[i]); \
1906 } \
1907 __kmp_free(threadInfo);
1908
1909 //
1910 // A value of UINT_MAX means that we didn't find the field
1911 //
1912 unsigned __index;
1913
1914#define INIT_PROC_INFO(p) \
1915 for (__index = 0; __index <= maxIndex; __index++) { \
1916 (p)[__index] = UINT_MAX; \
1917 }
1918
1919 for (i = 0; i <= num_records; i++) {
1920 INIT_PROC_INFO(threadInfo[i]);
1921 }
1922
1923 unsigned num_avail = 0;
1924 *line = 0;
1925 while (! feof(f)) {
1926 //
1927 // Create an inner scoping level, so that all the goto targets at the
1928 // end of the loop appear in an outer scoping level. This avoids
1929 // warnings about jumping past an initialization to a target in the
1930 // same block.
1931 //
1932 {
1933 buf[sizeof(buf) - 1] = 1;
1934 bool long_line = false;
1935 if (! fgets(buf, sizeof(buf), f)) {
1936 //
1937 // Read errors presumably because of EOF
1938 //
1939 // If there is valid data in threadInfo[num_avail], then fake
1940 // a blank line in ensure that the last address gets parsed.
1941 //
1942 bool valid = false;
1943 for (i = 0; i <= maxIndex; i++) {
1944 if (threadInfo[num_avail][i] != UINT_MAX) {
1945 valid = true;
1946 }
1947 }
1948 if (! valid) {
1949 break;
1950 }
1951 buf[0] = 0;
1952 } else if (!buf[sizeof(buf) - 1]) {
1953 //
1954 // The line is longer than the buffer. Set a flag and don't
1955 // emit an error if we were going to ignore the line, anyway.
1956 //
1957 long_line = true;
1958
1959#define CHECK_LINE \
1960 if (long_line) { \
1961 CLEANUP_THREAD_INFO; \
1962 *msg_id = kmp_i18n_str_LongLineCpuinfo; \
1963 return -1; \
1964 }
1965 }
1966 (*line)++;
1967
1968 char s1[] = "processor";
1969 if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
1970 CHECK_LINE;
1971 char *p = strchr(buf + sizeof(s1) - 1, ':');
1972 unsigned val;
1973 if ((p == NULL) || (sscanf(p + 1, "%u\n", &val) != 1)) goto no_val;
1974 if (threadInfo[num_avail][osIdIndex] != UINT_MAX) goto dup_field;
1975 threadInfo[num_avail][osIdIndex] = val;
Jim Cownie181b4bb2013-12-23 17:28:57 +00001976#if KMP_OS_LINUX && USE_SYSFS_INFO
1977 char path[256];
1978 snprintf(path, sizeof(path),
1979 "/sys/devices/system/cpu/cpu%u/topology/physical_package_id",
1980 threadInfo[num_avail][osIdIndex]);
1981 __kmp_read_from_file(path, "%u", &threadInfo[num_avail][pkgIdIndex]);
1982
1983 snprintf(path, sizeof(path),
1984 "/sys/devices/system/cpu/cpu%u/topology/core_id",
1985 threadInfo[num_avail][osIdIndex]);
1986 __kmp_read_from_file(path, "%u", &threadInfo[num_avail][coreIdIndex]);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001987 continue;
Jim Cownie181b4bb2013-12-23 17:28:57 +00001988#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00001989 }
1990 char s2[] = "physical id";
1991 if (strncmp(buf, s2, sizeof(s2) - 1) == 0) {
1992 CHECK_LINE;
1993 char *p = strchr(buf + sizeof(s2) - 1, ':');
1994 unsigned val;
1995 if ((p == NULL) || (sscanf(p + 1, "%u\n", &val) != 1)) goto no_val;
1996 if (threadInfo[num_avail][pkgIdIndex] != UINT_MAX) goto dup_field;
1997 threadInfo[num_avail][pkgIdIndex] = val;
1998 continue;
1999 }
2000 char s3[] = "core id";
2001 if (strncmp(buf, s3, sizeof(s3) - 1) == 0) {
2002 CHECK_LINE;
2003 char *p = strchr(buf + sizeof(s3) - 1, ':');
2004 unsigned val;
2005 if ((p == NULL) || (sscanf(p + 1, "%u\n", &val) != 1)) goto no_val;
2006 if (threadInfo[num_avail][coreIdIndex] != UINT_MAX) goto dup_field;
2007 threadInfo[num_avail][coreIdIndex] = val;
2008 continue;
Jim Cownie181b4bb2013-12-23 17:28:57 +00002009#endif // KMP_OS_LINUX && USE_SYSFS_INFO
Jim Cownie5e8470a2013-09-27 10:38:44 +00002010 }
2011 char s4[] = "thread id";
2012 if (strncmp(buf, s4, sizeof(s4) - 1) == 0) {
2013 CHECK_LINE;
2014 char *p = strchr(buf + sizeof(s4) - 1, ':');
2015 unsigned val;
2016 if ((p == NULL) || (sscanf(p + 1, "%u\n", &val) != 1)) goto no_val;
2017 if (threadInfo[num_avail][threadIdIndex] != UINT_MAX) goto dup_field;
2018 threadInfo[num_avail][threadIdIndex] = val;
2019 continue;
2020 }
2021 unsigned level;
2022 if (sscanf(buf, "node_%d id", &level) == 1) {
2023 CHECK_LINE;
2024 char *p = strchr(buf + sizeof(s4) - 1, ':');
2025 unsigned val;
2026 if ((p == NULL) || (sscanf(p + 1, "%u\n", &val) != 1)) goto no_val;
2027 KMP_ASSERT(nodeIdIndex + level <= maxIndex);
2028 if (threadInfo[num_avail][nodeIdIndex + level] != UINT_MAX) goto dup_field;
2029 threadInfo[num_avail][nodeIdIndex + level] = val;
2030 continue;
2031 }
2032
2033 //
2034 // We didn't recognize the leading token on the line.
2035 // There are lots of leading tokens that we don't recognize -
2036 // if the line isn't empty, go on to the next line.
2037 //
2038 if ((*buf != 0) && (*buf != '\n')) {
2039 //
2040 // If the line is longer than the buffer, read characters
2041 // until we find a newline.
2042 //
2043 if (long_line) {
2044 int ch;
2045 while (((ch = fgetc(f)) != EOF) && (ch != '\n'));
2046 }
2047 continue;
2048 }
2049
2050 //
2051 // A newline has signalled the end of the processor record.
2052 // Check that there aren't too many procs specified.
2053 //
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002054 if ((int)num_avail == __kmp_xproc) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002055 CLEANUP_THREAD_INFO;
2056 *msg_id = kmp_i18n_str_TooManyEntries;
2057 return -1;
2058 }
2059
2060 //
2061 // Check for missing fields. The osId field must be there, and we
2062 // currently require that the physical id field is specified, also.
2063 //
2064 if (threadInfo[num_avail][osIdIndex] == UINT_MAX) {
2065 CLEANUP_THREAD_INFO;
2066 *msg_id = kmp_i18n_str_MissingProcField;
2067 return -1;
2068 }
2069 if (threadInfo[0][pkgIdIndex] == UINT_MAX) {
2070 CLEANUP_THREAD_INFO;
2071 *msg_id = kmp_i18n_str_MissingPhysicalIDField;
2072 return -1;
2073 }
2074
2075 //
2076 // Skip this proc if it is not included in the machine model.
2077 //
2078 if (! KMP_CPU_ISSET(threadInfo[num_avail][osIdIndex], fullMask)) {
2079 INIT_PROC_INFO(threadInfo[num_avail]);
2080 continue;
2081 }
2082
2083 //
2084 // We have a successful parse of this proc's info.
2085 // Increment the counter, and prepare for the next proc.
2086 //
2087 num_avail++;
2088 KMP_ASSERT(num_avail <= num_records);
2089 INIT_PROC_INFO(threadInfo[num_avail]);
2090 }
2091 continue;
2092
2093 no_val:
2094 CLEANUP_THREAD_INFO;
2095 *msg_id = kmp_i18n_str_MissingValCpuinfo;
2096 return -1;
2097
2098 dup_field:
2099 CLEANUP_THREAD_INFO;
2100 *msg_id = kmp_i18n_str_DuplicateFieldCpuinfo;
2101 return -1;
2102 }
2103 *line = 0;
2104
2105# if KMP_MIC && REDUCE_TEAM_SIZE
2106 unsigned teamSize = 0;
2107# endif // KMP_MIC && REDUCE_TEAM_SIZE
2108
2109 // check for num_records == __kmp_xproc ???
2110
2111 //
2112 // If there's only one thread context to bind to, form an Address object
2113 // with depth 1 and return immediately (or, if affinity is off, set
2114 // address2os to NULL and return).
2115 //
2116 // If it is configured to omit the package level when there is only a
2117 // single package, the logic at the end of this routine won't work if
2118 // there is only a single thread - it would try to form an Address
2119 // object with depth 0.
2120 //
2121 KMP_ASSERT(num_avail > 0);
2122 KMP_ASSERT(num_avail <= num_records);
2123 if (num_avail == 1) {
2124 __kmp_ncores = 1;
2125 __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002126 if (__kmp_affinity_verbose) {
2127 if (! KMP_AFFINITY_CAPABLE()) {
2128 KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
2129 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
2130 KMP_INFORM(Uniform, "KMP_AFFINITY");
2131 }
2132 else {
2133 char buf[KMP_AFFIN_MASK_PRINT_LEN];
2134 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
2135 fullMask);
2136 KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
2137 if (__kmp_affinity_respect_mask) {
2138 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
2139 } else {
2140 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
2141 }
2142 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
2143 KMP_INFORM(Uniform, "KMP_AFFINITY");
2144 }
2145 int index;
2146 kmp_str_buf_t buf;
2147 __kmp_str_buf_init(&buf);
2148 __kmp_str_buf_print(&buf, "1");
2149 for (index = maxIndex - 1; index > pkgIdIndex; index--) {
2150 __kmp_str_buf_print(&buf, " x 1");
2151 }
2152 KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, 1, 1, 1);
2153 __kmp_str_buf_free(&buf);
2154 }
2155
2156 if (__kmp_affinity_type == affinity_none) {
2157 CLEANUP_THREAD_INFO;
2158 return 0;
2159 }
2160
2161 *address2os = (AddrUnsPair*)__kmp_allocate(sizeof(AddrUnsPair));
2162 Address addr(1);
2163 addr.labels[0] = threadInfo[0][pkgIdIndex];
2164 (*address2os)[0] = AddrUnsPair(addr, threadInfo[0][osIdIndex]);
2165
2166 if (__kmp_affinity_gran_levels < 0) {
2167 __kmp_affinity_gran_levels = 0;
2168 }
2169
2170 if (__kmp_affinity_verbose) {
2171 __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
2172 }
2173
2174 CLEANUP_THREAD_INFO;
2175 return 1;
2176 }
2177
2178 //
2179 // Sort the threadInfo table by physical Id.
2180 //
2181 qsort(threadInfo, num_avail, sizeof(*threadInfo),
2182 __kmp_affinity_cmp_ProcCpuInfo_phys_id);
2183
2184 //
2185 // The table is now sorted by pkgId / coreId / threadId, but we really
2186 // don't know the radix of any of the fields. pkgId's may be sparsely
2187 // assigned among the chips on a system. Although coreId's are usually
2188 // assigned [0 .. coresPerPkg-1] and threadId's are usually assigned
2189 // [0..threadsPerCore-1], we don't want to make any such assumptions.
2190 //
2191 // For that matter, we don't know what coresPerPkg and threadsPerCore
2192 // (or the total # packages) are at this point - we want to determine
2193 // that now. We only have an upper bound on the first two figures.
2194 //
2195 unsigned *counts = (unsigned *)__kmp_allocate((maxIndex + 1)
2196 * sizeof(unsigned));
2197 unsigned *maxCt = (unsigned *)__kmp_allocate((maxIndex + 1)
2198 * sizeof(unsigned));
2199 unsigned *totals = (unsigned *)__kmp_allocate((maxIndex + 1)
2200 * sizeof(unsigned));
2201 unsigned *lastId = (unsigned *)__kmp_allocate((maxIndex + 1)
2202 * sizeof(unsigned));
2203
2204 bool assign_thread_ids = false;
2205 unsigned threadIdCt;
2206 unsigned index;
2207
2208 restart_radix_check:
2209 threadIdCt = 0;
2210
2211 //
2212 // Initialize the counter arrays with data from threadInfo[0].
2213 //
2214 if (assign_thread_ids) {
2215 if (threadInfo[0][threadIdIndex] == UINT_MAX) {
2216 threadInfo[0][threadIdIndex] = threadIdCt++;
2217 }
2218 else if (threadIdCt <= threadInfo[0][threadIdIndex]) {
2219 threadIdCt = threadInfo[0][threadIdIndex] + 1;
2220 }
2221 }
2222 for (index = 0; index <= maxIndex; index++) {
2223 counts[index] = 1;
2224 maxCt[index] = 1;
2225 totals[index] = 1;
2226 lastId[index] = threadInfo[0][index];;
2227 }
2228
2229 //
2230 // Run through the rest of the OS procs.
2231 //
2232 for (i = 1; i < num_avail; i++) {
2233 //
2234 // Find the most significant index whose id differs
2235 // from the id for the previous OS proc.
2236 //
2237 for (index = maxIndex; index >= threadIdIndex; index--) {
2238 if (assign_thread_ids && (index == threadIdIndex)) {
2239 //
2240 // Auto-assign the thread id field if it wasn't specified.
2241 //
2242 if (threadInfo[i][threadIdIndex] == UINT_MAX) {
2243 threadInfo[i][threadIdIndex] = threadIdCt++;
2244 }
2245
2246 //
2247 // Aparrently the thread id field was specified for some
2248 // entries and not others. Start the thread id counter
2249 // off at the next higher thread id.
2250 //
2251 else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
2252 threadIdCt = threadInfo[i][threadIdIndex] + 1;
2253 }
2254 }
2255 if (threadInfo[i][index] != lastId[index]) {
2256 //
2257 // Run through all indices which are less significant,
2258 // and reset the counts to 1.
2259 //
2260 // At all levels up to and including index, we need to
2261 // increment the totals and record the last id.
2262 //
2263 unsigned index2;
2264 for (index2 = threadIdIndex; index2 < index; index2++) {
2265 totals[index2]++;
2266 if (counts[index2] > maxCt[index2]) {
2267 maxCt[index2] = counts[index2];
2268 }
2269 counts[index2] = 1;
2270 lastId[index2] = threadInfo[i][index2];
2271 }
2272 counts[index]++;
2273 totals[index]++;
2274 lastId[index] = threadInfo[i][index];
2275
2276 if (assign_thread_ids && (index > threadIdIndex)) {
2277
2278# if KMP_MIC && REDUCE_TEAM_SIZE
2279 //
2280 // The default team size is the total #threads in the machine
2281 // minus 1 thread for every core that has 3 or more threads.
2282 //
2283 teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 );
2284# endif // KMP_MIC && REDUCE_TEAM_SIZE
2285
2286 //
2287 // Restart the thread counter, as we are on a new core.
2288 //
2289 threadIdCt = 0;
2290
2291 //
2292 // Auto-assign the thread id field if it wasn't specified.
2293 //
2294 if (threadInfo[i][threadIdIndex] == UINT_MAX) {
2295 threadInfo[i][threadIdIndex] = threadIdCt++;
2296 }
2297
2298 //
2299 // Aparrently the thread id field was specified for some
2300 // entries and not others. Start the thread id counter
2301 // off at the next higher thread id.
2302 //
2303 else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
2304 threadIdCt = threadInfo[i][threadIdIndex] + 1;
2305 }
2306 }
2307 break;
2308 }
2309 }
2310 if (index < threadIdIndex) {
2311 //
2312 // If thread ids were specified, it is an error if they are not
2313 // unique. Also, check that we waven't already restarted the
2314 // loop (to be safe - shouldn't need to).
2315 //
2316 if ((threadInfo[i][threadIdIndex] != UINT_MAX)
2317 || assign_thread_ids) {
2318 __kmp_free(lastId);
2319 __kmp_free(totals);
2320 __kmp_free(maxCt);
2321 __kmp_free(counts);
2322 CLEANUP_THREAD_INFO;
2323 *msg_id = kmp_i18n_str_PhysicalIDsNotUnique;
2324 return -1;
2325 }
2326
2327 //
2328 // If the thread ids were not specified and we see entries
2329 // entries that are duplicates, start the loop over and
2330 // assign the thread ids manually.
2331 //
2332 assign_thread_ids = true;
2333 goto restart_radix_check;
2334 }
2335 }
2336
2337# if KMP_MIC && REDUCE_TEAM_SIZE
2338 //
2339 // The default team size is the total #threads in the machine
2340 // minus 1 thread for every core that has 3 or more threads.
2341 //
2342 teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 );
2343# endif // KMP_MIC && REDUCE_TEAM_SIZE
2344
2345 for (index = threadIdIndex; index <= maxIndex; index++) {
2346 if (counts[index] > maxCt[index]) {
2347 maxCt[index] = counts[index];
2348 }
2349 }
2350
2351 __kmp_nThreadsPerCore = maxCt[threadIdIndex];
2352 nCoresPerPkg = maxCt[coreIdIndex];
2353 nPackages = totals[pkgIdIndex];
2354
2355 //
2356 // Check to see if the machine topology is uniform
2357 //
2358 unsigned prod = totals[maxIndex];
2359 for (index = threadIdIndex; index < maxIndex; index++) {
2360 prod *= maxCt[index];
2361 }
2362 bool uniform = (prod == totals[threadIdIndex]);
2363
2364 //
2365 // When affinity is off, this routine will still be called to set
Andrey Churbanovf696c822015-01-27 16:55:43 +00002366 // __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie5e8470a2013-09-27 10:38:44 +00002367 // nCoresPerPkg, & nPackages. Make sure all these vars are set
2368 // correctly, and return now if affinity is not enabled.
2369 //
Jim Cownie5e8470a2013-09-27 10:38:44 +00002370 __kmp_ncores = totals[coreIdIndex];
2371
2372 if (__kmp_affinity_verbose) {
2373 if (! KMP_AFFINITY_CAPABLE()) {
2374 KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
2375 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
2376 if (uniform) {
2377 KMP_INFORM(Uniform, "KMP_AFFINITY");
2378 } else {
2379 KMP_INFORM(NonUniform, "KMP_AFFINITY");
2380 }
2381 }
2382 else {
2383 char buf[KMP_AFFIN_MASK_PRINT_LEN];
2384 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, fullMask);
2385 KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
2386 if (__kmp_affinity_respect_mask) {
2387 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
2388 } else {
2389 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
2390 }
2391 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
2392 if (uniform) {
2393 KMP_INFORM(Uniform, "KMP_AFFINITY");
2394 } else {
2395 KMP_INFORM(NonUniform, "KMP_AFFINITY");
2396 }
2397 }
2398 kmp_str_buf_t buf;
2399 __kmp_str_buf_init(&buf);
2400
2401 __kmp_str_buf_print(&buf, "%d", totals[maxIndex]);
2402 for (index = maxIndex - 1; index >= pkgIdIndex; index--) {
2403 __kmp_str_buf_print(&buf, " x %d", maxCt[index]);
2404 }
2405 KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, maxCt[coreIdIndex],
2406 maxCt[threadIdIndex], __kmp_ncores);
2407
2408 __kmp_str_buf_free(&buf);
2409 }
2410
2411# if KMP_MIC && REDUCE_TEAM_SIZE
2412 //
2413 // Set the default team size.
2414 //
2415 if ((__kmp_dflt_team_nth == 0) && (teamSize > 0)) {
2416 __kmp_dflt_team_nth = teamSize;
2417 KA_TRACE(20, ("__kmp_affinity_create_cpuinfo_map: setting __kmp_dflt_team_nth = %d\n",
2418 __kmp_dflt_team_nth));
2419 }
2420# endif // KMP_MIC && REDUCE_TEAM_SIZE
2421
2422 if (__kmp_affinity_type == affinity_none) {
2423 __kmp_free(lastId);
2424 __kmp_free(totals);
2425 __kmp_free(maxCt);
2426 __kmp_free(counts);
2427 CLEANUP_THREAD_INFO;
2428 return 0;
2429 }
2430
2431 //
2432 // Count the number of levels which have more nodes at that level than
2433 // at the parent's level (with there being an implicit root node of
2434 // the top level). This is equivalent to saying that there is at least
2435 // one node at this level which has a sibling. These levels are in the
2436 // map, and the package level is always in the map.
2437 //
2438 bool *inMap = (bool *)__kmp_allocate((maxIndex + 1) * sizeof(bool));
2439 int level = 0;
2440 for (index = threadIdIndex; index < maxIndex; index++) {
2441 KMP_ASSERT(totals[index] >= totals[index + 1]);
2442 inMap[index] = (totals[index] > totals[index + 1]);
2443 }
2444 inMap[maxIndex] = (totals[maxIndex] > 1);
2445 inMap[pkgIdIndex] = true;
2446
2447 int depth = 0;
2448 for (index = threadIdIndex; index <= maxIndex; index++) {
2449 if (inMap[index]) {
2450 depth++;
2451 }
2452 }
2453 KMP_ASSERT(depth > 0);
2454
2455 //
2456 // Construct the data structure that is to be returned.
2457 //
2458 *address2os = (AddrUnsPair*)
2459 __kmp_allocate(sizeof(AddrUnsPair) * num_avail);
2460 int pkgLevel = -1;
2461 int coreLevel = -1;
2462 int threadLevel = -1;
2463
2464 for (i = 0; i < num_avail; ++i) {
2465 Address addr(depth);
2466 unsigned os = threadInfo[i][osIdIndex];
2467 int src_index;
2468 int dst_index = 0;
2469
2470 for (src_index = maxIndex; src_index >= threadIdIndex; src_index--) {
2471 if (! inMap[src_index]) {
2472 continue;
2473 }
2474 addr.labels[dst_index] = threadInfo[i][src_index];
2475 if (src_index == pkgIdIndex) {
2476 pkgLevel = dst_index;
2477 }
2478 else if (src_index == coreIdIndex) {
2479 coreLevel = dst_index;
2480 }
2481 else if (src_index == threadIdIndex) {
2482 threadLevel = dst_index;
2483 }
2484 dst_index++;
2485 }
2486 (*address2os)[i] = AddrUnsPair(addr, os);
2487 }
2488
2489 if (__kmp_affinity_gran_levels < 0) {
2490 //
2491 // Set the granularity level based on what levels are modeled
2492 // in the machine topology map.
2493 //
2494 unsigned src_index;
2495 __kmp_affinity_gran_levels = 0;
2496 for (src_index = threadIdIndex; src_index <= maxIndex; src_index++) {
2497 if (! inMap[src_index]) {
2498 continue;
2499 }
2500 switch (src_index) {
2501 case threadIdIndex:
2502 if (__kmp_affinity_gran > affinity_gran_thread) {
2503 __kmp_affinity_gran_levels++;
2504 }
2505
2506 break;
2507 case coreIdIndex:
2508 if (__kmp_affinity_gran > affinity_gran_core) {
2509 __kmp_affinity_gran_levels++;
2510 }
2511 break;
2512
2513 case pkgIdIndex:
2514 if (__kmp_affinity_gran > affinity_gran_package) {
2515 __kmp_affinity_gran_levels++;
2516 }
2517 break;
2518 }
2519 }
2520 }
2521
2522 if (__kmp_affinity_verbose) {
2523 __kmp_affinity_print_topology(*address2os, num_avail, depth, pkgLevel,
2524 coreLevel, threadLevel);
2525 }
2526
2527 __kmp_free(inMap);
2528 __kmp_free(lastId);
2529 __kmp_free(totals);
2530 __kmp_free(maxCt);
2531 __kmp_free(counts);
2532 CLEANUP_THREAD_INFO;
2533 return depth;
2534}
2535
2536
2537//
2538// Create and return a table of affinity masks, indexed by OS thread ID.
2539// This routine handles OR'ing together all the affinity masks of threads
2540// that are sufficiently close, if granularity > fine.
2541//
2542static kmp_affin_mask_t *
2543__kmp_create_masks(unsigned *maxIndex, unsigned *numUnique,
2544 AddrUnsPair *address2os, unsigned numAddrs)
2545{
2546 //
2547 // First form a table of affinity masks in order of OS thread id.
2548 //
2549 unsigned depth;
2550 unsigned maxOsId;
2551 unsigned i;
2552
2553 KMP_ASSERT(numAddrs > 0);
2554 depth = address2os[0].first.depth;
2555
2556 maxOsId = 0;
2557 for (i = 0; i < numAddrs; i++) {
2558 unsigned osId = address2os[i].second;
2559 if (osId > maxOsId) {
2560 maxOsId = osId;
2561 }
2562 }
2563 kmp_affin_mask_t *osId2Mask = (kmp_affin_mask_t *)__kmp_allocate(
2564 (maxOsId + 1) * __kmp_affin_mask_size);
2565
2566 //
2567 // Sort the address2os table according to physical order. Doing so
2568 // will put all threads on the same core/package/node in consecutive
2569 // locations.
2570 //
2571 qsort(address2os, numAddrs, sizeof(*address2os),
2572 __kmp_affinity_cmp_Address_labels);
2573
2574 KMP_ASSERT(__kmp_affinity_gran_levels >= 0);
2575 if (__kmp_affinity_verbose && (__kmp_affinity_gran_levels > 0)) {
2576 KMP_INFORM(ThreadsMigrate, "KMP_AFFINITY", __kmp_affinity_gran_levels);
2577 }
2578 if (__kmp_affinity_gran_levels >= (int)depth) {
2579 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
2580 && (__kmp_affinity_type != affinity_none))) {
2581 KMP_WARNING(AffThreadsMayMigrate);
2582 }
2583 }
2584
2585 //
2586 // Run through the table, forming the masks for all threads on each
2587 // core. Threads on the same core will have identical "Address"
2588 // objects, not considering the last level, which must be the thread
2589 // id. All threads on a core will appear consecutively.
2590 //
2591 unsigned unique = 0;
2592 unsigned j = 0; // index of 1st thread on core
2593 unsigned leader = 0;
2594 Address *leaderAddr = &(address2os[0].first);
2595 kmp_affin_mask_t *sum
2596 = (kmp_affin_mask_t *)alloca(__kmp_affin_mask_size);
2597 KMP_CPU_ZERO(sum);
2598 KMP_CPU_SET(address2os[0].second, sum);
2599 for (i = 1; i < numAddrs; i++) {
2600 //
Alp Toker8f2d3f02014-02-24 10:40:15 +00002601 // If this thread is sufficiently close to the leader (within the
Jim Cownie5e8470a2013-09-27 10:38:44 +00002602 // granularity setting), then set the bit for this os thread in the
2603 // affinity mask for this group, and go on to the next thread.
2604 //
2605 if (leaderAddr->isClose(address2os[i].first,
2606 __kmp_affinity_gran_levels)) {
2607 KMP_CPU_SET(address2os[i].second, sum);
2608 continue;
2609 }
2610
2611 //
2612 // For every thread in this group, copy the mask to the thread's
2613 // entry in the osId2Mask table. Mark the first address as a
2614 // leader.
2615 //
2616 for (; j < i; j++) {
2617 unsigned osId = address2os[j].second;
2618 KMP_DEBUG_ASSERT(osId <= maxOsId);
2619 kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
2620 KMP_CPU_COPY(mask, sum);
2621 address2os[j].first.leader = (j == leader);
2622 }
2623 unique++;
2624
2625 //
2626 // Start a new mask.
2627 //
2628 leader = i;
2629 leaderAddr = &(address2os[i].first);
2630 KMP_CPU_ZERO(sum);
2631 KMP_CPU_SET(address2os[i].second, sum);
2632 }
2633
2634 //
2635 // For every thread in last group, copy the mask to the thread's
2636 // entry in the osId2Mask table.
2637 //
2638 for (; j < i; j++) {
2639 unsigned osId = address2os[j].second;
2640 KMP_DEBUG_ASSERT(osId <= maxOsId);
2641 kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
2642 KMP_CPU_COPY(mask, sum);
2643 address2os[j].first.leader = (j == leader);
2644 }
2645 unique++;
2646
2647 *maxIndex = maxOsId;
2648 *numUnique = unique;
2649 return osId2Mask;
2650}
2651
2652
2653//
2654// Stuff for the affinity proclist parsers. It's easier to declare these vars
2655// as file-static than to try and pass them through the calling sequence of
2656// the recursive-descent OMP_PLACES parser.
2657//
2658static kmp_affin_mask_t *newMasks;
2659static int numNewMasks;
2660static int nextNewMask;
2661
2662#define ADD_MASK(_mask) \
2663 { \
2664 if (nextNewMask >= numNewMasks) { \
2665 numNewMasks *= 2; \
2666 newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_REALLOC(newMasks, \
2667 numNewMasks * __kmp_affin_mask_size); \
2668 } \
2669 KMP_CPU_COPY(KMP_CPU_INDEX(newMasks, nextNewMask), (_mask)); \
2670 nextNewMask++; \
2671 }
2672
2673#define ADD_MASK_OSID(_osId,_osId2Mask,_maxOsId) \
2674 { \
2675 if (((_osId) > _maxOsId) || \
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002676 (! KMP_CPU_ISSET((_osId), KMP_CPU_INDEX((_osId2Mask), (_osId))))) { \
Jim Cownie5e8470a2013-09-27 10:38:44 +00002677 if (__kmp_affinity_verbose || (__kmp_affinity_warnings \
2678 && (__kmp_affinity_type != affinity_none))) { \
2679 KMP_WARNING(AffIgnoreInvalidProcID, _osId); \
2680 } \
2681 } \
2682 else { \
2683 ADD_MASK(KMP_CPU_INDEX(_osId2Mask, (_osId))); \
2684 } \
2685 }
2686
2687
2688//
2689// Re-parse the proclist (for the explicit affinity type), and form the list
2690// of affinity newMasks indexed by gtid.
2691//
2692static void
2693__kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks,
2694 unsigned int *out_numMasks, const char *proclist,
2695 kmp_affin_mask_t *osId2Mask, int maxOsId)
2696{
2697 const char *scan = proclist;
2698 const char *next = proclist;
2699
2700 //
2701 // We use malloc() for the temporary mask vector,
2702 // so that we can use realloc() to extend it.
2703 //
2704 numNewMasks = 2;
2705 newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(numNewMasks
2706 * __kmp_affin_mask_size);
2707 nextNewMask = 0;
2708 kmp_affin_mask_t *sumMask = (kmp_affin_mask_t *)__kmp_allocate(
2709 __kmp_affin_mask_size);
2710 int setSize = 0;
2711
2712 for (;;) {
2713 int start, end, stride;
2714
2715 SKIP_WS(scan);
2716 next = scan;
2717 if (*next == '\0') {
2718 break;
2719 }
2720
2721 if (*next == '{') {
2722 int num;
2723 setSize = 0;
2724 next++; // skip '{'
2725 SKIP_WS(next);
2726 scan = next;
2727
2728 //
2729 // Read the first integer in the set.
2730 //
2731 KMP_ASSERT2((*next >= '0') && (*next <= '9'),
2732 "bad proclist");
2733 SKIP_DIGITS(next);
2734 num = __kmp_str_to_int(scan, *next);
2735 KMP_ASSERT2(num >= 0, "bad explicit proc list");
2736
2737 //
2738 // Copy the mask for that osId to the sum (union) mask.
2739 //
2740 if ((num > maxOsId) ||
2741 (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
2742 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
2743 && (__kmp_affinity_type != affinity_none))) {
2744 KMP_WARNING(AffIgnoreInvalidProcID, num);
2745 }
2746 KMP_CPU_ZERO(sumMask);
2747 }
2748 else {
2749 KMP_CPU_COPY(sumMask, KMP_CPU_INDEX(osId2Mask, num));
2750 setSize = 1;
2751 }
2752
2753 for (;;) {
2754 //
2755 // Check for end of set.
2756 //
2757 SKIP_WS(next);
2758 if (*next == '}') {
2759 next++; // skip '}'
2760 break;
2761 }
2762
2763 //
2764 // Skip optional comma.
2765 //
2766 if (*next == ',') {
2767 next++;
2768 }
2769 SKIP_WS(next);
2770
2771 //
2772 // Read the next integer in the set.
2773 //
2774 scan = next;
2775 KMP_ASSERT2((*next >= '0') && (*next <= '9'),
2776 "bad explicit proc list");
2777
2778 SKIP_DIGITS(next);
2779 num = __kmp_str_to_int(scan, *next);
2780 KMP_ASSERT2(num >= 0, "bad explicit proc list");
2781
2782 //
2783 // Add the mask for that osId to the sum mask.
2784 //
2785 if ((num > maxOsId) ||
2786 (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
2787 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
2788 && (__kmp_affinity_type != affinity_none))) {
2789 KMP_WARNING(AffIgnoreInvalidProcID, num);
2790 }
2791 }
2792 else {
2793 KMP_CPU_UNION(sumMask, KMP_CPU_INDEX(osId2Mask, num));
2794 setSize++;
2795 }
2796 }
2797 if (setSize > 0) {
2798 ADD_MASK(sumMask);
2799 }
2800
2801 SKIP_WS(next);
2802 if (*next == ',') {
2803 next++;
2804 }
2805 scan = next;
2806 continue;
2807 }
2808
2809 //
2810 // Read the first integer.
2811 //
2812 KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list");
2813 SKIP_DIGITS(next);
2814 start = __kmp_str_to_int(scan, *next);
2815 KMP_ASSERT2(start >= 0, "bad explicit proc list");
2816 SKIP_WS(next);
2817
2818 //
2819 // If this isn't a range, then add a mask to the list and go on.
2820 //
2821 if (*next != '-') {
2822 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2823
2824 //
2825 // Skip optional comma.
2826 //
2827 if (*next == ',') {
2828 next++;
2829 }
2830 scan = next;
2831 continue;
2832 }
2833
2834 //
2835 // This is a range. Skip over the '-' and read in the 2nd int.
2836 //
2837 next++; // skip '-'
2838 SKIP_WS(next);
2839 scan = next;
2840 KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list");
2841 SKIP_DIGITS(next);
2842 end = __kmp_str_to_int(scan, *next);
2843 KMP_ASSERT2(end >= 0, "bad explicit proc list");
2844
2845 //
2846 // Check for a stride parameter
2847 //
2848 stride = 1;
2849 SKIP_WS(next);
2850 if (*next == ':') {
2851 //
2852 // A stride is specified. Skip over the ':" and read the 3rd int.
2853 //
2854 int sign = +1;
2855 next++; // skip ':'
2856 SKIP_WS(next);
2857 scan = next;
2858 if (*next == '-') {
2859 sign = -1;
2860 next++;
2861 SKIP_WS(next);
2862 scan = next;
2863 }
2864 KMP_ASSERT2((*next >= '0') && (*next <= '9'),
2865 "bad explicit proc list");
2866 SKIP_DIGITS(next);
2867 stride = __kmp_str_to_int(scan, *next);
2868 KMP_ASSERT2(stride >= 0, "bad explicit proc list");
2869 stride *= sign;
2870 }
2871
2872 //
2873 // Do some range checks.
2874 //
2875 KMP_ASSERT2(stride != 0, "bad explicit proc list");
2876 if (stride > 0) {
2877 KMP_ASSERT2(start <= end, "bad explicit proc list");
2878 }
2879 else {
2880 KMP_ASSERT2(start >= end, "bad explicit proc list");
2881 }
2882 KMP_ASSERT2((end - start) / stride <= 65536, "bad explicit proc list");
2883
2884 //
2885 // Add the mask for each OS proc # to the list.
2886 //
2887 if (stride > 0) {
2888 do {
2889 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2890 start += stride;
2891 } while (start <= end);
2892 }
2893 else {
2894 do {
2895 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2896 start += stride;
2897 } while (start >= end);
2898 }
2899
2900 //
2901 // Skip optional comma.
2902 //
2903 SKIP_WS(next);
2904 if (*next == ',') {
2905 next++;
2906 }
2907 scan = next;
2908 }
2909
2910 *out_numMasks = nextNewMask;
2911 if (nextNewMask == 0) {
2912 *out_masks = NULL;
2913 KMP_INTERNAL_FREE(newMasks);
2914 return;
2915 }
2916 *out_masks
2917 = (kmp_affin_mask_t *)__kmp_allocate(nextNewMask * __kmp_affin_mask_size);
2918 memcpy(*out_masks, newMasks, nextNewMask * __kmp_affin_mask_size);
2919 __kmp_free(sumMask);
2920 KMP_INTERNAL_FREE(newMasks);
2921}
2922
2923
2924# if OMP_40_ENABLED
2925
2926/*-----------------------------------------------------------------------------
2927
2928Re-parse the OMP_PLACES proc id list, forming the newMasks for the different
2929places. Again, Here is the grammar:
2930
2931place_list := place
2932place_list := place , place_list
2933place := num
2934place := place : num
2935place := place : num : signed
2936place := { subplacelist }
2937place := ! place // (lowest priority)
2938subplace_list := subplace
2939subplace_list := subplace , subplace_list
2940subplace := num
2941subplace := num : num
2942subplace := num : num : signed
2943signed := num
2944signed := + signed
2945signed := - signed
2946
2947-----------------------------------------------------------------------------*/
2948
2949static void
2950__kmp_process_subplace_list(const char **scan, kmp_affin_mask_t *osId2Mask,
2951 int maxOsId, kmp_affin_mask_t *tempMask, int *setSize)
2952{
2953 const char *next;
2954
2955 for (;;) {
2956 int start, count, stride, i;
2957
2958 //
2959 // Read in the starting proc id
2960 //
2961 SKIP_WS(*scan);
2962 KMP_ASSERT2((**scan >= '0') && (**scan <= '9'),
2963 "bad explicit places list");
2964 next = *scan;
2965 SKIP_DIGITS(next);
2966 start = __kmp_str_to_int(*scan, *next);
2967 KMP_ASSERT(start >= 0);
2968 *scan = next;
2969
2970 //
2971 // valid follow sets are ',' ':' and '}'
2972 //
2973 SKIP_WS(*scan);
2974 if (**scan == '}' || **scan == ',') {
2975 if ((start > maxOsId) ||
2976 (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
2977 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
2978 && (__kmp_affinity_type != affinity_none))) {
2979 KMP_WARNING(AffIgnoreInvalidProcID, start);
2980 }
2981 }
2982 else {
2983 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
2984 (*setSize)++;
2985 }
2986 if (**scan == '}') {
2987 break;
2988 }
2989 (*scan)++; // skip ','
2990 continue;
2991 }
2992 KMP_ASSERT2(**scan == ':', "bad explicit places list");
2993 (*scan)++; // skip ':'
2994
2995 //
2996 // Read count parameter
2997 //
2998 SKIP_WS(*scan);
2999 KMP_ASSERT2((**scan >= '0') && (**scan <= '9'),
3000 "bad explicit places list");
3001 next = *scan;
3002 SKIP_DIGITS(next);
3003 count = __kmp_str_to_int(*scan, *next);
3004 KMP_ASSERT(count >= 0);
3005 *scan = next;
3006
3007 //
3008 // valid follow sets are ',' ':' and '}'
3009 //
3010 SKIP_WS(*scan);
3011 if (**scan == '}' || **scan == ',') {
3012 for (i = 0; i < count; i++) {
3013 if ((start > maxOsId) ||
3014 (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
3015 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3016 && (__kmp_affinity_type != affinity_none))) {
3017 KMP_WARNING(AffIgnoreInvalidProcID, start);
3018 }
3019 break; // don't proliferate warnings for large count
3020 }
3021 else {
3022 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
3023 start++;
3024 (*setSize)++;
3025 }
3026 }
3027 if (**scan == '}') {
3028 break;
3029 }
3030 (*scan)++; // skip ','
3031 continue;
3032 }
3033 KMP_ASSERT2(**scan == ':', "bad explicit places list");
3034 (*scan)++; // skip ':'
3035
3036 //
3037 // Read stride parameter
3038 //
3039 int sign = +1;
3040 for (;;) {
3041 SKIP_WS(*scan);
3042 if (**scan == '+') {
3043 (*scan)++; // skip '+'
3044 continue;
3045 }
3046 if (**scan == '-') {
3047 sign *= -1;
3048 (*scan)++; // skip '-'
3049 continue;
3050 }
3051 break;
3052 }
3053 SKIP_WS(*scan);
3054 KMP_ASSERT2((**scan >= '0') && (**scan <= '9'),
3055 "bad explicit places list");
3056 next = *scan;
3057 SKIP_DIGITS(next);
3058 stride = __kmp_str_to_int(*scan, *next);
3059 KMP_ASSERT(stride >= 0);
3060 *scan = next;
3061 stride *= sign;
3062
3063 //
3064 // valid follow sets are ',' and '}'
3065 //
3066 SKIP_WS(*scan);
3067 if (**scan == '}' || **scan == ',') {
3068 for (i = 0; i < count; i++) {
3069 if ((start > maxOsId) ||
3070 (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
3071 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3072 && (__kmp_affinity_type != affinity_none))) {
3073 KMP_WARNING(AffIgnoreInvalidProcID, start);
3074 }
3075 break; // don't proliferate warnings for large count
3076 }
3077 else {
3078 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
3079 start += stride;
3080 (*setSize)++;
3081 }
3082 }
3083 if (**scan == '}') {
3084 break;
3085 }
3086 (*scan)++; // skip ','
3087 continue;
3088 }
3089
3090 KMP_ASSERT2(0, "bad explicit places list");
3091 }
3092}
3093
3094
3095static void
3096__kmp_process_place(const char **scan, kmp_affin_mask_t *osId2Mask,
3097 int maxOsId, kmp_affin_mask_t *tempMask, int *setSize)
3098{
3099 const char *next;
3100
3101 //
3102 // valid follow sets are '{' '!' and num
3103 //
3104 SKIP_WS(*scan);
3105 if (**scan == '{') {
3106 (*scan)++; // skip '{'
3107 __kmp_process_subplace_list(scan, osId2Mask, maxOsId , tempMask,
3108 setSize);
3109 KMP_ASSERT2(**scan == '}', "bad explicit places list");
3110 (*scan)++; // skip '}'
3111 }
3112 else if (**scan == '!') {
3113 __kmp_process_place(scan, osId2Mask, maxOsId, tempMask, setSize);
3114 KMP_CPU_COMPLEMENT(tempMask);
3115 (*scan)++; // skip '!'
3116 }
3117 else if ((**scan >= '0') && (**scan <= '9')) {
3118 next = *scan;
3119 SKIP_DIGITS(next);
3120 int num = __kmp_str_to_int(*scan, *next);
3121 KMP_ASSERT(num >= 0);
3122 if ((num > maxOsId) ||
3123 (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
3124 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3125 && (__kmp_affinity_type != affinity_none))) {
3126 KMP_WARNING(AffIgnoreInvalidProcID, num);
3127 }
3128 }
3129 else {
3130 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, num));
3131 (*setSize)++;
3132 }
3133 *scan = next; // skip num
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003134 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003135 else {
3136 KMP_ASSERT2(0, "bad explicit places list");
3137 }
3138}
3139
3140
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003141//static void
3142void
Jim Cownie5e8470a2013-09-27 10:38:44 +00003143__kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
3144 unsigned int *out_numMasks, const char *placelist,
3145 kmp_affin_mask_t *osId2Mask, int maxOsId)
3146{
3147 const char *scan = placelist;
3148 const char *next = placelist;
3149
3150 numNewMasks = 2;
3151 newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(numNewMasks
3152 * __kmp_affin_mask_size);
3153 nextNewMask = 0;
3154
3155 kmp_affin_mask_t *tempMask = (kmp_affin_mask_t *)__kmp_allocate(
3156 __kmp_affin_mask_size);
3157 KMP_CPU_ZERO(tempMask);
3158 int setSize = 0;
3159
3160 for (;;) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003161 __kmp_process_place(&scan, osId2Mask, maxOsId, tempMask, &setSize);
3162
3163 //
3164 // valid follow sets are ',' ':' and EOL
3165 //
3166 SKIP_WS(scan);
3167 if (*scan == '\0' || *scan == ',') {
3168 if (setSize > 0) {
3169 ADD_MASK(tempMask);
3170 }
3171 KMP_CPU_ZERO(tempMask);
3172 setSize = 0;
3173 if (*scan == '\0') {
3174 break;
3175 }
3176 scan++; // skip ','
3177 continue;
3178 }
3179
3180 KMP_ASSERT2(*scan == ':', "bad explicit places list");
3181 scan++; // skip ':'
3182
3183 //
3184 // Read count parameter
3185 //
3186 SKIP_WS(scan);
3187 KMP_ASSERT2((*scan >= '0') && (*scan <= '9'),
3188 "bad explicit places list");
3189 next = scan;
3190 SKIP_DIGITS(next);
Jim Cownie181b4bb2013-12-23 17:28:57 +00003191 int count = __kmp_str_to_int(scan, *next);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003192 KMP_ASSERT(count >= 0);
3193 scan = next;
3194
3195 //
3196 // valid follow sets are ',' ':' and EOL
3197 //
3198 SKIP_WS(scan);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003199 int stride;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003200 if (*scan == '\0' || *scan == ',') {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003201 stride = +1;
3202 }
3203 else {
3204 KMP_ASSERT2(*scan == ':', "bad explicit places list");
3205 scan++; // skip ':'
Jim Cownie5e8470a2013-09-27 10:38:44 +00003206
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003207 //
3208 // Read stride parameter
3209 //
3210 int sign = +1;
3211 for (;;) {
3212 SKIP_WS(scan);
3213 if (*scan == '+') {
3214 scan++; // skip '+'
3215 continue;
3216 }
3217 if (*scan == '-') {
3218 sign *= -1;
3219 scan++; // skip '-'
3220 continue;
3221 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003222 break;
3223 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003224 SKIP_WS(scan);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003225 KMP_ASSERT2((*scan >= '0') && (*scan <= '9'),
3226 "bad explicit places list");
3227 next = scan;
3228 SKIP_DIGITS(next);
3229 stride = __kmp_str_to_int(scan, *next);
3230 KMP_DEBUG_ASSERT(stride >= 0);
3231 scan = next;
3232 stride *= sign;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003233 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003234
3235 if (stride > 0) {
3236 int i;
3237 for (i = 0; i < count; i++) {
3238 int j;
3239 if (setSize == 0) {
3240 break;
3241 }
3242 ADD_MASK(tempMask);
3243 setSize = 0;
3244 for (j = __kmp_affin_mask_size * CHAR_BIT - 1; j >= stride; j--) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003245 if (! KMP_CPU_ISSET(j - stride, tempMask)) {
3246 KMP_CPU_CLR(j, tempMask);
3247 }
3248 else if ((j > maxOsId) ||
3249 (! KMP_CPU_ISSET(j, KMP_CPU_INDEX(osId2Mask, j)))) {
3250 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3251 && (__kmp_affinity_type != affinity_none))) {
3252 KMP_WARNING(AffIgnoreInvalidProcID, j);
3253 }
3254 KMP_CPU_CLR(j, tempMask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003255 }
3256 else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003257 KMP_CPU_SET(j, tempMask);
3258 setSize++;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003259 }
3260 }
3261 for (; j >= 0; j--) {
3262 KMP_CPU_CLR(j, tempMask);
3263 }
3264 }
3265 }
3266 else {
3267 int i;
3268 for (i = 0; i < count; i++) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003269 int j;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003270 if (setSize == 0) {
3271 break;
3272 }
3273 ADD_MASK(tempMask);
3274 setSize = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003275 for (j = 0; j < ((int)__kmp_affin_mask_size * CHAR_BIT) + stride;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003276 j++) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003277 if (! KMP_CPU_ISSET(j - stride, tempMask)) {
3278 KMP_CPU_CLR(j, tempMask);
3279 }
3280 else if ((j > maxOsId) ||
3281 (! KMP_CPU_ISSET(j, KMP_CPU_INDEX(osId2Mask, j)))) {
3282 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3283 && (__kmp_affinity_type != affinity_none))) {
3284 KMP_WARNING(AffIgnoreInvalidProcID, j);
3285 }
3286 KMP_CPU_CLR(j, tempMask);
3287 }
3288 else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003289 KMP_CPU_SET(j, tempMask);
3290 setSize++;
3291 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003292 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003293 for (; j < (int)__kmp_affin_mask_size * CHAR_BIT; j++) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003294 KMP_CPU_CLR(j, tempMask);
3295 }
3296 }
3297 }
3298 KMP_CPU_ZERO(tempMask);
3299 setSize = 0;
3300
3301 //
3302 // valid follow sets are ',' and EOL
3303 //
3304 SKIP_WS(scan);
3305 if (*scan == '\0') {
3306 break;
3307 }
3308 if (*scan == ',') {
3309 scan++; // skip ','
3310 continue;
3311 }
3312
3313 KMP_ASSERT2(0, "bad explicit places list");
3314 }
3315
3316 *out_numMasks = nextNewMask;
3317 if (nextNewMask == 0) {
3318 *out_masks = NULL;
3319 KMP_INTERNAL_FREE(newMasks);
3320 return;
3321 }
3322 *out_masks
3323 = (kmp_affin_mask_t *)__kmp_allocate(nextNewMask * __kmp_affin_mask_size);
3324 memcpy(*out_masks, newMasks, nextNewMask * __kmp_affin_mask_size);
3325 __kmp_free(tempMask);
3326 KMP_INTERNAL_FREE(newMasks);
3327}
3328
3329# endif /* OMP_40_ENABLED */
3330
3331#undef ADD_MASK
3332#undef ADD_MASK_OSID
3333
Jim Cownie5e8470a2013-09-27 10:38:44 +00003334static void
3335__kmp_apply_thread_places(AddrUnsPair **pAddr, int depth)
3336{
3337 if ( __kmp_place_num_cores == 0 ) {
3338 if ( __kmp_place_num_threads_per_core == 0 ) {
3339 return; // no cores limiting actions requested, exit
3340 }
3341 __kmp_place_num_cores = nCoresPerPkg; // use all available cores
3342 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003343 if ( !__kmp_affinity_uniform_topology() ) {
3344 KMP_WARNING( AffThrPlaceNonUniform );
3345 return; // don't support non-uniform topology
3346 }
3347 if ( depth != 3 ) {
3348 KMP_WARNING( AffThrPlaceNonThreeLevel );
3349 return; // don't support not-3-level topology
Jim Cownie5e8470a2013-09-27 10:38:44 +00003350 }
3351 if ( __kmp_place_num_threads_per_core == 0 ) {
3352 __kmp_place_num_threads_per_core = __kmp_nThreadsPerCore; // use all HW contexts
3353 }
Andrey Churbanov12875572015-03-10 09:00:36 +00003354 if ( __kmp_place_core_offset + __kmp_place_num_cores > nCoresPerPkg ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003355 KMP_WARNING( AffThrPlaceManyCores );
3356 return;
3357 }
3358
3359 AddrUnsPair *newAddr = (AddrUnsPair *)__kmp_allocate( sizeof(AddrUnsPair) *
3360 nPackages * __kmp_place_num_cores * __kmp_place_num_threads_per_core);
3361 int i, j, k, n_old = 0, n_new = 0;
3362 for ( i = 0; i < nPackages; ++i ) {
3363 for ( j = 0; j < nCoresPerPkg; ++j ) {
Andrey Churbanov12875572015-03-10 09:00:36 +00003364 if ( j < __kmp_place_core_offset || j >= __kmp_place_core_offset + __kmp_place_num_cores ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003365 n_old += __kmp_nThreadsPerCore; // skip not-requested core
3366 } else {
3367 for ( k = 0; k < __kmp_nThreadsPerCore; ++k ) {
Andrey Churbanov12875572015-03-10 09:00:36 +00003368 if ( k < __kmp_place_num_threads_per_core ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003369 newAddr[n_new] = (*pAddr)[n_old]; // copy requested core' data to new location
3370 n_new++;
3371 }
3372 n_old++;
3373 }
3374 }
3375 }
3376 }
3377 nCoresPerPkg = __kmp_place_num_cores; // correct nCoresPerPkg
3378 __kmp_nThreadsPerCore = __kmp_place_num_threads_per_core; // correct __kmp_nThreadsPerCore
3379 __kmp_avail_proc = n_new; // correct avail_proc
3380 __kmp_ncores = nPackages * __kmp_place_num_cores; // correct ncores
3381
3382 __kmp_free( *pAddr );
3383 *pAddr = newAddr; // replace old topology with new one
3384}
3385
Jim Cownie5e8470a2013-09-27 10:38:44 +00003386
3387static AddrUnsPair *address2os = NULL;
3388static int * procarr = NULL;
3389static int __kmp_aff_depth = 0;
3390
3391static void
3392__kmp_aux_affinity_initialize(void)
3393{
3394 if (__kmp_affinity_masks != NULL) {
3395 KMP_ASSERT(fullMask != NULL);
3396 return;
3397 }
3398
3399 //
3400 // Create the "full" mask - this defines all of the processors that we
3401 // consider to be in the machine model. If respect is set, then it is
3402 // the initialization thread's affinity mask. Otherwise, it is all
3403 // processors that we know about on the machine.
3404 //
3405 if (fullMask == NULL) {
3406 fullMask = (kmp_affin_mask_t *)__kmp_allocate(__kmp_affin_mask_size);
3407 }
3408 if (KMP_AFFINITY_CAPABLE()) {
3409 if (__kmp_affinity_respect_mask) {
3410 __kmp_get_system_affinity(fullMask, TRUE);
3411
3412 //
3413 // Count the number of available processors.
3414 //
3415 unsigned i;
3416 __kmp_avail_proc = 0;
3417 for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
3418 if (! KMP_CPU_ISSET(i, fullMask)) {
3419 continue;
3420 }
3421 __kmp_avail_proc++;
3422 }
3423 if (__kmp_avail_proc > __kmp_xproc) {
3424 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3425 && (__kmp_affinity_type != affinity_none))) {
3426 KMP_WARNING(ErrorInitializeAffinity);
3427 }
3428 __kmp_affinity_type = affinity_none;
3429 __kmp_affin_mask_size = 0;
3430 return;
3431 }
3432 }
3433 else {
3434 __kmp_affinity_entire_machine_mask(fullMask);
3435 __kmp_avail_proc = __kmp_xproc;
3436 }
3437 }
3438
3439 int depth = -1;
3440 kmp_i18n_id_t msg_id = kmp_i18n_null;
3441
3442 //
Alp Toker8f2d3f02014-02-24 10:40:15 +00003443 // For backward compatibility, setting KMP_CPUINFO_FILE =>
Jim Cownie5e8470a2013-09-27 10:38:44 +00003444 // KMP_TOPOLOGY_METHOD=cpuinfo
3445 //
3446 if ((__kmp_cpuinfo_file != NULL) &&
3447 (__kmp_affinity_top_method == affinity_top_method_all)) {
3448 __kmp_affinity_top_method = affinity_top_method_cpuinfo;
3449 }
3450
3451 if (__kmp_affinity_top_method == affinity_top_method_all) {
3452 //
3453 // In the default code path, errors are not fatal - we just try using
3454 // another method. We only emit a warning message if affinity is on,
3455 // or the verbose flag is set, an the nowarnings flag was not set.
3456 //
3457 const char *file_name = NULL;
3458 int line = 0;
3459
3460# if KMP_ARCH_X86 || KMP_ARCH_X86_64
3461
3462 if (__kmp_affinity_verbose) {
3463 KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
3464 }
3465
3466 file_name = NULL;
3467 depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
3468 if (depth == 0) {
3469 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3470 KMP_ASSERT(address2os == NULL);
3471 return;
3472 }
3473
3474 if (depth < 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003475 if (__kmp_affinity_verbose) {
3476 if (msg_id != kmp_i18n_null) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003477 KMP_INFORM(AffInfoStrStr, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id),
3478 KMP_I18N_STR(DecodingLegacyAPIC));
3479 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003480 else {
3481 KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(DecodingLegacyAPIC));
3482 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003483 }
3484
3485 file_name = NULL;
3486 depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
3487 if (depth == 0) {
3488 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3489 KMP_ASSERT(address2os == NULL);
3490 return;
3491 }
3492 }
3493
3494# endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
3495
3496# if KMP_OS_LINUX
3497
3498 if (depth < 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003499 if (__kmp_affinity_verbose) {
3500 if (msg_id != kmp_i18n_null) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003501 KMP_INFORM(AffStrParseFilename, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id), "/proc/cpuinfo");
3502 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003503 else {
3504 KMP_INFORM(AffParseFilename, "KMP_AFFINITY", "/proc/cpuinfo");
3505 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003506 }
3507
3508 FILE *f = fopen("/proc/cpuinfo", "r");
3509 if (f == NULL) {
3510 msg_id = kmp_i18n_str_CantOpenCpuinfo;
3511 }
3512 else {
3513 file_name = "/proc/cpuinfo";
3514 depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
3515 fclose(f);
3516 if (depth == 0) {
3517 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3518 KMP_ASSERT(address2os == NULL);
3519 return;
3520 }
3521 }
3522 }
3523
3524# endif /* KMP_OS_LINUX */
3525
Andrey Churbanov7daf9802015-01-27 16:52:57 +00003526# if KMP_GROUP_AFFINITY
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003527
3528 if ((depth < 0) && (__kmp_num_proc_groups > 1)) {
3529 if (__kmp_affinity_verbose) {
3530 KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
3531 }
3532
3533 depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
3534 KMP_ASSERT(depth != 0);
3535 }
3536
Andrey Churbanov7daf9802015-01-27 16:52:57 +00003537# endif /* KMP_GROUP_AFFINITY */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003538
Jim Cownie5e8470a2013-09-27 10:38:44 +00003539 if (depth < 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003540 if (__kmp_affinity_verbose && (msg_id != kmp_i18n_null)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003541 if (file_name == NULL) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003542 KMP_INFORM(UsingFlatOS, __kmp_i18n_catgets(msg_id));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003543 }
3544 else if (line == 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003545 KMP_INFORM(UsingFlatOSFile, file_name, __kmp_i18n_catgets(msg_id));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003546 }
3547 else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003548 KMP_INFORM(UsingFlatOSFileLine, file_name, line, __kmp_i18n_catgets(msg_id));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003549 }
3550 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003551 // FIXME - print msg if msg_id = kmp_i18n_null ???
Jim Cownie5e8470a2013-09-27 10:38:44 +00003552
3553 file_name = "";
3554 depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
3555 if (depth == 0) {
3556 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3557 KMP_ASSERT(address2os == NULL);
3558 return;
3559 }
3560 KMP_ASSERT(depth > 0);
3561 KMP_ASSERT(address2os != NULL);
3562 }
3563 }
3564
3565 //
3566 // If the user has specified that a paricular topology discovery method
3567 // is to be used, then we abort if that method fails. The exception is
3568 // group affinity, which might have been implicitly set.
3569 //
3570
3571# if KMP_ARCH_X86 || KMP_ARCH_X86_64
3572
3573 else if (__kmp_affinity_top_method == affinity_top_method_x2apicid) {
3574 if (__kmp_affinity_verbose) {
3575 KMP_INFORM(AffInfoStr, "KMP_AFFINITY",
3576 KMP_I18N_STR(Decodingx2APIC));
3577 }
3578
3579 depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
3580 if (depth == 0) {
3581 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3582 KMP_ASSERT(address2os == NULL);
3583 return;
3584 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003585 if (depth < 0) {
3586 KMP_ASSERT(msg_id != kmp_i18n_null);
3587 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
3588 }
3589 }
3590 else if (__kmp_affinity_top_method == affinity_top_method_apicid) {
3591 if (__kmp_affinity_verbose) {
3592 KMP_INFORM(AffInfoStr, "KMP_AFFINITY",
3593 KMP_I18N_STR(DecodingLegacyAPIC));
3594 }
3595
3596 depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
3597 if (depth == 0) {
3598 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3599 KMP_ASSERT(address2os == NULL);
3600 return;
3601 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003602 if (depth < 0) {
3603 KMP_ASSERT(msg_id != kmp_i18n_null);
3604 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
3605 }
3606 }
3607
3608# endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
3609
3610 else if (__kmp_affinity_top_method == affinity_top_method_cpuinfo) {
3611 const char *filename;
3612 if (__kmp_cpuinfo_file != NULL) {
3613 filename = __kmp_cpuinfo_file;
3614 }
3615 else {
3616 filename = "/proc/cpuinfo";
3617 }
3618
3619 if (__kmp_affinity_verbose) {
3620 KMP_INFORM(AffParseFilename, "KMP_AFFINITY", filename);
3621 }
3622
3623 FILE *f = fopen(filename, "r");
3624 if (f == NULL) {
3625 int code = errno;
3626 if (__kmp_cpuinfo_file != NULL) {
3627 __kmp_msg(
3628 kmp_ms_fatal,
3629 KMP_MSG(CantOpenFileForReading, filename),
3630 KMP_ERR(code),
3631 KMP_HNT(NameComesFrom_CPUINFO_FILE),
3632 __kmp_msg_null
3633 );
3634 }
3635 else {
3636 __kmp_msg(
3637 kmp_ms_fatal,
3638 KMP_MSG(CantOpenFileForReading, filename),
3639 KMP_ERR(code),
3640 __kmp_msg_null
3641 );
3642 }
3643 }
3644 int line = 0;
3645 depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
3646 fclose(f);
3647 if (depth < 0) {
3648 KMP_ASSERT(msg_id != kmp_i18n_null);
3649 if (line > 0) {
3650 KMP_FATAL(FileLineMsgExiting, filename, line, __kmp_i18n_catgets(msg_id));
3651 }
3652 else {
3653 KMP_FATAL(FileMsgExiting, filename, __kmp_i18n_catgets(msg_id));
3654 }
3655 }
3656 if (__kmp_affinity_type == affinity_none) {
3657 KMP_ASSERT(depth == 0);
3658 KMP_ASSERT(address2os == NULL);
3659 return;
3660 }
3661 }
3662
Andrey Churbanov7daf9802015-01-27 16:52:57 +00003663# if KMP_GROUP_AFFINITY
Jim Cownie5e8470a2013-09-27 10:38:44 +00003664
3665 else if (__kmp_affinity_top_method == affinity_top_method_group) {
3666 if (__kmp_affinity_verbose) {
3667 KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
3668 }
3669
3670 depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
3671 KMP_ASSERT(depth != 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003672 if (depth < 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003673 KMP_ASSERT(msg_id != kmp_i18n_null);
3674 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003675 }
3676 }
3677
Andrey Churbanov7daf9802015-01-27 16:52:57 +00003678# endif /* KMP_GROUP_AFFINITY */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003679
3680 else if (__kmp_affinity_top_method == affinity_top_method_flat) {
3681 if (__kmp_affinity_verbose) {
3682 KMP_INFORM(AffUsingFlatOS, "KMP_AFFINITY");
3683 }
3684
3685 depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
3686 if (depth == 0) {
3687 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3688 KMP_ASSERT(address2os == NULL);
3689 return;
3690 }
3691 // should not fail
3692 KMP_ASSERT(depth > 0);
3693 KMP_ASSERT(address2os != NULL);
3694 }
3695
3696 if (address2os == NULL) {
3697 if (KMP_AFFINITY_CAPABLE()
3698 && (__kmp_affinity_verbose || (__kmp_affinity_warnings
3699 && (__kmp_affinity_type != affinity_none)))) {
3700 KMP_WARNING(ErrorInitializeAffinity);
3701 }
3702 __kmp_affinity_type = affinity_none;
3703 __kmp_affin_mask_size = 0;
3704 return;
3705 }
3706
Jim Cownie5e8470a2013-09-27 10:38:44 +00003707 __kmp_apply_thread_places(&address2os, depth);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003708
3709 //
3710 // Create the table of masks, indexed by thread Id.
3711 //
3712 unsigned maxIndex;
3713 unsigned numUnique;
3714 kmp_affin_mask_t *osId2Mask = __kmp_create_masks(&maxIndex, &numUnique,
3715 address2os, __kmp_avail_proc);
3716 if (__kmp_affinity_gran_levels == 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003717 KMP_DEBUG_ASSERT((int)numUnique == __kmp_avail_proc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003718 }
3719
3720 //
3721 // Set the childNums vector in all Address objects. This must be done
3722 // before we can sort using __kmp_affinity_cmp_Address_child_num(),
3723 // which takes into account the setting of __kmp_affinity_compact.
3724 //
3725 __kmp_affinity_assign_child_nums(address2os, __kmp_avail_proc);
3726
3727 switch (__kmp_affinity_type) {
3728
3729 case affinity_explicit:
3730 KMP_DEBUG_ASSERT(__kmp_affinity_proclist != NULL);
3731# if OMP_40_ENABLED
3732 if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
3733# endif
3734 {
3735 __kmp_affinity_process_proclist(&__kmp_affinity_masks,
3736 &__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask,
3737 maxIndex);
3738 }
3739# if OMP_40_ENABLED
3740 else {
3741 __kmp_affinity_process_placelist(&__kmp_affinity_masks,
3742 &__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask,
3743 maxIndex);
3744 }
3745# endif
3746 if (__kmp_affinity_num_masks == 0) {
3747 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3748 && (__kmp_affinity_type != affinity_none))) {
3749 KMP_WARNING(AffNoValidProcID);
3750 }
3751 __kmp_affinity_type = affinity_none;
3752 return;
3753 }
3754 break;
3755
3756 //
3757 // The other affinity types rely on sorting the Addresses according
3758 // to some permutation of the machine topology tree. Set
3759 // __kmp_affinity_compact and __kmp_affinity_offset appropriately,
3760 // then jump to a common code fragment to do the sort and create
3761 // the array of affinity masks.
3762 //
3763
3764 case affinity_logical:
3765 __kmp_affinity_compact = 0;
3766 if (__kmp_affinity_offset) {
3767 __kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset
3768 % __kmp_avail_proc;
3769 }
3770 goto sortAddresses;
3771
3772 case affinity_physical:
3773 if (__kmp_nThreadsPerCore > 1) {
3774 __kmp_affinity_compact = 1;
3775 if (__kmp_affinity_compact >= depth) {
3776 __kmp_affinity_compact = 0;
3777 }
3778 } else {
3779 __kmp_affinity_compact = 0;
3780 }
3781 if (__kmp_affinity_offset) {
3782 __kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset
3783 % __kmp_avail_proc;
3784 }
3785 goto sortAddresses;
3786
3787 case affinity_scatter:
3788 if (__kmp_affinity_compact >= depth) {
3789 __kmp_affinity_compact = 0;
3790 }
3791 else {
3792 __kmp_affinity_compact = depth - 1 - __kmp_affinity_compact;
3793 }
3794 goto sortAddresses;
3795
3796 case affinity_compact:
3797 if (__kmp_affinity_compact >= depth) {
3798 __kmp_affinity_compact = depth - 1;
3799 }
3800 goto sortAddresses;
3801
Jim Cownie5e8470a2013-09-27 10:38:44 +00003802 case affinity_balanced:
Andrey Churbanove4b92132015-03-05 17:46:50 +00003803 // Balanced works only for the case of a single package and uniform topology
Jim Cownie5e8470a2013-09-27 10:38:44 +00003804 if( nPackages > 1 ) {
3805 if( __kmp_affinity_verbose || __kmp_affinity_warnings ) {
3806 KMP_WARNING( AffBalancedNotAvail, "KMP_AFFINITY" );
3807 }
3808 __kmp_affinity_type = affinity_none;
3809 return;
3810 } else if( __kmp_affinity_uniform_topology() ) {
3811 break;
3812 } else { // Non-uniform topology
3813
3814 // Save the depth for further usage
3815 __kmp_aff_depth = depth;
3816
3817 // Number of hyper threads per core in HT machine
3818 int nth_per_core = __kmp_nThreadsPerCore;
3819
3820 int core_level;
3821 if( nth_per_core > 1 ) {
3822 core_level = depth - 2;
3823 } else {
3824 core_level = depth - 1;
3825 }
3826 int ncores = address2os[ __kmp_avail_proc - 1 ].first.labels[ core_level ] + 1;
3827 int nproc = nth_per_core * ncores;
3828
3829 procarr = ( int * )__kmp_allocate( sizeof( int ) * nproc );
3830 for( int i = 0; i < nproc; i++ ) {
3831 procarr[ i ] = -1;
3832 }
3833
3834 for( int i = 0; i < __kmp_avail_proc; i++ ) {
3835 int proc = address2os[ i ].second;
3836 // If depth == 3 then level=0 - package, level=1 - core, level=2 - thread.
3837 // If there is only one thread per core then depth == 2: level 0 - package,
3838 // level 1 - core.
3839 int level = depth - 1;
3840
3841 // __kmp_nth_per_core == 1
3842 int thread = 0;
3843 int core = address2os[ i ].first.labels[ level ];
3844 // If the thread level exists, that is we have more than one thread context per core
3845 if( nth_per_core > 1 ) {
3846 thread = address2os[ i ].first.labels[ level ] % nth_per_core;
3847 core = address2os[ i ].first.labels[ level - 1 ];
3848 }
3849 procarr[ core * nth_per_core + thread ] = proc;
3850 }
3851
3852 break;
3853 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003854
3855 sortAddresses:
3856 //
3857 // Allocate the gtid->affinity mask table.
3858 //
3859 if (__kmp_affinity_dups) {
3860 __kmp_affinity_num_masks = __kmp_avail_proc;
3861 }
3862 else {
3863 __kmp_affinity_num_masks = numUnique;
3864 }
3865
3866# if OMP_40_ENABLED
3867 if ( ( __kmp_nested_proc_bind.bind_types[0] != proc_bind_intel )
3868 && ( __kmp_affinity_num_places > 0 )
3869 && ( (unsigned)__kmp_affinity_num_places < __kmp_affinity_num_masks ) ) {
3870 __kmp_affinity_num_masks = __kmp_affinity_num_places;
3871 }
3872# endif
3873
3874 __kmp_affinity_masks = (kmp_affin_mask_t*)__kmp_allocate(
3875 __kmp_affinity_num_masks * __kmp_affin_mask_size);
3876
3877 //
3878 // Sort the address2os table according to the current setting of
3879 // __kmp_affinity_compact, then fill out __kmp_affinity_masks.
3880 //
3881 qsort(address2os, __kmp_avail_proc, sizeof(*address2os),
3882 __kmp_affinity_cmp_Address_child_num);
3883 {
3884 int i;
3885 unsigned j;
3886 for (i = 0, j = 0; i < __kmp_avail_proc; i++) {
3887 if ((! __kmp_affinity_dups) && (! address2os[i].first.leader)) {
3888 continue;
3889 }
3890 unsigned osId = address2os[i].second;
3891 kmp_affin_mask_t *src = KMP_CPU_INDEX(osId2Mask, osId);
3892 kmp_affin_mask_t *dest
3893 = KMP_CPU_INDEX(__kmp_affinity_masks, j);
3894 KMP_ASSERT(KMP_CPU_ISSET(osId, src));
3895 KMP_CPU_COPY(dest, src);
3896 if (++j >= __kmp_affinity_num_masks) {
3897 break;
3898 }
3899 }
3900 KMP_DEBUG_ASSERT(j == __kmp_affinity_num_masks);
3901 }
3902 break;
3903
3904 default:
3905 KMP_ASSERT2(0, "Unexpected affinity setting");
3906 }
3907
3908 __kmp_free(osId2Mask);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003909 machine_hierarchy.init(address2os, __kmp_avail_proc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003910}
3911
3912
3913void
3914__kmp_affinity_initialize(void)
3915{
3916 //
3917 // Much of the code above was written assumming that if a machine was not
3918 // affinity capable, then __kmp_affinity_type == affinity_none. We now
3919 // explicitly represent this as __kmp_affinity_type == affinity_disabled.
3920 //
3921 // There are too many checks for __kmp_affinity_type == affinity_none
3922 // in this code. Instead of trying to change them all, check if
3923 // __kmp_affinity_type == affinity_disabled, and if so, slam it with
3924 // affinity_none, call the real initialization routine, then restore
3925 // __kmp_affinity_type to affinity_disabled.
3926 //
3927 int disabled = (__kmp_affinity_type == affinity_disabled);
3928 if (! KMP_AFFINITY_CAPABLE()) {
3929 KMP_ASSERT(disabled);
3930 }
3931 if (disabled) {
3932 __kmp_affinity_type = affinity_none;
3933 }
3934 __kmp_aux_affinity_initialize();
3935 if (disabled) {
3936 __kmp_affinity_type = affinity_disabled;
3937 }
3938}
3939
3940
3941void
3942__kmp_affinity_uninitialize(void)
3943{
3944 if (__kmp_affinity_masks != NULL) {
3945 __kmp_free(__kmp_affinity_masks);
3946 __kmp_affinity_masks = NULL;
3947 }
3948 if (fullMask != NULL) {
3949 KMP_CPU_FREE(fullMask);
3950 fullMask = NULL;
3951 }
3952 __kmp_affinity_num_masks = 0;
3953# if OMP_40_ENABLED
3954 __kmp_affinity_num_places = 0;
3955# endif
3956 if (__kmp_affinity_proclist != NULL) {
3957 __kmp_free(__kmp_affinity_proclist);
3958 __kmp_affinity_proclist = NULL;
3959 }
3960 if( address2os != NULL ) {
3961 __kmp_free( address2os );
3962 address2os = NULL;
3963 }
3964 if( procarr != NULL ) {
3965 __kmp_free( procarr );
3966 procarr = NULL;
3967 }
3968}
3969
3970
3971void
3972__kmp_affinity_set_init_mask(int gtid, int isa_root)
3973{
3974 if (! KMP_AFFINITY_CAPABLE()) {
3975 return;
3976 }
3977
3978 kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
3979 if (th->th.th_affin_mask == NULL) {
3980 KMP_CPU_ALLOC(th->th.th_affin_mask);
3981 }
3982 else {
3983 KMP_CPU_ZERO(th->th.th_affin_mask);
3984 }
3985
3986 //
3987 // Copy the thread mask to the kmp_info_t strucuture.
3988 // If __kmp_affinity_type == affinity_none, copy the "full" mask, i.e. one
3989 // that has all of the OS proc ids set, or if __kmp_affinity_respect_mask
3990 // is set, then the full mask is the same as the mask of the initialization
3991 // thread.
3992 //
3993 kmp_affin_mask_t *mask;
3994 int i;
3995
3996# if OMP_40_ENABLED
3997 if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
3998# endif
3999 {
Andrey Churbanovf28f6132015-01-13 14:54:00 +00004000 if ((__kmp_affinity_type == affinity_none) || (__kmp_affinity_type == affinity_balanced)
Jim Cownie5e8470a2013-09-27 10:38:44 +00004001 ) {
Andrey Churbanov7daf9802015-01-27 16:52:57 +00004002# if KMP_GROUP_AFFINITY
Jim Cownie5e8470a2013-09-27 10:38:44 +00004003 if (__kmp_num_proc_groups > 1) {
4004 return;
4005 }
4006# endif
4007 KMP_ASSERT(fullMask != NULL);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004008 i = KMP_PLACE_ALL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004009 mask = fullMask;
4010 }
4011 else {
4012 KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 );
4013 i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
4014 mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
4015 }
4016 }
4017# if OMP_40_ENABLED
4018 else {
4019 if ((! isa_root)
4020 || (__kmp_nested_proc_bind.bind_types[0] == proc_bind_false)) {
Andrey Churbanov7daf9802015-01-27 16:52:57 +00004021# if KMP_GROUP_AFFINITY
Jim Cownie5e8470a2013-09-27 10:38:44 +00004022 if (__kmp_num_proc_groups > 1) {
4023 return;
4024 }
4025# endif
4026 KMP_ASSERT(fullMask != NULL);
4027 i = KMP_PLACE_ALL;
4028 mask = fullMask;
4029 }
4030 else {
4031 //
4032 // int i = some hash function or just a counter that doesn't
4033 // always start at 0. Use gtid for now.
4034 //
4035 KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 );
4036 i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
4037 mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
4038 }
4039 }
4040# endif
4041
4042# if OMP_40_ENABLED
4043 th->th.th_current_place = i;
4044 if (isa_root) {
4045 th->th.th_new_place = i;
4046 th->th.th_first_place = 0;
4047 th->th.th_last_place = __kmp_affinity_num_masks - 1;
4048 }
4049
4050 if (i == KMP_PLACE_ALL) {
4051 KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to all places\n",
4052 gtid));
4053 }
4054 else {
4055 KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to place %d\n",
4056 gtid, i));
4057 }
4058# else
4059 if (i == -1) {
4060 KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to fullMask\n",
4061 gtid));
4062 }
4063 else {
4064 KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to mask %d\n",
4065 gtid, i));
4066 }
4067# endif /* OMP_40_ENABLED */
4068
4069 KMP_CPU_COPY(th->th.th_affin_mask, mask);
4070
4071 if (__kmp_affinity_verbose) {
4072 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4073 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4074 th->th.th_affin_mask);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004075 KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(), gtid,
4076 buf);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004077 }
4078
4079# if KMP_OS_WINDOWS
4080 //
4081 // On Windows* OS, the process affinity mask might have changed.
4082 // If the user didn't request affinity and this call fails,
4083 // just continue silently. See CQ171393.
4084 //
4085 if ( __kmp_affinity_type == affinity_none ) {
4086 __kmp_set_system_affinity(th->th.th_affin_mask, FALSE);
4087 }
4088 else
4089# endif
4090 __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
4091}
4092
4093
4094# if OMP_40_ENABLED
4095
4096void
4097__kmp_affinity_set_place(int gtid)
4098{
4099 int retval;
4100
4101 if (! KMP_AFFINITY_CAPABLE()) {
4102 return;
4103 }
4104
4105 kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
4106
4107 KA_TRACE(100, ("__kmp_affinity_set_place: binding T#%d to place %d (current place = %d)\n",
4108 gtid, th->th.th_new_place, th->th.th_current_place));
4109
4110 //
Alp Toker8f2d3f02014-02-24 10:40:15 +00004111 // Check that the new place is within this thread's partition.
Jim Cownie5e8470a2013-09-27 10:38:44 +00004112 //
4113 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004114 KMP_ASSERT(th->th.th_new_place >= 0);
4115 KMP_ASSERT((unsigned)th->th.th_new_place <= __kmp_affinity_num_masks);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004116 if (th->th.th_first_place <= th->th.th_last_place) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004117 KMP_ASSERT((th->th.th_new_place >= th->th.th_first_place)
Jim Cownie5e8470a2013-09-27 10:38:44 +00004118 && (th->th.th_new_place <= th->th.th_last_place));
4119 }
4120 else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004121 KMP_ASSERT((th->th.th_new_place <= th->th.th_first_place)
Jim Cownie5e8470a2013-09-27 10:38:44 +00004122 || (th->th.th_new_place >= th->th.th_last_place));
4123 }
4124
4125 //
4126 // Copy the thread mask to the kmp_info_t strucuture,
4127 // and set this thread's affinity.
4128 //
4129 kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity_masks,
4130 th->th.th_new_place);
4131 KMP_CPU_COPY(th->th.th_affin_mask, mask);
4132 th->th.th_current_place = th->th.th_new_place;
4133
4134 if (__kmp_affinity_verbose) {
4135 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4136 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4137 th->th.th_affin_mask);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004138 KMP_INFORM(BoundToOSProcSet, "OMP_PROC_BIND", (kmp_int32)getpid(),
4139 gtid, buf);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004140 }
4141 __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
4142}
4143
4144# endif /* OMP_40_ENABLED */
4145
4146
4147int
4148__kmp_aux_set_affinity(void **mask)
4149{
4150 int gtid;
4151 kmp_info_t *th;
4152 int retval;
4153
4154 if (! KMP_AFFINITY_CAPABLE()) {
4155 return -1;
4156 }
4157
4158 gtid = __kmp_entry_gtid();
4159 KA_TRACE(1000, ;{
4160 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4161 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4162 (kmp_affin_mask_t *)(*mask));
4163 __kmp_debug_printf("kmp_set_affinity: setting affinity mask for thread %d = %s\n",
4164 gtid, buf);
4165 });
4166
4167 if (__kmp_env_consistency_check) {
4168 if ((mask == NULL) || (*mask == NULL)) {
4169 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
4170 }
4171 else {
4172 unsigned proc;
4173 int num_procs = 0;
4174
4175 for (proc = 0; proc < KMP_CPU_SETSIZE; proc++) {
4176 if (! KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask))) {
4177 continue;
4178 }
4179 num_procs++;
4180 if (! KMP_CPU_ISSET(proc, fullMask)) {
4181 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
4182 break;
4183 }
4184 }
4185 if (num_procs == 0) {
4186 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
4187 }
4188
Andrey Churbanov7daf9802015-01-27 16:52:57 +00004189# if KMP_GROUP_AFFINITY
Jim Cownie5e8470a2013-09-27 10:38:44 +00004190 if (__kmp_get_proc_group((kmp_affin_mask_t *)(*mask)) < 0) {
4191 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
4192 }
Andrey Churbanov7daf9802015-01-27 16:52:57 +00004193# endif /* KMP_GROUP_AFFINITY */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004194
4195 }
4196 }
4197
4198 th = __kmp_threads[gtid];
4199 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
4200 retval = __kmp_set_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
4201 if (retval == 0) {
4202 KMP_CPU_COPY(th->th.th_affin_mask, (kmp_affin_mask_t *)(*mask));
4203 }
4204
4205# if OMP_40_ENABLED
4206 th->th.th_current_place = KMP_PLACE_UNDEFINED;
4207 th->th.th_new_place = KMP_PLACE_UNDEFINED;
4208 th->th.th_first_place = 0;
4209 th->th.th_last_place = __kmp_affinity_num_masks - 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004210
4211 //
4212 // Turn off 4.0 affinity for the current tread at this parallel level.
4213 //
4214 th->th.th_current_task->td_icvs.proc_bind = proc_bind_false;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004215# endif
4216
4217 return retval;
4218}
4219
4220
4221int
4222__kmp_aux_get_affinity(void **mask)
4223{
4224 int gtid;
4225 int retval;
4226 kmp_info_t *th;
4227
4228 if (! KMP_AFFINITY_CAPABLE()) {
4229 return -1;
4230 }
4231
4232 gtid = __kmp_entry_gtid();
4233 th = __kmp_threads[gtid];
4234 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
4235
4236 KA_TRACE(1000, ;{
4237 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4238 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4239 th->th.th_affin_mask);
4240 __kmp_printf("kmp_get_affinity: stored affinity mask for thread %d = %s\n", gtid, buf);
4241 });
4242
4243 if (__kmp_env_consistency_check) {
4244 if ((mask == NULL) || (*mask == NULL)) {
4245 KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity");
4246 }
4247 }
4248
4249# if !KMP_OS_WINDOWS
4250
4251 retval = __kmp_get_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
4252 KA_TRACE(1000, ;{
4253 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4254 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4255 (kmp_affin_mask_t *)(*mask));
4256 __kmp_printf("kmp_get_affinity: system affinity mask for thread %d = %s\n", gtid, buf);
4257 });
4258 return retval;
4259
4260# else
4261
4262 KMP_CPU_COPY((kmp_affin_mask_t *)(*mask), th->th.th_affin_mask);
4263 return 0;
4264
4265# endif /* KMP_OS_WINDOWS */
4266
4267}
4268
Jim Cownie5e8470a2013-09-27 10:38:44 +00004269int
4270__kmp_aux_set_affinity_mask_proc(int proc, void **mask)
4271{
4272 int retval;
4273
4274 if (! KMP_AFFINITY_CAPABLE()) {
4275 return -1;
4276 }
4277
4278 KA_TRACE(1000, ;{
4279 int gtid = __kmp_entry_gtid();
4280 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4281 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4282 (kmp_affin_mask_t *)(*mask));
4283 __kmp_debug_printf("kmp_set_affinity_mask_proc: setting proc %d in affinity mask for thread %d = %s\n",
4284 proc, gtid, buf);
4285 });
4286
4287 if (__kmp_env_consistency_check) {
4288 if ((mask == NULL) || (*mask == NULL)) {
4289 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity_mask_proc");
4290 }
4291 }
4292
4293 if ((proc < 0) || ((unsigned)proc >= KMP_CPU_SETSIZE)) {
4294 return -1;
4295 }
4296 if (! KMP_CPU_ISSET(proc, fullMask)) {
4297 return -2;
4298 }
4299
4300 KMP_CPU_SET(proc, (kmp_affin_mask_t *)(*mask));
4301 return 0;
4302}
4303
4304
4305int
4306__kmp_aux_unset_affinity_mask_proc(int proc, void **mask)
4307{
4308 int retval;
4309
4310 if (! KMP_AFFINITY_CAPABLE()) {
4311 return -1;
4312 }
4313
4314 KA_TRACE(1000, ;{
4315 int gtid = __kmp_entry_gtid();
4316 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4317 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4318 (kmp_affin_mask_t *)(*mask));
4319 __kmp_debug_printf("kmp_unset_affinity_mask_proc: unsetting proc %d in affinity mask for thread %d = %s\n",
4320 proc, gtid, buf);
4321 });
4322
4323 if (__kmp_env_consistency_check) {
4324 if ((mask == NULL) || (*mask == NULL)) {
4325 KMP_FATAL(AffinityInvalidMask, "kmp_unset_affinity_mask_proc");
4326 }
4327 }
4328
4329 if ((proc < 0) || ((unsigned)proc >= KMP_CPU_SETSIZE)) {
4330 return -1;
4331 }
4332 if (! KMP_CPU_ISSET(proc, fullMask)) {
4333 return -2;
4334 }
4335
4336 KMP_CPU_CLR(proc, (kmp_affin_mask_t *)(*mask));
4337 return 0;
4338}
4339
4340
4341int
4342__kmp_aux_get_affinity_mask_proc(int proc, void **mask)
4343{
4344 int retval;
4345
4346 if (! KMP_AFFINITY_CAPABLE()) {
4347 return -1;
4348 }
4349
4350 KA_TRACE(1000, ;{
4351 int gtid = __kmp_entry_gtid();
4352 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4353 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4354 (kmp_affin_mask_t *)(*mask));
4355 __kmp_debug_printf("kmp_get_affinity_mask_proc: getting proc %d in affinity mask for thread %d = %s\n",
4356 proc, gtid, buf);
4357 });
4358
4359 if (__kmp_env_consistency_check) {
4360 if ((mask == NULL) || (*mask == NULL)) {
Andrey Churbanov4b2f17a2015-01-29 15:49:22 +00004361 KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity_mask_proc");
Jim Cownie5e8470a2013-09-27 10:38:44 +00004362 }
4363 }
4364
4365 if ((proc < 0) || ((unsigned)proc >= KMP_CPU_SETSIZE)) {
4366 return 0;
4367 }
4368 if (! KMP_CPU_ISSET(proc, fullMask)) {
4369 return 0;
4370 }
4371
4372 return KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask));
4373}
4374
Jim Cownie5e8470a2013-09-27 10:38:44 +00004375
4376// Dynamic affinity settings - Affinity balanced
4377void __kmp_balanced_affinity( int tid, int nthreads )
4378{
4379 if( __kmp_affinity_uniform_topology() ) {
4380 int coreID;
4381 int threadID;
4382 // Number of hyper threads per core in HT machine
4383 int __kmp_nth_per_core = __kmp_avail_proc / __kmp_ncores;
4384 // Number of cores
4385 int ncores = __kmp_ncores;
4386 // How many threads will be bound to each core
4387 int chunk = nthreads / ncores;
4388 // How many cores will have an additional thread bound to it - "big cores"
4389 int big_cores = nthreads % ncores;
4390 // Number of threads on the big cores
4391 int big_nth = ( chunk + 1 ) * big_cores;
4392 if( tid < big_nth ) {
4393 coreID = tid / (chunk + 1 );
4394 threadID = ( tid % (chunk + 1 ) ) % __kmp_nth_per_core ;
4395 } else { //tid >= big_nth
4396 coreID = ( tid - big_cores ) / chunk;
4397 threadID = ( ( tid - big_cores ) % chunk ) % __kmp_nth_per_core ;
4398 }
4399
4400 KMP_DEBUG_ASSERT2(KMP_AFFINITY_CAPABLE(),
4401 "Illegal set affinity operation when not capable");
4402
4403 kmp_affin_mask_t *mask = (kmp_affin_mask_t *)alloca(__kmp_affin_mask_size);
4404 KMP_CPU_ZERO(mask);
4405
4406 // Granularity == thread
4407 if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) {
4408 int osID = address2os[ coreID * __kmp_nth_per_core + threadID ].second;
4409 KMP_CPU_SET( osID, mask);
4410 } else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
4411 for( int i = 0; i < __kmp_nth_per_core; i++ ) {
4412 int osID;
4413 osID = address2os[ coreID * __kmp_nth_per_core + i ].second;
4414 KMP_CPU_SET( osID, mask);
4415 }
4416 }
4417 if (__kmp_affinity_verbose) {
4418 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4419 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004420 KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
4421 tid, buf);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004422 }
4423 __kmp_set_system_affinity( mask, TRUE );
4424 } else { // Non-uniform topology
4425
4426 kmp_affin_mask_t *mask = (kmp_affin_mask_t *)alloca(__kmp_affin_mask_size);
4427 KMP_CPU_ZERO(mask);
4428
4429 // Number of hyper threads per core in HT machine
4430 int nth_per_core = __kmp_nThreadsPerCore;
4431 int core_level;
4432 if( nth_per_core > 1 ) {
4433 core_level = __kmp_aff_depth - 2;
4434 } else {
4435 core_level = __kmp_aff_depth - 1;
4436 }
4437
4438 // Number of cores - maximum value; it does not count trail cores with 0 processors
4439 int ncores = address2os[ __kmp_avail_proc - 1 ].first.labels[ core_level ] + 1;
4440
4441 // For performance gain consider the special case nthreads == __kmp_avail_proc
4442 if( nthreads == __kmp_avail_proc ) {
4443 if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) {
4444 int osID = address2os[ tid ].second;
4445 KMP_CPU_SET( osID, mask);
4446 } else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
4447 int coreID = address2os[ tid ].first.labels[ core_level ];
4448 // We'll count found osIDs for the current core; they can be not more than nth_per_core;
4449 // since the address2os is sortied we can break when cnt==nth_per_core
4450 int cnt = 0;
4451 for( int i = 0; i < __kmp_avail_proc; i++ ) {
4452 int osID = address2os[ i ].second;
4453 int core = address2os[ i ].first.labels[ core_level ];
4454 if( core == coreID ) {
4455 KMP_CPU_SET( osID, mask);
4456 cnt++;
4457 if( cnt == nth_per_core ) {
4458 break;
4459 }
4460 }
4461 }
4462 }
4463 } else if( nthreads <= __kmp_ncores ) {
4464
4465 int core = 0;
4466 for( int i = 0; i < ncores; i++ ) {
4467 // Check if this core from procarr[] is in the mask
4468 int in_mask = 0;
4469 for( int j = 0; j < nth_per_core; j++ ) {
4470 if( procarr[ i * nth_per_core + j ] != - 1 ) {
4471 in_mask = 1;
4472 break;
4473 }
4474 }
4475 if( in_mask ) {
4476 if( tid == core ) {
4477 for( int j = 0; j < nth_per_core; j++ ) {
4478 int osID = procarr[ i * nth_per_core + j ];
4479 if( osID != -1 ) {
4480 KMP_CPU_SET( osID, mask );
4481 // For granularity=thread it is enough to set the first available osID for this core
4482 if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) {
4483 break;
4484 }
4485 }
4486 }
4487 break;
4488 } else {
4489 core++;
4490 }
4491 }
4492 }
4493
4494 } else { // nthreads > __kmp_ncores
4495
4496 // Array to save the number of processors at each core
4497 int nproc_at_core[ ncores ];
4498 // Array to save the number of cores with "x" available processors;
4499 int ncores_with_x_procs[ nth_per_core + 1 ];
4500 // Array to save the number of cores with # procs from x to nth_per_core
4501 int ncores_with_x_to_max_procs[ nth_per_core + 1 ];
4502
4503 for( int i = 0; i <= nth_per_core; i++ ) {
4504 ncores_with_x_procs[ i ] = 0;
4505 ncores_with_x_to_max_procs[ i ] = 0;
4506 }
4507
4508 for( int i = 0; i < ncores; i++ ) {
4509 int cnt = 0;
4510 for( int j = 0; j < nth_per_core; j++ ) {
4511 if( procarr[ i * nth_per_core + j ] != -1 ) {
4512 cnt++;
4513 }
4514 }
4515 nproc_at_core[ i ] = cnt;
4516 ncores_with_x_procs[ cnt ]++;
4517 }
4518
4519 for( int i = 0; i <= nth_per_core; i++ ) {
4520 for( int j = i; j <= nth_per_core; j++ ) {
4521 ncores_with_x_to_max_procs[ i ] += ncores_with_x_procs[ j ];
4522 }
4523 }
4524
4525 // Max number of processors
4526 int nproc = nth_per_core * ncores;
4527 // An array to keep number of threads per each context
4528 int * newarr = ( int * )__kmp_allocate( sizeof( int ) * nproc );
4529 for( int i = 0; i < nproc; i++ ) {
4530 newarr[ i ] = 0;
4531 }
4532
4533 int nth = nthreads;
4534 int flag = 0;
4535 while( nth > 0 ) {
4536 for( int j = 1; j <= nth_per_core; j++ ) {
4537 int cnt = ncores_with_x_to_max_procs[ j ];
4538 for( int i = 0; i < ncores; i++ ) {
4539 // Skip the core with 0 processors
4540 if( nproc_at_core[ i ] == 0 ) {
4541 continue;
4542 }
4543 for( int k = 0; k < nth_per_core; k++ ) {
4544 if( procarr[ i * nth_per_core + k ] != -1 ) {
4545 if( newarr[ i * nth_per_core + k ] == 0 ) {
4546 newarr[ i * nth_per_core + k ] = 1;
4547 cnt--;
4548 nth--;
4549 break;
4550 } else {
4551 if( flag != 0 ) {
4552 newarr[ i * nth_per_core + k ] ++;
4553 cnt--;
4554 nth--;
4555 break;
4556 }
4557 }
4558 }
4559 }
4560 if( cnt == 0 || nth == 0 ) {
4561 break;
4562 }
4563 }
4564 if( nth == 0 ) {
4565 break;
4566 }
4567 }
4568 flag = 1;
4569 }
4570 int sum = 0;
4571 for( int i = 0; i < nproc; i++ ) {
4572 sum += newarr[ i ];
4573 if( sum > tid ) {
4574 // Granularity == thread
4575 if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) {
4576 int osID = procarr[ i ];
4577 KMP_CPU_SET( osID, mask);
4578 } else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
4579 int coreID = i / nth_per_core;
4580 for( int ii = 0; ii < nth_per_core; ii++ ) {
4581 int osID = procarr[ coreID * nth_per_core + ii ];
4582 if( osID != -1 ) {
4583 KMP_CPU_SET( osID, mask);
4584 }
4585 }
4586 }
4587 break;
4588 }
4589 }
4590 __kmp_free( newarr );
4591 }
4592
4593 if (__kmp_affinity_verbose) {
4594 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4595 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004596 KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
4597 tid, buf);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004598 }
4599 __kmp_set_system_affinity( mask, TRUE );
4600 }
4601}
4602
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004603#else
4604 // affinity not supported
4605
4606kmp_uint32 mac_skipPerLevel[7];
4607kmp_uint32 mac_depth;
4608kmp_uint8 mac_leaf_kids;
4609void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
4610 static int first = 1;
4611 if (first) {
4612 const kmp_uint32 maxLevels = 7;
4613 kmp_uint32 numPerLevel[maxLevels];
4614
4615 for (kmp_uint32 i=0; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
4616 numPerLevel[i] = 1;
4617 mac_skipPerLevel[i] = 1;
4618 }
4619
4620 mac_depth = 2;
4621 numPerLevel[0] = nproc;
4622
4623 kmp_uint32 branch = 4;
4624 if (numPerLevel[0] == 1) branch = nproc/4;
4625 if (branch<4) branch=4;
4626 for (kmp_uint32 d=0; d<mac_depth-1; ++d) { // optimize hierarchy width
4627 while (numPerLevel[d] > branch || (d==0 && numPerLevel[d]>4)) { // max 4 on level 0!
4628 if (numPerLevel[d] & 1) numPerLevel[d]++;
4629 numPerLevel[d] = numPerLevel[d] >> 1;
4630 if (numPerLevel[d+1] == 1) mac_depth++;
4631 numPerLevel[d+1] = numPerLevel[d+1] << 1;
4632 }
4633 if(numPerLevel[0] == 1) {
4634 branch = branch >> 1;
4635 if (branch<4) branch = 4;
4636 }
4637 }
4638
4639 for (kmp_uint32 i=1; i<mac_depth; ++i)
4640 mac_skipPerLevel[i] = numPerLevel[i-1] * mac_skipPerLevel[i-1];
4641 mac_leaf_kids = (kmp_uint8)numPerLevel[0]-1;
4642 first=0;
4643 }
4644 thr_bar->depth = mac_depth;
4645 thr_bar->base_leaf_kids = mac_leaf_kids;
4646 thr_bar->skip_per_level = mac_skipPerLevel;
4647}
4648
Alp Toker763b9392014-02-28 09:42:41 +00004649#endif // KMP_AFFINITY_SUPPORTED