blob: 2679518978bfed4613fe28994074d2f453e030c6 [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
2 * kmp_affinity.cpp -- affinity management
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003 * $Revision: 43473 $
4 * $Date: 2014-09-26 15:02:57 -0500 (Fri, 26 Sep 2014) $
Jim Cownie5e8470a2013-09-27 10:38:44 +00005 */
6
7
8//===----------------------------------------------------------------------===//
9//
10// The LLVM Compiler Infrastructure
11//
12// This file is dual licensed under the MIT and the University of Illinois Open
13// Source Licenses. See LICENSE.txt for details.
14//
15//===----------------------------------------------------------------------===//
16
17
18#include "kmp.h"
19#include "kmp_i18n.h"
20#include "kmp_io.h"
21#include "kmp_str.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000022#include "kmp_wrapper_getpid.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000023
Alp Toker763b9392014-02-28 09:42:41 +000024#if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +000025
26//
27// Print the affinity mask to the character array in a pretty format.
28//
29char *
30__kmp_affinity_print_mask(char *buf, int buf_len, kmp_affin_mask_t *mask)
31{
32 KMP_ASSERT(buf_len >= 40);
33 char *scan = buf;
34 char *end = buf + buf_len - 1;
35
36 //
37 // Find first element / check for empty set.
38 //
39 size_t i;
40 for (i = 0; i < KMP_CPU_SETSIZE; i++) {
41 if (KMP_CPU_ISSET(i, mask)) {
42 break;
43 }
44 }
45 if (i == KMP_CPU_SETSIZE) {
46 sprintf(scan, "{<empty>}");
47 while (*scan != '\0') scan++;
48 KMP_ASSERT(scan <= end);
49 return buf;
50 }
51
Jim Cownie4cc4bb42014-10-07 16:25:50 +000052 sprintf(scan, "{%ld", (long)i);
Jim Cownie5e8470a2013-09-27 10:38:44 +000053 while (*scan != '\0') scan++;
54 i++;
55 for (; i < KMP_CPU_SETSIZE; i++) {
56 if (! KMP_CPU_ISSET(i, mask)) {
57 continue;
58 }
59
60 //
61 // Check for buffer overflow. A string of the form ",<n>" will have
62 // at most 10 characters, plus we want to leave room to print ",...}"
63 // if the set is too large to print for a total of 15 characters.
64 // We already left room for '\0' in setting end.
65 //
66 if (end - scan < 15) {
67 break;
68 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +000069 sprintf(scan, ",%-ld", (long)i);
Jim Cownie5e8470a2013-09-27 10:38:44 +000070 while (*scan != '\0') scan++;
71 }
72 if (i < KMP_CPU_SETSIZE) {
73 sprintf(scan, ",...");
74 while (*scan != '\0') scan++;
75 }
76 sprintf(scan, "}");
77 while (*scan != '\0') scan++;
78 KMP_ASSERT(scan <= end);
79 return buf;
80}
81
82
83void
84__kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask)
85{
86 KMP_CPU_ZERO(mask);
87
Andrey Churbanov7daf9802015-01-27 16:52:57 +000088# if KMP_GROUP_AFFINITY
Jim Cownie5e8470a2013-09-27 10:38:44 +000089
90 if (__kmp_num_proc_groups > 1) {
91 int group;
Jim Cownie5e8470a2013-09-27 10:38:44 +000092 KMP_DEBUG_ASSERT(__kmp_GetActiveProcessorCount != NULL);
93 for (group = 0; group < __kmp_num_proc_groups; group++) {
94 int i;
95 int num = __kmp_GetActiveProcessorCount(group);
96 for (i = 0; i < num; i++) {
97 KMP_CPU_SET(i + group * (CHAR_BIT * sizeof(DWORD_PTR)), mask);
98 }
99 }
100 }
101 else
102
Andrey Churbanov7daf9802015-01-27 16:52:57 +0000103# endif /* KMP_GROUP_AFFINITY */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000104
105 {
106 int proc;
107 for (proc = 0; proc < __kmp_xproc; proc++) {
108 KMP_CPU_SET(proc, mask);
109 }
110 }
111}
112
113
114//
115// In Linux* OS debug & cover (-O0) builds, we need to avoid inline member
116// functions.
117//
118// The icc codegen emits sections with extremely long names, of the form
119// ".gnu.linkonce.<mangled_name>". There seems to have been a linker bug
120// introduced between GNU ld version 2.14.90.0.4 and 2.15.92.0.2 involving
121// some sort of memory corruption or table overflow that is triggered by
122// these long strings. I checked the latest version of the linker -
123// GNU ld (Linux* OS/GNU Binutils) 2.18.50.0.7.20080422 - and the bug is not
124// fixed.
125//
126// Unfortunately, my attempts to reproduce it in a smaller example have
127// failed - I'm not sure what the prospects are of getting it fixed
128// properly - but we need a reproducer smaller than all of libiomp.
129//
130// Work around the problem by avoiding inline constructors in such builds.
131// We do this for all platforms, not just Linux* OS - non-inline functions are
132// more debuggable and provide better coverage into than inline functions.
133// Use inline functions in shipping libs, for performance.
134//
135
136# if !defined(KMP_DEBUG) && !defined(COVER)
137
138class Address {
139public:
140 static const unsigned maxDepth = 32;
141 unsigned labels[maxDepth];
142 unsigned childNums[maxDepth];
143 unsigned depth;
144 unsigned leader;
145 Address(unsigned _depth)
146 : depth(_depth), leader(FALSE) {
147 }
148 Address &operator=(const Address &b) {
149 depth = b.depth;
150 for (unsigned i = 0; i < depth; i++) {
151 labels[i] = b.labels[i];
152 childNums[i] = b.childNums[i];
153 }
154 leader = FALSE;
155 return *this;
156 }
157 bool operator==(const Address &b) const {
158 if (depth != b.depth)
159 return false;
160 for (unsigned i = 0; i < depth; i++)
161 if(labels[i] != b.labels[i])
162 return false;
163 return true;
164 }
165 bool isClose(const Address &b, int level) const {
166 if (depth != b.depth)
167 return false;
168 if ((unsigned)level >= depth)
169 return true;
170 for (unsigned i = 0; i < (depth - level); i++)
171 if(labels[i] != b.labels[i])
172 return false;
173 return true;
174 }
175 bool operator!=(const Address &b) const {
176 return !operator==(b);
177 }
178};
179
180class AddrUnsPair {
181public:
182 Address first;
183 unsigned second;
184 AddrUnsPair(Address _first, unsigned _second)
185 : first(_first), second(_second) {
186 }
187 AddrUnsPair &operator=(const AddrUnsPair &b)
188 {
189 first = b.first;
190 second = b.second;
191 return *this;
192 }
193};
194
195# else
196
197class Address {
198public:
199 static const unsigned maxDepth = 32;
200 unsigned labels[maxDepth];
201 unsigned childNums[maxDepth];
202 unsigned depth;
203 unsigned leader;
204 Address(unsigned _depth);
205 Address &operator=(const Address &b);
206 bool operator==(const Address &b) const;
207 bool isClose(const Address &b, int level) const;
208 bool operator!=(const Address &b) const;
209};
210
211Address::Address(unsigned _depth)
212{
213 depth = _depth;
214 leader = FALSE;
215}
216
217Address &Address::operator=(const Address &b) {
218 depth = b.depth;
219 for (unsigned i = 0; i < depth; i++) {
220 labels[i] = b.labels[i];
221 childNums[i] = b.childNums[i];
222 }
223 leader = FALSE;
224 return *this;
225}
226
227bool Address::operator==(const Address &b) const {
228 if (depth != b.depth)
229 return false;
230 for (unsigned i = 0; i < depth; i++)
231 if(labels[i] != b.labels[i])
232 return false;
233 return true;
234}
235
236bool Address::isClose(const Address &b, int level) const {
237 if (depth != b.depth)
238 return false;
239 if ((unsigned)level >= depth)
240 return true;
241 for (unsigned i = 0; i < (depth - level); i++)
242 if(labels[i] != b.labels[i])
243 return false;
244 return true;
245}
246
247bool Address::operator!=(const Address &b) const {
248 return !operator==(b);
249}
250
251class AddrUnsPair {
252public:
253 Address first;
254 unsigned second;
255 AddrUnsPair(Address _first, unsigned _second);
256 AddrUnsPair &operator=(const AddrUnsPair &b);
257};
258
259AddrUnsPair::AddrUnsPair(Address _first, unsigned _second)
260 : first(_first), second(_second)
261{
262}
263
264AddrUnsPair &AddrUnsPair::operator=(const AddrUnsPair &b)
265{
266 first = b.first;
267 second = b.second;
268 return *this;
269}
270
271# endif /* !defined(KMP_DEBUG) && !defined(COVER) */
272
273
274static int
275__kmp_affinity_cmp_Address_labels(const void *a, const void *b)
276{
277 const Address *aa = (const Address *)&(((AddrUnsPair *)a)
278 ->first);
279 const Address *bb = (const Address *)&(((AddrUnsPair *)b)
280 ->first);
281 unsigned depth = aa->depth;
282 unsigned i;
283 KMP_DEBUG_ASSERT(depth == bb->depth);
284 for (i = 0; i < depth; i++) {
285 if (aa->labels[i] < bb->labels[i]) return -1;
286 if (aa->labels[i] > bb->labels[i]) return 1;
287 }
288 return 0;
289}
290
291
292static int
293__kmp_affinity_cmp_Address_child_num(const void *a, const void *b)
294{
295 const Address *aa = (const Address *)&(((AddrUnsPair *)a)
296 ->first);
297 const Address *bb = (const Address *)&(((AddrUnsPair *)b)
298 ->first);
299 unsigned depth = aa->depth;
300 unsigned i;
301 KMP_DEBUG_ASSERT(depth == bb->depth);
302 KMP_DEBUG_ASSERT((unsigned)__kmp_affinity_compact <= depth);
303 KMP_DEBUG_ASSERT(__kmp_affinity_compact >= 0);
304 for (i = 0; i < (unsigned)__kmp_affinity_compact; i++) {
305 int j = depth - i - 1;
306 if (aa->childNums[j] < bb->childNums[j]) return -1;
307 if (aa->childNums[j] > bb->childNums[j]) return 1;
308 }
309 for (; i < depth; i++) {
310 int j = i - __kmp_affinity_compact;
311 if (aa->childNums[j] < bb->childNums[j]) return -1;
312 if (aa->childNums[j] > bb->childNums[j]) return 1;
313 }
314 return 0;
315}
316
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000317/** A structure for holding machine-specific hierarchy info to be computed once at init. */
318class hierarchy_info {
319public:
320 /** Typical levels are threads/core, cores/package or socket, packages/node, nodes/machine,
321 etc. We don't want to get specific with nomenclature */
322 static const kmp_uint32 maxLevels=7;
323
324 /** This is specifically the depth of the machine configuration hierarchy, in terms of the
325 number of levels along the longest path from root to any leaf. It corresponds to the
326 number of entries in numPerLevel if we exclude all but one trailing 1. */
327 kmp_uint32 depth;
328 kmp_uint32 base_depth;
329 kmp_uint32 base_num_threads;
330 bool uninitialized;
331
332 /** Level 0 corresponds to leaves. numPerLevel[i] is the number of children the parent of a
333 node at level i has. For example, if we have a machine with 4 packages, 4 cores/package
334 and 2 HT per core, then numPerLevel = {2, 4, 4, 1, 1}. All empty levels are set to 1. */
335 kmp_uint32 numPerLevel[maxLevels];
336 kmp_uint32 skipPerLevel[maxLevels];
337
338 void deriveLevels(AddrUnsPair *adr2os, int num_addrs) {
339 int hier_depth = adr2os[0].first.depth;
340 int level = 0;
341 for (int i=hier_depth-1; i>=0; --i) {
342 int max = -1;
343 for (int j=0; j<num_addrs; ++j) {
344 int next = adr2os[j].first.childNums[i];
345 if (next > max) max = next;
346 }
347 numPerLevel[level] = max+1;
348 ++level;
349 }
350 }
351
352 hierarchy_info() : depth(1), uninitialized(true) {}
353 void init(AddrUnsPair *adr2os, int num_addrs)
354 {
355 uninitialized = false;
356 for (kmp_uint32 i=0; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
357 numPerLevel[i] = 1;
358 skipPerLevel[i] = 1;
359 }
360
361 // Sort table by physical ID
362 if (adr2os) {
363 qsort(adr2os, num_addrs, sizeof(*adr2os), __kmp_affinity_cmp_Address_labels);
364 deriveLevels(adr2os, num_addrs);
365 }
366 else {
367 numPerLevel[0] = 4;
368 numPerLevel[1] = num_addrs/4;
369 if (num_addrs%4) numPerLevel[1]++;
370 }
371
372 base_num_threads = num_addrs;
373 for (int i=maxLevels-1; i>=0; --i) // count non-empty levels to get depth
374 if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1'
375 depth++;
376
377 kmp_uint32 branch = 4;
378 if (numPerLevel[0] == 1) branch = num_addrs/4;
379 if (branch<4) branch=4;
380 for (kmp_uint32 d=0; d<depth-1; ++d) { // optimize hierarchy width
381 while (numPerLevel[d] > branch || (d==0 && numPerLevel[d]>4)) { // max 4 on level 0!
382 if (numPerLevel[d] & 1) numPerLevel[d]++;
383 numPerLevel[d] = numPerLevel[d] >> 1;
384 if (numPerLevel[d+1] == 1) depth++;
385 numPerLevel[d+1] = numPerLevel[d+1] << 1;
386 }
387 if(numPerLevel[0] == 1) {
388 branch = branch >> 1;
389 if (branch<4) branch = 4;
390 }
391 }
392
393 for (kmp_uint32 i=1; i<depth; ++i)
394 skipPerLevel[i] = numPerLevel[i-1] * skipPerLevel[i-1];
395
396 base_depth = depth;
397 }
398};
399
400static hierarchy_info machine_hierarchy;
401
402void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
403 if (machine_hierarchy.uninitialized)
404 machine_hierarchy.init(NULL, nproc);
405
406 if (nproc <= machine_hierarchy.base_num_threads)
407 machine_hierarchy.depth = machine_hierarchy.base_depth;
408 KMP_DEBUG_ASSERT(machine_hierarchy.depth > 0);
409 while (nproc > machine_hierarchy.skipPerLevel[machine_hierarchy.depth-1]) {
410 machine_hierarchy.depth++;
411 machine_hierarchy.skipPerLevel[machine_hierarchy.depth-1] = 2*machine_hierarchy.skipPerLevel[machine_hierarchy.depth-2];
412 }
413 thr_bar->depth = machine_hierarchy.depth;
414 thr_bar->base_leaf_kids = (kmp_uint8)machine_hierarchy.numPerLevel[0]-1;
415 thr_bar->skip_per_level = machine_hierarchy.skipPerLevel;
416}
Jim Cownie5e8470a2013-09-27 10:38:44 +0000417
418//
419// When sorting by labels, __kmp_affinity_assign_child_nums() must first be
420// called to renumber the labels from [0..n] and place them into the child_num
421// vector of the address object. This is done in case the labels used for
Alp Toker8f2d3f02014-02-24 10:40:15 +0000422// the children at one node of the hierarchy differ from those used for
Jim Cownie5e8470a2013-09-27 10:38:44 +0000423// another node at the same level. Example: suppose the machine has 2 nodes
424// with 2 packages each. The first node contains packages 601 and 602, and
425// second node contains packages 603 and 604. If we try to sort the table
426// for "scatter" affinity, the table will still be sorted 601, 602, 603, 604
427// because we are paying attention to the labels themselves, not the ordinal
428// child numbers. By using the child numbers in the sort, the result is
429// {0,0}=601, {0,1}=603, {1,0}=602, {1,1}=604.
430//
431static void
432__kmp_affinity_assign_child_nums(AddrUnsPair *address2os,
433 int numAddrs)
434{
435 KMP_DEBUG_ASSERT(numAddrs > 0);
436 int depth = address2os->first.depth;
437 unsigned *counts = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
438 unsigned *lastLabel = (unsigned *)__kmp_allocate(depth
439 * sizeof(unsigned));
440 int labCt;
441 for (labCt = 0; labCt < depth; labCt++) {
442 address2os[0].first.childNums[labCt] = counts[labCt] = 0;
443 lastLabel[labCt] = address2os[0].first.labels[labCt];
444 }
445 int i;
446 for (i = 1; i < numAddrs; i++) {
447 for (labCt = 0; labCt < depth; labCt++) {
448 if (address2os[i].first.labels[labCt] != lastLabel[labCt]) {
449 int labCt2;
450 for (labCt2 = labCt + 1; labCt2 < depth; labCt2++) {
451 counts[labCt2] = 0;
452 lastLabel[labCt2] = address2os[i].first.labels[labCt2];
453 }
454 counts[labCt]++;
455 lastLabel[labCt] = address2os[i].first.labels[labCt];
456 break;
457 }
458 }
459 for (labCt = 0; labCt < depth; labCt++) {
460 address2os[i].first.childNums[labCt] = counts[labCt];
461 }
462 for (; labCt < (int)Address::maxDepth; labCt++) {
463 address2os[i].first.childNums[labCt] = 0;
464 }
465 }
466}
467
468
469//
470// All of the __kmp_affinity_create_*_map() routines should set
471// __kmp_affinity_masks to a vector of affinity mask objects of length
472// __kmp_affinity_num_masks, if __kmp_affinity_type != affinity_none, and
473// return the number of levels in the machine topology tree (zero if
474// __kmp_affinity_type == affinity_none).
475//
476// All of the __kmp_affinity_create_*_map() routines should set *fullMask
477// to the affinity mask for the initialization thread. They need to save and
478// restore the mask, and it could be needed later, so saving it is just an
479// optimization to avoid calling kmp_get_system_affinity() again.
480//
481static kmp_affin_mask_t *fullMask = NULL;
482
483kmp_affin_mask_t *
484__kmp_affinity_get_fullMask() { return fullMask; }
485
486
487static int nCoresPerPkg, nPackages;
Andrey Churbanovf696c822015-01-27 16:55:43 +0000488static int __kmp_nThreadsPerCore;
489#ifndef KMP_DFLT_NTH_CORES
490static int __kmp_ncores;
491#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000492
493//
494// __kmp_affinity_uniform_topology() doesn't work when called from
495// places which support arbitrarily many levels in the machine topology
496// map, i.e. the non-default cases in __kmp_affinity_create_cpuinfo_map()
497// __kmp_affinity_create_x2apicid_map().
498//
499inline static bool
500__kmp_affinity_uniform_topology()
501{
502 return __kmp_avail_proc == (__kmp_nThreadsPerCore * nCoresPerPkg * nPackages);
503}
504
505
506//
507// Print out the detailed machine topology map, i.e. the physical locations
508// of each OS proc.
509//
510static void
511__kmp_affinity_print_topology(AddrUnsPair *address2os, int len, int depth,
512 int pkgLevel, int coreLevel, int threadLevel)
513{
514 int proc;
515
516 KMP_INFORM(OSProcToPhysicalThreadMap, "KMP_AFFINITY");
517 for (proc = 0; proc < len; proc++) {
518 int level;
519 kmp_str_buf_t buf;
520 __kmp_str_buf_init(&buf);
521 for (level = 0; level < depth; level++) {
522 if (level == threadLevel) {
523 __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Thread));
524 }
525 else if (level == coreLevel) {
526 __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Core));
527 }
528 else if (level == pkgLevel) {
529 __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Package));
530 }
531 else if (level > pkgLevel) {
532 __kmp_str_buf_print(&buf, "%s_%d ", KMP_I18N_STR(Node),
533 level - pkgLevel - 1);
534 }
535 else {
536 __kmp_str_buf_print(&buf, "L%d ", level);
537 }
538 __kmp_str_buf_print(&buf, "%d ",
539 address2os[proc].first.labels[level]);
540 }
541 KMP_INFORM(OSProcMapToPack, "KMP_AFFINITY", address2os[proc].second,
542 buf.str);
543 __kmp_str_buf_free(&buf);
544 }
545}
546
547
548//
549// If we don't know how to retrieve the machine's processor topology, or
550// encounter an error in doing so, this routine is called to form a "flat"
551// mapping of os thread id's <-> processor id's.
552//
553static int
554__kmp_affinity_create_flat_map(AddrUnsPair **address2os,
555 kmp_i18n_id_t *const msg_id)
556{
557 *address2os = NULL;
558 *msg_id = kmp_i18n_null;
559
560 //
561 // Even if __kmp_affinity_type == affinity_none, this routine might still
Andrey Churbanovf696c822015-01-27 16:55:43 +0000562 // called to set __kmp_ncores, as well as
Jim Cownie5e8470a2013-09-27 10:38:44 +0000563 // __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
564 //
565 if (! KMP_AFFINITY_CAPABLE()) {
566 KMP_ASSERT(__kmp_affinity_type == affinity_none);
567 __kmp_ncores = nPackages = __kmp_xproc;
568 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000569 if (__kmp_affinity_verbose) {
570 KMP_INFORM(AffFlatTopology, "KMP_AFFINITY");
571 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
572 KMP_INFORM(Uniform, "KMP_AFFINITY");
573 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
574 __kmp_nThreadsPerCore, __kmp_ncores);
575 }
576 return 0;
577 }
578
579 //
580 // When affinity is off, this routine will still be called to set
Andrey Churbanovf696c822015-01-27 16:55:43 +0000581 // __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie5e8470a2013-09-27 10:38:44 +0000582 // nCoresPerPkg, & nPackages. Make sure all these vars are set
583 // correctly, and return now if affinity is not enabled.
584 //
585 __kmp_ncores = nPackages = __kmp_avail_proc;
586 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000587 if (__kmp_affinity_verbose) {
588 char buf[KMP_AFFIN_MASK_PRINT_LEN];
589 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, fullMask);
590
591 KMP_INFORM(AffCapableUseFlat, "KMP_AFFINITY");
592 if (__kmp_affinity_respect_mask) {
593 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
594 } else {
595 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
596 }
597 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
598 KMP_INFORM(Uniform, "KMP_AFFINITY");
599 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
600 __kmp_nThreadsPerCore, __kmp_ncores);
601 }
602 if (__kmp_affinity_type == affinity_none) {
603 return 0;
604 }
605
606 //
607 // Contruct the data structure to be returned.
608 //
609 *address2os = (AddrUnsPair*)
610 __kmp_allocate(sizeof(**address2os) * __kmp_avail_proc);
611 int avail_ct = 0;
612 unsigned int i;
613 for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
614 //
615 // Skip this proc if it is not included in the machine model.
616 //
617 if (! KMP_CPU_ISSET(i, fullMask)) {
618 continue;
619 }
620
621 Address addr(1);
622 addr.labels[0] = i;
623 (*address2os)[avail_ct++] = AddrUnsPair(addr,i);
624 }
625 if (__kmp_affinity_verbose) {
626 KMP_INFORM(OSProcToPackage, "KMP_AFFINITY");
627 }
628
629 if (__kmp_affinity_gran_levels < 0) {
630 //
631 // Only the package level is modeled in the machine topology map,
632 // so the #levels of granularity is either 0 or 1.
633 //
634 if (__kmp_affinity_gran > affinity_gran_package) {
635 __kmp_affinity_gran_levels = 1;
636 }
637 else {
638 __kmp_affinity_gran_levels = 0;
639 }
640 }
641 return 1;
642}
643
644
Andrey Churbanov7daf9802015-01-27 16:52:57 +0000645# if KMP_GROUP_AFFINITY
Jim Cownie5e8470a2013-09-27 10:38:44 +0000646
647//
648// If multiple Windows* OS processor groups exist, we can create a 2-level
649// topology map with the groups at level 0 and the individual procs at
650// level 1.
651//
652// This facilitates letting the threads float among all procs in a group,
653// if granularity=group (the default when there are multiple groups).
654//
655static int
656__kmp_affinity_create_proc_group_map(AddrUnsPair **address2os,
657 kmp_i18n_id_t *const msg_id)
658{
659 *address2os = NULL;
660 *msg_id = kmp_i18n_null;
661
662 //
663 // If we don't have multiple processor groups, return now.
664 // The flat mapping will be used.
665 //
666 if ((! KMP_AFFINITY_CAPABLE()) || (__kmp_get_proc_group(fullMask) >= 0)) {
667 // FIXME set *msg_id
668 return -1;
669 }
670
671 //
672 // Contruct the data structure to be returned.
673 //
674 *address2os = (AddrUnsPair*)
675 __kmp_allocate(sizeof(**address2os) * __kmp_avail_proc);
676 int avail_ct = 0;
677 int i;
678 for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
679 //
680 // Skip this proc if it is not included in the machine model.
681 //
682 if (! KMP_CPU_ISSET(i, fullMask)) {
683 continue;
684 }
685
686 Address addr(2);
687 addr.labels[0] = i / (CHAR_BIT * sizeof(DWORD_PTR));
688 addr.labels[1] = i % (CHAR_BIT * sizeof(DWORD_PTR));
689 (*address2os)[avail_ct++] = AddrUnsPair(addr,i);
690
691 if (__kmp_affinity_verbose) {
692 KMP_INFORM(AffOSProcToGroup, "KMP_AFFINITY", i, addr.labels[0],
693 addr.labels[1]);
694 }
695 }
696
697 if (__kmp_affinity_gran_levels < 0) {
698 if (__kmp_affinity_gran == affinity_gran_group) {
699 __kmp_affinity_gran_levels = 1;
700 }
701 else if ((__kmp_affinity_gran == affinity_gran_fine)
702 || (__kmp_affinity_gran == affinity_gran_thread)) {
703 __kmp_affinity_gran_levels = 0;
704 }
705 else {
706 const char *gran_str = NULL;
707 if (__kmp_affinity_gran == affinity_gran_core) {
708 gran_str = "core";
709 }
710 else if (__kmp_affinity_gran == affinity_gran_package) {
711 gran_str = "package";
712 }
713 else if (__kmp_affinity_gran == affinity_gran_node) {
714 gran_str = "node";
715 }
716 else {
717 KMP_ASSERT(0);
718 }
719
720 // Warning: can't use affinity granularity \"gran\" with group topology method, using "thread"
721 __kmp_affinity_gran_levels = 0;
722 }
723 }
724 return 2;
725}
726
Andrey Churbanov7daf9802015-01-27 16:52:57 +0000727# endif /* KMP_GROUP_AFFINITY */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000728
729
730# if KMP_ARCH_X86 || KMP_ARCH_X86_64
731
732static int
733__kmp_cpuid_mask_width(int count) {
734 int r = 0;
735
736 while((1<<r) < count)
737 ++r;
738 return r;
739}
740
741
742class apicThreadInfo {
743public:
744 unsigned osId; // param to __kmp_affinity_bind_thread
745 unsigned apicId; // from cpuid after binding
746 unsigned maxCoresPerPkg; // ""
747 unsigned maxThreadsPerPkg; // ""
748 unsigned pkgId; // inferred from above values
749 unsigned coreId; // ""
750 unsigned threadId; // ""
751};
752
753
754static int
755__kmp_affinity_cmp_apicThreadInfo_os_id(const void *a, const void *b)
756{
757 const apicThreadInfo *aa = (const apicThreadInfo *)a;
758 const apicThreadInfo *bb = (const apicThreadInfo *)b;
759 if (aa->osId < bb->osId) return -1;
760 if (aa->osId > bb->osId) return 1;
761 return 0;
762}
763
764
765static int
766__kmp_affinity_cmp_apicThreadInfo_phys_id(const void *a, const void *b)
767{
768 const apicThreadInfo *aa = (const apicThreadInfo *)a;
769 const apicThreadInfo *bb = (const apicThreadInfo *)b;
770 if (aa->pkgId < bb->pkgId) return -1;
771 if (aa->pkgId > bb->pkgId) return 1;
772 if (aa->coreId < bb->coreId) return -1;
773 if (aa->coreId > bb->coreId) return 1;
774 if (aa->threadId < bb->threadId) return -1;
775 if (aa->threadId > bb->threadId) return 1;
776 return 0;
777}
778
779
780//
781// On IA-32 architecture and Intel(R) 64 architecture, we attempt to use
782// an algorithm which cycles through the available os threads, setting
783// the current thread's affinity mask to that thread, and then retrieves
784// the Apic Id for each thread context using the cpuid instruction.
785//
786static int
787__kmp_affinity_create_apicid_map(AddrUnsPair **address2os,
788 kmp_i18n_id_t *const msg_id)
789{
790 int rc;
791 *address2os = NULL;
792 *msg_id = kmp_i18n_null;
793
794# if KMP_MIC
795 {
796 // The code below will use cpuid(4).
797 // Check if cpuid(4) is supported.
798 // FIXME? - this really doesn't need to be specific to MIC.
799 kmp_cpuid buf;
800 __kmp_x86_cpuid(0, 0, &buf);
801 if (buf.eax < 4) {
802 *msg_id = kmp_i18n_str_NoLeaf4Support;
803 return -1;
804 }
805 }
806# endif // KMP_MIC
807
808 //
809 // Even if __kmp_affinity_type == affinity_none, this routine is still
810 // called to set __kmp_ht_enabled, & __kmp_ncores, as well as
811 // __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
812 //
813 // The algorithm used starts by setting the affinity to each available
814 // thread and retreiving info from the cpuid instruction, so if we are not
815 // capable of calling __kmp_affinity_get_map()/__kmp_affinity_get_map(),
816 // then we need to do something else.
817 //
818 if (! KMP_AFFINITY_CAPABLE()) {
819 //
820 // Hack to try and infer the machine topology using only the data
821 // available from cpuid on the current thread, and __kmp_xproc.
822 //
823 KMP_ASSERT(__kmp_affinity_type == affinity_none);
824
825 //
826 // Get an upper bound on the number of threads per package using
827 // cpuid(1).
828 //
829 // On some OS/chps combinations where HT is supported by the chip
830 // but is disabled, this value will be 2 on a single core chip.
831 // Usually, it will be 2 if HT is enabled and 1 if HT is disabled.
832 //
833 kmp_cpuid buf;
834 __kmp_x86_cpuid(1, 0, &buf);
835 int maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
836 if (maxThreadsPerPkg == 0) {
837 maxThreadsPerPkg = 1;
838 }
839
840 //
841 // The num cores per pkg comes from cpuid(4).
842 // 1 must be added to the encoded value.
843 //
844 // The author of cpu_count.cpp treated this only an upper bound
845 // on the number of cores, but I haven't seen any cases where it
846 // was greater than the actual number of cores, so we will treat
847 // it as exact in this block of code.
848 //
849 // First, we need to check if cpuid(4) is supported on this chip.
850 // To see if cpuid(n) is supported, issue cpuid(0) and check if eax
851 // has the value n or greater.
852 //
853 __kmp_x86_cpuid(0, 0, &buf);
854 if (buf.eax >= 4) {
855 __kmp_x86_cpuid(4, 0, &buf);
856 nCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
857 }
858 else {
859 nCoresPerPkg = 1;
860 }
861
862 //
863 // There is no way to reliably tell if HT is enabled without issuing
864 // the cpuid instruction from every thread, can correlating the cpuid
865 // info, so if the machine is not affinity capable, we assume that HT
866 // is off. We have seen quite a few machines where maxThreadsPerPkg
867 // is 2, yet the machine does not support HT.
868 //
869 // - Older OSes are usually found on machines with older chips, which
870 // do not support HT.
871 //
872 // - The performance penalty for mistakenly identifying a machine as
873 // HT when it isn't (which results in blocktime being incorrecly set
874 // to 0) is greater than the penalty when for mistakenly identifying
875 // a machine as being 1 thread/core when it is really HT enabled
876 // (which results in blocktime being incorrectly set to a positive
877 // value).
878 //
879 __kmp_ncores = __kmp_xproc;
880 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
881 __kmp_nThreadsPerCore = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000882 if (__kmp_affinity_verbose) {
883 KMP_INFORM(AffNotCapableUseLocCpuid, "KMP_AFFINITY");
884 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
885 if (__kmp_affinity_uniform_topology()) {
886 KMP_INFORM(Uniform, "KMP_AFFINITY");
887 } else {
888 KMP_INFORM(NonUniform, "KMP_AFFINITY");
889 }
890 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
891 __kmp_nThreadsPerCore, __kmp_ncores);
892 }
893 return 0;
894 }
895
896 //
897 //
898 // From here on, we can assume that it is safe to call
899 // __kmp_get_system_affinity() and __kmp_set_system_affinity(),
900 // even if __kmp_affinity_type = affinity_none.
901 //
902
903 //
904 // Save the affinity mask for the current thread.
905 //
906 kmp_affin_mask_t *oldMask;
907 KMP_CPU_ALLOC(oldMask);
908 KMP_ASSERT(oldMask != NULL);
909 __kmp_get_system_affinity(oldMask, TRUE);
910
911 //
912 // Run through each of the available contexts, binding the current thread
913 // to it, and obtaining the pertinent information using the cpuid instr.
914 //
915 // The relevant information is:
916 //
917 // Apic Id: Bits 24:31 of ebx after issuing cpuid(1) - each thread context
918 // has a uniqie Apic Id, which is of the form pkg# : core# : thread#.
919 //
920 // Max Threads Per Pkg: Bits 16:23 of ebx after issuing cpuid(1). The
921 // value of this field determines the width of the core# + thread#
922 // fields in the Apic Id. It is also an upper bound on the number
923 // of threads per package, but it has been verified that situations
924 // happen were it is not exact. In particular, on certain OS/chip
925 // combinations where Intel(R) Hyper-Threading Technology is supported
926 // by the chip but has
927 // been disabled, the value of this field will be 2 (for a single core
928 // chip). On other OS/chip combinations supporting
929 // Intel(R) Hyper-Threading Technology, the value of
930 // this field will be 1 when Intel(R) Hyper-Threading Technology is
931 // disabled and 2 when it is enabled.
932 //
933 // Max Cores Per Pkg: Bits 26:31 of eax after issuing cpuid(4). The
934 // value of this field (+1) determines the width of the core# field in
935 // the Apic Id. The comments in "cpucount.cpp" say that this value is
936 // an upper bound, but the IA-32 architecture manual says that it is
937 // exactly the number of cores per package, and I haven't seen any
938 // case where it wasn't.
939 //
940 // From this information, deduce the package Id, core Id, and thread Id,
941 // and set the corresponding fields in the apicThreadInfo struct.
942 //
943 unsigned i;
944 apicThreadInfo *threadInfo = (apicThreadInfo *)__kmp_allocate(
945 __kmp_avail_proc * sizeof(apicThreadInfo));
946 unsigned nApics = 0;
947 for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
948 //
949 // Skip this proc if it is not included in the machine model.
950 //
951 if (! KMP_CPU_ISSET(i, fullMask)) {
952 continue;
953 }
954 KMP_DEBUG_ASSERT((int)nApics < __kmp_avail_proc);
955
956 __kmp_affinity_bind_thread(i);
957 threadInfo[nApics].osId = i;
958
959 //
960 // The apic id and max threads per pkg come from cpuid(1).
961 //
962 kmp_cpuid buf;
963 __kmp_x86_cpuid(1, 0, &buf);
964 if (! (buf.edx >> 9) & 1) {
965 __kmp_set_system_affinity(oldMask, TRUE);
966 __kmp_free(threadInfo);
967 KMP_CPU_FREE(oldMask);
968 *msg_id = kmp_i18n_str_ApicNotPresent;
969 return -1;
970 }
971 threadInfo[nApics].apicId = (buf.ebx >> 24) & 0xff;
972 threadInfo[nApics].maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
973 if (threadInfo[nApics].maxThreadsPerPkg == 0) {
974 threadInfo[nApics].maxThreadsPerPkg = 1;
975 }
976
977 //
978 // Max cores per pkg comes from cpuid(4).
979 // 1 must be added to the encoded value.
980 //
981 // First, we need to check if cpuid(4) is supported on this chip.
982 // To see if cpuid(n) is supported, issue cpuid(0) and check if eax
983 // has the value n or greater.
984 //
985 __kmp_x86_cpuid(0, 0, &buf);
986 if (buf.eax >= 4) {
987 __kmp_x86_cpuid(4, 0, &buf);
988 threadInfo[nApics].maxCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
989 }
990 else {
991 threadInfo[nApics].maxCoresPerPkg = 1;
992 }
993
994 //
995 // Infer the pkgId / coreId / threadId using only the info
996 // obtained locally.
997 //
998 int widthCT = __kmp_cpuid_mask_width(
999 threadInfo[nApics].maxThreadsPerPkg);
1000 threadInfo[nApics].pkgId = threadInfo[nApics].apicId >> widthCT;
1001
1002 int widthC = __kmp_cpuid_mask_width(
1003 threadInfo[nApics].maxCoresPerPkg);
1004 int widthT = widthCT - widthC;
1005 if (widthT < 0) {
1006 //
1007 // I've never seen this one happen, but I suppose it could, if
1008 // the cpuid instruction on a chip was really screwed up.
1009 // Make sure to restore the affinity mask before the tail call.
1010 //
1011 __kmp_set_system_affinity(oldMask, TRUE);
1012 __kmp_free(threadInfo);
1013 KMP_CPU_FREE(oldMask);
1014 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1015 return -1;
1016 }
1017
1018 int maskC = (1 << widthC) - 1;
1019 threadInfo[nApics].coreId = (threadInfo[nApics].apicId >> widthT)
1020 &maskC;
1021
1022 int maskT = (1 << widthT) - 1;
1023 threadInfo[nApics].threadId = threadInfo[nApics].apicId &maskT;
1024
1025 nApics++;
1026 }
1027
1028 //
1029 // We've collected all the info we need.
1030 // Restore the old affinity mask for this thread.
1031 //
1032 __kmp_set_system_affinity(oldMask, TRUE);
1033
1034 //
1035 // If there's only one thread context to bind to, form an Address object
1036 // with depth 1 and return immediately (or, if affinity is off, set
1037 // address2os to NULL and return).
1038 //
1039 // If it is configured to omit the package level when there is only a
1040 // single package, the logic at the end of this routine won't work if
1041 // there is only a single thread - it would try to form an Address
1042 // object with depth 0.
1043 //
1044 KMP_ASSERT(nApics > 0);
1045 if (nApics == 1) {
1046 __kmp_ncores = nPackages = 1;
1047 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001048 if (__kmp_affinity_verbose) {
1049 char buf[KMP_AFFIN_MASK_PRINT_LEN];
1050 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1051
1052 KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
1053 if (__kmp_affinity_respect_mask) {
1054 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
1055 } else {
1056 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
1057 }
1058 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
1059 KMP_INFORM(Uniform, "KMP_AFFINITY");
1060 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
1061 __kmp_nThreadsPerCore, __kmp_ncores);
1062 }
1063
1064 if (__kmp_affinity_type == affinity_none) {
1065 __kmp_free(threadInfo);
1066 KMP_CPU_FREE(oldMask);
1067 return 0;
1068 }
1069
1070 *address2os = (AddrUnsPair*)__kmp_allocate(sizeof(AddrUnsPair));
1071 Address addr(1);
1072 addr.labels[0] = threadInfo[0].pkgId;
1073 (*address2os)[0] = AddrUnsPair(addr, threadInfo[0].osId);
1074
1075 if (__kmp_affinity_gran_levels < 0) {
1076 __kmp_affinity_gran_levels = 0;
1077 }
1078
1079 if (__kmp_affinity_verbose) {
1080 __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
1081 }
1082
1083 __kmp_free(threadInfo);
1084 KMP_CPU_FREE(oldMask);
1085 return 1;
1086 }
1087
1088 //
1089 // Sort the threadInfo table by physical Id.
1090 //
1091 qsort(threadInfo, nApics, sizeof(*threadInfo),
1092 __kmp_affinity_cmp_apicThreadInfo_phys_id);
1093
1094 //
1095 // The table is now sorted by pkgId / coreId / threadId, but we really
1096 // don't know the radix of any of the fields. pkgId's may be sparsely
1097 // assigned among the chips on a system. Although coreId's are usually
1098 // assigned [0 .. coresPerPkg-1] and threadId's are usually assigned
1099 // [0..threadsPerCore-1], we don't want to make any such assumptions.
1100 //
1101 // For that matter, we don't know what coresPerPkg and threadsPerCore
1102 // (or the total # packages) are at this point - we want to determine
1103 // that now. We only have an upper bound on the first two figures.
1104 //
1105 // We also perform a consistency check at this point: the values returned
1106 // by the cpuid instruction for any thread bound to a given package had
1107 // better return the same info for maxThreadsPerPkg and maxCoresPerPkg.
1108 //
1109 nPackages = 1;
1110 nCoresPerPkg = 1;
1111 __kmp_nThreadsPerCore = 1;
1112 unsigned nCores = 1;
1113
1114 unsigned pkgCt = 1; // to determine radii
1115 unsigned lastPkgId = threadInfo[0].pkgId;
1116 unsigned coreCt = 1;
1117 unsigned lastCoreId = threadInfo[0].coreId;
1118 unsigned threadCt = 1;
1119 unsigned lastThreadId = threadInfo[0].threadId;
1120
1121 // intra-pkg consist checks
1122 unsigned prevMaxCoresPerPkg = threadInfo[0].maxCoresPerPkg;
1123 unsigned prevMaxThreadsPerPkg = threadInfo[0].maxThreadsPerPkg;
1124
1125 for (i = 1; i < nApics; i++) {
1126 if (threadInfo[i].pkgId != lastPkgId) {
1127 nCores++;
1128 pkgCt++;
1129 lastPkgId = threadInfo[i].pkgId;
1130 if ((int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt;
1131 coreCt = 1;
1132 lastCoreId = threadInfo[i].coreId;
1133 if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
1134 threadCt = 1;
1135 lastThreadId = threadInfo[i].threadId;
1136
1137 //
1138 // This is a different package, so go on to the next iteration
1139 // without doing any consistency checks. Reset the consistency
1140 // check vars, though.
1141 //
1142 prevMaxCoresPerPkg = threadInfo[i].maxCoresPerPkg;
1143 prevMaxThreadsPerPkg = threadInfo[i].maxThreadsPerPkg;
1144 continue;
1145 }
1146
1147 if (threadInfo[i].coreId != lastCoreId) {
1148 nCores++;
1149 coreCt++;
1150 lastCoreId = threadInfo[i].coreId;
1151 if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
1152 threadCt = 1;
1153 lastThreadId = threadInfo[i].threadId;
1154 }
1155 else if (threadInfo[i].threadId != lastThreadId) {
1156 threadCt++;
1157 lastThreadId = threadInfo[i].threadId;
1158 }
1159 else {
1160 __kmp_free(threadInfo);
1161 KMP_CPU_FREE(oldMask);
1162 *msg_id = kmp_i18n_str_LegacyApicIDsNotUnique;
1163 return -1;
1164 }
1165
1166 //
1167 // Check to make certain that the maxCoresPerPkg and maxThreadsPerPkg
1168 // fields agree between all the threads bounds to a given package.
1169 //
1170 if ((prevMaxCoresPerPkg != threadInfo[i].maxCoresPerPkg)
1171 || (prevMaxThreadsPerPkg != threadInfo[i].maxThreadsPerPkg)) {
1172 __kmp_free(threadInfo);
1173 KMP_CPU_FREE(oldMask);
1174 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1175 return -1;
1176 }
1177 }
1178 nPackages = pkgCt;
1179 if ((int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt;
1180 if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
1181
1182 //
1183 // When affinity is off, this routine will still be called to set
Andrey Churbanovf696c822015-01-27 16:55:43 +00001184 // __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie5e8470a2013-09-27 10:38:44 +00001185 // nCoresPerPkg, & nPackages. Make sure all these vars are set
1186 // correctly, and return now if affinity is not enabled.
1187 //
Jim Cownie5e8470a2013-09-27 10:38:44 +00001188 __kmp_ncores = nCores;
1189 if (__kmp_affinity_verbose) {
1190 char buf[KMP_AFFIN_MASK_PRINT_LEN];
1191 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1192
1193 KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
1194 if (__kmp_affinity_respect_mask) {
1195 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
1196 } else {
1197 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
1198 }
1199 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
1200 if (__kmp_affinity_uniform_topology()) {
1201 KMP_INFORM(Uniform, "KMP_AFFINITY");
1202 } else {
1203 KMP_INFORM(NonUniform, "KMP_AFFINITY");
1204 }
1205 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
1206 __kmp_nThreadsPerCore, __kmp_ncores);
1207
1208 }
1209
1210 if (__kmp_affinity_type == affinity_none) {
1211 __kmp_free(threadInfo);
1212 KMP_CPU_FREE(oldMask);
1213 return 0;
1214 }
1215
1216 //
1217 // Now that we've determined the number of packages, the number of cores
1218 // per package, and the number of threads per core, we can construct the
1219 // data structure that is to be returned.
1220 //
1221 int pkgLevel = 0;
1222 int coreLevel = (nCoresPerPkg <= 1) ? -1 : 1;
1223 int threadLevel = (__kmp_nThreadsPerCore <= 1) ? -1 : ((coreLevel >= 0) ? 2 : 1);
1224 unsigned depth = (pkgLevel >= 0) + (coreLevel >= 0) + (threadLevel >= 0);
1225
1226 KMP_ASSERT(depth > 0);
1227 *address2os = (AddrUnsPair*)__kmp_allocate(sizeof(AddrUnsPair) * nApics);
1228
1229 for (i = 0; i < nApics; ++i) {
1230 Address addr(depth);
1231 unsigned os = threadInfo[i].osId;
1232 int d = 0;
1233
1234 if (pkgLevel >= 0) {
1235 addr.labels[d++] = threadInfo[i].pkgId;
1236 }
1237 if (coreLevel >= 0) {
1238 addr.labels[d++] = threadInfo[i].coreId;
1239 }
1240 if (threadLevel >= 0) {
1241 addr.labels[d++] = threadInfo[i].threadId;
1242 }
1243 (*address2os)[i] = AddrUnsPair(addr, os);
1244 }
1245
1246 if (__kmp_affinity_gran_levels < 0) {
1247 //
1248 // Set the granularity level based on what levels are modeled
1249 // in the machine topology map.
1250 //
1251 __kmp_affinity_gran_levels = 0;
1252 if ((threadLevel >= 0)
1253 && (__kmp_affinity_gran > affinity_gran_thread)) {
1254 __kmp_affinity_gran_levels++;
1255 }
1256 if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
1257 __kmp_affinity_gran_levels++;
1258 }
1259 if ((pkgLevel >= 0) && (__kmp_affinity_gran > affinity_gran_package)) {
1260 __kmp_affinity_gran_levels++;
1261 }
1262 }
1263
1264 if (__kmp_affinity_verbose) {
1265 __kmp_affinity_print_topology(*address2os, nApics, depth, pkgLevel,
1266 coreLevel, threadLevel);
1267 }
1268
1269 __kmp_free(threadInfo);
1270 KMP_CPU_FREE(oldMask);
1271 return depth;
1272}
1273
1274
1275//
1276// Intel(R) microarchitecture code name Nehalem, Dunnington and later
1277// architectures support a newer interface for specifying the x2APIC Ids,
1278// based on cpuid leaf 11.
1279//
1280static int
1281__kmp_affinity_create_x2apicid_map(AddrUnsPair **address2os,
1282 kmp_i18n_id_t *const msg_id)
1283{
1284 kmp_cpuid buf;
1285
1286 *address2os = NULL;
1287 *msg_id = kmp_i18n_null;
1288
1289 //
1290 // Check to see if cpuid leaf 11 is supported.
1291 //
1292 __kmp_x86_cpuid(0, 0, &buf);
1293 if (buf.eax < 11) {
1294 *msg_id = kmp_i18n_str_NoLeaf11Support;
1295 return -1;
1296 }
1297 __kmp_x86_cpuid(11, 0, &buf);
1298 if (buf.ebx == 0) {
1299 *msg_id = kmp_i18n_str_NoLeaf11Support;
1300 return -1;
1301 }
1302
1303 //
1304 // Find the number of levels in the machine topology. While we're at it,
1305 // get the default values for __kmp_nThreadsPerCore & nCoresPerPkg. We will
1306 // try to get more accurate values later by explicitly counting them,
1307 // but get reasonable defaults now, in case we return early.
1308 //
1309 int level;
1310 int threadLevel = -1;
1311 int coreLevel = -1;
1312 int pkgLevel = -1;
1313 __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
1314
1315 for (level = 0;; level++) {
1316 if (level > 31) {
1317 //
1318 // FIXME: Hack for DPD200163180
1319 //
1320 // If level is big then something went wrong -> exiting
1321 //
1322 // There could actually be 32 valid levels in the machine topology,
1323 // but so far, the only machine we have seen which does not exit
1324 // this loop before iteration 32 has fubar x2APIC settings.
1325 //
1326 // For now, just reject this case based upon loop trip count.
1327 //
1328 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1329 return -1;
1330 }
1331 __kmp_x86_cpuid(11, level, &buf);
1332 if (buf.ebx == 0) {
1333 if (pkgLevel < 0) {
1334 //
1335 // Will infer nPackages from __kmp_xproc
1336 //
1337 pkgLevel = level;
1338 level++;
1339 }
1340 break;
1341 }
1342 int kind = (buf.ecx >> 8) & 0xff;
1343 if (kind == 1) {
1344 //
1345 // SMT level
1346 //
1347 threadLevel = level;
1348 coreLevel = -1;
1349 pkgLevel = -1;
1350 __kmp_nThreadsPerCore = buf.ebx & 0xff;
1351 if (__kmp_nThreadsPerCore == 0) {
1352 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1353 return -1;
1354 }
1355 }
1356 else if (kind == 2) {
1357 //
1358 // core level
1359 //
1360 coreLevel = level;
1361 pkgLevel = -1;
1362 nCoresPerPkg = buf.ebx & 0xff;
1363 if (nCoresPerPkg == 0) {
1364 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1365 return -1;
1366 }
1367 }
1368 else {
1369 if (level <= 0) {
1370 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1371 return -1;
1372 }
1373 if (pkgLevel >= 0) {
1374 continue;
1375 }
1376 pkgLevel = level;
1377 nPackages = buf.ebx & 0xff;
1378 if (nPackages == 0) {
1379 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1380 return -1;
1381 }
1382 }
1383 }
1384 int depth = level;
1385
1386 //
1387 // In the above loop, "level" was counted from the finest level (usually
1388 // thread) to the coarsest. The caller expects that we will place the
1389 // labels in (*address2os)[].first.labels[] in the inverse order, so
1390 // we need to invert the vars saying which level means what.
1391 //
1392 if (threadLevel >= 0) {
1393 threadLevel = depth - threadLevel - 1;
1394 }
1395 if (coreLevel >= 0) {
1396 coreLevel = depth - coreLevel - 1;
1397 }
1398 KMP_DEBUG_ASSERT(pkgLevel >= 0);
1399 pkgLevel = depth - pkgLevel - 1;
1400
1401 //
1402 // The algorithm used starts by setting the affinity to each available
1403 // thread and retrieving info from the cpuid instruction, so if we are not
1404 // capable of calling __kmp_affinity_get_map()/__kmp_affinity_get_map(),
1405 // then we need to do something else - use the defaults that we calculated
1406 // from issuing cpuid without binding to each proc.
1407 //
1408 if (! KMP_AFFINITY_CAPABLE())
1409 {
1410 //
1411 // Hack to try and infer the machine topology using only the data
1412 // available from cpuid on the current thread, and __kmp_xproc.
1413 //
1414 KMP_ASSERT(__kmp_affinity_type == affinity_none);
1415
1416 __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
1417 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001418 if (__kmp_affinity_verbose) {
1419 KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY");
1420 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
1421 if (__kmp_affinity_uniform_topology()) {
1422 KMP_INFORM(Uniform, "KMP_AFFINITY");
1423 } else {
1424 KMP_INFORM(NonUniform, "KMP_AFFINITY");
1425 }
1426 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
1427 __kmp_nThreadsPerCore, __kmp_ncores);
1428 }
1429 return 0;
1430 }
1431
1432 //
1433 //
1434 // From here on, we can assume that it is safe to call
1435 // __kmp_get_system_affinity() and __kmp_set_system_affinity(),
1436 // even if __kmp_affinity_type = affinity_none.
1437 //
1438
1439 //
1440 // Save the affinity mask for the current thread.
1441 //
1442 kmp_affin_mask_t *oldMask;
1443 KMP_CPU_ALLOC(oldMask);
1444 __kmp_get_system_affinity(oldMask, TRUE);
1445
1446 //
1447 // Allocate the data structure to be returned.
1448 //
1449 AddrUnsPair *retval = (AddrUnsPair *)
1450 __kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc);
1451
1452 //
1453 // Run through each of the available contexts, binding the current thread
1454 // to it, and obtaining the pertinent information using the cpuid instr.
1455 //
1456 unsigned int proc;
1457 int nApics = 0;
1458 for (proc = 0; proc < KMP_CPU_SETSIZE; ++proc) {
1459 //
1460 // Skip this proc if it is not included in the machine model.
1461 //
1462 if (! KMP_CPU_ISSET(proc, fullMask)) {
1463 continue;
1464 }
1465 KMP_DEBUG_ASSERT(nApics < __kmp_avail_proc);
1466
1467 __kmp_affinity_bind_thread(proc);
1468
1469 //
1470 // Extrach the labels for each level in the machine topology map
1471 // from the Apic ID.
1472 //
1473 Address addr(depth);
1474 int prev_shift = 0;
1475
1476 for (level = 0; level < depth; level++) {
1477 __kmp_x86_cpuid(11, level, &buf);
1478 unsigned apicId = buf.edx;
1479 if (buf.ebx == 0) {
1480 if (level != depth - 1) {
1481 KMP_CPU_FREE(oldMask);
1482 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1483 return -1;
1484 }
1485 addr.labels[depth - level - 1] = apicId >> prev_shift;
1486 level++;
1487 break;
1488 }
1489 int shift = buf.eax & 0x1f;
1490 int mask = (1 << shift) - 1;
1491 addr.labels[depth - level - 1] = (apicId & mask) >> prev_shift;
1492 prev_shift = shift;
1493 }
1494 if (level != depth) {
1495 KMP_CPU_FREE(oldMask);
1496 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1497 return -1;
1498 }
1499
1500 retval[nApics] = AddrUnsPair(addr, proc);
1501 nApics++;
1502 }
1503
1504 //
1505 // We've collected all the info we need.
1506 // Restore the old affinity mask for this thread.
1507 //
1508 __kmp_set_system_affinity(oldMask, TRUE);
1509
1510 //
1511 // If there's only one thread context to bind to, return now.
1512 //
1513 KMP_ASSERT(nApics > 0);
1514 if (nApics == 1) {
1515 __kmp_ncores = nPackages = 1;
1516 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001517 if (__kmp_affinity_verbose) {
1518 char buf[KMP_AFFIN_MASK_PRINT_LEN];
1519 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1520
1521 KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
1522 if (__kmp_affinity_respect_mask) {
1523 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
1524 } else {
1525 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
1526 }
1527 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
1528 KMP_INFORM(Uniform, "KMP_AFFINITY");
1529 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
1530 __kmp_nThreadsPerCore, __kmp_ncores);
1531 }
1532
1533 if (__kmp_affinity_type == affinity_none) {
1534 __kmp_free(retval);
1535 KMP_CPU_FREE(oldMask);
1536 return 0;
1537 }
1538
1539 //
1540 // Form an Address object which only includes the package level.
1541 //
1542 Address addr(1);
1543 addr.labels[0] = retval[0].first.labels[pkgLevel];
1544 retval[0].first = addr;
1545
1546 if (__kmp_affinity_gran_levels < 0) {
1547 __kmp_affinity_gran_levels = 0;
1548 }
1549
1550 if (__kmp_affinity_verbose) {
1551 __kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
1552 }
1553
1554 *address2os = retval;
1555 KMP_CPU_FREE(oldMask);
1556 return 1;
1557 }
1558
1559 //
1560 // Sort the table by physical Id.
1561 //
1562 qsort(retval, nApics, sizeof(*retval), __kmp_affinity_cmp_Address_labels);
1563
1564 //
1565 // Find the radix at each of the levels.
1566 //
1567 unsigned *totals = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
1568 unsigned *counts = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
1569 unsigned *maxCt = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
1570 unsigned *last = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
1571 for (level = 0; level < depth; level++) {
1572 totals[level] = 1;
1573 maxCt[level] = 1;
1574 counts[level] = 1;
1575 last[level] = retval[0].first.labels[level];
1576 }
1577
1578 //
1579 // From here on, the iteration variable "level" runs from the finest
1580 // level to the coarsest, i.e. we iterate forward through
1581 // (*address2os)[].first.labels[] - in the previous loops, we iterated
1582 // backwards.
1583 //
1584 for (proc = 1; (int)proc < nApics; proc++) {
1585 int level;
1586 for (level = 0; level < depth; level++) {
1587 if (retval[proc].first.labels[level] != last[level]) {
1588 int j;
1589 for (j = level + 1; j < depth; j++) {
1590 totals[j]++;
1591 counts[j] = 1;
1592 // The line below causes printing incorrect topology information
1593 // in case the max value for some level (maxCt[level]) is encountered earlier than
1594 // some less value while going through the array.
1595 // For example, let pkg0 has 4 cores and pkg1 has 2 cores. Then maxCt[1] == 2
1596 // whereas it must be 4.
1597 // TODO!!! Check if it can be commented safely
1598 //maxCt[j] = 1;
1599 last[j] = retval[proc].first.labels[j];
1600 }
1601 totals[level]++;
1602 counts[level]++;
1603 if (counts[level] > maxCt[level]) {
1604 maxCt[level] = counts[level];
1605 }
1606 last[level] = retval[proc].first.labels[level];
1607 break;
1608 }
1609 else if (level == depth - 1) {
1610 __kmp_free(last);
1611 __kmp_free(maxCt);
1612 __kmp_free(counts);
1613 __kmp_free(totals);
1614 __kmp_free(retval);
1615 KMP_CPU_FREE(oldMask);
1616 *msg_id = kmp_i18n_str_x2ApicIDsNotUnique;
1617 return -1;
1618 }
1619 }
1620 }
1621
1622 //
1623 // When affinity is off, this routine will still be called to set
Andrey Churbanovf696c822015-01-27 16:55:43 +00001624 // __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie5e8470a2013-09-27 10:38:44 +00001625 // nCoresPerPkg, & nPackages. Make sure all these vars are set
1626 // correctly, and return if affinity is not enabled.
1627 //
1628 if (threadLevel >= 0) {
1629 __kmp_nThreadsPerCore = maxCt[threadLevel];
1630 }
1631 else {
1632 __kmp_nThreadsPerCore = 1;
1633 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001634 nPackages = totals[pkgLevel];
1635
1636 if (coreLevel >= 0) {
1637 __kmp_ncores = totals[coreLevel];
1638 nCoresPerPkg = maxCt[coreLevel];
1639 }
1640 else {
1641 __kmp_ncores = nPackages;
1642 nCoresPerPkg = 1;
1643 }
1644
1645 //
1646 // Check to see if the machine topology is uniform
1647 //
1648 unsigned prod = maxCt[0];
1649 for (level = 1; level < depth; level++) {
1650 prod *= maxCt[level];
1651 }
1652 bool uniform = (prod == totals[level - 1]);
1653
1654 //
1655 // Print the machine topology summary.
1656 //
1657 if (__kmp_affinity_verbose) {
1658 char mask[KMP_AFFIN_MASK_PRINT_LEN];
1659 __kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1660
1661 KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
1662 if (__kmp_affinity_respect_mask) {
1663 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask);
1664 } else {
1665 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask);
1666 }
1667 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
1668 if (uniform) {
1669 KMP_INFORM(Uniform, "KMP_AFFINITY");
1670 } else {
1671 KMP_INFORM(NonUniform, "KMP_AFFINITY");
1672 }
1673
1674 kmp_str_buf_t buf;
1675 __kmp_str_buf_init(&buf);
1676
1677 __kmp_str_buf_print(&buf, "%d", totals[0]);
1678 for (level = 1; level <= pkgLevel; level++) {
1679 __kmp_str_buf_print(&buf, " x %d", maxCt[level]);
1680 }
1681 KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg,
1682 __kmp_nThreadsPerCore, __kmp_ncores);
1683
1684 __kmp_str_buf_free(&buf);
1685 }
1686
1687 if (__kmp_affinity_type == affinity_none) {
1688 __kmp_free(last);
1689 __kmp_free(maxCt);
1690 __kmp_free(counts);
1691 __kmp_free(totals);
1692 __kmp_free(retval);
1693 KMP_CPU_FREE(oldMask);
1694 return 0;
1695 }
1696
1697 //
1698 // Find any levels with radiix 1, and remove them from the map
1699 // (except for the package level).
1700 //
1701 int new_depth = 0;
1702 for (level = 0; level < depth; level++) {
1703 if ((maxCt[level] == 1) && (level != pkgLevel)) {
1704 continue;
1705 }
1706 new_depth++;
1707 }
1708
1709 //
1710 // If we are removing any levels, allocate a new vector to return,
1711 // and copy the relevant information to it.
1712 //
1713 if (new_depth != depth) {
1714 AddrUnsPair *new_retval = (AddrUnsPair *)__kmp_allocate(
1715 sizeof(AddrUnsPair) * nApics);
1716 for (proc = 0; (int)proc < nApics; proc++) {
1717 Address addr(new_depth);
1718 new_retval[proc] = AddrUnsPair(addr, retval[proc].second);
1719 }
1720 int new_level = 0;
1721 for (level = 0; level < depth; level++) {
1722 if ((maxCt[level] == 1) && (level != pkgLevel)) {
1723 if (level == threadLevel) {
1724 threadLevel = -1;
1725 }
1726 else if ((threadLevel >= 0) && (level < threadLevel)) {
1727 threadLevel--;
1728 }
1729 if (level == coreLevel) {
1730 coreLevel = -1;
1731 }
1732 else if ((coreLevel >= 0) && (level < coreLevel)) {
1733 coreLevel--;
1734 }
1735 if (level < pkgLevel) {
1736 pkgLevel--;
1737 }
1738 continue;
1739 }
1740 for (proc = 0; (int)proc < nApics; proc++) {
1741 new_retval[proc].first.labels[new_level]
1742 = retval[proc].first.labels[level];
1743 }
1744 new_level++;
1745 }
1746
1747 __kmp_free(retval);
1748 retval = new_retval;
1749 depth = new_depth;
1750 }
1751
1752 if (__kmp_affinity_gran_levels < 0) {
1753 //
1754 // Set the granularity level based on what levels are modeled
1755 // in the machine topology map.
1756 //
1757 __kmp_affinity_gran_levels = 0;
1758 if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
1759 __kmp_affinity_gran_levels++;
1760 }
1761 if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
1762 __kmp_affinity_gran_levels++;
1763 }
1764 if (__kmp_affinity_gran > affinity_gran_package) {
1765 __kmp_affinity_gran_levels++;
1766 }
1767 }
1768
1769 if (__kmp_affinity_verbose) {
1770 __kmp_affinity_print_topology(retval, nApics, depth, pkgLevel,
1771 coreLevel, threadLevel);
1772 }
1773
1774 __kmp_free(last);
1775 __kmp_free(maxCt);
1776 __kmp_free(counts);
1777 __kmp_free(totals);
1778 KMP_CPU_FREE(oldMask);
1779 *address2os = retval;
1780 return depth;
1781}
1782
1783
1784# endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1785
1786
1787#define osIdIndex 0
1788#define threadIdIndex 1
1789#define coreIdIndex 2
1790#define pkgIdIndex 3
1791#define nodeIdIndex 4
1792
1793typedef unsigned *ProcCpuInfo;
1794static unsigned maxIndex = pkgIdIndex;
1795
1796
1797static int
1798__kmp_affinity_cmp_ProcCpuInfo_os_id(const void *a, const void *b)
1799{
1800 const unsigned *aa = (const unsigned *)a;
1801 const unsigned *bb = (const unsigned *)b;
1802 if (aa[osIdIndex] < bb[osIdIndex]) return -1;
1803 if (aa[osIdIndex] > bb[osIdIndex]) return 1;
1804 return 0;
1805};
1806
1807
1808static int
1809__kmp_affinity_cmp_ProcCpuInfo_phys_id(const void *a, const void *b)
1810{
1811 unsigned i;
1812 const unsigned *aa = *((const unsigned **)a);
1813 const unsigned *bb = *((const unsigned **)b);
1814 for (i = maxIndex; ; i--) {
1815 if (aa[i] < bb[i]) return -1;
1816 if (aa[i] > bb[i]) return 1;
1817 if (i == osIdIndex) break;
1818 }
1819 return 0;
1820}
1821
1822
1823//
1824// Parse /proc/cpuinfo (or an alternate file in the same format) to obtain the
1825// affinity map.
1826//
1827static int
1828__kmp_affinity_create_cpuinfo_map(AddrUnsPair **address2os, int *line,
1829 kmp_i18n_id_t *const msg_id, FILE *f)
1830{
1831 *address2os = NULL;
1832 *msg_id = kmp_i18n_null;
1833
1834 //
1835 // Scan of the file, and count the number of "processor" (osId) fields,
Alp Toker8f2d3f02014-02-24 10:40:15 +00001836 // and find the highest value of <n> for a node_<n> field.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001837 //
1838 char buf[256];
1839 unsigned num_records = 0;
1840 while (! feof(f)) {
1841 buf[sizeof(buf) - 1] = 1;
1842 if (! fgets(buf, sizeof(buf), f)) {
1843 //
1844 // Read errors presumably because of EOF
1845 //
1846 break;
1847 }
1848
1849 char s1[] = "processor";
1850 if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
1851 num_records++;
1852 continue;
1853 }
1854
1855 //
1856 // FIXME - this will match "node_<n> <garbage>"
1857 //
1858 unsigned level;
1859 if (sscanf(buf, "node_%d id", &level) == 1) {
1860 if (nodeIdIndex + level >= maxIndex) {
1861 maxIndex = nodeIdIndex + level;
1862 }
1863 continue;
1864 }
1865 }
1866
1867 //
1868 // Check for empty file / no valid processor records, or too many.
1869 // The number of records can't exceed the number of valid bits in the
1870 // affinity mask.
1871 //
1872 if (num_records == 0) {
1873 *line = 0;
1874 *msg_id = kmp_i18n_str_NoProcRecords;
1875 return -1;
1876 }
1877 if (num_records > (unsigned)__kmp_xproc) {
1878 *line = 0;
1879 *msg_id = kmp_i18n_str_TooManyProcRecords;
1880 return -1;
1881 }
1882
1883 //
1884 // Set the file pointer back to the begginning, so that we can scan the
1885 // file again, this time performing a full parse of the data.
1886 // Allocate a vector of ProcCpuInfo object, where we will place the data.
1887 // Adding an extra element at the end allows us to remove a lot of extra
1888 // checks for termination conditions.
1889 //
1890 if (fseek(f, 0, SEEK_SET) != 0) {
1891 *line = 0;
1892 *msg_id = kmp_i18n_str_CantRewindCpuinfo;
1893 return -1;
1894 }
1895
1896 //
1897 // Allocate the array of records to store the proc info in. The dummy
1898 // element at the end makes the logic in filling them out easier to code.
1899 //
1900 unsigned **threadInfo = (unsigned **)__kmp_allocate((num_records + 1)
1901 * sizeof(unsigned *));
1902 unsigned i;
1903 for (i = 0; i <= num_records; i++) {
1904 threadInfo[i] = (unsigned *)__kmp_allocate((maxIndex + 1)
1905 * sizeof(unsigned));
1906 }
1907
1908#define CLEANUP_THREAD_INFO \
1909 for (i = 0; i <= num_records; i++) { \
1910 __kmp_free(threadInfo[i]); \
1911 } \
1912 __kmp_free(threadInfo);
1913
1914 //
1915 // A value of UINT_MAX means that we didn't find the field
1916 //
1917 unsigned __index;
1918
1919#define INIT_PROC_INFO(p) \
1920 for (__index = 0; __index <= maxIndex; __index++) { \
1921 (p)[__index] = UINT_MAX; \
1922 }
1923
1924 for (i = 0; i <= num_records; i++) {
1925 INIT_PROC_INFO(threadInfo[i]);
1926 }
1927
1928 unsigned num_avail = 0;
1929 *line = 0;
1930 while (! feof(f)) {
1931 //
1932 // Create an inner scoping level, so that all the goto targets at the
1933 // end of the loop appear in an outer scoping level. This avoids
1934 // warnings about jumping past an initialization to a target in the
1935 // same block.
1936 //
1937 {
1938 buf[sizeof(buf) - 1] = 1;
1939 bool long_line = false;
1940 if (! fgets(buf, sizeof(buf), f)) {
1941 //
1942 // Read errors presumably because of EOF
1943 //
1944 // If there is valid data in threadInfo[num_avail], then fake
1945 // a blank line in ensure that the last address gets parsed.
1946 //
1947 bool valid = false;
1948 for (i = 0; i <= maxIndex; i++) {
1949 if (threadInfo[num_avail][i] != UINT_MAX) {
1950 valid = true;
1951 }
1952 }
1953 if (! valid) {
1954 break;
1955 }
1956 buf[0] = 0;
1957 } else if (!buf[sizeof(buf) - 1]) {
1958 //
1959 // The line is longer than the buffer. Set a flag and don't
1960 // emit an error if we were going to ignore the line, anyway.
1961 //
1962 long_line = true;
1963
1964#define CHECK_LINE \
1965 if (long_line) { \
1966 CLEANUP_THREAD_INFO; \
1967 *msg_id = kmp_i18n_str_LongLineCpuinfo; \
1968 return -1; \
1969 }
1970 }
1971 (*line)++;
1972
1973 char s1[] = "processor";
1974 if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
1975 CHECK_LINE;
1976 char *p = strchr(buf + sizeof(s1) - 1, ':');
1977 unsigned val;
1978 if ((p == NULL) || (sscanf(p + 1, "%u\n", &val) != 1)) goto no_val;
1979 if (threadInfo[num_avail][osIdIndex] != UINT_MAX) goto dup_field;
1980 threadInfo[num_avail][osIdIndex] = val;
Jim Cownie181b4bb2013-12-23 17:28:57 +00001981#if KMP_OS_LINUX && USE_SYSFS_INFO
1982 char path[256];
1983 snprintf(path, sizeof(path),
1984 "/sys/devices/system/cpu/cpu%u/topology/physical_package_id",
1985 threadInfo[num_avail][osIdIndex]);
1986 __kmp_read_from_file(path, "%u", &threadInfo[num_avail][pkgIdIndex]);
1987
1988 snprintf(path, sizeof(path),
1989 "/sys/devices/system/cpu/cpu%u/topology/core_id",
1990 threadInfo[num_avail][osIdIndex]);
1991 __kmp_read_from_file(path, "%u", &threadInfo[num_avail][coreIdIndex]);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001992 continue;
Jim Cownie181b4bb2013-12-23 17:28:57 +00001993#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00001994 }
1995 char s2[] = "physical id";
1996 if (strncmp(buf, s2, sizeof(s2) - 1) == 0) {
1997 CHECK_LINE;
1998 char *p = strchr(buf + sizeof(s2) - 1, ':');
1999 unsigned val;
2000 if ((p == NULL) || (sscanf(p + 1, "%u\n", &val) != 1)) goto no_val;
2001 if (threadInfo[num_avail][pkgIdIndex] != UINT_MAX) goto dup_field;
2002 threadInfo[num_avail][pkgIdIndex] = val;
2003 continue;
2004 }
2005 char s3[] = "core id";
2006 if (strncmp(buf, s3, sizeof(s3) - 1) == 0) {
2007 CHECK_LINE;
2008 char *p = strchr(buf + sizeof(s3) - 1, ':');
2009 unsigned val;
2010 if ((p == NULL) || (sscanf(p + 1, "%u\n", &val) != 1)) goto no_val;
2011 if (threadInfo[num_avail][coreIdIndex] != UINT_MAX) goto dup_field;
2012 threadInfo[num_avail][coreIdIndex] = val;
2013 continue;
Jim Cownie181b4bb2013-12-23 17:28:57 +00002014#endif // KMP_OS_LINUX && USE_SYSFS_INFO
Jim Cownie5e8470a2013-09-27 10:38:44 +00002015 }
2016 char s4[] = "thread id";
2017 if (strncmp(buf, s4, sizeof(s4) - 1) == 0) {
2018 CHECK_LINE;
2019 char *p = strchr(buf + sizeof(s4) - 1, ':');
2020 unsigned val;
2021 if ((p == NULL) || (sscanf(p + 1, "%u\n", &val) != 1)) goto no_val;
2022 if (threadInfo[num_avail][threadIdIndex] != UINT_MAX) goto dup_field;
2023 threadInfo[num_avail][threadIdIndex] = val;
2024 continue;
2025 }
2026 unsigned level;
2027 if (sscanf(buf, "node_%d id", &level) == 1) {
2028 CHECK_LINE;
2029 char *p = strchr(buf + sizeof(s4) - 1, ':');
2030 unsigned val;
2031 if ((p == NULL) || (sscanf(p + 1, "%u\n", &val) != 1)) goto no_val;
2032 KMP_ASSERT(nodeIdIndex + level <= maxIndex);
2033 if (threadInfo[num_avail][nodeIdIndex + level] != UINT_MAX) goto dup_field;
2034 threadInfo[num_avail][nodeIdIndex + level] = val;
2035 continue;
2036 }
2037
2038 //
2039 // We didn't recognize the leading token on the line.
2040 // There are lots of leading tokens that we don't recognize -
2041 // if the line isn't empty, go on to the next line.
2042 //
2043 if ((*buf != 0) && (*buf != '\n')) {
2044 //
2045 // If the line is longer than the buffer, read characters
2046 // until we find a newline.
2047 //
2048 if (long_line) {
2049 int ch;
2050 while (((ch = fgetc(f)) != EOF) && (ch != '\n'));
2051 }
2052 continue;
2053 }
2054
2055 //
2056 // A newline has signalled the end of the processor record.
2057 // Check that there aren't too many procs specified.
2058 //
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002059 if ((int)num_avail == __kmp_xproc) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002060 CLEANUP_THREAD_INFO;
2061 *msg_id = kmp_i18n_str_TooManyEntries;
2062 return -1;
2063 }
2064
2065 //
2066 // Check for missing fields. The osId field must be there, and we
2067 // currently require that the physical id field is specified, also.
2068 //
2069 if (threadInfo[num_avail][osIdIndex] == UINT_MAX) {
2070 CLEANUP_THREAD_INFO;
2071 *msg_id = kmp_i18n_str_MissingProcField;
2072 return -1;
2073 }
2074 if (threadInfo[0][pkgIdIndex] == UINT_MAX) {
2075 CLEANUP_THREAD_INFO;
2076 *msg_id = kmp_i18n_str_MissingPhysicalIDField;
2077 return -1;
2078 }
2079
2080 //
2081 // Skip this proc if it is not included in the machine model.
2082 //
2083 if (! KMP_CPU_ISSET(threadInfo[num_avail][osIdIndex], fullMask)) {
2084 INIT_PROC_INFO(threadInfo[num_avail]);
2085 continue;
2086 }
2087
2088 //
2089 // We have a successful parse of this proc's info.
2090 // Increment the counter, and prepare for the next proc.
2091 //
2092 num_avail++;
2093 KMP_ASSERT(num_avail <= num_records);
2094 INIT_PROC_INFO(threadInfo[num_avail]);
2095 }
2096 continue;
2097
2098 no_val:
2099 CLEANUP_THREAD_INFO;
2100 *msg_id = kmp_i18n_str_MissingValCpuinfo;
2101 return -1;
2102
2103 dup_field:
2104 CLEANUP_THREAD_INFO;
2105 *msg_id = kmp_i18n_str_DuplicateFieldCpuinfo;
2106 return -1;
2107 }
2108 *line = 0;
2109
2110# if KMP_MIC && REDUCE_TEAM_SIZE
2111 unsigned teamSize = 0;
2112# endif // KMP_MIC && REDUCE_TEAM_SIZE
2113
2114 // check for num_records == __kmp_xproc ???
2115
2116 //
2117 // If there's only one thread context to bind to, form an Address object
2118 // with depth 1 and return immediately (or, if affinity is off, set
2119 // address2os to NULL and return).
2120 //
2121 // If it is configured to omit the package level when there is only a
2122 // single package, the logic at the end of this routine won't work if
2123 // there is only a single thread - it would try to form an Address
2124 // object with depth 0.
2125 //
2126 KMP_ASSERT(num_avail > 0);
2127 KMP_ASSERT(num_avail <= num_records);
2128 if (num_avail == 1) {
2129 __kmp_ncores = 1;
2130 __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002131 if (__kmp_affinity_verbose) {
2132 if (! KMP_AFFINITY_CAPABLE()) {
2133 KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
2134 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
2135 KMP_INFORM(Uniform, "KMP_AFFINITY");
2136 }
2137 else {
2138 char buf[KMP_AFFIN_MASK_PRINT_LEN];
2139 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
2140 fullMask);
2141 KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
2142 if (__kmp_affinity_respect_mask) {
2143 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
2144 } else {
2145 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
2146 }
2147 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
2148 KMP_INFORM(Uniform, "KMP_AFFINITY");
2149 }
2150 int index;
2151 kmp_str_buf_t buf;
2152 __kmp_str_buf_init(&buf);
2153 __kmp_str_buf_print(&buf, "1");
2154 for (index = maxIndex - 1; index > pkgIdIndex; index--) {
2155 __kmp_str_buf_print(&buf, " x 1");
2156 }
2157 KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, 1, 1, 1);
2158 __kmp_str_buf_free(&buf);
2159 }
2160
2161 if (__kmp_affinity_type == affinity_none) {
2162 CLEANUP_THREAD_INFO;
2163 return 0;
2164 }
2165
2166 *address2os = (AddrUnsPair*)__kmp_allocate(sizeof(AddrUnsPair));
2167 Address addr(1);
2168 addr.labels[0] = threadInfo[0][pkgIdIndex];
2169 (*address2os)[0] = AddrUnsPair(addr, threadInfo[0][osIdIndex]);
2170
2171 if (__kmp_affinity_gran_levels < 0) {
2172 __kmp_affinity_gran_levels = 0;
2173 }
2174
2175 if (__kmp_affinity_verbose) {
2176 __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
2177 }
2178
2179 CLEANUP_THREAD_INFO;
2180 return 1;
2181 }
2182
2183 //
2184 // Sort the threadInfo table by physical Id.
2185 //
2186 qsort(threadInfo, num_avail, sizeof(*threadInfo),
2187 __kmp_affinity_cmp_ProcCpuInfo_phys_id);
2188
2189 //
2190 // The table is now sorted by pkgId / coreId / threadId, but we really
2191 // don't know the radix of any of the fields. pkgId's may be sparsely
2192 // assigned among the chips on a system. Although coreId's are usually
2193 // assigned [0 .. coresPerPkg-1] and threadId's are usually assigned
2194 // [0..threadsPerCore-1], we don't want to make any such assumptions.
2195 //
2196 // For that matter, we don't know what coresPerPkg and threadsPerCore
2197 // (or the total # packages) are at this point - we want to determine
2198 // that now. We only have an upper bound on the first two figures.
2199 //
2200 unsigned *counts = (unsigned *)__kmp_allocate((maxIndex + 1)
2201 * sizeof(unsigned));
2202 unsigned *maxCt = (unsigned *)__kmp_allocate((maxIndex + 1)
2203 * sizeof(unsigned));
2204 unsigned *totals = (unsigned *)__kmp_allocate((maxIndex + 1)
2205 * sizeof(unsigned));
2206 unsigned *lastId = (unsigned *)__kmp_allocate((maxIndex + 1)
2207 * sizeof(unsigned));
2208
2209 bool assign_thread_ids = false;
2210 unsigned threadIdCt;
2211 unsigned index;
2212
2213 restart_radix_check:
2214 threadIdCt = 0;
2215
2216 //
2217 // Initialize the counter arrays with data from threadInfo[0].
2218 //
2219 if (assign_thread_ids) {
2220 if (threadInfo[0][threadIdIndex] == UINT_MAX) {
2221 threadInfo[0][threadIdIndex] = threadIdCt++;
2222 }
2223 else if (threadIdCt <= threadInfo[0][threadIdIndex]) {
2224 threadIdCt = threadInfo[0][threadIdIndex] + 1;
2225 }
2226 }
2227 for (index = 0; index <= maxIndex; index++) {
2228 counts[index] = 1;
2229 maxCt[index] = 1;
2230 totals[index] = 1;
2231 lastId[index] = threadInfo[0][index];;
2232 }
2233
2234 //
2235 // Run through the rest of the OS procs.
2236 //
2237 for (i = 1; i < num_avail; i++) {
2238 //
2239 // Find the most significant index whose id differs
2240 // from the id for the previous OS proc.
2241 //
2242 for (index = maxIndex; index >= threadIdIndex; index--) {
2243 if (assign_thread_ids && (index == threadIdIndex)) {
2244 //
2245 // Auto-assign the thread id field if it wasn't specified.
2246 //
2247 if (threadInfo[i][threadIdIndex] == UINT_MAX) {
2248 threadInfo[i][threadIdIndex] = threadIdCt++;
2249 }
2250
2251 //
2252 // Aparrently the thread id field was specified for some
2253 // entries and not others. Start the thread id counter
2254 // off at the next higher thread id.
2255 //
2256 else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
2257 threadIdCt = threadInfo[i][threadIdIndex] + 1;
2258 }
2259 }
2260 if (threadInfo[i][index] != lastId[index]) {
2261 //
2262 // Run through all indices which are less significant,
2263 // and reset the counts to 1.
2264 //
2265 // At all levels up to and including index, we need to
2266 // increment the totals and record the last id.
2267 //
2268 unsigned index2;
2269 for (index2 = threadIdIndex; index2 < index; index2++) {
2270 totals[index2]++;
2271 if (counts[index2] > maxCt[index2]) {
2272 maxCt[index2] = counts[index2];
2273 }
2274 counts[index2] = 1;
2275 lastId[index2] = threadInfo[i][index2];
2276 }
2277 counts[index]++;
2278 totals[index]++;
2279 lastId[index] = threadInfo[i][index];
2280
2281 if (assign_thread_ids && (index > threadIdIndex)) {
2282
2283# if KMP_MIC && REDUCE_TEAM_SIZE
2284 //
2285 // The default team size is the total #threads in the machine
2286 // minus 1 thread for every core that has 3 or more threads.
2287 //
2288 teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 );
2289# endif // KMP_MIC && REDUCE_TEAM_SIZE
2290
2291 //
2292 // Restart the thread counter, as we are on a new core.
2293 //
2294 threadIdCt = 0;
2295
2296 //
2297 // Auto-assign the thread id field if it wasn't specified.
2298 //
2299 if (threadInfo[i][threadIdIndex] == UINT_MAX) {
2300 threadInfo[i][threadIdIndex] = threadIdCt++;
2301 }
2302
2303 //
2304 // Aparrently the thread id field was specified for some
2305 // entries and not others. Start the thread id counter
2306 // off at the next higher thread id.
2307 //
2308 else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
2309 threadIdCt = threadInfo[i][threadIdIndex] + 1;
2310 }
2311 }
2312 break;
2313 }
2314 }
2315 if (index < threadIdIndex) {
2316 //
2317 // If thread ids were specified, it is an error if they are not
2318 // unique. Also, check that we waven't already restarted the
2319 // loop (to be safe - shouldn't need to).
2320 //
2321 if ((threadInfo[i][threadIdIndex] != UINT_MAX)
2322 || assign_thread_ids) {
2323 __kmp_free(lastId);
2324 __kmp_free(totals);
2325 __kmp_free(maxCt);
2326 __kmp_free(counts);
2327 CLEANUP_THREAD_INFO;
2328 *msg_id = kmp_i18n_str_PhysicalIDsNotUnique;
2329 return -1;
2330 }
2331
2332 //
2333 // If the thread ids were not specified and we see entries
2334 // entries that are duplicates, start the loop over and
2335 // assign the thread ids manually.
2336 //
2337 assign_thread_ids = true;
2338 goto restart_radix_check;
2339 }
2340 }
2341
2342# if KMP_MIC && REDUCE_TEAM_SIZE
2343 //
2344 // The default team size is the total #threads in the machine
2345 // minus 1 thread for every core that has 3 or more threads.
2346 //
2347 teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 );
2348# endif // KMP_MIC && REDUCE_TEAM_SIZE
2349
2350 for (index = threadIdIndex; index <= maxIndex; index++) {
2351 if (counts[index] > maxCt[index]) {
2352 maxCt[index] = counts[index];
2353 }
2354 }
2355
2356 __kmp_nThreadsPerCore = maxCt[threadIdIndex];
2357 nCoresPerPkg = maxCt[coreIdIndex];
2358 nPackages = totals[pkgIdIndex];
2359
2360 //
2361 // Check to see if the machine topology is uniform
2362 //
2363 unsigned prod = totals[maxIndex];
2364 for (index = threadIdIndex; index < maxIndex; index++) {
2365 prod *= maxCt[index];
2366 }
2367 bool uniform = (prod == totals[threadIdIndex]);
2368
2369 //
2370 // When affinity is off, this routine will still be called to set
Andrey Churbanovf696c822015-01-27 16:55:43 +00002371 // __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie5e8470a2013-09-27 10:38:44 +00002372 // nCoresPerPkg, & nPackages. Make sure all these vars are set
2373 // correctly, and return now if affinity is not enabled.
2374 //
Jim Cownie5e8470a2013-09-27 10:38:44 +00002375 __kmp_ncores = totals[coreIdIndex];
2376
2377 if (__kmp_affinity_verbose) {
2378 if (! KMP_AFFINITY_CAPABLE()) {
2379 KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
2380 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
2381 if (uniform) {
2382 KMP_INFORM(Uniform, "KMP_AFFINITY");
2383 } else {
2384 KMP_INFORM(NonUniform, "KMP_AFFINITY");
2385 }
2386 }
2387 else {
2388 char buf[KMP_AFFIN_MASK_PRINT_LEN];
2389 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, fullMask);
2390 KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
2391 if (__kmp_affinity_respect_mask) {
2392 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
2393 } else {
2394 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
2395 }
2396 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
2397 if (uniform) {
2398 KMP_INFORM(Uniform, "KMP_AFFINITY");
2399 } else {
2400 KMP_INFORM(NonUniform, "KMP_AFFINITY");
2401 }
2402 }
2403 kmp_str_buf_t buf;
2404 __kmp_str_buf_init(&buf);
2405
2406 __kmp_str_buf_print(&buf, "%d", totals[maxIndex]);
2407 for (index = maxIndex - 1; index >= pkgIdIndex; index--) {
2408 __kmp_str_buf_print(&buf, " x %d", maxCt[index]);
2409 }
2410 KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, maxCt[coreIdIndex],
2411 maxCt[threadIdIndex], __kmp_ncores);
2412
2413 __kmp_str_buf_free(&buf);
2414 }
2415
2416# if KMP_MIC && REDUCE_TEAM_SIZE
2417 //
2418 // Set the default team size.
2419 //
2420 if ((__kmp_dflt_team_nth == 0) && (teamSize > 0)) {
2421 __kmp_dflt_team_nth = teamSize;
2422 KA_TRACE(20, ("__kmp_affinity_create_cpuinfo_map: setting __kmp_dflt_team_nth = %d\n",
2423 __kmp_dflt_team_nth));
2424 }
2425# endif // KMP_MIC && REDUCE_TEAM_SIZE
2426
2427 if (__kmp_affinity_type == affinity_none) {
2428 __kmp_free(lastId);
2429 __kmp_free(totals);
2430 __kmp_free(maxCt);
2431 __kmp_free(counts);
2432 CLEANUP_THREAD_INFO;
2433 return 0;
2434 }
2435
2436 //
2437 // Count the number of levels which have more nodes at that level than
2438 // at the parent's level (with there being an implicit root node of
2439 // the top level). This is equivalent to saying that there is at least
2440 // one node at this level which has a sibling. These levels are in the
2441 // map, and the package level is always in the map.
2442 //
2443 bool *inMap = (bool *)__kmp_allocate((maxIndex + 1) * sizeof(bool));
2444 int level = 0;
2445 for (index = threadIdIndex; index < maxIndex; index++) {
2446 KMP_ASSERT(totals[index] >= totals[index + 1]);
2447 inMap[index] = (totals[index] > totals[index + 1]);
2448 }
2449 inMap[maxIndex] = (totals[maxIndex] > 1);
2450 inMap[pkgIdIndex] = true;
2451
2452 int depth = 0;
2453 for (index = threadIdIndex; index <= maxIndex; index++) {
2454 if (inMap[index]) {
2455 depth++;
2456 }
2457 }
2458 KMP_ASSERT(depth > 0);
2459
2460 //
2461 // Construct the data structure that is to be returned.
2462 //
2463 *address2os = (AddrUnsPair*)
2464 __kmp_allocate(sizeof(AddrUnsPair) * num_avail);
2465 int pkgLevel = -1;
2466 int coreLevel = -1;
2467 int threadLevel = -1;
2468
2469 for (i = 0; i < num_avail; ++i) {
2470 Address addr(depth);
2471 unsigned os = threadInfo[i][osIdIndex];
2472 int src_index;
2473 int dst_index = 0;
2474
2475 for (src_index = maxIndex; src_index >= threadIdIndex; src_index--) {
2476 if (! inMap[src_index]) {
2477 continue;
2478 }
2479 addr.labels[dst_index] = threadInfo[i][src_index];
2480 if (src_index == pkgIdIndex) {
2481 pkgLevel = dst_index;
2482 }
2483 else if (src_index == coreIdIndex) {
2484 coreLevel = dst_index;
2485 }
2486 else if (src_index == threadIdIndex) {
2487 threadLevel = dst_index;
2488 }
2489 dst_index++;
2490 }
2491 (*address2os)[i] = AddrUnsPair(addr, os);
2492 }
2493
2494 if (__kmp_affinity_gran_levels < 0) {
2495 //
2496 // Set the granularity level based on what levels are modeled
2497 // in the machine topology map.
2498 //
2499 unsigned src_index;
2500 __kmp_affinity_gran_levels = 0;
2501 for (src_index = threadIdIndex; src_index <= maxIndex; src_index++) {
2502 if (! inMap[src_index]) {
2503 continue;
2504 }
2505 switch (src_index) {
2506 case threadIdIndex:
2507 if (__kmp_affinity_gran > affinity_gran_thread) {
2508 __kmp_affinity_gran_levels++;
2509 }
2510
2511 break;
2512 case coreIdIndex:
2513 if (__kmp_affinity_gran > affinity_gran_core) {
2514 __kmp_affinity_gran_levels++;
2515 }
2516 break;
2517
2518 case pkgIdIndex:
2519 if (__kmp_affinity_gran > affinity_gran_package) {
2520 __kmp_affinity_gran_levels++;
2521 }
2522 break;
2523 }
2524 }
2525 }
2526
2527 if (__kmp_affinity_verbose) {
2528 __kmp_affinity_print_topology(*address2os, num_avail, depth, pkgLevel,
2529 coreLevel, threadLevel);
2530 }
2531
2532 __kmp_free(inMap);
2533 __kmp_free(lastId);
2534 __kmp_free(totals);
2535 __kmp_free(maxCt);
2536 __kmp_free(counts);
2537 CLEANUP_THREAD_INFO;
2538 return depth;
2539}
2540
2541
2542//
2543// Create and return a table of affinity masks, indexed by OS thread ID.
2544// This routine handles OR'ing together all the affinity masks of threads
2545// that are sufficiently close, if granularity > fine.
2546//
2547static kmp_affin_mask_t *
2548__kmp_create_masks(unsigned *maxIndex, unsigned *numUnique,
2549 AddrUnsPair *address2os, unsigned numAddrs)
2550{
2551 //
2552 // First form a table of affinity masks in order of OS thread id.
2553 //
2554 unsigned depth;
2555 unsigned maxOsId;
2556 unsigned i;
2557
2558 KMP_ASSERT(numAddrs > 0);
2559 depth = address2os[0].first.depth;
2560
2561 maxOsId = 0;
2562 for (i = 0; i < numAddrs; i++) {
2563 unsigned osId = address2os[i].second;
2564 if (osId > maxOsId) {
2565 maxOsId = osId;
2566 }
2567 }
2568 kmp_affin_mask_t *osId2Mask = (kmp_affin_mask_t *)__kmp_allocate(
2569 (maxOsId + 1) * __kmp_affin_mask_size);
2570
2571 //
2572 // Sort the address2os table according to physical order. Doing so
2573 // will put all threads on the same core/package/node in consecutive
2574 // locations.
2575 //
2576 qsort(address2os, numAddrs, sizeof(*address2os),
2577 __kmp_affinity_cmp_Address_labels);
2578
2579 KMP_ASSERT(__kmp_affinity_gran_levels >= 0);
2580 if (__kmp_affinity_verbose && (__kmp_affinity_gran_levels > 0)) {
2581 KMP_INFORM(ThreadsMigrate, "KMP_AFFINITY", __kmp_affinity_gran_levels);
2582 }
2583 if (__kmp_affinity_gran_levels >= (int)depth) {
2584 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
2585 && (__kmp_affinity_type != affinity_none))) {
2586 KMP_WARNING(AffThreadsMayMigrate);
2587 }
2588 }
2589
2590 //
2591 // Run through the table, forming the masks for all threads on each
2592 // core. Threads on the same core will have identical "Address"
2593 // objects, not considering the last level, which must be the thread
2594 // id. All threads on a core will appear consecutively.
2595 //
2596 unsigned unique = 0;
2597 unsigned j = 0; // index of 1st thread on core
2598 unsigned leader = 0;
2599 Address *leaderAddr = &(address2os[0].first);
2600 kmp_affin_mask_t *sum
2601 = (kmp_affin_mask_t *)alloca(__kmp_affin_mask_size);
2602 KMP_CPU_ZERO(sum);
2603 KMP_CPU_SET(address2os[0].second, sum);
2604 for (i = 1; i < numAddrs; i++) {
2605 //
Alp Toker8f2d3f02014-02-24 10:40:15 +00002606 // If this thread is sufficiently close to the leader (within the
Jim Cownie5e8470a2013-09-27 10:38:44 +00002607 // granularity setting), then set the bit for this os thread in the
2608 // affinity mask for this group, and go on to the next thread.
2609 //
2610 if (leaderAddr->isClose(address2os[i].first,
2611 __kmp_affinity_gran_levels)) {
2612 KMP_CPU_SET(address2os[i].second, sum);
2613 continue;
2614 }
2615
2616 //
2617 // For every thread in this group, copy the mask to the thread's
2618 // entry in the osId2Mask table. Mark the first address as a
2619 // leader.
2620 //
2621 for (; j < i; j++) {
2622 unsigned osId = address2os[j].second;
2623 KMP_DEBUG_ASSERT(osId <= maxOsId);
2624 kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
2625 KMP_CPU_COPY(mask, sum);
2626 address2os[j].first.leader = (j == leader);
2627 }
2628 unique++;
2629
2630 //
2631 // Start a new mask.
2632 //
2633 leader = i;
2634 leaderAddr = &(address2os[i].first);
2635 KMP_CPU_ZERO(sum);
2636 KMP_CPU_SET(address2os[i].second, sum);
2637 }
2638
2639 //
2640 // For every thread in last group, copy the mask to the thread's
2641 // entry in the osId2Mask table.
2642 //
2643 for (; j < i; j++) {
2644 unsigned osId = address2os[j].second;
2645 KMP_DEBUG_ASSERT(osId <= maxOsId);
2646 kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
2647 KMP_CPU_COPY(mask, sum);
2648 address2os[j].first.leader = (j == leader);
2649 }
2650 unique++;
2651
2652 *maxIndex = maxOsId;
2653 *numUnique = unique;
2654 return osId2Mask;
2655}
2656
2657
2658//
2659// Stuff for the affinity proclist parsers. It's easier to declare these vars
2660// as file-static than to try and pass them through the calling sequence of
2661// the recursive-descent OMP_PLACES parser.
2662//
2663static kmp_affin_mask_t *newMasks;
2664static int numNewMasks;
2665static int nextNewMask;
2666
2667#define ADD_MASK(_mask) \
2668 { \
2669 if (nextNewMask >= numNewMasks) { \
2670 numNewMasks *= 2; \
2671 newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_REALLOC(newMasks, \
2672 numNewMasks * __kmp_affin_mask_size); \
2673 } \
2674 KMP_CPU_COPY(KMP_CPU_INDEX(newMasks, nextNewMask), (_mask)); \
2675 nextNewMask++; \
2676 }
2677
2678#define ADD_MASK_OSID(_osId,_osId2Mask,_maxOsId) \
2679 { \
2680 if (((_osId) > _maxOsId) || \
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002681 (! KMP_CPU_ISSET((_osId), KMP_CPU_INDEX((_osId2Mask), (_osId))))) { \
Jim Cownie5e8470a2013-09-27 10:38:44 +00002682 if (__kmp_affinity_verbose || (__kmp_affinity_warnings \
2683 && (__kmp_affinity_type != affinity_none))) { \
2684 KMP_WARNING(AffIgnoreInvalidProcID, _osId); \
2685 } \
2686 } \
2687 else { \
2688 ADD_MASK(KMP_CPU_INDEX(_osId2Mask, (_osId))); \
2689 } \
2690 }
2691
2692
2693//
2694// Re-parse the proclist (for the explicit affinity type), and form the list
2695// of affinity newMasks indexed by gtid.
2696//
2697static void
2698__kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks,
2699 unsigned int *out_numMasks, const char *proclist,
2700 kmp_affin_mask_t *osId2Mask, int maxOsId)
2701{
2702 const char *scan = proclist;
2703 const char *next = proclist;
2704
2705 //
2706 // We use malloc() for the temporary mask vector,
2707 // so that we can use realloc() to extend it.
2708 //
2709 numNewMasks = 2;
2710 newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(numNewMasks
2711 * __kmp_affin_mask_size);
2712 nextNewMask = 0;
2713 kmp_affin_mask_t *sumMask = (kmp_affin_mask_t *)__kmp_allocate(
2714 __kmp_affin_mask_size);
2715 int setSize = 0;
2716
2717 for (;;) {
2718 int start, end, stride;
2719
2720 SKIP_WS(scan);
2721 next = scan;
2722 if (*next == '\0') {
2723 break;
2724 }
2725
2726 if (*next == '{') {
2727 int num;
2728 setSize = 0;
2729 next++; // skip '{'
2730 SKIP_WS(next);
2731 scan = next;
2732
2733 //
2734 // Read the first integer in the set.
2735 //
2736 KMP_ASSERT2((*next >= '0') && (*next <= '9'),
2737 "bad proclist");
2738 SKIP_DIGITS(next);
2739 num = __kmp_str_to_int(scan, *next);
2740 KMP_ASSERT2(num >= 0, "bad explicit proc list");
2741
2742 //
2743 // Copy the mask for that osId to the sum (union) mask.
2744 //
2745 if ((num > maxOsId) ||
2746 (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
2747 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
2748 && (__kmp_affinity_type != affinity_none))) {
2749 KMP_WARNING(AffIgnoreInvalidProcID, num);
2750 }
2751 KMP_CPU_ZERO(sumMask);
2752 }
2753 else {
2754 KMP_CPU_COPY(sumMask, KMP_CPU_INDEX(osId2Mask, num));
2755 setSize = 1;
2756 }
2757
2758 for (;;) {
2759 //
2760 // Check for end of set.
2761 //
2762 SKIP_WS(next);
2763 if (*next == '}') {
2764 next++; // skip '}'
2765 break;
2766 }
2767
2768 //
2769 // Skip optional comma.
2770 //
2771 if (*next == ',') {
2772 next++;
2773 }
2774 SKIP_WS(next);
2775
2776 //
2777 // Read the next integer in the set.
2778 //
2779 scan = next;
2780 KMP_ASSERT2((*next >= '0') && (*next <= '9'),
2781 "bad explicit proc list");
2782
2783 SKIP_DIGITS(next);
2784 num = __kmp_str_to_int(scan, *next);
2785 KMP_ASSERT2(num >= 0, "bad explicit proc list");
2786
2787 //
2788 // Add the mask for that osId to the sum mask.
2789 //
2790 if ((num > maxOsId) ||
2791 (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
2792 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
2793 && (__kmp_affinity_type != affinity_none))) {
2794 KMP_WARNING(AffIgnoreInvalidProcID, num);
2795 }
2796 }
2797 else {
2798 KMP_CPU_UNION(sumMask, KMP_CPU_INDEX(osId2Mask, num));
2799 setSize++;
2800 }
2801 }
2802 if (setSize > 0) {
2803 ADD_MASK(sumMask);
2804 }
2805
2806 SKIP_WS(next);
2807 if (*next == ',') {
2808 next++;
2809 }
2810 scan = next;
2811 continue;
2812 }
2813
2814 //
2815 // Read the first integer.
2816 //
2817 KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list");
2818 SKIP_DIGITS(next);
2819 start = __kmp_str_to_int(scan, *next);
2820 KMP_ASSERT2(start >= 0, "bad explicit proc list");
2821 SKIP_WS(next);
2822
2823 //
2824 // If this isn't a range, then add a mask to the list and go on.
2825 //
2826 if (*next != '-') {
2827 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2828
2829 //
2830 // Skip optional comma.
2831 //
2832 if (*next == ',') {
2833 next++;
2834 }
2835 scan = next;
2836 continue;
2837 }
2838
2839 //
2840 // This is a range. Skip over the '-' and read in the 2nd int.
2841 //
2842 next++; // skip '-'
2843 SKIP_WS(next);
2844 scan = next;
2845 KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list");
2846 SKIP_DIGITS(next);
2847 end = __kmp_str_to_int(scan, *next);
2848 KMP_ASSERT2(end >= 0, "bad explicit proc list");
2849
2850 //
2851 // Check for a stride parameter
2852 //
2853 stride = 1;
2854 SKIP_WS(next);
2855 if (*next == ':') {
2856 //
2857 // A stride is specified. Skip over the ':" and read the 3rd int.
2858 //
2859 int sign = +1;
2860 next++; // skip ':'
2861 SKIP_WS(next);
2862 scan = next;
2863 if (*next == '-') {
2864 sign = -1;
2865 next++;
2866 SKIP_WS(next);
2867 scan = next;
2868 }
2869 KMP_ASSERT2((*next >= '0') && (*next <= '9'),
2870 "bad explicit proc list");
2871 SKIP_DIGITS(next);
2872 stride = __kmp_str_to_int(scan, *next);
2873 KMP_ASSERT2(stride >= 0, "bad explicit proc list");
2874 stride *= sign;
2875 }
2876
2877 //
2878 // Do some range checks.
2879 //
2880 KMP_ASSERT2(stride != 0, "bad explicit proc list");
2881 if (stride > 0) {
2882 KMP_ASSERT2(start <= end, "bad explicit proc list");
2883 }
2884 else {
2885 KMP_ASSERT2(start >= end, "bad explicit proc list");
2886 }
2887 KMP_ASSERT2((end - start) / stride <= 65536, "bad explicit proc list");
2888
2889 //
2890 // Add the mask for each OS proc # to the list.
2891 //
2892 if (stride > 0) {
2893 do {
2894 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2895 start += stride;
2896 } while (start <= end);
2897 }
2898 else {
2899 do {
2900 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2901 start += stride;
2902 } while (start >= end);
2903 }
2904
2905 //
2906 // Skip optional comma.
2907 //
2908 SKIP_WS(next);
2909 if (*next == ',') {
2910 next++;
2911 }
2912 scan = next;
2913 }
2914
2915 *out_numMasks = nextNewMask;
2916 if (nextNewMask == 0) {
2917 *out_masks = NULL;
2918 KMP_INTERNAL_FREE(newMasks);
2919 return;
2920 }
2921 *out_masks
2922 = (kmp_affin_mask_t *)__kmp_allocate(nextNewMask * __kmp_affin_mask_size);
2923 memcpy(*out_masks, newMasks, nextNewMask * __kmp_affin_mask_size);
2924 __kmp_free(sumMask);
2925 KMP_INTERNAL_FREE(newMasks);
2926}
2927
2928
2929# if OMP_40_ENABLED
2930
2931/*-----------------------------------------------------------------------------
2932
2933Re-parse the OMP_PLACES proc id list, forming the newMasks for the different
2934places. Again, Here is the grammar:
2935
2936place_list := place
2937place_list := place , place_list
2938place := num
2939place := place : num
2940place := place : num : signed
2941place := { subplacelist }
2942place := ! place // (lowest priority)
2943subplace_list := subplace
2944subplace_list := subplace , subplace_list
2945subplace := num
2946subplace := num : num
2947subplace := num : num : signed
2948signed := num
2949signed := + signed
2950signed := - signed
2951
2952-----------------------------------------------------------------------------*/
2953
2954static void
2955__kmp_process_subplace_list(const char **scan, kmp_affin_mask_t *osId2Mask,
2956 int maxOsId, kmp_affin_mask_t *tempMask, int *setSize)
2957{
2958 const char *next;
2959
2960 for (;;) {
2961 int start, count, stride, i;
2962
2963 //
2964 // Read in the starting proc id
2965 //
2966 SKIP_WS(*scan);
2967 KMP_ASSERT2((**scan >= '0') && (**scan <= '9'),
2968 "bad explicit places list");
2969 next = *scan;
2970 SKIP_DIGITS(next);
2971 start = __kmp_str_to_int(*scan, *next);
2972 KMP_ASSERT(start >= 0);
2973 *scan = next;
2974
2975 //
2976 // valid follow sets are ',' ':' and '}'
2977 //
2978 SKIP_WS(*scan);
2979 if (**scan == '}' || **scan == ',') {
2980 if ((start > maxOsId) ||
2981 (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
2982 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
2983 && (__kmp_affinity_type != affinity_none))) {
2984 KMP_WARNING(AffIgnoreInvalidProcID, start);
2985 }
2986 }
2987 else {
2988 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
2989 (*setSize)++;
2990 }
2991 if (**scan == '}') {
2992 break;
2993 }
2994 (*scan)++; // skip ','
2995 continue;
2996 }
2997 KMP_ASSERT2(**scan == ':', "bad explicit places list");
2998 (*scan)++; // skip ':'
2999
3000 //
3001 // Read count parameter
3002 //
3003 SKIP_WS(*scan);
3004 KMP_ASSERT2((**scan >= '0') && (**scan <= '9'),
3005 "bad explicit places list");
3006 next = *scan;
3007 SKIP_DIGITS(next);
3008 count = __kmp_str_to_int(*scan, *next);
3009 KMP_ASSERT(count >= 0);
3010 *scan = next;
3011
3012 //
3013 // valid follow sets are ',' ':' and '}'
3014 //
3015 SKIP_WS(*scan);
3016 if (**scan == '}' || **scan == ',') {
3017 for (i = 0; i < count; i++) {
3018 if ((start > maxOsId) ||
3019 (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
3020 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3021 && (__kmp_affinity_type != affinity_none))) {
3022 KMP_WARNING(AffIgnoreInvalidProcID, start);
3023 }
3024 break; // don't proliferate warnings for large count
3025 }
3026 else {
3027 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
3028 start++;
3029 (*setSize)++;
3030 }
3031 }
3032 if (**scan == '}') {
3033 break;
3034 }
3035 (*scan)++; // skip ','
3036 continue;
3037 }
3038 KMP_ASSERT2(**scan == ':', "bad explicit places list");
3039 (*scan)++; // skip ':'
3040
3041 //
3042 // Read stride parameter
3043 //
3044 int sign = +1;
3045 for (;;) {
3046 SKIP_WS(*scan);
3047 if (**scan == '+') {
3048 (*scan)++; // skip '+'
3049 continue;
3050 }
3051 if (**scan == '-') {
3052 sign *= -1;
3053 (*scan)++; // skip '-'
3054 continue;
3055 }
3056 break;
3057 }
3058 SKIP_WS(*scan);
3059 KMP_ASSERT2((**scan >= '0') && (**scan <= '9'),
3060 "bad explicit places list");
3061 next = *scan;
3062 SKIP_DIGITS(next);
3063 stride = __kmp_str_to_int(*scan, *next);
3064 KMP_ASSERT(stride >= 0);
3065 *scan = next;
3066 stride *= sign;
3067
3068 //
3069 // valid follow sets are ',' and '}'
3070 //
3071 SKIP_WS(*scan);
3072 if (**scan == '}' || **scan == ',') {
3073 for (i = 0; i < count; i++) {
3074 if ((start > maxOsId) ||
3075 (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
3076 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3077 && (__kmp_affinity_type != affinity_none))) {
3078 KMP_WARNING(AffIgnoreInvalidProcID, start);
3079 }
3080 break; // don't proliferate warnings for large count
3081 }
3082 else {
3083 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
3084 start += stride;
3085 (*setSize)++;
3086 }
3087 }
3088 if (**scan == '}') {
3089 break;
3090 }
3091 (*scan)++; // skip ','
3092 continue;
3093 }
3094
3095 KMP_ASSERT2(0, "bad explicit places list");
3096 }
3097}
3098
3099
3100static void
3101__kmp_process_place(const char **scan, kmp_affin_mask_t *osId2Mask,
3102 int maxOsId, kmp_affin_mask_t *tempMask, int *setSize)
3103{
3104 const char *next;
3105
3106 //
3107 // valid follow sets are '{' '!' and num
3108 //
3109 SKIP_WS(*scan);
3110 if (**scan == '{') {
3111 (*scan)++; // skip '{'
3112 __kmp_process_subplace_list(scan, osId2Mask, maxOsId , tempMask,
3113 setSize);
3114 KMP_ASSERT2(**scan == '}', "bad explicit places list");
3115 (*scan)++; // skip '}'
3116 }
3117 else if (**scan == '!') {
3118 __kmp_process_place(scan, osId2Mask, maxOsId, tempMask, setSize);
3119 KMP_CPU_COMPLEMENT(tempMask);
3120 (*scan)++; // skip '!'
3121 }
3122 else if ((**scan >= '0') && (**scan <= '9')) {
3123 next = *scan;
3124 SKIP_DIGITS(next);
3125 int num = __kmp_str_to_int(*scan, *next);
3126 KMP_ASSERT(num >= 0);
3127 if ((num > maxOsId) ||
3128 (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
3129 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3130 && (__kmp_affinity_type != affinity_none))) {
3131 KMP_WARNING(AffIgnoreInvalidProcID, num);
3132 }
3133 }
3134 else {
3135 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, num));
3136 (*setSize)++;
3137 }
3138 *scan = next; // skip num
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003139 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003140 else {
3141 KMP_ASSERT2(0, "bad explicit places list");
3142 }
3143}
3144
3145
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003146//static void
3147void
Jim Cownie5e8470a2013-09-27 10:38:44 +00003148__kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
3149 unsigned int *out_numMasks, const char *placelist,
3150 kmp_affin_mask_t *osId2Mask, int maxOsId)
3151{
3152 const char *scan = placelist;
3153 const char *next = placelist;
3154
3155 numNewMasks = 2;
3156 newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(numNewMasks
3157 * __kmp_affin_mask_size);
3158 nextNewMask = 0;
3159
3160 kmp_affin_mask_t *tempMask = (kmp_affin_mask_t *)__kmp_allocate(
3161 __kmp_affin_mask_size);
3162 KMP_CPU_ZERO(tempMask);
3163 int setSize = 0;
3164
3165 for (;;) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003166 __kmp_process_place(&scan, osId2Mask, maxOsId, tempMask, &setSize);
3167
3168 //
3169 // valid follow sets are ',' ':' and EOL
3170 //
3171 SKIP_WS(scan);
3172 if (*scan == '\0' || *scan == ',') {
3173 if (setSize > 0) {
3174 ADD_MASK(tempMask);
3175 }
3176 KMP_CPU_ZERO(tempMask);
3177 setSize = 0;
3178 if (*scan == '\0') {
3179 break;
3180 }
3181 scan++; // skip ','
3182 continue;
3183 }
3184
3185 KMP_ASSERT2(*scan == ':', "bad explicit places list");
3186 scan++; // skip ':'
3187
3188 //
3189 // Read count parameter
3190 //
3191 SKIP_WS(scan);
3192 KMP_ASSERT2((*scan >= '0') && (*scan <= '9'),
3193 "bad explicit places list");
3194 next = scan;
3195 SKIP_DIGITS(next);
Jim Cownie181b4bb2013-12-23 17:28:57 +00003196 int count = __kmp_str_to_int(scan, *next);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003197 KMP_ASSERT(count >= 0);
3198 scan = next;
3199
3200 //
3201 // valid follow sets are ',' ':' and EOL
3202 //
3203 SKIP_WS(scan);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003204 int stride;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003205 if (*scan == '\0' || *scan == ',') {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003206 stride = +1;
3207 }
3208 else {
3209 KMP_ASSERT2(*scan == ':', "bad explicit places list");
3210 scan++; // skip ':'
Jim Cownie5e8470a2013-09-27 10:38:44 +00003211
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003212 //
3213 // Read stride parameter
3214 //
3215 int sign = +1;
3216 for (;;) {
3217 SKIP_WS(scan);
3218 if (*scan == '+') {
3219 scan++; // skip '+'
3220 continue;
3221 }
3222 if (*scan == '-') {
3223 sign *= -1;
3224 scan++; // skip '-'
3225 continue;
3226 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003227 break;
3228 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003229 SKIP_WS(scan);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003230 KMP_ASSERT2((*scan >= '0') && (*scan <= '9'),
3231 "bad explicit places list");
3232 next = scan;
3233 SKIP_DIGITS(next);
3234 stride = __kmp_str_to_int(scan, *next);
3235 KMP_DEBUG_ASSERT(stride >= 0);
3236 scan = next;
3237 stride *= sign;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003238 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003239
3240 if (stride > 0) {
3241 int i;
3242 for (i = 0; i < count; i++) {
3243 int j;
3244 if (setSize == 0) {
3245 break;
3246 }
3247 ADD_MASK(tempMask);
3248 setSize = 0;
3249 for (j = __kmp_affin_mask_size * CHAR_BIT - 1; j >= stride; j--) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003250 if (! KMP_CPU_ISSET(j - stride, tempMask)) {
3251 KMP_CPU_CLR(j, tempMask);
3252 }
3253 else if ((j > maxOsId) ||
3254 (! KMP_CPU_ISSET(j, KMP_CPU_INDEX(osId2Mask, j)))) {
3255 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3256 && (__kmp_affinity_type != affinity_none))) {
3257 KMP_WARNING(AffIgnoreInvalidProcID, j);
3258 }
3259 KMP_CPU_CLR(j, tempMask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003260 }
3261 else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003262 KMP_CPU_SET(j, tempMask);
3263 setSize++;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003264 }
3265 }
3266 for (; j >= 0; j--) {
3267 KMP_CPU_CLR(j, tempMask);
3268 }
3269 }
3270 }
3271 else {
3272 int i;
3273 for (i = 0; i < count; i++) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003274 int j;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003275 if (setSize == 0) {
3276 break;
3277 }
3278 ADD_MASK(tempMask);
3279 setSize = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003280 for (j = 0; j < ((int)__kmp_affin_mask_size * CHAR_BIT) + stride;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003281 j++) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003282 if (! KMP_CPU_ISSET(j - stride, tempMask)) {
3283 KMP_CPU_CLR(j, tempMask);
3284 }
3285 else if ((j > maxOsId) ||
3286 (! KMP_CPU_ISSET(j, KMP_CPU_INDEX(osId2Mask, j)))) {
3287 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3288 && (__kmp_affinity_type != affinity_none))) {
3289 KMP_WARNING(AffIgnoreInvalidProcID, j);
3290 }
3291 KMP_CPU_CLR(j, tempMask);
3292 }
3293 else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003294 KMP_CPU_SET(j, tempMask);
3295 setSize++;
3296 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003297 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003298 for (; j < (int)__kmp_affin_mask_size * CHAR_BIT; j++) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003299 KMP_CPU_CLR(j, tempMask);
3300 }
3301 }
3302 }
3303 KMP_CPU_ZERO(tempMask);
3304 setSize = 0;
3305
3306 //
3307 // valid follow sets are ',' and EOL
3308 //
3309 SKIP_WS(scan);
3310 if (*scan == '\0') {
3311 break;
3312 }
3313 if (*scan == ',') {
3314 scan++; // skip ','
3315 continue;
3316 }
3317
3318 KMP_ASSERT2(0, "bad explicit places list");
3319 }
3320
3321 *out_numMasks = nextNewMask;
3322 if (nextNewMask == 0) {
3323 *out_masks = NULL;
3324 KMP_INTERNAL_FREE(newMasks);
3325 return;
3326 }
3327 *out_masks
3328 = (kmp_affin_mask_t *)__kmp_allocate(nextNewMask * __kmp_affin_mask_size);
3329 memcpy(*out_masks, newMasks, nextNewMask * __kmp_affin_mask_size);
3330 __kmp_free(tempMask);
3331 KMP_INTERNAL_FREE(newMasks);
3332}
3333
3334# endif /* OMP_40_ENABLED */
3335
3336#undef ADD_MASK
3337#undef ADD_MASK_OSID
3338
3339
3340# if KMP_MIC
3341
3342static void
3343__kmp_apply_thread_places(AddrUnsPair **pAddr, int depth)
3344{
3345 if ( __kmp_place_num_cores == 0 ) {
3346 if ( __kmp_place_num_threads_per_core == 0 ) {
3347 return; // no cores limiting actions requested, exit
3348 }
3349 __kmp_place_num_cores = nCoresPerPkg; // use all available cores
3350 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003351 if ( !__kmp_affinity_uniform_topology() ) {
3352 KMP_WARNING( AffThrPlaceNonUniform );
3353 return; // don't support non-uniform topology
3354 }
3355 if ( depth != 3 ) {
3356 KMP_WARNING( AffThrPlaceNonThreeLevel );
3357 return; // don't support not-3-level topology
Jim Cownie5e8470a2013-09-27 10:38:44 +00003358 }
3359 if ( __kmp_place_num_threads_per_core == 0 ) {
3360 __kmp_place_num_threads_per_core = __kmp_nThreadsPerCore; // use all HW contexts
3361 }
3362 if ( __kmp_place_core_offset + __kmp_place_num_cores > nCoresPerPkg ) {
3363 KMP_WARNING( AffThrPlaceManyCores );
3364 return;
3365 }
3366
3367 AddrUnsPair *newAddr = (AddrUnsPair *)__kmp_allocate( sizeof(AddrUnsPair) *
3368 nPackages * __kmp_place_num_cores * __kmp_place_num_threads_per_core);
3369 int i, j, k, n_old = 0, n_new = 0;
3370 for ( i = 0; i < nPackages; ++i ) {
3371 for ( j = 0; j < nCoresPerPkg; ++j ) {
3372 if ( j < __kmp_place_core_offset || j >= __kmp_place_core_offset + __kmp_place_num_cores ) {
3373 n_old += __kmp_nThreadsPerCore; // skip not-requested core
3374 } else {
3375 for ( k = 0; k < __kmp_nThreadsPerCore; ++k ) {
3376 if ( k < __kmp_place_num_threads_per_core ) {
3377 newAddr[n_new] = (*pAddr)[n_old]; // copy requested core' data to new location
3378 n_new++;
3379 }
3380 n_old++;
3381 }
3382 }
3383 }
3384 }
3385 nCoresPerPkg = __kmp_place_num_cores; // correct nCoresPerPkg
3386 __kmp_nThreadsPerCore = __kmp_place_num_threads_per_core; // correct __kmp_nThreadsPerCore
3387 __kmp_avail_proc = n_new; // correct avail_proc
3388 __kmp_ncores = nPackages * __kmp_place_num_cores; // correct ncores
3389
3390 __kmp_free( *pAddr );
3391 *pAddr = newAddr; // replace old topology with new one
3392}
3393
3394# endif /* KMP_MIC */
3395
3396
3397static AddrUnsPair *address2os = NULL;
3398static int * procarr = NULL;
3399static int __kmp_aff_depth = 0;
3400
3401static void
3402__kmp_aux_affinity_initialize(void)
3403{
3404 if (__kmp_affinity_masks != NULL) {
3405 KMP_ASSERT(fullMask != NULL);
3406 return;
3407 }
3408
3409 //
3410 // Create the "full" mask - this defines all of the processors that we
3411 // consider to be in the machine model. If respect is set, then it is
3412 // the initialization thread's affinity mask. Otherwise, it is all
3413 // processors that we know about on the machine.
3414 //
3415 if (fullMask == NULL) {
3416 fullMask = (kmp_affin_mask_t *)__kmp_allocate(__kmp_affin_mask_size);
3417 }
3418 if (KMP_AFFINITY_CAPABLE()) {
3419 if (__kmp_affinity_respect_mask) {
3420 __kmp_get_system_affinity(fullMask, TRUE);
3421
3422 //
3423 // Count the number of available processors.
3424 //
3425 unsigned i;
3426 __kmp_avail_proc = 0;
3427 for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
3428 if (! KMP_CPU_ISSET(i, fullMask)) {
3429 continue;
3430 }
3431 __kmp_avail_proc++;
3432 }
3433 if (__kmp_avail_proc > __kmp_xproc) {
3434 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3435 && (__kmp_affinity_type != affinity_none))) {
3436 KMP_WARNING(ErrorInitializeAffinity);
3437 }
3438 __kmp_affinity_type = affinity_none;
3439 __kmp_affin_mask_size = 0;
3440 return;
3441 }
3442 }
3443 else {
3444 __kmp_affinity_entire_machine_mask(fullMask);
3445 __kmp_avail_proc = __kmp_xproc;
3446 }
3447 }
3448
3449 int depth = -1;
3450 kmp_i18n_id_t msg_id = kmp_i18n_null;
3451
3452 //
Alp Toker8f2d3f02014-02-24 10:40:15 +00003453 // For backward compatibility, setting KMP_CPUINFO_FILE =>
Jim Cownie5e8470a2013-09-27 10:38:44 +00003454 // KMP_TOPOLOGY_METHOD=cpuinfo
3455 //
3456 if ((__kmp_cpuinfo_file != NULL) &&
3457 (__kmp_affinity_top_method == affinity_top_method_all)) {
3458 __kmp_affinity_top_method = affinity_top_method_cpuinfo;
3459 }
3460
3461 if (__kmp_affinity_top_method == affinity_top_method_all) {
3462 //
3463 // In the default code path, errors are not fatal - we just try using
3464 // another method. We only emit a warning message if affinity is on,
3465 // or the verbose flag is set, an the nowarnings flag was not set.
3466 //
3467 const char *file_name = NULL;
3468 int line = 0;
3469
3470# if KMP_ARCH_X86 || KMP_ARCH_X86_64
3471
3472 if (__kmp_affinity_verbose) {
3473 KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
3474 }
3475
3476 file_name = NULL;
3477 depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
3478 if (depth == 0) {
3479 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3480 KMP_ASSERT(address2os == NULL);
3481 return;
3482 }
3483
3484 if (depth < 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003485 if (__kmp_affinity_verbose) {
3486 if (msg_id != kmp_i18n_null) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003487 KMP_INFORM(AffInfoStrStr, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id),
3488 KMP_I18N_STR(DecodingLegacyAPIC));
3489 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003490 else {
3491 KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(DecodingLegacyAPIC));
3492 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003493 }
3494
3495 file_name = NULL;
3496 depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
3497 if (depth == 0) {
3498 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3499 KMP_ASSERT(address2os == NULL);
3500 return;
3501 }
3502 }
3503
3504# endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
3505
3506# if KMP_OS_LINUX
3507
3508 if (depth < 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003509 if (__kmp_affinity_verbose) {
3510 if (msg_id != kmp_i18n_null) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003511 KMP_INFORM(AffStrParseFilename, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id), "/proc/cpuinfo");
3512 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003513 else {
3514 KMP_INFORM(AffParseFilename, "KMP_AFFINITY", "/proc/cpuinfo");
3515 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003516 }
3517
3518 FILE *f = fopen("/proc/cpuinfo", "r");
3519 if (f == NULL) {
3520 msg_id = kmp_i18n_str_CantOpenCpuinfo;
3521 }
3522 else {
3523 file_name = "/proc/cpuinfo";
3524 depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
3525 fclose(f);
3526 if (depth == 0) {
3527 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3528 KMP_ASSERT(address2os == NULL);
3529 return;
3530 }
3531 }
3532 }
3533
3534# endif /* KMP_OS_LINUX */
3535
Andrey Churbanov7daf9802015-01-27 16:52:57 +00003536# if KMP_GROUP_AFFINITY
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003537
3538 if ((depth < 0) && (__kmp_num_proc_groups > 1)) {
3539 if (__kmp_affinity_verbose) {
3540 KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
3541 }
3542
3543 depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
3544 KMP_ASSERT(depth != 0);
3545 }
3546
Andrey Churbanov7daf9802015-01-27 16:52:57 +00003547# endif /* KMP_GROUP_AFFINITY */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003548
Jim Cownie5e8470a2013-09-27 10:38:44 +00003549 if (depth < 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003550 if (__kmp_affinity_verbose && (msg_id != kmp_i18n_null)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003551 if (file_name == NULL) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003552 KMP_INFORM(UsingFlatOS, __kmp_i18n_catgets(msg_id));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003553 }
3554 else if (line == 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003555 KMP_INFORM(UsingFlatOSFile, file_name, __kmp_i18n_catgets(msg_id));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003556 }
3557 else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003558 KMP_INFORM(UsingFlatOSFileLine, file_name, line, __kmp_i18n_catgets(msg_id));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003559 }
3560 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003561 // FIXME - print msg if msg_id = kmp_i18n_null ???
Jim Cownie5e8470a2013-09-27 10:38:44 +00003562
3563 file_name = "";
3564 depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
3565 if (depth == 0) {
3566 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3567 KMP_ASSERT(address2os == NULL);
3568 return;
3569 }
3570 KMP_ASSERT(depth > 0);
3571 KMP_ASSERT(address2os != NULL);
3572 }
3573 }
3574
3575 //
3576 // If the user has specified that a paricular topology discovery method
3577 // is to be used, then we abort if that method fails. The exception is
3578 // group affinity, which might have been implicitly set.
3579 //
3580
3581# if KMP_ARCH_X86 || KMP_ARCH_X86_64
3582
3583 else if (__kmp_affinity_top_method == affinity_top_method_x2apicid) {
3584 if (__kmp_affinity_verbose) {
3585 KMP_INFORM(AffInfoStr, "KMP_AFFINITY",
3586 KMP_I18N_STR(Decodingx2APIC));
3587 }
3588
3589 depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
3590 if (depth == 0) {
3591 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3592 KMP_ASSERT(address2os == NULL);
3593 return;
3594 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003595 if (depth < 0) {
3596 KMP_ASSERT(msg_id != kmp_i18n_null);
3597 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
3598 }
3599 }
3600 else if (__kmp_affinity_top_method == affinity_top_method_apicid) {
3601 if (__kmp_affinity_verbose) {
3602 KMP_INFORM(AffInfoStr, "KMP_AFFINITY",
3603 KMP_I18N_STR(DecodingLegacyAPIC));
3604 }
3605
3606 depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
3607 if (depth == 0) {
3608 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3609 KMP_ASSERT(address2os == NULL);
3610 return;
3611 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003612 if (depth < 0) {
3613 KMP_ASSERT(msg_id != kmp_i18n_null);
3614 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
3615 }
3616 }
3617
3618# endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
3619
3620 else if (__kmp_affinity_top_method == affinity_top_method_cpuinfo) {
3621 const char *filename;
3622 if (__kmp_cpuinfo_file != NULL) {
3623 filename = __kmp_cpuinfo_file;
3624 }
3625 else {
3626 filename = "/proc/cpuinfo";
3627 }
3628
3629 if (__kmp_affinity_verbose) {
3630 KMP_INFORM(AffParseFilename, "KMP_AFFINITY", filename);
3631 }
3632
3633 FILE *f = fopen(filename, "r");
3634 if (f == NULL) {
3635 int code = errno;
3636 if (__kmp_cpuinfo_file != NULL) {
3637 __kmp_msg(
3638 kmp_ms_fatal,
3639 KMP_MSG(CantOpenFileForReading, filename),
3640 KMP_ERR(code),
3641 KMP_HNT(NameComesFrom_CPUINFO_FILE),
3642 __kmp_msg_null
3643 );
3644 }
3645 else {
3646 __kmp_msg(
3647 kmp_ms_fatal,
3648 KMP_MSG(CantOpenFileForReading, filename),
3649 KMP_ERR(code),
3650 __kmp_msg_null
3651 );
3652 }
3653 }
3654 int line = 0;
3655 depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
3656 fclose(f);
3657 if (depth < 0) {
3658 KMP_ASSERT(msg_id != kmp_i18n_null);
3659 if (line > 0) {
3660 KMP_FATAL(FileLineMsgExiting, filename, line, __kmp_i18n_catgets(msg_id));
3661 }
3662 else {
3663 KMP_FATAL(FileMsgExiting, filename, __kmp_i18n_catgets(msg_id));
3664 }
3665 }
3666 if (__kmp_affinity_type == affinity_none) {
3667 KMP_ASSERT(depth == 0);
3668 KMP_ASSERT(address2os == NULL);
3669 return;
3670 }
3671 }
3672
Andrey Churbanov7daf9802015-01-27 16:52:57 +00003673# if KMP_GROUP_AFFINITY
Jim Cownie5e8470a2013-09-27 10:38:44 +00003674
3675 else if (__kmp_affinity_top_method == affinity_top_method_group) {
3676 if (__kmp_affinity_verbose) {
3677 KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
3678 }
3679
3680 depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
3681 KMP_ASSERT(depth != 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003682 if (depth < 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003683 KMP_ASSERT(msg_id != kmp_i18n_null);
3684 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003685 }
3686 }
3687
Andrey Churbanov7daf9802015-01-27 16:52:57 +00003688# endif /* KMP_GROUP_AFFINITY */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003689
3690 else if (__kmp_affinity_top_method == affinity_top_method_flat) {
3691 if (__kmp_affinity_verbose) {
3692 KMP_INFORM(AffUsingFlatOS, "KMP_AFFINITY");
3693 }
3694
3695 depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
3696 if (depth == 0) {
3697 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3698 KMP_ASSERT(address2os == NULL);
3699 return;
3700 }
3701 // should not fail
3702 KMP_ASSERT(depth > 0);
3703 KMP_ASSERT(address2os != NULL);
3704 }
3705
3706 if (address2os == NULL) {
3707 if (KMP_AFFINITY_CAPABLE()
3708 && (__kmp_affinity_verbose || (__kmp_affinity_warnings
3709 && (__kmp_affinity_type != affinity_none)))) {
3710 KMP_WARNING(ErrorInitializeAffinity);
3711 }
3712 __kmp_affinity_type = affinity_none;
3713 __kmp_affin_mask_size = 0;
3714 return;
3715 }
3716
3717# if KMP_MIC
3718 __kmp_apply_thread_places(&address2os, depth);
3719# endif
3720
3721 //
3722 // Create the table of masks, indexed by thread Id.
3723 //
3724 unsigned maxIndex;
3725 unsigned numUnique;
3726 kmp_affin_mask_t *osId2Mask = __kmp_create_masks(&maxIndex, &numUnique,
3727 address2os, __kmp_avail_proc);
3728 if (__kmp_affinity_gran_levels == 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003729 KMP_DEBUG_ASSERT((int)numUnique == __kmp_avail_proc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003730 }
3731
3732 //
3733 // Set the childNums vector in all Address objects. This must be done
3734 // before we can sort using __kmp_affinity_cmp_Address_child_num(),
3735 // which takes into account the setting of __kmp_affinity_compact.
3736 //
3737 __kmp_affinity_assign_child_nums(address2os, __kmp_avail_proc);
3738
3739 switch (__kmp_affinity_type) {
3740
3741 case affinity_explicit:
3742 KMP_DEBUG_ASSERT(__kmp_affinity_proclist != NULL);
3743# if OMP_40_ENABLED
3744 if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
3745# endif
3746 {
3747 __kmp_affinity_process_proclist(&__kmp_affinity_masks,
3748 &__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask,
3749 maxIndex);
3750 }
3751# if OMP_40_ENABLED
3752 else {
3753 __kmp_affinity_process_placelist(&__kmp_affinity_masks,
3754 &__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask,
3755 maxIndex);
3756 }
3757# endif
3758 if (__kmp_affinity_num_masks == 0) {
3759 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3760 && (__kmp_affinity_type != affinity_none))) {
3761 KMP_WARNING(AffNoValidProcID);
3762 }
3763 __kmp_affinity_type = affinity_none;
3764 return;
3765 }
3766 break;
3767
3768 //
3769 // The other affinity types rely on sorting the Addresses according
3770 // to some permutation of the machine topology tree. Set
3771 // __kmp_affinity_compact and __kmp_affinity_offset appropriately,
3772 // then jump to a common code fragment to do the sort and create
3773 // the array of affinity masks.
3774 //
3775
3776 case affinity_logical:
3777 __kmp_affinity_compact = 0;
3778 if (__kmp_affinity_offset) {
3779 __kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset
3780 % __kmp_avail_proc;
3781 }
3782 goto sortAddresses;
3783
3784 case affinity_physical:
3785 if (__kmp_nThreadsPerCore > 1) {
3786 __kmp_affinity_compact = 1;
3787 if (__kmp_affinity_compact >= depth) {
3788 __kmp_affinity_compact = 0;
3789 }
3790 } else {
3791 __kmp_affinity_compact = 0;
3792 }
3793 if (__kmp_affinity_offset) {
3794 __kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset
3795 % __kmp_avail_proc;
3796 }
3797 goto sortAddresses;
3798
3799 case affinity_scatter:
3800 if (__kmp_affinity_compact >= depth) {
3801 __kmp_affinity_compact = 0;
3802 }
3803 else {
3804 __kmp_affinity_compact = depth - 1 - __kmp_affinity_compact;
3805 }
3806 goto sortAddresses;
3807
3808 case affinity_compact:
3809 if (__kmp_affinity_compact >= depth) {
3810 __kmp_affinity_compact = depth - 1;
3811 }
3812 goto sortAddresses;
3813
Jim Cownie5e8470a2013-09-27 10:38:44 +00003814 case affinity_balanced:
Andrey Churbanovf28f6132015-01-13 14:54:00 +00003815 // Balanced works only for the case of a single package
Jim Cownie5e8470a2013-09-27 10:38:44 +00003816 if( nPackages > 1 ) {
3817 if( __kmp_affinity_verbose || __kmp_affinity_warnings ) {
3818 KMP_WARNING( AffBalancedNotAvail, "KMP_AFFINITY" );
3819 }
3820 __kmp_affinity_type = affinity_none;
3821 return;
3822 } else if( __kmp_affinity_uniform_topology() ) {
3823 break;
3824 } else { // Non-uniform topology
3825
3826 // Save the depth for further usage
3827 __kmp_aff_depth = depth;
3828
3829 // Number of hyper threads per core in HT machine
3830 int nth_per_core = __kmp_nThreadsPerCore;
3831
3832 int core_level;
3833 if( nth_per_core > 1 ) {
3834 core_level = depth - 2;
3835 } else {
3836 core_level = depth - 1;
3837 }
3838 int ncores = address2os[ __kmp_avail_proc - 1 ].first.labels[ core_level ] + 1;
3839 int nproc = nth_per_core * ncores;
3840
3841 procarr = ( int * )__kmp_allocate( sizeof( int ) * nproc );
3842 for( int i = 0; i < nproc; i++ ) {
3843 procarr[ i ] = -1;
3844 }
3845
3846 for( int i = 0; i < __kmp_avail_proc; i++ ) {
3847 int proc = address2os[ i ].second;
3848 // If depth == 3 then level=0 - package, level=1 - core, level=2 - thread.
3849 // If there is only one thread per core then depth == 2: level 0 - package,
3850 // level 1 - core.
3851 int level = depth - 1;
3852
3853 // __kmp_nth_per_core == 1
3854 int thread = 0;
3855 int core = address2os[ i ].first.labels[ level ];
3856 // If the thread level exists, that is we have more than one thread context per core
3857 if( nth_per_core > 1 ) {
3858 thread = address2os[ i ].first.labels[ level ] % nth_per_core;
3859 core = address2os[ i ].first.labels[ level - 1 ];
3860 }
3861 procarr[ core * nth_per_core + thread ] = proc;
3862 }
3863
3864 break;
3865 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003866
3867 sortAddresses:
3868 //
3869 // Allocate the gtid->affinity mask table.
3870 //
3871 if (__kmp_affinity_dups) {
3872 __kmp_affinity_num_masks = __kmp_avail_proc;
3873 }
3874 else {
3875 __kmp_affinity_num_masks = numUnique;
3876 }
3877
3878# if OMP_40_ENABLED
3879 if ( ( __kmp_nested_proc_bind.bind_types[0] != proc_bind_intel )
3880 && ( __kmp_affinity_num_places > 0 )
3881 && ( (unsigned)__kmp_affinity_num_places < __kmp_affinity_num_masks ) ) {
3882 __kmp_affinity_num_masks = __kmp_affinity_num_places;
3883 }
3884# endif
3885
3886 __kmp_affinity_masks = (kmp_affin_mask_t*)__kmp_allocate(
3887 __kmp_affinity_num_masks * __kmp_affin_mask_size);
3888
3889 //
3890 // Sort the address2os table according to the current setting of
3891 // __kmp_affinity_compact, then fill out __kmp_affinity_masks.
3892 //
3893 qsort(address2os, __kmp_avail_proc, sizeof(*address2os),
3894 __kmp_affinity_cmp_Address_child_num);
3895 {
3896 int i;
3897 unsigned j;
3898 for (i = 0, j = 0; i < __kmp_avail_proc; i++) {
3899 if ((! __kmp_affinity_dups) && (! address2os[i].first.leader)) {
3900 continue;
3901 }
3902 unsigned osId = address2os[i].second;
3903 kmp_affin_mask_t *src = KMP_CPU_INDEX(osId2Mask, osId);
3904 kmp_affin_mask_t *dest
3905 = KMP_CPU_INDEX(__kmp_affinity_masks, j);
3906 KMP_ASSERT(KMP_CPU_ISSET(osId, src));
3907 KMP_CPU_COPY(dest, src);
3908 if (++j >= __kmp_affinity_num_masks) {
3909 break;
3910 }
3911 }
3912 KMP_DEBUG_ASSERT(j == __kmp_affinity_num_masks);
3913 }
3914 break;
3915
3916 default:
3917 KMP_ASSERT2(0, "Unexpected affinity setting");
3918 }
3919
3920 __kmp_free(osId2Mask);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003921 machine_hierarchy.init(address2os, __kmp_avail_proc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003922}
3923
3924
3925void
3926__kmp_affinity_initialize(void)
3927{
3928 //
3929 // Much of the code above was written assumming that if a machine was not
3930 // affinity capable, then __kmp_affinity_type == affinity_none. We now
3931 // explicitly represent this as __kmp_affinity_type == affinity_disabled.
3932 //
3933 // There are too many checks for __kmp_affinity_type == affinity_none
3934 // in this code. Instead of trying to change them all, check if
3935 // __kmp_affinity_type == affinity_disabled, and if so, slam it with
3936 // affinity_none, call the real initialization routine, then restore
3937 // __kmp_affinity_type to affinity_disabled.
3938 //
3939 int disabled = (__kmp_affinity_type == affinity_disabled);
3940 if (! KMP_AFFINITY_CAPABLE()) {
3941 KMP_ASSERT(disabled);
3942 }
3943 if (disabled) {
3944 __kmp_affinity_type = affinity_none;
3945 }
3946 __kmp_aux_affinity_initialize();
3947 if (disabled) {
3948 __kmp_affinity_type = affinity_disabled;
3949 }
3950}
3951
3952
3953void
3954__kmp_affinity_uninitialize(void)
3955{
3956 if (__kmp_affinity_masks != NULL) {
3957 __kmp_free(__kmp_affinity_masks);
3958 __kmp_affinity_masks = NULL;
3959 }
3960 if (fullMask != NULL) {
3961 KMP_CPU_FREE(fullMask);
3962 fullMask = NULL;
3963 }
3964 __kmp_affinity_num_masks = 0;
3965# if OMP_40_ENABLED
3966 __kmp_affinity_num_places = 0;
3967# endif
3968 if (__kmp_affinity_proclist != NULL) {
3969 __kmp_free(__kmp_affinity_proclist);
3970 __kmp_affinity_proclist = NULL;
3971 }
3972 if( address2os != NULL ) {
3973 __kmp_free( address2os );
3974 address2os = NULL;
3975 }
3976 if( procarr != NULL ) {
3977 __kmp_free( procarr );
3978 procarr = NULL;
3979 }
3980}
3981
3982
3983void
3984__kmp_affinity_set_init_mask(int gtid, int isa_root)
3985{
3986 if (! KMP_AFFINITY_CAPABLE()) {
3987 return;
3988 }
3989
3990 kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
3991 if (th->th.th_affin_mask == NULL) {
3992 KMP_CPU_ALLOC(th->th.th_affin_mask);
3993 }
3994 else {
3995 KMP_CPU_ZERO(th->th.th_affin_mask);
3996 }
3997
3998 //
3999 // Copy the thread mask to the kmp_info_t strucuture.
4000 // If __kmp_affinity_type == affinity_none, copy the "full" mask, i.e. one
4001 // that has all of the OS proc ids set, or if __kmp_affinity_respect_mask
4002 // is set, then the full mask is the same as the mask of the initialization
4003 // thread.
4004 //
4005 kmp_affin_mask_t *mask;
4006 int i;
4007
4008# if OMP_40_ENABLED
4009 if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
4010# endif
4011 {
Andrey Churbanovf28f6132015-01-13 14:54:00 +00004012 if ((__kmp_affinity_type == affinity_none) || (__kmp_affinity_type == affinity_balanced)
Jim Cownie5e8470a2013-09-27 10:38:44 +00004013 ) {
Andrey Churbanov7daf9802015-01-27 16:52:57 +00004014# if KMP_GROUP_AFFINITY
Jim Cownie5e8470a2013-09-27 10:38:44 +00004015 if (__kmp_num_proc_groups > 1) {
4016 return;
4017 }
4018# endif
4019 KMP_ASSERT(fullMask != NULL);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004020 i = KMP_PLACE_ALL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004021 mask = fullMask;
4022 }
4023 else {
4024 KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 );
4025 i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
4026 mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
4027 }
4028 }
4029# if OMP_40_ENABLED
4030 else {
4031 if ((! isa_root)
4032 || (__kmp_nested_proc_bind.bind_types[0] == proc_bind_false)) {
Andrey Churbanov7daf9802015-01-27 16:52:57 +00004033# if KMP_GROUP_AFFINITY
Jim Cownie5e8470a2013-09-27 10:38:44 +00004034 if (__kmp_num_proc_groups > 1) {
4035 return;
4036 }
4037# endif
4038 KMP_ASSERT(fullMask != NULL);
4039 i = KMP_PLACE_ALL;
4040 mask = fullMask;
4041 }
4042 else {
4043 //
4044 // int i = some hash function or just a counter that doesn't
4045 // always start at 0. Use gtid for now.
4046 //
4047 KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 );
4048 i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
4049 mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
4050 }
4051 }
4052# endif
4053
4054# if OMP_40_ENABLED
4055 th->th.th_current_place = i;
4056 if (isa_root) {
4057 th->th.th_new_place = i;
4058 th->th.th_first_place = 0;
4059 th->th.th_last_place = __kmp_affinity_num_masks - 1;
4060 }
4061
4062 if (i == KMP_PLACE_ALL) {
4063 KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to all places\n",
4064 gtid));
4065 }
4066 else {
4067 KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to place %d\n",
4068 gtid, i));
4069 }
4070# else
4071 if (i == -1) {
4072 KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to fullMask\n",
4073 gtid));
4074 }
4075 else {
4076 KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to mask %d\n",
4077 gtid, i));
4078 }
4079# endif /* OMP_40_ENABLED */
4080
4081 KMP_CPU_COPY(th->th.th_affin_mask, mask);
4082
4083 if (__kmp_affinity_verbose) {
4084 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4085 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4086 th->th.th_affin_mask);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004087 KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(), gtid,
4088 buf);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004089 }
4090
4091# if KMP_OS_WINDOWS
4092 //
4093 // On Windows* OS, the process affinity mask might have changed.
4094 // If the user didn't request affinity and this call fails,
4095 // just continue silently. See CQ171393.
4096 //
4097 if ( __kmp_affinity_type == affinity_none ) {
4098 __kmp_set_system_affinity(th->th.th_affin_mask, FALSE);
4099 }
4100 else
4101# endif
4102 __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
4103}
4104
4105
4106# if OMP_40_ENABLED
4107
4108void
4109__kmp_affinity_set_place(int gtid)
4110{
4111 int retval;
4112
4113 if (! KMP_AFFINITY_CAPABLE()) {
4114 return;
4115 }
4116
4117 kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
4118
4119 KA_TRACE(100, ("__kmp_affinity_set_place: binding T#%d to place %d (current place = %d)\n",
4120 gtid, th->th.th_new_place, th->th.th_current_place));
4121
4122 //
Alp Toker8f2d3f02014-02-24 10:40:15 +00004123 // Check that the new place is within this thread's partition.
Jim Cownie5e8470a2013-09-27 10:38:44 +00004124 //
4125 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004126 KMP_ASSERT(th->th.th_new_place >= 0);
4127 KMP_ASSERT((unsigned)th->th.th_new_place <= __kmp_affinity_num_masks);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004128 if (th->th.th_first_place <= th->th.th_last_place) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004129 KMP_ASSERT((th->th.th_new_place >= th->th.th_first_place)
Jim Cownie5e8470a2013-09-27 10:38:44 +00004130 && (th->th.th_new_place <= th->th.th_last_place));
4131 }
4132 else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004133 KMP_ASSERT((th->th.th_new_place <= th->th.th_first_place)
Jim Cownie5e8470a2013-09-27 10:38:44 +00004134 || (th->th.th_new_place >= th->th.th_last_place));
4135 }
4136
4137 //
4138 // Copy the thread mask to the kmp_info_t strucuture,
4139 // and set this thread's affinity.
4140 //
4141 kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity_masks,
4142 th->th.th_new_place);
4143 KMP_CPU_COPY(th->th.th_affin_mask, mask);
4144 th->th.th_current_place = th->th.th_new_place;
4145
4146 if (__kmp_affinity_verbose) {
4147 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4148 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4149 th->th.th_affin_mask);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004150 KMP_INFORM(BoundToOSProcSet, "OMP_PROC_BIND", (kmp_int32)getpid(),
4151 gtid, buf);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004152 }
4153 __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
4154}
4155
4156# endif /* OMP_40_ENABLED */
4157
4158
4159int
4160__kmp_aux_set_affinity(void **mask)
4161{
4162 int gtid;
4163 kmp_info_t *th;
4164 int retval;
4165
4166 if (! KMP_AFFINITY_CAPABLE()) {
4167 return -1;
4168 }
4169
4170 gtid = __kmp_entry_gtid();
4171 KA_TRACE(1000, ;{
4172 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4173 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4174 (kmp_affin_mask_t *)(*mask));
4175 __kmp_debug_printf("kmp_set_affinity: setting affinity mask for thread %d = %s\n",
4176 gtid, buf);
4177 });
4178
4179 if (__kmp_env_consistency_check) {
4180 if ((mask == NULL) || (*mask == NULL)) {
4181 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
4182 }
4183 else {
4184 unsigned proc;
4185 int num_procs = 0;
4186
4187 for (proc = 0; proc < KMP_CPU_SETSIZE; proc++) {
4188 if (! KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask))) {
4189 continue;
4190 }
4191 num_procs++;
4192 if (! KMP_CPU_ISSET(proc, fullMask)) {
4193 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
4194 break;
4195 }
4196 }
4197 if (num_procs == 0) {
4198 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
4199 }
4200
Andrey Churbanov7daf9802015-01-27 16:52:57 +00004201# if KMP_GROUP_AFFINITY
Jim Cownie5e8470a2013-09-27 10:38:44 +00004202 if (__kmp_get_proc_group((kmp_affin_mask_t *)(*mask)) < 0) {
4203 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
4204 }
Andrey Churbanov7daf9802015-01-27 16:52:57 +00004205# endif /* KMP_GROUP_AFFINITY */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004206
4207 }
4208 }
4209
4210 th = __kmp_threads[gtid];
4211 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
4212 retval = __kmp_set_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
4213 if (retval == 0) {
4214 KMP_CPU_COPY(th->th.th_affin_mask, (kmp_affin_mask_t *)(*mask));
4215 }
4216
4217# if OMP_40_ENABLED
4218 th->th.th_current_place = KMP_PLACE_UNDEFINED;
4219 th->th.th_new_place = KMP_PLACE_UNDEFINED;
4220 th->th.th_first_place = 0;
4221 th->th.th_last_place = __kmp_affinity_num_masks - 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004222
4223 //
4224 // Turn off 4.0 affinity for the current tread at this parallel level.
4225 //
4226 th->th.th_current_task->td_icvs.proc_bind = proc_bind_false;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004227# endif
4228
4229 return retval;
4230}
4231
4232
4233int
4234__kmp_aux_get_affinity(void **mask)
4235{
4236 int gtid;
4237 int retval;
4238 kmp_info_t *th;
4239
4240 if (! KMP_AFFINITY_CAPABLE()) {
4241 return -1;
4242 }
4243
4244 gtid = __kmp_entry_gtid();
4245 th = __kmp_threads[gtid];
4246 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
4247
4248 KA_TRACE(1000, ;{
4249 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4250 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4251 th->th.th_affin_mask);
4252 __kmp_printf("kmp_get_affinity: stored affinity mask for thread %d = %s\n", gtid, buf);
4253 });
4254
4255 if (__kmp_env_consistency_check) {
4256 if ((mask == NULL) || (*mask == NULL)) {
4257 KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity");
4258 }
4259 }
4260
4261# if !KMP_OS_WINDOWS
4262
4263 retval = __kmp_get_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
4264 KA_TRACE(1000, ;{
4265 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4266 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4267 (kmp_affin_mask_t *)(*mask));
4268 __kmp_printf("kmp_get_affinity: system affinity mask for thread %d = %s\n", gtid, buf);
4269 });
4270 return retval;
4271
4272# else
4273
4274 KMP_CPU_COPY((kmp_affin_mask_t *)(*mask), th->th.th_affin_mask);
4275 return 0;
4276
4277# endif /* KMP_OS_WINDOWS */
4278
4279}
4280
Jim Cownie5e8470a2013-09-27 10:38:44 +00004281int
4282__kmp_aux_set_affinity_mask_proc(int proc, void **mask)
4283{
4284 int retval;
4285
4286 if (! KMP_AFFINITY_CAPABLE()) {
4287 return -1;
4288 }
4289
4290 KA_TRACE(1000, ;{
4291 int gtid = __kmp_entry_gtid();
4292 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4293 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4294 (kmp_affin_mask_t *)(*mask));
4295 __kmp_debug_printf("kmp_set_affinity_mask_proc: setting proc %d in affinity mask for thread %d = %s\n",
4296 proc, gtid, buf);
4297 });
4298
4299 if (__kmp_env_consistency_check) {
4300 if ((mask == NULL) || (*mask == NULL)) {
4301 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity_mask_proc");
4302 }
4303 }
4304
4305 if ((proc < 0) || ((unsigned)proc >= KMP_CPU_SETSIZE)) {
4306 return -1;
4307 }
4308 if (! KMP_CPU_ISSET(proc, fullMask)) {
4309 return -2;
4310 }
4311
4312 KMP_CPU_SET(proc, (kmp_affin_mask_t *)(*mask));
4313 return 0;
4314}
4315
4316
4317int
4318__kmp_aux_unset_affinity_mask_proc(int proc, void **mask)
4319{
4320 int retval;
4321
4322 if (! KMP_AFFINITY_CAPABLE()) {
4323 return -1;
4324 }
4325
4326 KA_TRACE(1000, ;{
4327 int gtid = __kmp_entry_gtid();
4328 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4329 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4330 (kmp_affin_mask_t *)(*mask));
4331 __kmp_debug_printf("kmp_unset_affinity_mask_proc: unsetting proc %d in affinity mask for thread %d = %s\n",
4332 proc, gtid, buf);
4333 });
4334
4335 if (__kmp_env_consistency_check) {
4336 if ((mask == NULL) || (*mask == NULL)) {
4337 KMP_FATAL(AffinityInvalidMask, "kmp_unset_affinity_mask_proc");
4338 }
4339 }
4340
4341 if ((proc < 0) || ((unsigned)proc >= KMP_CPU_SETSIZE)) {
4342 return -1;
4343 }
4344 if (! KMP_CPU_ISSET(proc, fullMask)) {
4345 return -2;
4346 }
4347
4348 KMP_CPU_CLR(proc, (kmp_affin_mask_t *)(*mask));
4349 return 0;
4350}
4351
4352
4353int
4354__kmp_aux_get_affinity_mask_proc(int proc, void **mask)
4355{
4356 int retval;
4357
4358 if (! KMP_AFFINITY_CAPABLE()) {
4359 return -1;
4360 }
4361
4362 KA_TRACE(1000, ;{
4363 int gtid = __kmp_entry_gtid();
4364 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4365 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4366 (kmp_affin_mask_t *)(*mask));
4367 __kmp_debug_printf("kmp_get_affinity_mask_proc: getting proc %d in affinity mask for thread %d = %s\n",
4368 proc, gtid, buf);
4369 });
4370
4371 if (__kmp_env_consistency_check) {
4372 if ((mask == NULL) || (*mask == NULL)) {
4373 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity_mask_proc");
4374 }
4375 }
4376
4377 if ((proc < 0) || ((unsigned)proc >= KMP_CPU_SETSIZE)) {
4378 return 0;
4379 }
4380 if (! KMP_CPU_ISSET(proc, fullMask)) {
4381 return 0;
4382 }
4383
4384 return KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask));
4385}
4386
Jim Cownie5e8470a2013-09-27 10:38:44 +00004387
4388// Dynamic affinity settings - Affinity balanced
4389void __kmp_balanced_affinity( int tid, int nthreads )
4390{
4391 if( __kmp_affinity_uniform_topology() ) {
4392 int coreID;
4393 int threadID;
4394 // Number of hyper threads per core in HT machine
4395 int __kmp_nth_per_core = __kmp_avail_proc / __kmp_ncores;
4396 // Number of cores
4397 int ncores = __kmp_ncores;
4398 // How many threads will be bound to each core
4399 int chunk = nthreads / ncores;
4400 // How many cores will have an additional thread bound to it - "big cores"
4401 int big_cores = nthreads % ncores;
4402 // Number of threads on the big cores
4403 int big_nth = ( chunk + 1 ) * big_cores;
4404 if( tid < big_nth ) {
4405 coreID = tid / (chunk + 1 );
4406 threadID = ( tid % (chunk + 1 ) ) % __kmp_nth_per_core ;
4407 } else { //tid >= big_nth
4408 coreID = ( tid - big_cores ) / chunk;
4409 threadID = ( ( tid - big_cores ) % chunk ) % __kmp_nth_per_core ;
4410 }
4411
4412 KMP_DEBUG_ASSERT2(KMP_AFFINITY_CAPABLE(),
4413 "Illegal set affinity operation when not capable");
4414
4415 kmp_affin_mask_t *mask = (kmp_affin_mask_t *)alloca(__kmp_affin_mask_size);
4416 KMP_CPU_ZERO(mask);
4417
4418 // Granularity == thread
4419 if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) {
4420 int osID = address2os[ coreID * __kmp_nth_per_core + threadID ].second;
4421 KMP_CPU_SET( osID, mask);
4422 } else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
4423 for( int i = 0; i < __kmp_nth_per_core; i++ ) {
4424 int osID;
4425 osID = address2os[ coreID * __kmp_nth_per_core + i ].second;
4426 KMP_CPU_SET( osID, mask);
4427 }
4428 }
4429 if (__kmp_affinity_verbose) {
4430 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4431 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004432 KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
4433 tid, buf);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004434 }
4435 __kmp_set_system_affinity( mask, TRUE );
4436 } else { // Non-uniform topology
4437
4438 kmp_affin_mask_t *mask = (kmp_affin_mask_t *)alloca(__kmp_affin_mask_size);
4439 KMP_CPU_ZERO(mask);
4440
4441 // Number of hyper threads per core in HT machine
4442 int nth_per_core = __kmp_nThreadsPerCore;
4443 int core_level;
4444 if( nth_per_core > 1 ) {
4445 core_level = __kmp_aff_depth - 2;
4446 } else {
4447 core_level = __kmp_aff_depth - 1;
4448 }
4449
4450 // Number of cores - maximum value; it does not count trail cores with 0 processors
4451 int ncores = address2os[ __kmp_avail_proc - 1 ].first.labels[ core_level ] + 1;
4452
4453 // For performance gain consider the special case nthreads == __kmp_avail_proc
4454 if( nthreads == __kmp_avail_proc ) {
4455 if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) {
4456 int osID = address2os[ tid ].second;
4457 KMP_CPU_SET( osID, mask);
4458 } else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
4459 int coreID = address2os[ tid ].first.labels[ core_level ];
4460 // We'll count found osIDs for the current core; they can be not more than nth_per_core;
4461 // since the address2os is sortied we can break when cnt==nth_per_core
4462 int cnt = 0;
4463 for( int i = 0; i < __kmp_avail_proc; i++ ) {
4464 int osID = address2os[ i ].second;
4465 int core = address2os[ i ].first.labels[ core_level ];
4466 if( core == coreID ) {
4467 KMP_CPU_SET( osID, mask);
4468 cnt++;
4469 if( cnt == nth_per_core ) {
4470 break;
4471 }
4472 }
4473 }
4474 }
4475 } else if( nthreads <= __kmp_ncores ) {
4476
4477 int core = 0;
4478 for( int i = 0; i < ncores; i++ ) {
4479 // Check if this core from procarr[] is in the mask
4480 int in_mask = 0;
4481 for( int j = 0; j < nth_per_core; j++ ) {
4482 if( procarr[ i * nth_per_core + j ] != - 1 ) {
4483 in_mask = 1;
4484 break;
4485 }
4486 }
4487 if( in_mask ) {
4488 if( tid == core ) {
4489 for( int j = 0; j < nth_per_core; j++ ) {
4490 int osID = procarr[ i * nth_per_core + j ];
4491 if( osID != -1 ) {
4492 KMP_CPU_SET( osID, mask );
4493 // For granularity=thread it is enough to set the first available osID for this core
4494 if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) {
4495 break;
4496 }
4497 }
4498 }
4499 break;
4500 } else {
4501 core++;
4502 }
4503 }
4504 }
4505
4506 } else { // nthreads > __kmp_ncores
4507
4508 // Array to save the number of processors at each core
4509 int nproc_at_core[ ncores ];
4510 // Array to save the number of cores with "x" available processors;
4511 int ncores_with_x_procs[ nth_per_core + 1 ];
4512 // Array to save the number of cores with # procs from x to nth_per_core
4513 int ncores_with_x_to_max_procs[ nth_per_core + 1 ];
4514
4515 for( int i = 0; i <= nth_per_core; i++ ) {
4516 ncores_with_x_procs[ i ] = 0;
4517 ncores_with_x_to_max_procs[ i ] = 0;
4518 }
4519
4520 for( int i = 0; i < ncores; i++ ) {
4521 int cnt = 0;
4522 for( int j = 0; j < nth_per_core; j++ ) {
4523 if( procarr[ i * nth_per_core + j ] != -1 ) {
4524 cnt++;
4525 }
4526 }
4527 nproc_at_core[ i ] = cnt;
4528 ncores_with_x_procs[ cnt ]++;
4529 }
4530
4531 for( int i = 0; i <= nth_per_core; i++ ) {
4532 for( int j = i; j <= nth_per_core; j++ ) {
4533 ncores_with_x_to_max_procs[ i ] += ncores_with_x_procs[ j ];
4534 }
4535 }
4536
4537 // Max number of processors
4538 int nproc = nth_per_core * ncores;
4539 // An array to keep number of threads per each context
4540 int * newarr = ( int * )__kmp_allocate( sizeof( int ) * nproc );
4541 for( int i = 0; i < nproc; i++ ) {
4542 newarr[ i ] = 0;
4543 }
4544
4545 int nth = nthreads;
4546 int flag = 0;
4547 while( nth > 0 ) {
4548 for( int j = 1; j <= nth_per_core; j++ ) {
4549 int cnt = ncores_with_x_to_max_procs[ j ];
4550 for( int i = 0; i < ncores; i++ ) {
4551 // Skip the core with 0 processors
4552 if( nproc_at_core[ i ] == 0 ) {
4553 continue;
4554 }
4555 for( int k = 0; k < nth_per_core; k++ ) {
4556 if( procarr[ i * nth_per_core + k ] != -1 ) {
4557 if( newarr[ i * nth_per_core + k ] == 0 ) {
4558 newarr[ i * nth_per_core + k ] = 1;
4559 cnt--;
4560 nth--;
4561 break;
4562 } else {
4563 if( flag != 0 ) {
4564 newarr[ i * nth_per_core + k ] ++;
4565 cnt--;
4566 nth--;
4567 break;
4568 }
4569 }
4570 }
4571 }
4572 if( cnt == 0 || nth == 0 ) {
4573 break;
4574 }
4575 }
4576 if( nth == 0 ) {
4577 break;
4578 }
4579 }
4580 flag = 1;
4581 }
4582 int sum = 0;
4583 for( int i = 0; i < nproc; i++ ) {
4584 sum += newarr[ i ];
4585 if( sum > tid ) {
4586 // Granularity == thread
4587 if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) {
4588 int osID = procarr[ i ];
4589 KMP_CPU_SET( osID, mask);
4590 } else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
4591 int coreID = i / nth_per_core;
4592 for( int ii = 0; ii < nth_per_core; ii++ ) {
4593 int osID = procarr[ coreID * nth_per_core + ii ];
4594 if( osID != -1 ) {
4595 KMP_CPU_SET( osID, mask);
4596 }
4597 }
4598 }
4599 break;
4600 }
4601 }
4602 __kmp_free( newarr );
4603 }
4604
4605 if (__kmp_affinity_verbose) {
4606 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4607 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004608 KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
4609 tid, buf);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004610 }
4611 __kmp_set_system_affinity( mask, TRUE );
4612 }
4613}
4614
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004615#else
4616 // affinity not supported
4617
4618kmp_uint32 mac_skipPerLevel[7];
4619kmp_uint32 mac_depth;
4620kmp_uint8 mac_leaf_kids;
4621void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
4622 static int first = 1;
4623 if (first) {
4624 const kmp_uint32 maxLevels = 7;
4625 kmp_uint32 numPerLevel[maxLevels];
4626
4627 for (kmp_uint32 i=0; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
4628 numPerLevel[i] = 1;
4629 mac_skipPerLevel[i] = 1;
4630 }
4631
4632 mac_depth = 2;
4633 numPerLevel[0] = nproc;
4634
4635 kmp_uint32 branch = 4;
4636 if (numPerLevel[0] == 1) branch = nproc/4;
4637 if (branch<4) branch=4;
4638 for (kmp_uint32 d=0; d<mac_depth-1; ++d) { // optimize hierarchy width
4639 while (numPerLevel[d] > branch || (d==0 && numPerLevel[d]>4)) { // max 4 on level 0!
4640 if (numPerLevel[d] & 1) numPerLevel[d]++;
4641 numPerLevel[d] = numPerLevel[d] >> 1;
4642 if (numPerLevel[d+1] == 1) mac_depth++;
4643 numPerLevel[d+1] = numPerLevel[d+1] << 1;
4644 }
4645 if(numPerLevel[0] == 1) {
4646 branch = branch >> 1;
4647 if (branch<4) branch = 4;
4648 }
4649 }
4650
4651 for (kmp_uint32 i=1; i<mac_depth; ++i)
4652 mac_skipPerLevel[i] = numPerLevel[i-1] * mac_skipPerLevel[i-1];
4653 mac_leaf_kids = (kmp_uint8)numPerLevel[0]-1;
4654 first=0;
4655 }
4656 thr_bar->depth = mac_depth;
4657 thr_bar->base_leaf_kids = mac_leaf_kids;
4658 thr_bar->skip_per_level = mac_skipPerLevel;
4659}
4660
Alp Toker763b9392014-02-28 09:42:41 +00004661#endif // KMP_AFFINITY_SUPPORTED