blob: 32a04465fe8688cc3d165cccae848d63c8348b25 [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
2 * kmp_affinity.cpp -- affinity management
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16#include "kmp.h"
17#include "kmp_i18n.h"
18#include "kmp_io.h"
19#include "kmp_str.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000020#include "kmp_wrapper_getpid.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000021
Alp Toker763b9392014-02-28 09:42:41 +000022#if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +000023
24//
25// Print the affinity mask to the character array in a pretty format.
26//
27char *
28__kmp_affinity_print_mask(char *buf, int buf_len, kmp_affin_mask_t *mask)
29{
30 KMP_ASSERT(buf_len >= 40);
31 char *scan = buf;
32 char *end = buf + buf_len - 1;
33
34 //
35 // Find first element / check for empty set.
36 //
37 size_t i;
38 for (i = 0; i < KMP_CPU_SETSIZE; i++) {
39 if (KMP_CPU_ISSET(i, mask)) {
40 break;
41 }
42 }
43 if (i == KMP_CPU_SETSIZE) {
Andrey Churbanov74bf17b2015-04-02 13:27:08 +000044 KMP_SNPRINTF(scan, buf_len, "{<empty>}");
Jim Cownie5e8470a2013-09-27 10:38:44 +000045 while (*scan != '\0') scan++;
46 KMP_ASSERT(scan <= end);
47 return buf;
48 }
49
Andrey Churbanov74bf17b2015-04-02 13:27:08 +000050 KMP_SNPRINTF(scan, buf_len, "{%ld", (long)i);
Jim Cownie5e8470a2013-09-27 10:38:44 +000051 while (*scan != '\0') scan++;
52 i++;
53 for (; i < KMP_CPU_SETSIZE; i++) {
54 if (! KMP_CPU_ISSET(i, mask)) {
55 continue;
56 }
57
58 //
59 // Check for buffer overflow. A string of the form ",<n>" will have
60 // at most 10 characters, plus we want to leave room to print ",...}"
61 // if the set is too large to print for a total of 15 characters.
62 // We already left room for '\0' in setting end.
63 //
64 if (end - scan < 15) {
65 break;
66 }
Andrey Churbanov74bf17b2015-04-02 13:27:08 +000067 KMP_SNPRINTF(scan, buf_len, ",%-ld", (long)i);
Jim Cownie5e8470a2013-09-27 10:38:44 +000068 while (*scan != '\0') scan++;
69 }
70 if (i < KMP_CPU_SETSIZE) {
Andrey Churbanov74bf17b2015-04-02 13:27:08 +000071 KMP_SNPRINTF(scan, buf_len, ",...");
Jim Cownie5e8470a2013-09-27 10:38:44 +000072 while (*scan != '\0') scan++;
73 }
Andrey Churbanov74bf17b2015-04-02 13:27:08 +000074 KMP_SNPRINTF(scan, buf_len, "}");
Jim Cownie5e8470a2013-09-27 10:38:44 +000075 while (*scan != '\0') scan++;
76 KMP_ASSERT(scan <= end);
77 return buf;
78}
79
80
81void
82__kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask)
83{
84 KMP_CPU_ZERO(mask);
85
Andrey Churbanov7daf9802015-01-27 16:52:57 +000086# if KMP_GROUP_AFFINITY
Jim Cownie5e8470a2013-09-27 10:38:44 +000087
88 if (__kmp_num_proc_groups > 1) {
89 int group;
Jim Cownie5e8470a2013-09-27 10:38:44 +000090 KMP_DEBUG_ASSERT(__kmp_GetActiveProcessorCount != NULL);
91 for (group = 0; group < __kmp_num_proc_groups; group++) {
92 int i;
93 int num = __kmp_GetActiveProcessorCount(group);
94 for (i = 0; i < num; i++) {
95 KMP_CPU_SET(i + group * (CHAR_BIT * sizeof(DWORD_PTR)), mask);
96 }
97 }
98 }
99 else
100
Andrey Churbanov7daf9802015-01-27 16:52:57 +0000101# endif /* KMP_GROUP_AFFINITY */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000102
103 {
104 int proc;
105 for (proc = 0; proc < __kmp_xproc; proc++) {
106 KMP_CPU_SET(proc, mask);
107 }
108 }
109}
110
111
112//
113// In Linux* OS debug & cover (-O0) builds, we need to avoid inline member
114// functions.
115//
116// The icc codegen emits sections with extremely long names, of the form
117// ".gnu.linkonce.<mangled_name>". There seems to have been a linker bug
118// introduced between GNU ld version 2.14.90.0.4 and 2.15.92.0.2 involving
119// some sort of memory corruption or table overflow that is triggered by
120// these long strings. I checked the latest version of the linker -
121// GNU ld (Linux* OS/GNU Binutils) 2.18.50.0.7.20080422 - and the bug is not
122// fixed.
123//
124// Unfortunately, my attempts to reproduce it in a smaller example have
125// failed - I'm not sure what the prospects are of getting it fixed
Jonathan Peyton66338292015-06-01 02:37:28 +0000126// properly - but we need a reproducer smaller than all of libomp.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000127//
128// Work around the problem by avoiding inline constructors in such builds.
129// We do this for all platforms, not just Linux* OS - non-inline functions are
130// more debuggable and provide better coverage into than inline functions.
131// Use inline functions in shipping libs, for performance.
132//
133
134# if !defined(KMP_DEBUG) && !defined(COVER)
135
136class Address {
137public:
138 static const unsigned maxDepth = 32;
139 unsigned labels[maxDepth];
140 unsigned childNums[maxDepth];
141 unsigned depth;
142 unsigned leader;
143 Address(unsigned _depth)
144 : depth(_depth), leader(FALSE) {
145 }
146 Address &operator=(const Address &b) {
147 depth = b.depth;
148 for (unsigned i = 0; i < depth; i++) {
149 labels[i] = b.labels[i];
150 childNums[i] = b.childNums[i];
151 }
152 leader = FALSE;
153 return *this;
154 }
155 bool operator==(const Address &b) const {
156 if (depth != b.depth)
157 return false;
158 for (unsigned i = 0; i < depth; i++)
159 if(labels[i] != b.labels[i])
160 return false;
161 return true;
162 }
163 bool isClose(const Address &b, int level) const {
164 if (depth != b.depth)
165 return false;
166 if ((unsigned)level >= depth)
167 return true;
168 for (unsigned i = 0; i < (depth - level); i++)
169 if(labels[i] != b.labels[i])
170 return false;
171 return true;
172 }
173 bool operator!=(const Address &b) const {
174 return !operator==(b);
175 }
176};
177
178class AddrUnsPair {
179public:
180 Address first;
181 unsigned second;
182 AddrUnsPair(Address _first, unsigned _second)
183 : first(_first), second(_second) {
184 }
185 AddrUnsPair &operator=(const AddrUnsPair &b)
186 {
187 first = b.first;
188 second = b.second;
189 return *this;
190 }
191};
192
193# else
194
195class Address {
196public:
197 static const unsigned maxDepth = 32;
198 unsigned labels[maxDepth];
199 unsigned childNums[maxDepth];
200 unsigned depth;
201 unsigned leader;
202 Address(unsigned _depth);
203 Address &operator=(const Address &b);
204 bool operator==(const Address &b) const;
205 bool isClose(const Address &b, int level) const;
206 bool operator!=(const Address &b) const;
207};
208
209Address::Address(unsigned _depth)
210{
211 depth = _depth;
212 leader = FALSE;
213}
214
215Address &Address::operator=(const Address &b) {
216 depth = b.depth;
217 for (unsigned i = 0; i < depth; i++) {
218 labels[i] = b.labels[i];
219 childNums[i] = b.childNums[i];
220 }
221 leader = FALSE;
222 return *this;
223}
224
225bool Address::operator==(const Address &b) const {
226 if (depth != b.depth)
227 return false;
228 for (unsigned i = 0; i < depth; i++)
229 if(labels[i] != b.labels[i])
230 return false;
231 return true;
232}
233
234bool Address::isClose(const Address &b, int level) const {
235 if (depth != b.depth)
236 return false;
237 if ((unsigned)level >= depth)
238 return true;
239 for (unsigned i = 0; i < (depth - level); i++)
240 if(labels[i] != b.labels[i])
241 return false;
242 return true;
243}
244
245bool Address::operator!=(const Address &b) const {
246 return !operator==(b);
247}
248
249class AddrUnsPair {
250public:
251 Address first;
252 unsigned second;
253 AddrUnsPair(Address _first, unsigned _second);
254 AddrUnsPair &operator=(const AddrUnsPair &b);
255};
256
257AddrUnsPair::AddrUnsPair(Address _first, unsigned _second)
258 : first(_first), second(_second)
259{
260}
261
262AddrUnsPair &AddrUnsPair::operator=(const AddrUnsPair &b)
263{
264 first = b.first;
265 second = b.second;
266 return *this;
267}
268
269# endif /* !defined(KMP_DEBUG) && !defined(COVER) */
270
271
272static int
273__kmp_affinity_cmp_Address_labels(const void *a, const void *b)
274{
275 const Address *aa = (const Address *)&(((AddrUnsPair *)a)
276 ->first);
277 const Address *bb = (const Address *)&(((AddrUnsPair *)b)
278 ->first);
279 unsigned depth = aa->depth;
280 unsigned i;
281 KMP_DEBUG_ASSERT(depth == bb->depth);
282 for (i = 0; i < depth; i++) {
283 if (aa->labels[i] < bb->labels[i]) return -1;
284 if (aa->labels[i] > bb->labels[i]) return 1;
285 }
286 return 0;
287}
288
289
290static int
291__kmp_affinity_cmp_Address_child_num(const void *a, const void *b)
292{
293 const Address *aa = (const Address *)&(((AddrUnsPair *)a)
294 ->first);
295 const Address *bb = (const Address *)&(((AddrUnsPair *)b)
296 ->first);
297 unsigned depth = aa->depth;
298 unsigned i;
299 KMP_DEBUG_ASSERT(depth == bb->depth);
300 KMP_DEBUG_ASSERT((unsigned)__kmp_affinity_compact <= depth);
301 KMP_DEBUG_ASSERT(__kmp_affinity_compact >= 0);
302 for (i = 0; i < (unsigned)__kmp_affinity_compact; i++) {
303 int j = depth - i - 1;
304 if (aa->childNums[j] < bb->childNums[j]) return -1;
305 if (aa->childNums[j] > bb->childNums[j]) return 1;
306 }
307 for (; i < depth; i++) {
308 int j = i - __kmp_affinity_compact;
309 if (aa->childNums[j] < bb->childNums[j]) return -1;
310 if (aa->childNums[j] > bb->childNums[j]) return 1;
311 }
312 return 0;
313}
314
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000315/** A structure for holding machine-specific hierarchy info to be computed once at init. */
316class hierarchy_info {
317public:
318 /** Typical levels are threads/core, cores/package or socket, packages/node, nodes/machine,
319 etc. We don't want to get specific with nomenclature */
320 static const kmp_uint32 maxLevels=7;
321
322 /** This is specifically the depth of the machine configuration hierarchy, in terms of the
323 number of levels along the longest path from root to any leaf. It corresponds to the
324 number of entries in numPerLevel if we exclude all but one trailing 1. */
325 kmp_uint32 depth;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000326 kmp_uint32 base_num_threads;
Andrey Churbanovaa1f2b62015-04-13 18:51:59 +0000327 volatile kmp_int8 uninitialized; // 0=initialized, 1=uninitialized, 2=initialization in progress
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000328
329 /** Level 0 corresponds to leaves. numPerLevel[i] is the number of children the parent of a
330 node at level i has. For example, if we have a machine with 4 packages, 4 cores/package
331 and 2 HT per core, then numPerLevel = {2, 4, 4, 1, 1}. All empty levels are set to 1. */
332 kmp_uint32 numPerLevel[maxLevels];
333 kmp_uint32 skipPerLevel[maxLevels];
334
335 void deriveLevels(AddrUnsPair *adr2os, int num_addrs) {
336 int hier_depth = adr2os[0].first.depth;
337 int level = 0;
338 for (int i=hier_depth-1; i>=0; --i) {
339 int max = -1;
340 for (int j=0; j<num_addrs; ++j) {
341 int next = adr2os[j].first.childNums[i];
342 if (next > max) max = next;
343 }
344 numPerLevel[level] = max+1;
345 ++level;
346 }
347 }
348
Andrey Churbanovaa1f2b62015-04-13 18:51:59 +0000349 hierarchy_info() : depth(1), uninitialized(1) {}
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000350 void init(AddrUnsPair *adr2os, int num_addrs)
351 {
Andrey Churbanovaa1f2b62015-04-13 18:51:59 +0000352 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&uninitialized, 1, 2);
353 if (bool_result == 0) { // Wait for initialization
354 while (TCR_1(uninitialized) != 0) KMP_CPU_PAUSE();
355 return;
356 }
357 KMP_DEBUG_ASSERT(bool_result==1);
358
Andrey Churbanovb41e62b2015-02-10 20:10:21 +0000359 /* Added explicit initialization of the depth here to prevent usage of dirty value
360 observed when static library is re-initialized multiple times (e.g. when
361 non-OpenMP thread repeatedly launches/joins thread that uses OpenMP). */
362 depth = 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000363 for (kmp_uint32 i=0; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
364 numPerLevel[i] = 1;
365 skipPerLevel[i] = 1;
366 }
367
368 // Sort table by physical ID
369 if (adr2os) {
370 qsort(adr2os, num_addrs, sizeof(*adr2os), __kmp_affinity_cmp_Address_labels);
371 deriveLevels(adr2os, num_addrs);
372 }
373 else {
374 numPerLevel[0] = 4;
375 numPerLevel[1] = num_addrs/4;
376 if (num_addrs%4) numPerLevel[1]++;
377 }
378
379 base_num_threads = num_addrs;
380 for (int i=maxLevels-1; i>=0; --i) // count non-empty levels to get depth
381 if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1'
382 depth++;
383
384 kmp_uint32 branch = 4;
385 if (numPerLevel[0] == 1) branch = num_addrs/4;
386 if (branch<4) branch=4;
387 for (kmp_uint32 d=0; d<depth-1; ++d) { // optimize hierarchy width
388 while (numPerLevel[d] > branch || (d==0 && numPerLevel[d]>4)) { // max 4 on level 0!
389 if (numPerLevel[d] & 1) numPerLevel[d]++;
390 numPerLevel[d] = numPerLevel[d] >> 1;
391 if (numPerLevel[d+1] == 1) depth++;
392 numPerLevel[d+1] = numPerLevel[d+1] << 1;
393 }
394 if(numPerLevel[0] == 1) {
395 branch = branch >> 1;
396 if (branch<4) branch = 4;
397 }
398 }
399
400 for (kmp_uint32 i=1; i<depth; ++i)
401 skipPerLevel[i] = numPerLevel[i-1] * skipPerLevel[i-1];
Andrey Churbanovaa1f2b62015-04-13 18:51:59 +0000402 // Fill in hierarchy in the case of oversubscription
403 for (kmp_uint32 i=depth; i<maxLevels; ++i)
404 skipPerLevel[i] = 2*skipPerLevel[i-1];
405
406 uninitialized = 0; // One writer
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000407
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000408 }
409};
410
411static hierarchy_info machine_hierarchy;
412
413void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
Andrey Churbanov1362ae72015-04-02 13:18:50 +0000414 kmp_uint32 depth;
Andrey Churbanovaa1f2b62015-04-13 18:51:59 +0000415 // The test below is true if affinity is available, but set to "none". Need to init on first use of hierarchical barrier.
416 if (TCR_1(machine_hierarchy.uninitialized))
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000417 machine_hierarchy.init(NULL, nproc);
418
Andrey Churbanov1362ae72015-04-02 13:18:50 +0000419 depth = machine_hierarchy.depth;
420 KMP_DEBUG_ASSERT(depth > 0);
Andrey Churbanovaa1f2b62015-04-13 18:51:59 +0000421 // The loop below adjusts the depth in the case of oversubscription
422 while (nproc > machine_hierarchy.skipPerLevel[depth-1] && depth<machine_hierarchy.maxLevels-1)
Andrey Churbanov1362ae72015-04-02 13:18:50 +0000423 depth++;
Andrey Churbanovaa1f2b62015-04-13 18:51:59 +0000424
Andrey Churbanov1362ae72015-04-02 13:18:50 +0000425 thr_bar->depth = depth;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000426 thr_bar->base_leaf_kids = (kmp_uint8)machine_hierarchy.numPerLevel[0]-1;
427 thr_bar->skip_per_level = machine_hierarchy.skipPerLevel;
428}
Jim Cownie5e8470a2013-09-27 10:38:44 +0000429
430//
431// When sorting by labels, __kmp_affinity_assign_child_nums() must first be
432// called to renumber the labels from [0..n] and place them into the child_num
433// vector of the address object. This is done in case the labels used for
Alp Toker8f2d3f02014-02-24 10:40:15 +0000434// the children at one node of the hierarchy differ from those used for
Jim Cownie5e8470a2013-09-27 10:38:44 +0000435// another node at the same level. Example: suppose the machine has 2 nodes
436// with 2 packages each. The first node contains packages 601 and 602, and
437// second node contains packages 603 and 604. If we try to sort the table
438// for "scatter" affinity, the table will still be sorted 601, 602, 603, 604
439// because we are paying attention to the labels themselves, not the ordinal
440// child numbers. By using the child numbers in the sort, the result is
441// {0,0}=601, {0,1}=603, {1,0}=602, {1,1}=604.
442//
443static void
444__kmp_affinity_assign_child_nums(AddrUnsPair *address2os,
445 int numAddrs)
446{
447 KMP_DEBUG_ASSERT(numAddrs > 0);
448 int depth = address2os->first.depth;
449 unsigned *counts = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
450 unsigned *lastLabel = (unsigned *)__kmp_allocate(depth
451 * sizeof(unsigned));
452 int labCt;
453 for (labCt = 0; labCt < depth; labCt++) {
454 address2os[0].first.childNums[labCt] = counts[labCt] = 0;
455 lastLabel[labCt] = address2os[0].first.labels[labCt];
456 }
457 int i;
458 for (i = 1; i < numAddrs; i++) {
459 for (labCt = 0; labCt < depth; labCt++) {
460 if (address2os[i].first.labels[labCt] != lastLabel[labCt]) {
461 int labCt2;
462 for (labCt2 = labCt + 1; labCt2 < depth; labCt2++) {
463 counts[labCt2] = 0;
464 lastLabel[labCt2] = address2os[i].first.labels[labCt2];
465 }
466 counts[labCt]++;
467 lastLabel[labCt] = address2os[i].first.labels[labCt];
468 break;
469 }
470 }
471 for (labCt = 0; labCt < depth; labCt++) {
472 address2os[i].first.childNums[labCt] = counts[labCt];
473 }
474 for (; labCt < (int)Address::maxDepth; labCt++) {
475 address2os[i].first.childNums[labCt] = 0;
476 }
477 }
478}
479
480
481//
482// All of the __kmp_affinity_create_*_map() routines should set
483// __kmp_affinity_masks to a vector of affinity mask objects of length
484// __kmp_affinity_num_masks, if __kmp_affinity_type != affinity_none, and
485// return the number of levels in the machine topology tree (zero if
486// __kmp_affinity_type == affinity_none).
487//
488// All of the __kmp_affinity_create_*_map() routines should set *fullMask
489// to the affinity mask for the initialization thread. They need to save and
490// restore the mask, and it could be needed later, so saving it is just an
491// optimization to avoid calling kmp_get_system_affinity() again.
492//
493static kmp_affin_mask_t *fullMask = NULL;
494
495kmp_affin_mask_t *
496__kmp_affinity_get_fullMask() { return fullMask; }
497
498
499static int nCoresPerPkg, nPackages;
Andrey Churbanovf696c822015-01-27 16:55:43 +0000500static int __kmp_nThreadsPerCore;
501#ifndef KMP_DFLT_NTH_CORES
502static int __kmp_ncores;
503#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000504
505//
506// __kmp_affinity_uniform_topology() doesn't work when called from
507// places which support arbitrarily many levels in the machine topology
508// map, i.e. the non-default cases in __kmp_affinity_create_cpuinfo_map()
509// __kmp_affinity_create_x2apicid_map().
510//
511inline static bool
512__kmp_affinity_uniform_topology()
513{
514 return __kmp_avail_proc == (__kmp_nThreadsPerCore * nCoresPerPkg * nPackages);
515}
516
517
518//
519// Print out the detailed machine topology map, i.e. the physical locations
520// of each OS proc.
521//
522static void
523__kmp_affinity_print_topology(AddrUnsPair *address2os, int len, int depth,
524 int pkgLevel, int coreLevel, int threadLevel)
525{
526 int proc;
527
528 KMP_INFORM(OSProcToPhysicalThreadMap, "KMP_AFFINITY");
529 for (proc = 0; proc < len; proc++) {
530 int level;
531 kmp_str_buf_t buf;
532 __kmp_str_buf_init(&buf);
533 for (level = 0; level < depth; level++) {
534 if (level == threadLevel) {
535 __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Thread));
536 }
537 else if (level == coreLevel) {
538 __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Core));
539 }
540 else if (level == pkgLevel) {
541 __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Package));
542 }
543 else if (level > pkgLevel) {
544 __kmp_str_buf_print(&buf, "%s_%d ", KMP_I18N_STR(Node),
545 level - pkgLevel - 1);
546 }
547 else {
548 __kmp_str_buf_print(&buf, "L%d ", level);
549 }
550 __kmp_str_buf_print(&buf, "%d ",
551 address2os[proc].first.labels[level]);
552 }
553 KMP_INFORM(OSProcMapToPack, "KMP_AFFINITY", address2os[proc].second,
554 buf.str);
555 __kmp_str_buf_free(&buf);
556 }
557}
558
559
560//
561// If we don't know how to retrieve the machine's processor topology, or
562// encounter an error in doing so, this routine is called to form a "flat"
563// mapping of os thread id's <-> processor id's.
564//
565static int
566__kmp_affinity_create_flat_map(AddrUnsPair **address2os,
567 kmp_i18n_id_t *const msg_id)
568{
569 *address2os = NULL;
570 *msg_id = kmp_i18n_null;
571
572 //
573 // Even if __kmp_affinity_type == affinity_none, this routine might still
Andrey Churbanovf696c822015-01-27 16:55:43 +0000574 // called to set __kmp_ncores, as well as
Jim Cownie5e8470a2013-09-27 10:38:44 +0000575 // __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
576 //
577 if (! KMP_AFFINITY_CAPABLE()) {
578 KMP_ASSERT(__kmp_affinity_type == affinity_none);
579 __kmp_ncores = nPackages = __kmp_xproc;
580 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000581 if (__kmp_affinity_verbose) {
582 KMP_INFORM(AffFlatTopology, "KMP_AFFINITY");
583 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
584 KMP_INFORM(Uniform, "KMP_AFFINITY");
585 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
586 __kmp_nThreadsPerCore, __kmp_ncores);
587 }
588 return 0;
589 }
590
591 //
592 // When affinity is off, this routine will still be called to set
Andrey Churbanovf696c822015-01-27 16:55:43 +0000593 // __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie5e8470a2013-09-27 10:38:44 +0000594 // nCoresPerPkg, & nPackages. Make sure all these vars are set
595 // correctly, and return now if affinity is not enabled.
596 //
597 __kmp_ncores = nPackages = __kmp_avail_proc;
598 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000599 if (__kmp_affinity_verbose) {
600 char buf[KMP_AFFIN_MASK_PRINT_LEN];
601 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, fullMask);
602
603 KMP_INFORM(AffCapableUseFlat, "KMP_AFFINITY");
604 if (__kmp_affinity_respect_mask) {
605 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
606 } else {
607 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
608 }
609 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
610 KMP_INFORM(Uniform, "KMP_AFFINITY");
611 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
612 __kmp_nThreadsPerCore, __kmp_ncores);
613 }
614 if (__kmp_affinity_type == affinity_none) {
615 return 0;
616 }
617
618 //
619 // Contruct the data structure to be returned.
620 //
621 *address2os = (AddrUnsPair*)
622 __kmp_allocate(sizeof(**address2os) * __kmp_avail_proc);
623 int avail_ct = 0;
624 unsigned int i;
625 for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
626 //
627 // Skip this proc if it is not included in the machine model.
628 //
629 if (! KMP_CPU_ISSET(i, fullMask)) {
630 continue;
631 }
632
633 Address addr(1);
634 addr.labels[0] = i;
635 (*address2os)[avail_ct++] = AddrUnsPair(addr,i);
636 }
637 if (__kmp_affinity_verbose) {
638 KMP_INFORM(OSProcToPackage, "KMP_AFFINITY");
639 }
640
641 if (__kmp_affinity_gran_levels < 0) {
642 //
643 // Only the package level is modeled in the machine topology map,
644 // so the #levels of granularity is either 0 or 1.
645 //
646 if (__kmp_affinity_gran > affinity_gran_package) {
647 __kmp_affinity_gran_levels = 1;
648 }
649 else {
650 __kmp_affinity_gran_levels = 0;
651 }
652 }
653 return 1;
654}
655
656
Andrey Churbanov7daf9802015-01-27 16:52:57 +0000657# if KMP_GROUP_AFFINITY
Jim Cownie5e8470a2013-09-27 10:38:44 +0000658
659//
660// If multiple Windows* OS processor groups exist, we can create a 2-level
661// topology map with the groups at level 0 and the individual procs at
662// level 1.
663//
664// This facilitates letting the threads float among all procs in a group,
665// if granularity=group (the default when there are multiple groups).
666//
667static int
668__kmp_affinity_create_proc_group_map(AddrUnsPair **address2os,
669 kmp_i18n_id_t *const msg_id)
670{
671 *address2os = NULL;
672 *msg_id = kmp_i18n_null;
673
674 //
675 // If we don't have multiple processor groups, return now.
676 // The flat mapping will be used.
677 //
678 if ((! KMP_AFFINITY_CAPABLE()) || (__kmp_get_proc_group(fullMask) >= 0)) {
679 // FIXME set *msg_id
680 return -1;
681 }
682
683 //
684 // Contruct the data structure to be returned.
685 //
686 *address2os = (AddrUnsPair*)
687 __kmp_allocate(sizeof(**address2os) * __kmp_avail_proc);
688 int avail_ct = 0;
689 int i;
690 for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
691 //
692 // Skip this proc if it is not included in the machine model.
693 //
694 if (! KMP_CPU_ISSET(i, fullMask)) {
695 continue;
696 }
697
698 Address addr(2);
699 addr.labels[0] = i / (CHAR_BIT * sizeof(DWORD_PTR));
700 addr.labels[1] = i % (CHAR_BIT * sizeof(DWORD_PTR));
701 (*address2os)[avail_ct++] = AddrUnsPair(addr,i);
702
703 if (__kmp_affinity_verbose) {
704 KMP_INFORM(AffOSProcToGroup, "KMP_AFFINITY", i, addr.labels[0],
705 addr.labels[1]);
706 }
707 }
708
709 if (__kmp_affinity_gran_levels < 0) {
710 if (__kmp_affinity_gran == affinity_gran_group) {
711 __kmp_affinity_gran_levels = 1;
712 }
713 else if ((__kmp_affinity_gran == affinity_gran_fine)
714 || (__kmp_affinity_gran == affinity_gran_thread)) {
715 __kmp_affinity_gran_levels = 0;
716 }
717 else {
718 const char *gran_str = NULL;
719 if (__kmp_affinity_gran == affinity_gran_core) {
720 gran_str = "core";
721 }
722 else if (__kmp_affinity_gran == affinity_gran_package) {
723 gran_str = "package";
724 }
725 else if (__kmp_affinity_gran == affinity_gran_node) {
726 gran_str = "node";
727 }
728 else {
729 KMP_ASSERT(0);
730 }
731
732 // Warning: can't use affinity granularity \"gran\" with group topology method, using "thread"
733 __kmp_affinity_gran_levels = 0;
734 }
735 }
736 return 2;
737}
738
Andrey Churbanov7daf9802015-01-27 16:52:57 +0000739# endif /* KMP_GROUP_AFFINITY */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000740
741
742# if KMP_ARCH_X86 || KMP_ARCH_X86_64
743
744static int
745__kmp_cpuid_mask_width(int count) {
746 int r = 0;
747
748 while((1<<r) < count)
749 ++r;
750 return r;
751}
752
753
754class apicThreadInfo {
755public:
756 unsigned osId; // param to __kmp_affinity_bind_thread
757 unsigned apicId; // from cpuid after binding
758 unsigned maxCoresPerPkg; // ""
759 unsigned maxThreadsPerPkg; // ""
760 unsigned pkgId; // inferred from above values
761 unsigned coreId; // ""
762 unsigned threadId; // ""
763};
764
765
766static int
767__kmp_affinity_cmp_apicThreadInfo_os_id(const void *a, const void *b)
768{
769 const apicThreadInfo *aa = (const apicThreadInfo *)a;
770 const apicThreadInfo *bb = (const apicThreadInfo *)b;
771 if (aa->osId < bb->osId) return -1;
772 if (aa->osId > bb->osId) return 1;
773 return 0;
774}
775
776
777static int
778__kmp_affinity_cmp_apicThreadInfo_phys_id(const void *a, const void *b)
779{
780 const apicThreadInfo *aa = (const apicThreadInfo *)a;
781 const apicThreadInfo *bb = (const apicThreadInfo *)b;
782 if (aa->pkgId < bb->pkgId) return -1;
783 if (aa->pkgId > bb->pkgId) return 1;
784 if (aa->coreId < bb->coreId) return -1;
785 if (aa->coreId > bb->coreId) return 1;
786 if (aa->threadId < bb->threadId) return -1;
787 if (aa->threadId > bb->threadId) return 1;
788 return 0;
789}
790
791
792//
793// On IA-32 architecture and Intel(R) 64 architecture, we attempt to use
794// an algorithm which cycles through the available os threads, setting
795// the current thread's affinity mask to that thread, and then retrieves
796// the Apic Id for each thread context using the cpuid instruction.
797//
798static int
799__kmp_affinity_create_apicid_map(AddrUnsPair **address2os,
800 kmp_i18n_id_t *const msg_id)
801{
Andrey Churbanov1c331292015-01-27 17:03:42 +0000802 kmp_cpuid buf;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000803 int rc;
804 *address2os = NULL;
805 *msg_id = kmp_i18n_null;
806
Andrey Churbanov1c331292015-01-27 17:03:42 +0000807 //
808 // Check if cpuid leaf 4 is supported.
809 //
Jim Cownie5e8470a2013-09-27 10:38:44 +0000810 __kmp_x86_cpuid(0, 0, &buf);
811 if (buf.eax < 4) {
812 *msg_id = kmp_i18n_str_NoLeaf4Support;
813 return -1;
814 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000815
816 //
Jim Cownie5e8470a2013-09-27 10:38:44 +0000817 // The algorithm used starts by setting the affinity to each available
Andrey Churbanov1c331292015-01-27 17:03:42 +0000818 // thread and retrieving info from the cpuid instruction, so if we are
819 // not capable of calling __kmp_get_system_affinity() and
820 // _kmp_get_system_affinity(), then we need to do something else - use
821 // the defaults that we calculated from issuing cpuid without binding
822 // to each proc.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000823 //
824 if (! KMP_AFFINITY_CAPABLE()) {
825 //
826 // Hack to try and infer the machine topology using only the data
827 // available from cpuid on the current thread, and __kmp_xproc.
828 //
829 KMP_ASSERT(__kmp_affinity_type == affinity_none);
830
831 //
832 // Get an upper bound on the number of threads per package using
833 // cpuid(1).
834 //
835 // On some OS/chps combinations where HT is supported by the chip
836 // but is disabled, this value will be 2 on a single core chip.
837 // Usually, it will be 2 if HT is enabled and 1 if HT is disabled.
838 //
Jim Cownie5e8470a2013-09-27 10:38:44 +0000839 __kmp_x86_cpuid(1, 0, &buf);
840 int maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
841 if (maxThreadsPerPkg == 0) {
842 maxThreadsPerPkg = 1;
843 }
844
845 //
846 // The num cores per pkg comes from cpuid(4).
847 // 1 must be added to the encoded value.
848 //
849 // The author of cpu_count.cpp treated this only an upper bound
850 // on the number of cores, but I haven't seen any cases where it
851 // was greater than the actual number of cores, so we will treat
852 // it as exact in this block of code.
853 //
854 // First, we need to check if cpuid(4) is supported on this chip.
855 // To see if cpuid(n) is supported, issue cpuid(0) and check if eax
856 // has the value n or greater.
857 //
858 __kmp_x86_cpuid(0, 0, &buf);
859 if (buf.eax >= 4) {
860 __kmp_x86_cpuid(4, 0, &buf);
861 nCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
862 }
863 else {
864 nCoresPerPkg = 1;
865 }
866
867 //
868 // There is no way to reliably tell if HT is enabled without issuing
869 // the cpuid instruction from every thread, can correlating the cpuid
870 // info, so if the machine is not affinity capable, we assume that HT
871 // is off. We have seen quite a few machines where maxThreadsPerPkg
872 // is 2, yet the machine does not support HT.
873 //
874 // - Older OSes are usually found on machines with older chips, which
875 // do not support HT.
876 //
877 // - The performance penalty for mistakenly identifying a machine as
878 // HT when it isn't (which results in blocktime being incorrecly set
879 // to 0) is greater than the penalty when for mistakenly identifying
880 // a machine as being 1 thread/core when it is really HT enabled
881 // (which results in blocktime being incorrectly set to a positive
882 // value).
883 //
884 __kmp_ncores = __kmp_xproc;
885 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
886 __kmp_nThreadsPerCore = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000887 if (__kmp_affinity_verbose) {
888 KMP_INFORM(AffNotCapableUseLocCpuid, "KMP_AFFINITY");
889 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
890 if (__kmp_affinity_uniform_topology()) {
891 KMP_INFORM(Uniform, "KMP_AFFINITY");
892 } else {
893 KMP_INFORM(NonUniform, "KMP_AFFINITY");
894 }
895 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
896 __kmp_nThreadsPerCore, __kmp_ncores);
897 }
898 return 0;
899 }
900
901 //
902 //
903 // From here on, we can assume that it is safe to call
904 // __kmp_get_system_affinity() and __kmp_set_system_affinity(),
905 // even if __kmp_affinity_type = affinity_none.
906 //
907
908 //
909 // Save the affinity mask for the current thread.
910 //
911 kmp_affin_mask_t *oldMask;
912 KMP_CPU_ALLOC(oldMask);
913 KMP_ASSERT(oldMask != NULL);
914 __kmp_get_system_affinity(oldMask, TRUE);
915
916 //
917 // Run through each of the available contexts, binding the current thread
918 // to it, and obtaining the pertinent information using the cpuid instr.
919 //
920 // The relevant information is:
921 //
922 // Apic Id: Bits 24:31 of ebx after issuing cpuid(1) - each thread context
923 // has a uniqie Apic Id, which is of the form pkg# : core# : thread#.
924 //
925 // Max Threads Per Pkg: Bits 16:23 of ebx after issuing cpuid(1). The
926 // value of this field determines the width of the core# + thread#
927 // fields in the Apic Id. It is also an upper bound on the number
928 // of threads per package, but it has been verified that situations
929 // happen were it is not exact. In particular, on certain OS/chip
930 // combinations where Intel(R) Hyper-Threading Technology is supported
931 // by the chip but has
932 // been disabled, the value of this field will be 2 (for a single core
933 // chip). On other OS/chip combinations supporting
934 // Intel(R) Hyper-Threading Technology, the value of
935 // this field will be 1 when Intel(R) Hyper-Threading Technology is
936 // disabled and 2 when it is enabled.
937 //
938 // Max Cores Per Pkg: Bits 26:31 of eax after issuing cpuid(4). The
939 // value of this field (+1) determines the width of the core# field in
940 // the Apic Id. The comments in "cpucount.cpp" say that this value is
941 // an upper bound, but the IA-32 architecture manual says that it is
942 // exactly the number of cores per package, and I haven't seen any
943 // case where it wasn't.
944 //
945 // From this information, deduce the package Id, core Id, and thread Id,
946 // and set the corresponding fields in the apicThreadInfo struct.
947 //
948 unsigned i;
949 apicThreadInfo *threadInfo = (apicThreadInfo *)__kmp_allocate(
950 __kmp_avail_proc * sizeof(apicThreadInfo));
951 unsigned nApics = 0;
952 for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
953 //
954 // Skip this proc if it is not included in the machine model.
955 //
956 if (! KMP_CPU_ISSET(i, fullMask)) {
957 continue;
958 }
959 KMP_DEBUG_ASSERT((int)nApics < __kmp_avail_proc);
960
961 __kmp_affinity_bind_thread(i);
962 threadInfo[nApics].osId = i;
963
964 //
965 // The apic id and max threads per pkg come from cpuid(1).
966 //
Jim Cownie5e8470a2013-09-27 10:38:44 +0000967 __kmp_x86_cpuid(1, 0, &buf);
968 if (! (buf.edx >> 9) & 1) {
969 __kmp_set_system_affinity(oldMask, TRUE);
970 __kmp_free(threadInfo);
971 KMP_CPU_FREE(oldMask);
972 *msg_id = kmp_i18n_str_ApicNotPresent;
973 return -1;
974 }
975 threadInfo[nApics].apicId = (buf.ebx >> 24) & 0xff;
976 threadInfo[nApics].maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
977 if (threadInfo[nApics].maxThreadsPerPkg == 0) {
978 threadInfo[nApics].maxThreadsPerPkg = 1;
979 }
980
981 //
982 // Max cores per pkg comes from cpuid(4).
983 // 1 must be added to the encoded value.
984 //
985 // First, we need to check if cpuid(4) is supported on this chip.
986 // To see if cpuid(n) is supported, issue cpuid(0) and check if eax
987 // has the value n or greater.
988 //
989 __kmp_x86_cpuid(0, 0, &buf);
990 if (buf.eax >= 4) {
991 __kmp_x86_cpuid(4, 0, &buf);
992 threadInfo[nApics].maxCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
993 }
994 else {
995 threadInfo[nApics].maxCoresPerPkg = 1;
996 }
997
998 //
999 // Infer the pkgId / coreId / threadId using only the info
1000 // obtained locally.
1001 //
1002 int widthCT = __kmp_cpuid_mask_width(
1003 threadInfo[nApics].maxThreadsPerPkg);
1004 threadInfo[nApics].pkgId = threadInfo[nApics].apicId >> widthCT;
1005
1006 int widthC = __kmp_cpuid_mask_width(
1007 threadInfo[nApics].maxCoresPerPkg);
1008 int widthT = widthCT - widthC;
1009 if (widthT < 0) {
1010 //
1011 // I've never seen this one happen, but I suppose it could, if
1012 // the cpuid instruction on a chip was really screwed up.
1013 // Make sure to restore the affinity mask before the tail call.
1014 //
1015 __kmp_set_system_affinity(oldMask, TRUE);
1016 __kmp_free(threadInfo);
1017 KMP_CPU_FREE(oldMask);
1018 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1019 return -1;
1020 }
1021
1022 int maskC = (1 << widthC) - 1;
1023 threadInfo[nApics].coreId = (threadInfo[nApics].apicId >> widthT)
1024 &maskC;
1025
1026 int maskT = (1 << widthT) - 1;
1027 threadInfo[nApics].threadId = threadInfo[nApics].apicId &maskT;
1028
1029 nApics++;
1030 }
1031
1032 //
1033 // We've collected all the info we need.
1034 // Restore the old affinity mask for this thread.
1035 //
1036 __kmp_set_system_affinity(oldMask, TRUE);
1037
1038 //
1039 // If there's only one thread context to bind to, form an Address object
1040 // with depth 1 and return immediately (or, if affinity is off, set
1041 // address2os to NULL and return).
1042 //
1043 // If it is configured to omit the package level when there is only a
1044 // single package, the logic at the end of this routine won't work if
1045 // there is only a single thread - it would try to form an Address
1046 // object with depth 0.
1047 //
1048 KMP_ASSERT(nApics > 0);
1049 if (nApics == 1) {
1050 __kmp_ncores = nPackages = 1;
1051 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001052 if (__kmp_affinity_verbose) {
1053 char buf[KMP_AFFIN_MASK_PRINT_LEN];
1054 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1055
1056 KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
1057 if (__kmp_affinity_respect_mask) {
1058 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
1059 } else {
1060 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
1061 }
1062 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
1063 KMP_INFORM(Uniform, "KMP_AFFINITY");
1064 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
1065 __kmp_nThreadsPerCore, __kmp_ncores);
1066 }
1067
1068 if (__kmp_affinity_type == affinity_none) {
1069 __kmp_free(threadInfo);
1070 KMP_CPU_FREE(oldMask);
1071 return 0;
1072 }
1073
1074 *address2os = (AddrUnsPair*)__kmp_allocate(sizeof(AddrUnsPair));
1075 Address addr(1);
1076 addr.labels[0] = threadInfo[0].pkgId;
1077 (*address2os)[0] = AddrUnsPair(addr, threadInfo[0].osId);
1078
1079 if (__kmp_affinity_gran_levels < 0) {
1080 __kmp_affinity_gran_levels = 0;
1081 }
1082
1083 if (__kmp_affinity_verbose) {
1084 __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
1085 }
1086
1087 __kmp_free(threadInfo);
1088 KMP_CPU_FREE(oldMask);
1089 return 1;
1090 }
1091
1092 //
1093 // Sort the threadInfo table by physical Id.
1094 //
1095 qsort(threadInfo, nApics, sizeof(*threadInfo),
1096 __kmp_affinity_cmp_apicThreadInfo_phys_id);
1097
1098 //
1099 // The table is now sorted by pkgId / coreId / threadId, but we really
1100 // don't know the radix of any of the fields. pkgId's may be sparsely
1101 // assigned among the chips on a system. Although coreId's are usually
1102 // assigned [0 .. coresPerPkg-1] and threadId's are usually assigned
1103 // [0..threadsPerCore-1], we don't want to make any such assumptions.
1104 //
1105 // For that matter, we don't know what coresPerPkg and threadsPerCore
1106 // (or the total # packages) are at this point - we want to determine
1107 // that now. We only have an upper bound on the first two figures.
1108 //
1109 // We also perform a consistency check at this point: the values returned
1110 // by the cpuid instruction for any thread bound to a given package had
1111 // better return the same info for maxThreadsPerPkg and maxCoresPerPkg.
1112 //
1113 nPackages = 1;
1114 nCoresPerPkg = 1;
1115 __kmp_nThreadsPerCore = 1;
1116 unsigned nCores = 1;
1117
1118 unsigned pkgCt = 1; // to determine radii
1119 unsigned lastPkgId = threadInfo[0].pkgId;
1120 unsigned coreCt = 1;
1121 unsigned lastCoreId = threadInfo[0].coreId;
1122 unsigned threadCt = 1;
1123 unsigned lastThreadId = threadInfo[0].threadId;
1124
1125 // intra-pkg consist checks
1126 unsigned prevMaxCoresPerPkg = threadInfo[0].maxCoresPerPkg;
1127 unsigned prevMaxThreadsPerPkg = threadInfo[0].maxThreadsPerPkg;
1128
1129 for (i = 1; i < nApics; i++) {
1130 if (threadInfo[i].pkgId != lastPkgId) {
1131 nCores++;
1132 pkgCt++;
1133 lastPkgId = threadInfo[i].pkgId;
1134 if ((int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt;
1135 coreCt = 1;
1136 lastCoreId = threadInfo[i].coreId;
1137 if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
1138 threadCt = 1;
1139 lastThreadId = threadInfo[i].threadId;
1140
1141 //
1142 // This is a different package, so go on to the next iteration
1143 // without doing any consistency checks. Reset the consistency
1144 // check vars, though.
1145 //
1146 prevMaxCoresPerPkg = threadInfo[i].maxCoresPerPkg;
1147 prevMaxThreadsPerPkg = threadInfo[i].maxThreadsPerPkg;
1148 continue;
1149 }
1150
1151 if (threadInfo[i].coreId != lastCoreId) {
1152 nCores++;
1153 coreCt++;
1154 lastCoreId = threadInfo[i].coreId;
1155 if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
1156 threadCt = 1;
1157 lastThreadId = threadInfo[i].threadId;
1158 }
1159 else if (threadInfo[i].threadId != lastThreadId) {
1160 threadCt++;
1161 lastThreadId = threadInfo[i].threadId;
1162 }
1163 else {
1164 __kmp_free(threadInfo);
1165 KMP_CPU_FREE(oldMask);
1166 *msg_id = kmp_i18n_str_LegacyApicIDsNotUnique;
1167 return -1;
1168 }
1169
1170 //
1171 // Check to make certain that the maxCoresPerPkg and maxThreadsPerPkg
1172 // fields agree between all the threads bounds to a given package.
1173 //
1174 if ((prevMaxCoresPerPkg != threadInfo[i].maxCoresPerPkg)
1175 || (prevMaxThreadsPerPkg != threadInfo[i].maxThreadsPerPkg)) {
1176 __kmp_free(threadInfo);
1177 KMP_CPU_FREE(oldMask);
1178 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1179 return -1;
1180 }
1181 }
1182 nPackages = pkgCt;
1183 if ((int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt;
1184 if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
1185
1186 //
1187 // When affinity is off, this routine will still be called to set
Andrey Churbanovf696c822015-01-27 16:55:43 +00001188 // __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie5e8470a2013-09-27 10:38:44 +00001189 // nCoresPerPkg, & nPackages. Make sure all these vars are set
1190 // correctly, and return now if affinity is not enabled.
1191 //
Jim Cownie5e8470a2013-09-27 10:38:44 +00001192 __kmp_ncores = nCores;
1193 if (__kmp_affinity_verbose) {
1194 char buf[KMP_AFFIN_MASK_PRINT_LEN];
1195 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1196
1197 KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
1198 if (__kmp_affinity_respect_mask) {
1199 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
1200 } else {
1201 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
1202 }
1203 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
1204 if (__kmp_affinity_uniform_topology()) {
1205 KMP_INFORM(Uniform, "KMP_AFFINITY");
1206 } else {
1207 KMP_INFORM(NonUniform, "KMP_AFFINITY");
1208 }
1209 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
1210 __kmp_nThreadsPerCore, __kmp_ncores);
1211
1212 }
1213
1214 if (__kmp_affinity_type == affinity_none) {
1215 __kmp_free(threadInfo);
1216 KMP_CPU_FREE(oldMask);
1217 return 0;
1218 }
1219
1220 //
1221 // Now that we've determined the number of packages, the number of cores
1222 // per package, and the number of threads per core, we can construct the
1223 // data structure that is to be returned.
1224 //
1225 int pkgLevel = 0;
1226 int coreLevel = (nCoresPerPkg <= 1) ? -1 : 1;
1227 int threadLevel = (__kmp_nThreadsPerCore <= 1) ? -1 : ((coreLevel >= 0) ? 2 : 1);
1228 unsigned depth = (pkgLevel >= 0) + (coreLevel >= 0) + (threadLevel >= 0);
1229
1230 KMP_ASSERT(depth > 0);
1231 *address2os = (AddrUnsPair*)__kmp_allocate(sizeof(AddrUnsPair) * nApics);
1232
1233 for (i = 0; i < nApics; ++i) {
1234 Address addr(depth);
1235 unsigned os = threadInfo[i].osId;
1236 int d = 0;
1237
1238 if (pkgLevel >= 0) {
1239 addr.labels[d++] = threadInfo[i].pkgId;
1240 }
1241 if (coreLevel >= 0) {
1242 addr.labels[d++] = threadInfo[i].coreId;
1243 }
1244 if (threadLevel >= 0) {
1245 addr.labels[d++] = threadInfo[i].threadId;
1246 }
1247 (*address2os)[i] = AddrUnsPair(addr, os);
1248 }
1249
1250 if (__kmp_affinity_gran_levels < 0) {
1251 //
1252 // Set the granularity level based on what levels are modeled
1253 // in the machine topology map.
1254 //
1255 __kmp_affinity_gran_levels = 0;
1256 if ((threadLevel >= 0)
1257 && (__kmp_affinity_gran > affinity_gran_thread)) {
1258 __kmp_affinity_gran_levels++;
1259 }
1260 if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
1261 __kmp_affinity_gran_levels++;
1262 }
1263 if ((pkgLevel >= 0) && (__kmp_affinity_gran > affinity_gran_package)) {
1264 __kmp_affinity_gran_levels++;
1265 }
1266 }
1267
1268 if (__kmp_affinity_verbose) {
1269 __kmp_affinity_print_topology(*address2os, nApics, depth, pkgLevel,
1270 coreLevel, threadLevel);
1271 }
1272
1273 __kmp_free(threadInfo);
1274 KMP_CPU_FREE(oldMask);
1275 return depth;
1276}
1277
1278
1279//
1280// Intel(R) microarchitecture code name Nehalem, Dunnington and later
1281// architectures support a newer interface for specifying the x2APIC Ids,
1282// based on cpuid leaf 11.
1283//
1284static int
1285__kmp_affinity_create_x2apicid_map(AddrUnsPair **address2os,
1286 kmp_i18n_id_t *const msg_id)
1287{
1288 kmp_cpuid buf;
1289
1290 *address2os = NULL;
1291 *msg_id = kmp_i18n_null;
1292
1293 //
1294 // Check to see if cpuid leaf 11 is supported.
1295 //
1296 __kmp_x86_cpuid(0, 0, &buf);
1297 if (buf.eax < 11) {
1298 *msg_id = kmp_i18n_str_NoLeaf11Support;
1299 return -1;
1300 }
1301 __kmp_x86_cpuid(11, 0, &buf);
1302 if (buf.ebx == 0) {
1303 *msg_id = kmp_i18n_str_NoLeaf11Support;
1304 return -1;
1305 }
1306
1307 //
1308 // Find the number of levels in the machine topology. While we're at it,
1309 // get the default values for __kmp_nThreadsPerCore & nCoresPerPkg. We will
1310 // try to get more accurate values later by explicitly counting them,
1311 // but get reasonable defaults now, in case we return early.
1312 //
1313 int level;
1314 int threadLevel = -1;
1315 int coreLevel = -1;
1316 int pkgLevel = -1;
1317 __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
1318
1319 for (level = 0;; level++) {
1320 if (level > 31) {
1321 //
1322 // FIXME: Hack for DPD200163180
1323 //
1324 // If level is big then something went wrong -> exiting
1325 //
1326 // There could actually be 32 valid levels in the machine topology,
1327 // but so far, the only machine we have seen which does not exit
1328 // this loop before iteration 32 has fubar x2APIC settings.
1329 //
1330 // For now, just reject this case based upon loop trip count.
1331 //
1332 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1333 return -1;
1334 }
1335 __kmp_x86_cpuid(11, level, &buf);
1336 if (buf.ebx == 0) {
1337 if (pkgLevel < 0) {
1338 //
1339 // Will infer nPackages from __kmp_xproc
1340 //
1341 pkgLevel = level;
1342 level++;
1343 }
1344 break;
1345 }
1346 int kind = (buf.ecx >> 8) & 0xff;
1347 if (kind == 1) {
1348 //
1349 // SMT level
1350 //
1351 threadLevel = level;
1352 coreLevel = -1;
1353 pkgLevel = -1;
1354 __kmp_nThreadsPerCore = buf.ebx & 0xff;
1355 if (__kmp_nThreadsPerCore == 0) {
1356 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1357 return -1;
1358 }
1359 }
1360 else if (kind == 2) {
1361 //
1362 // core level
1363 //
1364 coreLevel = level;
1365 pkgLevel = -1;
1366 nCoresPerPkg = buf.ebx & 0xff;
1367 if (nCoresPerPkg == 0) {
1368 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1369 return -1;
1370 }
1371 }
1372 else {
1373 if (level <= 0) {
1374 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1375 return -1;
1376 }
1377 if (pkgLevel >= 0) {
1378 continue;
1379 }
1380 pkgLevel = level;
1381 nPackages = buf.ebx & 0xff;
1382 if (nPackages == 0) {
1383 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1384 return -1;
1385 }
1386 }
1387 }
1388 int depth = level;
1389
1390 //
1391 // In the above loop, "level" was counted from the finest level (usually
1392 // thread) to the coarsest. The caller expects that we will place the
1393 // labels in (*address2os)[].first.labels[] in the inverse order, so
1394 // we need to invert the vars saying which level means what.
1395 //
1396 if (threadLevel >= 0) {
1397 threadLevel = depth - threadLevel - 1;
1398 }
1399 if (coreLevel >= 0) {
1400 coreLevel = depth - coreLevel - 1;
1401 }
1402 KMP_DEBUG_ASSERT(pkgLevel >= 0);
1403 pkgLevel = depth - pkgLevel - 1;
1404
1405 //
1406 // The algorithm used starts by setting the affinity to each available
Andrey Churbanov1c331292015-01-27 17:03:42 +00001407 // thread and retrieving info from the cpuid instruction, so if we are
1408 // not capable of calling __kmp_get_system_affinity() and
1409 // _kmp_get_system_affinity(), then we need to do something else - use
1410 // the defaults that we calculated from issuing cpuid without binding
1411 // to each proc.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001412 //
1413 if (! KMP_AFFINITY_CAPABLE())
1414 {
1415 //
1416 // Hack to try and infer the machine topology using only the data
1417 // available from cpuid on the current thread, and __kmp_xproc.
1418 //
1419 KMP_ASSERT(__kmp_affinity_type == affinity_none);
1420
1421 __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
1422 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001423 if (__kmp_affinity_verbose) {
1424 KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY");
1425 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
1426 if (__kmp_affinity_uniform_topology()) {
1427 KMP_INFORM(Uniform, "KMP_AFFINITY");
1428 } else {
1429 KMP_INFORM(NonUniform, "KMP_AFFINITY");
1430 }
1431 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
1432 __kmp_nThreadsPerCore, __kmp_ncores);
1433 }
1434 return 0;
1435 }
1436
1437 //
1438 //
1439 // From here on, we can assume that it is safe to call
1440 // __kmp_get_system_affinity() and __kmp_set_system_affinity(),
1441 // even if __kmp_affinity_type = affinity_none.
1442 //
1443
1444 //
1445 // Save the affinity mask for the current thread.
1446 //
1447 kmp_affin_mask_t *oldMask;
1448 KMP_CPU_ALLOC(oldMask);
1449 __kmp_get_system_affinity(oldMask, TRUE);
1450
1451 //
1452 // Allocate the data structure to be returned.
1453 //
1454 AddrUnsPair *retval = (AddrUnsPair *)
1455 __kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc);
1456
1457 //
1458 // Run through each of the available contexts, binding the current thread
1459 // to it, and obtaining the pertinent information using the cpuid instr.
1460 //
1461 unsigned int proc;
1462 int nApics = 0;
1463 for (proc = 0; proc < KMP_CPU_SETSIZE; ++proc) {
1464 //
1465 // Skip this proc if it is not included in the machine model.
1466 //
1467 if (! KMP_CPU_ISSET(proc, fullMask)) {
1468 continue;
1469 }
1470 KMP_DEBUG_ASSERT(nApics < __kmp_avail_proc);
1471
1472 __kmp_affinity_bind_thread(proc);
1473
1474 //
1475 // Extrach the labels for each level in the machine topology map
1476 // from the Apic ID.
1477 //
1478 Address addr(depth);
1479 int prev_shift = 0;
1480
1481 for (level = 0; level < depth; level++) {
1482 __kmp_x86_cpuid(11, level, &buf);
1483 unsigned apicId = buf.edx;
1484 if (buf.ebx == 0) {
1485 if (level != depth - 1) {
1486 KMP_CPU_FREE(oldMask);
1487 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1488 return -1;
1489 }
1490 addr.labels[depth - level - 1] = apicId >> prev_shift;
1491 level++;
1492 break;
1493 }
1494 int shift = buf.eax & 0x1f;
1495 int mask = (1 << shift) - 1;
1496 addr.labels[depth - level - 1] = (apicId & mask) >> prev_shift;
1497 prev_shift = shift;
1498 }
1499 if (level != depth) {
1500 KMP_CPU_FREE(oldMask);
1501 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1502 return -1;
1503 }
1504
1505 retval[nApics] = AddrUnsPair(addr, proc);
1506 nApics++;
1507 }
1508
1509 //
1510 // We've collected all the info we need.
1511 // Restore the old affinity mask for this thread.
1512 //
1513 __kmp_set_system_affinity(oldMask, TRUE);
1514
1515 //
1516 // If there's only one thread context to bind to, return now.
1517 //
1518 KMP_ASSERT(nApics > 0);
1519 if (nApics == 1) {
1520 __kmp_ncores = nPackages = 1;
1521 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001522 if (__kmp_affinity_verbose) {
1523 char buf[KMP_AFFIN_MASK_PRINT_LEN];
1524 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1525
1526 KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
1527 if (__kmp_affinity_respect_mask) {
1528 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
1529 } else {
1530 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
1531 }
1532 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
1533 KMP_INFORM(Uniform, "KMP_AFFINITY");
1534 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
1535 __kmp_nThreadsPerCore, __kmp_ncores);
1536 }
1537
1538 if (__kmp_affinity_type == affinity_none) {
1539 __kmp_free(retval);
1540 KMP_CPU_FREE(oldMask);
1541 return 0;
1542 }
1543
1544 //
1545 // Form an Address object which only includes the package level.
1546 //
1547 Address addr(1);
1548 addr.labels[0] = retval[0].first.labels[pkgLevel];
1549 retval[0].first = addr;
1550
1551 if (__kmp_affinity_gran_levels < 0) {
1552 __kmp_affinity_gran_levels = 0;
1553 }
1554
1555 if (__kmp_affinity_verbose) {
1556 __kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
1557 }
1558
1559 *address2os = retval;
1560 KMP_CPU_FREE(oldMask);
1561 return 1;
1562 }
1563
1564 //
1565 // Sort the table by physical Id.
1566 //
1567 qsort(retval, nApics, sizeof(*retval), __kmp_affinity_cmp_Address_labels);
1568
1569 //
1570 // Find the radix at each of the levels.
1571 //
1572 unsigned *totals = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
1573 unsigned *counts = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
1574 unsigned *maxCt = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
1575 unsigned *last = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
1576 for (level = 0; level < depth; level++) {
1577 totals[level] = 1;
1578 maxCt[level] = 1;
1579 counts[level] = 1;
1580 last[level] = retval[0].first.labels[level];
1581 }
1582
1583 //
1584 // From here on, the iteration variable "level" runs from the finest
1585 // level to the coarsest, i.e. we iterate forward through
1586 // (*address2os)[].first.labels[] - in the previous loops, we iterated
1587 // backwards.
1588 //
1589 for (proc = 1; (int)proc < nApics; proc++) {
1590 int level;
1591 for (level = 0; level < depth; level++) {
1592 if (retval[proc].first.labels[level] != last[level]) {
1593 int j;
1594 for (j = level + 1; j < depth; j++) {
1595 totals[j]++;
1596 counts[j] = 1;
1597 // The line below causes printing incorrect topology information
1598 // in case the max value for some level (maxCt[level]) is encountered earlier than
1599 // some less value while going through the array.
1600 // For example, let pkg0 has 4 cores and pkg1 has 2 cores. Then maxCt[1] == 2
1601 // whereas it must be 4.
1602 // TODO!!! Check if it can be commented safely
1603 //maxCt[j] = 1;
1604 last[j] = retval[proc].first.labels[j];
1605 }
1606 totals[level]++;
1607 counts[level]++;
1608 if (counts[level] > maxCt[level]) {
1609 maxCt[level] = counts[level];
1610 }
1611 last[level] = retval[proc].first.labels[level];
1612 break;
1613 }
1614 else if (level == depth - 1) {
1615 __kmp_free(last);
1616 __kmp_free(maxCt);
1617 __kmp_free(counts);
1618 __kmp_free(totals);
1619 __kmp_free(retval);
1620 KMP_CPU_FREE(oldMask);
1621 *msg_id = kmp_i18n_str_x2ApicIDsNotUnique;
1622 return -1;
1623 }
1624 }
1625 }
1626
1627 //
1628 // When affinity is off, this routine will still be called to set
Andrey Churbanovf696c822015-01-27 16:55:43 +00001629 // __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie5e8470a2013-09-27 10:38:44 +00001630 // nCoresPerPkg, & nPackages. Make sure all these vars are set
1631 // correctly, and return if affinity is not enabled.
1632 //
1633 if (threadLevel >= 0) {
1634 __kmp_nThreadsPerCore = maxCt[threadLevel];
1635 }
1636 else {
1637 __kmp_nThreadsPerCore = 1;
1638 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001639 nPackages = totals[pkgLevel];
1640
1641 if (coreLevel >= 0) {
1642 __kmp_ncores = totals[coreLevel];
1643 nCoresPerPkg = maxCt[coreLevel];
1644 }
1645 else {
1646 __kmp_ncores = nPackages;
1647 nCoresPerPkg = 1;
1648 }
1649
1650 //
1651 // Check to see if the machine topology is uniform
1652 //
1653 unsigned prod = maxCt[0];
1654 for (level = 1; level < depth; level++) {
1655 prod *= maxCt[level];
1656 }
1657 bool uniform = (prod == totals[level - 1]);
1658
1659 //
1660 // Print the machine topology summary.
1661 //
1662 if (__kmp_affinity_verbose) {
1663 char mask[KMP_AFFIN_MASK_PRINT_LEN];
1664 __kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1665
1666 KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
1667 if (__kmp_affinity_respect_mask) {
1668 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask);
1669 } else {
1670 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask);
1671 }
1672 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
1673 if (uniform) {
1674 KMP_INFORM(Uniform, "KMP_AFFINITY");
1675 } else {
1676 KMP_INFORM(NonUniform, "KMP_AFFINITY");
1677 }
1678
1679 kmp_str_buf_t buf;
1680 __kmp_str_buf_init(&buf);
1681
1682 __kmp_str_buf_print(&buf, "%d", totals[0]);
1683 for (level = 1; level <= pkgLevel; level++) {
1684 __kmp_str_buf_print(&buf, " x %d", maxCt[level]);
1685 }
1686 KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg,
1687 __kmp_nThreadsPerCore, __kmp_ncores);
1688
1689 __kmp_str_buf_free(&buf);
1690 }
1691
1692 if (__kmp_affinity_type == affinity_none) {
1693 __kmp_free(last);
1694 __kmp_free(maxCt);
1695 __kmp_free(counts);
1696 __kmp_free(totals);
1697 __kmp_free(retval);
1698 KMP_CPU_FREE(oldMask);
1699 return 0;
1700 }
1701
1702 //
1703 // Find any levels with radiix 1, and remove them from the map
1704 // (except for the package level).
1705 //
1706 int new_depth = 0;
1707 for (level = 0; level < depth; level++) {
1708 if ((maxCt[level] == 1) && (level != pkgLevel)) {
1709 continue;
1710 }
1711 new_depth++;
1712 }
1713
1714 //
1715 // If we are removing any levels, allocate a new vector to return,
1716 // and copy the relevant information to it.
1717 //
1718 if (new_depth != depth) {
1719 AddrUnsPair *new_retval = (AddrUnsPair *)__kmp_allocate(
1720 sizeof(AddrUnsPair) * nApics);
1721 for (proc = 0; (int)proc < nApics; proc++) {
1722 Address addr(new_depth);
1723 new_retval[proc] = AddrUnsPair(addr, retval[proc].second);
1724 }
1725 int new_level = 0;
1726 for (level = 0; level < depth; level++) {
1727 if ((maxCt[level] == 1) && (level != pkgLevel)) {
1728 if (level == threadLevel) {
1729 threadLevel = -1;
1730 }
1731 else if ((threadLevel >= 0) && (level < threadLevel)) {
1732 threadLevel--;
1733 }
1734 if (level == coreLevel) {
1735 coreLevel = -1;
1736 }
1737 else if ((coreLevel >= 0) && (level < coreLevel)) {
1738 coreLevel--;
1739 }
1740 if (level < pkgLevel) {
1741 pkgLevel--;
1742 }
1743 continue;
1744 }
1745 for (proc = 0; (int)proc < nApics; proc++) {
1746 new_retval[proc].first.labels[new_level]
1747 = retval[proc].first.labels[level];
1748 }
1749 new_level++;
1750 }
1751
1752 __kmp_free(retval);
1753 retval = new_retval;
1754 depth = new_depth;
1755 }
1756
1757 if (__kmp_affinity_gran_levels < 0) {
1758 //
1759 // Set the granularity level based on what levels are modeled
1760 // in the machine topology map.
1761 //
1762 __kmp_affinity_gran_levels = 0;
1763 if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
1764 __kmp_affinity_gran_levels++;
1765 }
1766 if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
1767 __kmp_affinity_gran_levels++;
1768 }
1769 if (__kmp_affinity_gran > affinity_gran_package) {
1770 __kmp_affinity_gran_levels++;
1771 }
1772 }
1773
1774 if (__kmp_affinity_verbose) {
1775 __kmp_affinity_print_topology(retval, nApics, depth, pkgLevel,
1776 coreLevel, threadLevel);
1777 }
1778
1779 __kmp_free(last);
1780 __kmp_free(maxCt);
1781 __kmp_free(counts);
1782 __kmp_free(totals);
1783 KMP_CPU_FREE(oldMask);
1784 *address2os = retval;
1785 return depth;
1786}
1787
1788
1789# endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1790
1791
1792#define osIdIndex 0
1793#define threadIdIndex 1
1794#define coreIdIndex 2
1795#define pkgIdIndex 3
1796#define nodeIdIndex 4
1797
1798typedef unsigned *ProcCpuInfo;
1799static unsigned maxIndex = pkgIdIndex;
1800
1801
1802static int
1803__kmp_affinity_cmp_ProcCpuInfo_os_id(const void *a, const void *b)
1804{
1805 const unsigned *aa = (const unsigned *)a;
1806 const unsigned *bb = (const unsigned *)b;
1807 if (aa[osIdIndex] < bb[osIdIndex]) return -1;
1808 if (aa[osIdIndex] > bb[osIdIndex]) return 1;
1809 return 0;
1810};
1811
1812
1813static int
1814__kmp_affinity_cmp_ProcCpuInfo_phys_id(const void *a, const void *b)
1815{
1816 unsigned i;
1817 const unsigned *aa = *((const unsigned **)a);
1818 const unsigned *bb = *((const unsigned **)b);
1819 for (i = maxIndex; ; i--) {
1820 if (aa[i] < bb[i]) return -1;
1821 if (aa[i] > bb[i]) return 1;
1822 if (i == osIdIndex) break;
1823 }
1824 return 0;
1825}
1826
1827
1828//
1829// Parse /proc/cpuinfo (or an alternate file in the same format) to obtain the
1830// affinity map.
1831//
1832static int
1833__kmp_affinity_create_cpuinfo_map(AddrUnsPair **address2os, int *line,
1834 kmp_i18n_id_t *const msg_id, FILE *f)
1835{
1836 *address2os = NULL;
1837 *msg_id = kmp_i18n_null;
1838
1839 //
1840 // Scan of the file, and count the number of "processor" (osId) fields,
Alp Toker8f2d3f02014-02-24 10:40:15 +00001841 // and find the highest value of <n> for a node_<n> field.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001842 //
1843 char buf[256];
1844 unsigned num_records = 0;
1845 while (! feof(f)) {
1846 buf[sizeof(buf) - 1] = 1;
1847 if (! fgets(buf, sizeof(buf), f)) {
1848 //
1849 // Read errors presumably because of EOF
1850 //
1851 break;
1852 }
1853
1854 char s1[] = "processor";
1855 if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
1856 num_records++;
1857 continue;
1858 }
1859
1860 //
1861 // FIXME - this will match "node_<n> <garbage>"
1862 //
1863 unsigned level;
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00001864 if (KMP_SSCANF(buf, "node_%d id", &level) == 1) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001865 if (nodeIdIndex + level >= maxIndex) {
1866 maxIndex = nodeIdIndex + level;
1867 }
1868 continue;
1869 }
1870 }
1871
1872 //
1873 // Check for empty file / no valid processor records, or too many.
1874 // The number of records can't exceed the number of valid bits in the
1875 // affinity mask.
1876 //
1877 if (num_records == 0) {
1878 *line = 0;
1879 *msg_id = kmp_i18n_str_NoProcRecords;
1880 return -1;
1881 }
1882 if (num_records > (unsigned)__kmp_xproc) {
1883 *line = 0;
1884 *msg_id = kmp_i18n_str_TooManyProcRecords;
1885 return -1;
1886 }
1887
1888 //
1889 // Set the file pointer back to the begginning, so that we can scan the
1890 // file again, this time performing a full parse of the data.
1891 // Allocate a vector of ProcCpuInfo object, where we will place the data.
1892 // Adding an extra element at the end allows us to remove a lot of extra
1893 // checks for termination conditions.
1894 //
1895 if (fseek(f, 0, SEEK_SET) != 0) {
1896 *line = 0;
1897 *msg_id = kmp_i18n_str_CantRewindCpuinfo;
1898 return -1;
1899 }
1900
1901 //
1902 // Allocate the array of records to store the proc info in. The dummy
1903 // element at the end makes the logic in filling them out easier to code.
1904 //
1905 unsigned **threadInfo = (unsigned **)__kmp_allocate((num_records + 1)
1906 * sizeof(unsigned *));
1907 unsigned i;
1908 for (i = 0; i <= num_records; i++) {
1909 threadInfo[i] = (unsigned *)__kmp_allocate((maxIndex + 1)
1910 * sizeof(unsigned));
1911 }
1912
1913#define CLEANUP_THREAD_INFO \
1914 for (i = 0; i <= num_records; i++) { \
1915 __kmp_free(threadInfo[i]); \
1916 } \
1917 __kmp_free(threadInfo);
1918
1919 //
1920 // A value of UINT_MAX means that we didn't find the field
1921 //
1922 unsigned __index;
1923
1924#define INIT_PROC_INFO(p) \
1925 for (__index = 0; __index <= maxIndex; __index++) { \
1926 (p)[__index] = UINT_MAX; \
1927 }
1928
1929 for (i = 0; i <= num_records; i++) {
1930 INIT_PROC_INFO(threadInfo[i]);
1931 }
1932
1933 unsigned num_avail = 0;
1934 *line = 0;
1935 while (! feof(f)) {
1936 //
1937 // Create an inner scoping level, so that all the goto targets at the
1938 // end of the loop appear in an outer scoping level. This avoids
1939 // warnings about jumping past an initialization to a target in the
1940 // same block.
1941 //
1942 {
1943 buf[sizeof(buf) - 1] = 1;
1944 bool long_line = false;
1945 if (! fgets(buf, sizeof(buf), f)) {
1946 //
1947 // Read errors presumably because of EOF
1948 //
1949 // If there is valid data in threadInfo[num_avail], then fake
1950 // a blank line in ensure that the last address gets parsed.
1951 //
1952 bool valid = false;
1953 for (i = 0; i <= maxIndex; i++) {
1954 if (threadInfo[num_avail][i] != UINT_MAX) {
1955 valid = true;
1956 }
1957 }
1958 if (! valid) {
1959 break;
1960 }
1961 buf[0] = 0;
1962 } else if (!buf[sizeof(buf) - 1]) {
1963 //
1964 // The line is longer than the buffer. Set a flag and don't
1965 // emit an error if we were going to ignore the line, anyway.
1966 //
1967 long_line = true;
1968
1969#define CHECK_LINE \
1970 if (long_line) { \
1971 CLEANUP_THREAD_INFO; \
1972 *msg_id = kmp_i18n_str_LongLineCpuinfo; \
1973 return -1; \
1974 }
1975 }
1976 (*line)++;
1977
1978 char s1[] = "processor";
1979 if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
1980 CHECK_LINE;
1981 char *p = strchr(buf + sizeof(s1) - 1, ':');
1982 unsigned val;
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00001983 if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001984 if (threadInfo[num_avail][osIdIndex] != UINT_MAX) goto dup_field;
1985 threadInfo[num_avail][osIdIndex] = val;
Jim Cownie181b4bb2013-12-23 17:28:57 +00001986#if KMP_OS_LINUX && USE_SYSFS_INFO
1987 char path[256];
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00001988 KMP_SNPRINTF(path, sizeof(path),
Jim Cownie181b4bb2013-12-23 17:28:57 +00001989 "/sys/devices/system/cpu/cpu%u/topology/physical_package_id",
1990 threadInfo[num_avail][osIdIndex]);
1991 __kmp_read_from_file(path, "%u", &threadInfo[num_avail][pkgIdIndex]);
1992
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00001993 KMP_SNPRINTF(path, sizeof(path),
Jim Cownie181b4bb2013-12-23 17:28:57 +00001994 "/sys/devices/system/cpu/cpu%u/topology/core_id",
1995 threadInfo[num_avail][osIdIndex]);
1996 __kmp_read_from_file(path, "%u", &threadInfo[num_avail][coreIdIndex]);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001997 continue;
Jim Cownie181b4bb2013-12-23 17:28:57 +00001998#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00001999 }
2000 char s2[] = "physical id";
2001 if (strncmp(buf, s2, sizeof(s2) - 1) == 0) {
2002 CHECK_LINE;
2003 char *p = strchr(buf + sizeof(s2) - 1, ':');
2004 unsigned val;
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00002005 if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002006 if (threadInfo[num_avail][pkgIdIndex] != UINT_MAX) goto dup_field;
2007 threadInfo[num_avail][pkgIdIndex] = val;
2008 continue;
2009 }
2010 char s3[] = "core id";
2011 if (strncmp(buf, s3, sizeof(s3) - 1) == 0) {
2012 CHECK_LINE;
2013 char *p = strchr(buf + sizeof(s3) - 1, ':');
2014 unsigned val;
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00002015 if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002016 if (threadInfo[num_avail][coreIdIndex] != UINT_MAX) goto dup_field;
2017 threadInfo[num_avail][coreIdIndex] = val;
2018 continue;
Jim Cownie181b4bb2013-12-23 17:28:57 +00002019#endif // KMP_OS_LINUX && USE_SYSFS_INFO
Jim Cownie5e8470a2013-09-27 10:38:44 +00002020 }
2021 char s4[] = "thread id";
2022 if (strncmp(buf, s4, sizeof(s4) - 1) == 0) {
2023 CHECK_LINE;
2024 char *p = strchr(buf + sizeof(s4) - 1, ':');
2025 unsigned val;
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00002026 if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002027 if (threadInfo[num_avail][threadIdIndex] != UINT_MAX) goto dup_field;
2028 threadInfo[num_avail][threadIdIndex] = val;
2029 continue;
2030 }
2031 unsigned level;
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00002032 if (KMP_SSCANF(buf, "node_%d id", &level) == 1) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002033 CHECK_LINE;
2034 char *p = strchr(buf + sizeof(s4) - 1, ':');
2035 unsigned val;
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00002036 if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002037 KMP_ASSERT(nodeIdIndex + level <= maxIndex);
2038 if (threadInfo[num_avail][nodeIdIndex + level] != UINT_MAX) goto dup_field;
2039 threadInfo[num_avail][nodeIdIndex + level] = val;
2040 continue;
2041 }
2042
2043 //
2044 // We didn't recognize the leading token on the line.
2045 // There are lots of leading tokens that we don't recognize -
2046 // if the line isn't empty, go on to the next line.
2047 //
2048 if ((*buf != 0) && (*buf != '\n')) {
2049 //
2050 // If the line is longer than the buffer, read characters
2051 // until we find a newline.
2052 //
2053 if (long_line) {
2054 int ch;
2055 while (((ch = fgetc(f)) != EOF) && (ch != '\n'));
2056 }
2057 continue;
2058 }
2059
2060 //
2061 // A newline has signalled the end of the processor record.
2062 // Check that there aren't too many procs specified.
2063 //
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002064 if ((int)num_avail == __kmp_xproc) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002065 CLEANUP_THREAD_INFO;
2066 *msg_id = kmp_i18n_str_TooManyEntries;
2067 return -1;
2068 }
2069
2070 //
2071 // Check for missing fields. The osId field must be there, and we
2072 // currently require that the physical id field is specified, also.
2073 //
2074 if (threadInfo[num_avail][osIdIndex] == UINT_MAX) {
2075 CLEANUP_THREAD_INFO;
2076 *msg_id = kmp_i18n_str_MissingProcField;
2077 return -1;
2078 }
2079 if (threadInfo[0][pkgIdIndex] == UINT_MAX) {
2080 CLEANUP_THREAD_INFO;
2081 *msg_id = kmp_i18n_str_MissingPhysicalIDField;
2082 return -1;
2083 }
2084
2085 //
2086 // Skip this proc if it is not included in the machine model.
2087 //
2088 if (! KMP_CPU_ISSET(threadInfo[num_avail][osIdIndex], fullMask)) {
2089 INIT_PROC_INFO(threadInfo[num_avail]);
2090 continue;
2091 }
2092
2093 //
2094 // We have a successful parse of this proc's info.
2095 // Increment the counter, and prepare for the next proc.
2096 //
2097 num_avail++;
2098 KMP_ASSERT(num_avail <= num_records);
2099 INIT_PROC_INFO(threadInfo[num_avail]);
2100 }
2101 continue;
2102
2103 no_val:
2104 CLEANUP_THREAD_INFO;
2105 *msg_id = kmp_i18n_str_MissingValCpuinfo;
2106 return -1;
2107
2108 dup_field:
2109 CLEANUP_THREAD_INFO;
2110 *msg_id = kmp_i18n_str_DuplicateFieldCpuinfo;
2111 return -1;
2112 }
2113 *line = 0;
2114
2115# if KMP_MIC && REDUCE_TEAM_SIZE
2116 unsigned teamSize = 0;
2117# endif // KMP_MIC && REDUCE_TEAM_SIZE
2118
2119 // check for num_records == __kmp_xproc ???
2120
2121 //
2122 // If there's only one thread context to bind to, form an Address object
2123 // with depth 1 and return immediately (or, if affinity is off, set
2124 // address2os to NULL and return).
2125 //
2126 // If it is configured to omit the package level when there is only a
2127 // single package, the logic at the end of this routine won't work if
2128 // there is only a single thread - it would try to form an Address
2129 // object with depth 0.
2130 //
2131 KMP_ASSERT(num_avail > 0);
2132 KMP_ASSERT(num_avail <= num_records);
2133 if (num_avail == 1) {
2134 __kmp_ncores = 1;
2135 __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002136 if (__kmp_affinity_verbose) {
2137 if (! KMP_AFFINITY_CAPABLE()) {
2138 KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
2139 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
2140 KMP_INFORM(Uniform, "KMP_AFFINITY");
2141 }
2142 else {
2143 char buf[KMP_AFFIN_MASK_PRINT_LEN];
2144 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
2145 fullMask);
2146 KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
2147 if (__kmp_affinity_respect_mask) {
2148 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
2149 } else {
2150 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
2151 }
2152 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
2153 KMP_INFORM(Uniform, "KMP_AFFINITY");
2154 }
2155 int index;
2156 kmp_str_buf_t buf;
2157 __kmp_str_buf_init(&buf);
2158 __kmp_str_buf_print(&buf, "1");
2159 for (index = maxIndex - 1; index > pkgIdIndex; index--) {
2160 __kmp_str_buf_print(&buf, " x 1");
2161 }
2162 KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, 1, 1, 1);
2163 __kmp_str_buf_free(&buf);
2164 }
2165
2166 if (__kmp_affinity_type == affinity_none) {
2167 CLEANUP_THREAD_INFO;
2168 return 0;
2169 }
2170
2171 *address2os = (AddrUnsPair*)__kmp_allocate(sizeof(AddrUnsPair));
2172 Address addr(1);
2173 addr.labels[0] = threadInfo[0][pkgIdIndex];
2174 (*address2os)[0] = AddrUnsPair(addr, threadInfo[0][osIdIndex]);
2175
2176 if (__kmp_affinity_gran_levels < 0) {
2177 __kmp_affinity_gran_levels = 0;
2178 }
2179
2180 if (__kmp_affinity_verbose) {
2181 __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
2182 }
2183
2184 CLEANUP_THREAD_INFO;
2185 return 1;
2186 }
2187
2188 //
2189 // Sort the threadInfo table by physical Id.
2190 //
2191 qsort(threadInfo, num_avail, sizeof(*threadInfo),
2192 __kmp_affinity_cmp_ProcCpuInfo_phys_id);
2193
2194 //
2195 // The table is now sorted by pkgId / coreId / threadId, but we really
2196 // don't know the radix of any of the fields. pkgId's may be sparsely
2197 // assigned among the chips on a system. Although coreId's are usually
2198 // assigned [0 .. coresPerPkg-1] and threadId's are usually assigned
2199 // [0..threadsPerCore-1], we don't want to make any such assumptions.
2200 //
2201 // For that matter, we don't know what coresPerPkg and threadsPerCore
2202 // (or the total # packages) are at this point - we want to determine
2203 // that now. We only have an upper bound on the first two figures.
2204 //
2205 unsigned *counts = (unsigned *)__kmp_allocate((maxIndex + 1)
2206 * sizeof(unsigned));
2207 unsigned *maxCt = (unsigned *)__kmp_allocate((maxIndex + 1)
2208 * sizeof(unsigned));
2209 unsigned *totals = (unsigned *)__kmp_allocate((maxIndex + 1)
2210 * sizeof(unsigned));
2211 unsigned *lastId = (unsigned *)__kmp_allocate((maxIndex + 1)
2212 * sizeof(unsigned));
2213
2214 bool assign_thread_ids = false;
2215 unsigned threadIdCt;
2216 unsigned index;
2217
2218 restart_radix_check:
2219 threadIdCt = 0;
2220
2221 //
2222 // Initialize the counter arrays with data from threadInfo[0].
2223 //
2224 if (assign_thread_ids) {
2225 if (threadInfo[0][threadIdIndex] == UINT_MAX) {
2226 threadInfo[0][threadIdIndex] = threadIdCt++;
2227 }
2228 else if (threadIdCt <= threadInfo[0][threadIdIndex]) {
2229 threadIdCt = threadInfo[0][threadIdIndex] + 1;
2230 }
2231 }
2232 for (index = 0; index <= maxIndex; index++) {
2233 counts[index] = 1;
2234 maxCt[index] = 1;
2235 totals[index] = 1;
2236 lastId[index] = threadInfo[0][index];;
2237 }
2238
2239 //
2240 // Run through the rest of the OS procs.
2241 //
2242 for (i = 1; i < num_avail; i++) {
2243 //
2244 // Find the most significant index whose id differs
2245 // from the id for the previous OS proc.
2246 //
2247 for (index = maxIndex; index >= threadIdIndex; index--) {
2248 if (assign_thread_ids && (index == threadIdIndex)) {
2249 //
2250 // Auto-assign the thread id field if it wasn't specified.
2251 //
2252 if (threadInfo[i][threadIdIndex] == UINT_MAX) {
2253 threadInfo[i][threadIdIndex] = threadIdCt++;
2254 }
2255
2256 //
2257 // Aparrently the thread id field was specified for some
2258 // entries and not others. Start the thread id counter
2259 // off at the next higher thread id.
2260 //
2261 else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
2262 threadIdCt = threadInfo[i][threadIdIndex] + 1;
2263 }
2264 }
2265 if (threadInfo[i][index] != lastId[index]) {
2266 //
2267 // Run through all indices which are less significant,
2268 // and reset the counts to 1.
2269 //
2270 // At all levels up to and including index, we need to
2271 // increment the totals and record the last id.
2272 //
2273 unsigned index2;
2274 for (index2 = threadIdIndex; index2 < index; index2++) {
2275 totals[index2]++;
2276 if (counts[index2] > maxCt[index2]) {
2277 maxCt[index2] = counts[index2];
2278 }
2279 counts[index2] = 1;
2280 lastId[index2] = threadInfo[i][index2];
2281 }
2282 counts[index]++;
2283 totals[index]++;
2284 lastId[index] = threadInfo[i][index];
2285
2286 if (assign_thread_ids && (index > threadIdIndex)) {
2287
2288# if KMP_MIC && REDUCE_TEAM_SIZE
2289 //
2290 // The default team size is the total #threads in the machine
2291 // minus 1 thread for every core that has 3 or more threads.
2292 //
2293 teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 );
2294# endif // KMP_MIC && REDUCE_TEAM_SIZE
2295
2296 //
2297 // Restart the thread counter, as we are on a new core.
2298 //
2299 threadIdCt = 0;
2300
2301 //
2302 // Auto-assign the thread id field if it wasn't specified.
2303 //
2304 if (threadInfo[i][threadIdIndex] == UINT_MAX) {
2305 threadInfo[i][threadIdIndex] = threadIdCt++;
2306 }
2307
2308 //
2309 // Aparrently the thread id field was specified for some
2310 // entries and not others. Start the thread id counter
2311 // off at the next higher thread id.
2312 //
2313 else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
2314 threadIdCt = threadInfo[i][threadIdIndex] + 1;
2315 }
2316 }
2317 break;
2318 }
2319 }
2320 if (index < threadIdIndex) {
2321 //
2322 // If thread ids were specified, it is an error if they are not
2323 // unique. Also, check that we waven't already restarted the
2324 // loop (to be safe - shouldn't need to).
2325 //
2326 if ((threadInfo[i][threadIdIndex] != UINT_MAX)
2327 || assign_thread_ids) {
2328 __kmp_free(lastId);
2329 __kmp_free(totals);
2330 __kmp_free(maxCt);
2331 __kmp_free(counts);
2332 CLEANUP_THREAD_INFO;
2333 *msg_id = kmp_i18n_str_PhysicalIDsNotUnique;
2334 return -1;
2335 }
2336
2337 //
2338 // If the thread ids were not specified and we see entries
2339 // entries that are duplicates, start the loop over and
2340 // assign the thread ids manually.
2341 //
2342 assign_thread_ids = true;
2343 goto restart_radix_check;
2344 }
2345 }
2346
2347# if KMP_MIC && REDUCE_TEAM_SIZE
2348 //
2349 // The default team size is the total #threads in the machine
2350 // minus 1 thread for every core that has 3 or more threads.
2351 //
2352 teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 );
2353# endif // KMP_MIC && REDUCE_TEAM_SIZE
2354
2355 for (index = threadIdIndex; index <= maxIndex; index++) {
2356 if (counts[index] > maxCt[index]) {
2357 maxCt[index] = counts[index];
2358 }
2359 }
2360
2361 __kmp_nThreadsPerCore = maxCt[threadIdIndex];
2362 nCoresPerPkg = maxCt[coreIdIndex];
2363 nPackages = totals[pkgIdIndex];
2364
2365 //
2366 // Check to see if the machine topology is uniform
2367 //
2368 unsigned prod = totals[maxIndex];
2369 for (index = threadIdIndex; index < maxIndex; index++) {
2370 prod *= maxCt[index];
2371 }
2372 bool uniform = (prod == totals[threadIdIndex]);
2373
2374 //
2375 // When affinity is off, this routine will still be called to set
Andrey Churbanovf696c822015-01-27 16:55:43 +00002376 // __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie5e8470a2013-09-27 10:38:44 +00002377 // nCoresPerPkg, & nPackages. Make sure all these vars are set
2378 // correctly, and return now if affinity is not enabled.
2379 //
Jim Cownie5e8470a2013-09-27 10:38:44 +00002380 __kmp_ncores = totals[coreIdIndex];
2381
2382 if (__kmp_affinity_verbose) {
2383 if (! KMP_AFFINITY_CAPABLE()) {
2384 KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
2385 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
2386 if (uniform) {
2387 KMP_INFORM(Uniform, "KMP_AFFINITY");
2388 } else {
2389 KMP_INFORM(NonUniform, "KMP_AFFINITY");
2390 }
2391 }
2392 else {
2393 char buf[KMP_AFFIN_MASK_PRINT_LEN];
2394 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, fullMask);
2395 KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
2396 if (__kmp_affinity_respect_mask) {
2397 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
2398 } else {
2399 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
2400 }
2401 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
2402 if (uniform) {
2403 KMP_INFORM(Uniform, "KMP_AFFINITY");
2404 } else {
2405 KMP_INFORM(NonUniform, "KMP_AFFINITY");
2406 }
2407 }
2408 kmp_str_buf_t buf;
2409 __kmp_str_buf_init(&buf);
2410
2411 __kmp_str_buf_print(&buf, "%d", totals[maxIndex]);
2412 for (index = maxIndex - 1; index >= pkgIdIndex; index--) {
2413 __kmp_str_buf_print(&buf, " x %d", maxCt[index]);
2414 }
2415 KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, maxCt[coreIdIndex],
2416 maxCt[threadIdIndex], __kmp_ncores);
2417
2418 __kmp_str_buf_free(&buf);
2419 }
2420
2421# if KMP_MIC && REDUCE_TEAM_SIZE
2422 //
2423 // Set the default team size.
2424 //
2425 if ((__kmp_dflt_team_nth == 0) && (teamSize > 0)) {
2426 __kmp_dflt_team_nth = teamSize;
2427 KA_TRACE(20, ("__kmp_affinity_create_cpuinfo_map: setting __kmp_dflt_team_nth = %d\n",
2428 __kmp_dflt_team_nth));
2429 }
2430# endif // KMP_MIC && REDUCE_TEAM_SIZE
2431
2432 if (__kmp_affinity_type == affinity_none) {
2433 __kmp_free(lastId);
2434 __kmp_free(totals);
2435 __kmp_free(maxCt);
2436 __kmp_free(counts);
2437 CLEANUP_THREAD_INFO;
2438 return 0;
2439 }
2440
2441 //
2442 // Count the number of levels which have more nodes at that level than
2443 // at the parent's level (with there being an implicit root node of
2444 // the top level). This is equivalent to saying that there is at least
2445 // one node at this level which has a sibling. These levels are in the
2446 // map, and the package level is always in the map.
2447 //
2448 bool *inMap = (bool *)__kmp_allocate((maxIndex + 1) * sizeof(bool));
2449 int level = 0;
2450 for (index = threadIdIndex; index < maxIndex; index++) {
2451 KMP_ASSERT(totals[index] >= totals[index + 1]);
2452 inMap[index] = (totals[index] > totals[index + 1]);
2453 }
2454 inMap[maxIndex] = (totals[maxIndex] > 1);
2455 inMap[pkgIdIndex] = true;
2456
2457 int depth = 0;
2458 for (index = threadIdIndex; index <= maxIndex; index++) {
2459 if (inMap[index]) {
2460 depth++;
2461 }
2462 }
2463 KMP_ASSERT(depth > 0);
2464
2465 //
2466 // Construct the data structure that is to be returned.
2467 //
2468 *address2os = (AddrUnsPair*)
2469 __kmp_allocate(sizeof(AddrUnsPair) * num_avail);
2470 int pkgLevel = -1;
2471 int coreLevel = -1;
2472 int threadLevel = -1;
2473
2474 for (i = 0; i < num_avail; ++i) {
2475 Address addr(depth);
2476 unsigned os = threadInfo[i][osIdIndex];
2477 int src_index;
2478 int dst_index = 0;
2479
2480 for (src_index = maxIndex; src_index >= threadIdIndex; src_index--) {
2481 if (! inMap[src_index]) {
2482 continue;
2483 }
2484 addr.labels[dst_index] = threadInfo[i][src_index];
2485 if (src_index == pkgIdIndex) {
2486 pkgLevel = dst_index;
2487 }
2488 else if (src_index == coreIdIndex) {
2489 coreLevel = dst_index;
2490 }
2491 else if (src_index == threadIdIndex) {
2492 threadLevel = dst_index;
2493 }
2494 dst_index++;
2495 }
2496 (*address2os)[i] = AddrUnsPair(addr, os);
2497 }
2498
2499 if (__kmp_affinity_gran_levels < 0) {
2500 //
2501 // Set the granularity level based on what levels are modeled
2502 // in the machine topology map.
2503 //
2504 unsigned src_index;
2505 __kmp_affinity_gran_levels = 0;
2506 for (src_index = threadIdIndex; src_index <= maxIndex; src_index++) {
2507 if (! inMap[src_index]) {
2508 continue;
2509 }
2510 switch (src_index) {
2511 case threadIdIndex:
2512 if (__kmp_affinity_gran > affinity_gran_thread) {
2513 __kmp_affinity_gran_levels++;
2514 }
2515
2516 break;
2517 case coreIdIndex:
2518 if (__kmp_affinity_gran > affinity_gran_core) {
2519 __kmp_affinity_gran_levels++;
2520 }
2521 break;
2522
2523 case pkgIdIndex:
2524 if (__kmp_affinity_gran > affinity_gran_package) {
2525 __kmp_affinity_gran_levels++;
2526 }
2527 break;
2528 }
2529 }
2530 }
2531
2532 if (__kmp_affinity_verbose) {
2533 __kmp_affinity_print_topology(*address2os, num_avail, depth, pkgLevel,
2534 coreLevel, threadLevel);
2535 }
2536
2537 __kmp_free(inMap);
2538 __kmp_free(lastId);
2539 __kmp_free(totals);
2540 __kmp_free(maxCt);
2541 __kmp_free(counts);
2542 CLEANUP_THREAD_INFO;
2543 return depth;
2544}
2545
2546
2547//
2548// Create and return a table of affinity masks, indexed by OS thread ID.
2549// This routine handles OR'ing together all the affinity masks of threads
2550// that are sufficiently close, if granularity > fine.
2551//
2552static kmp_affin_mask_t *
2553__kmp_create_masks(unsigned *maxIndex, unsigned *numUnique,
2554 AddrUnsPair *address2os, unsigned numAddrs)
2555{
2556 //
2557 // First form a table of affinity masks in order of OS thread id.
2558 //
2559 unsigned depth;
2560 unsigned maxOsId;
2561 unsigned i;
2562
2563 KMP_ASSERT(numAddrs > 0);
2564 depth = address2os[0].first.depth;
2565
2566 maxOsId = 0;
2567 for (i = 0; i < numAddrs; i++) {
2568 unsigned osId = address2os[i].second;
2569 if (osId > maxOsId) {
2570 maxOsId = osId;
2571 }
2572 }
2573 kmp_affin_mask_t *osId2Mask = (kmp_affin_mask_t *)__kmp_allocate(
2574 (maxOsId + 1) * __kmp_affin_mask_size);
2575
2576 //
2577 // Sort the address2os table according to physical order. Doing so
2578 // will put all threads on the same core/package/node in consecutive
2579 // locations.
2580 //
2581 qsort(address2os, numAddrs, sizeof(*address2os),
2582 __kmp_affinity_cmp_Address_labels);
2583
2584 KMP_ASSERT(__kmp_affinity_gran_levels >= 0);
2585 if (__kmp_affinity_verbose && (__kmp_affinity_gran_levels > 0)) {
2586 KMP_INFORM(ThreadsMigrate, "KMP_AFFINITY", __kmp_affinity_gran_levels);
2587 }
2588 if (__kmp_affinity_gran_levels >= (int)depth) {
2589 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
2590 && (__kmp_affinity_type != affinity_none))) {
2591 KMP_WARNING(AffThreadsMayMigrate);
2592 }
2593 }
2594
2595 //
2596 // Run through the table, forming the masks for all threads on each
2597 // core. Threads on the same core will have identical "Address"
2598 // objects, not considering the last level, which must be the thread
2599 // id. All threads on a core will appear consecutively.
2600 //
2601 unsigned unique = 0;
2602 unsigned j = 0; // index of 1st thread on core
2603 unsigned leader = 0;
2604 Address *leaderAddr = &(address2os[0].first);
2605 kmp_affin_mask_t *sum
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00002606 = (kmp_affin_mask_t *)KMP_ALLOCA(__kmp_affin_mask_size);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002607 KMP_CPU_ZERO(sum);
2608 KMP_CPU_SET(address2os[0].second, sum);
2609 for (i = 1; i < numAddrs; i++) {
2610 //
Alp Toker8f2d3f02014-02-24 10:40:15 +00002611 // If this thread is sufficiently close to the leader (within the
Jim Cownie5e8470a2013-09-27 10:38:44 +00002612 // granularity setting), then set the bit for this os thread in the
2613 // affinity mask for this group, and go on to the next thread.
2614 //
2615 if (leaderAddr->isClose(address2os[i].first,
2616 __kmp_affinity_gran_levels)) {
2617 KMP_CPU_SET(address2os[i].second, sum);
2618 continue;
2619 }
2620
2621 //
2622 // For every thread in this group, copy the mask to the thread's
2623 // entry in the osId2Mask table. Mark the first address as a
2624 // leader.
2625 //
2626 for (; j < i; j++) {
2627 unsigned osId = address2os[j].second;
2628 KMP_DEBUG_ASSERT(osId <= maxOsId);
2629 kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
2630 KMP_CPU_COPY(mask, sum);
2631 address2os[j].first.leader = (j == leader);
2632 }
2633 unique++;
2634
2635 //
2636 // Start a new mask.
2637 //
2638 leader = i;
2639 leaderAddr = &(address2os[i].first);
2640 KMP_CPU_ZERO(sum);
2641 KMP_CPU_SET(address2os[i].second, sum);
2642 }
2643
2644 //
2645 // For every thread in last group, copy the mask to the thread's
2646 // entry in the osId2Mask table.
2647 //
2648 for (; j < i; j++) {
2649 unsigned osId = address2os[j].second;
2650 KMP_DEBUG_ASSERT(osId <= maxOsId);
2651 kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
2652 KMP_CPU_COPY(mask, sum);
2653 address2os[j].first.leader = (j == leader);
2654 }
2655 unique++;
2656
2657 *maxIndex = maxOsId;
2658 *numUnique = unique;
2659 return osId2Mask;
2660}
2661
2662
2663//
2664// Stuff for the affinity proclist parsers. It's easier to declare these vars
2665// as file-static than to try and pass them through the calling sequence of
2666// the recursive-descent OMP_PLACES parser.
2667//
2668static kmp_affin_mask_t *newMasks;
2669static int numNewMasks;
2670static int nextNewMask;
2671
2672#define ADD_MASK(_mask) \
2673 { \
2674 if (nextNewMask >= numNewMasks) { \
2675 numNewMasks *= 2; \
2676 newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_REALLOC(newMasks, \
2677 numNewMasks * __kmp_affin_mask_size); \
2678 } \
2679 KMP_CPU_COPY(KMP_CPU_INDEX(newMasks, nextNewMask), (_mask)); \
2680 nextNewMask++; \
2681 }
2682
2683#define ADD_MASK_OSID(_osId,_osId2Mask,_maxOsId) \
2684 { \
2685 if (((_osId) > _maxOsId) || \
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002686 (! KMP_CPU_ISSET((_osId), KMP_CPU_INDEX((_osId2Mask), (_osId))))) { \
Jim Cownie5e8470a2013-09-27 10:38:44 +00002687 if (__kmp_affinity_verbose || (__kmp_affinity_warnings \
2688 && (__kmp_affinity_type != affinity_none))) { \
2689 KMP_WARNING(AffIgnoreInvalidProcID, _osId); \
2690 } \
2691 } \
2692 else { \
2693 ADD_MASK(KMP_CPU_INDEX(_osId2Mask, (_osId))); \
2694 } \
2695 }
2696
2697
2698//
2699// Re-parse the proclist (for the explicit affinity type), and form the list
2700// of affinity newMasks indexed by gtid.
2701//
2702static void
2703__kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks,
2704 unsigned int *out_numMasks, const char *proclist,
2705 kmp_affin_mask_t *osId2Mask, int maxOsId)
2706{
2707 const char *scan = proclist;
2708 const char *next = proclist;
2709
2710 //
2711 // We use malloc() for the temporary mask vector,
2712 // so that we can use realloc() to extend it.
2713 //
2714 numNewMasks = 2;
2715 newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(numNewMasks
2716 * __kmp_affin_mask_size);
2717 nextNewMask = 0;
2718 kmp_affin_mask_t *sumMask = (kmp_affin_mask_t *)__kmp_allocate(
2719 __kmp_affin_mask_size);
2720 int setSize = 0;
2721
2722 for (;;) {
2723 int start, end, stride;
2724
2725 SKIP_WS(scan);
2726 next = scan;
2727 if (*next == '\0') {
2728 break;
2729 }
2730
2731 if (*next == '{') {
2732 int num;
2733 setSize = 0;
2734 next++; // skip '{'
2735 SKIP_WS(next);
2736 scan = next;
2737
2738 //
2739 // Read the first integer in the set.
2740 //
2741 KMP_ASSERT2((*next >= '0') && (*next <= '9'),
2742 "bad proclist");
2743 SKIP_DIGITS(next);
2744 num = __kmp_str_to_int(scan, *next);
2745 KMP_ASSERT2(num >= 0, "bad explicit proc list");
2746
2747 //
2748 // Copy the mask for that osId to the sum (union) mask.
2749 //
2750 if ((num > maxOsId) ||
2751 (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
2752 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
2753 && (__kmp_affinity_type != affinity_none))) {
2754 KMP_WARNING(AffIgnoreInvalidProcID, num);
2755 }
2756 KMP_CPU_ZERO(sumMask);
2757 }
2758 else {
2759 KMP_CPU_COPY(sumMask, KMP_CPU_INDEX(osId2Mask, num));
2760 setSize = 1;
2761 }
2762
2763 for (;;) {
2764 //
2765 // Check for end of set.
2766 //
2767 SKIP_WS(next);
2768 if (*next == '}') {
2769 next++; // skip '}'
2770 break;
2771 }
2772
2773 //
2774 // Skip optional comma.
2775 //
2776 if (*next == ',') {
2777 next++;
2778 }
2779 SKIP_WS(next);
2780
2781 //
2782 // Read the next integer in the set.
2783 //
2784 scan = next;
2785 KMP_ASSERT2((*next >= '0') && (*next <= '9'),
2786 "bad explicit proc list");
2787
2788 SKIP_DIGITS(next);
2789 num = __kmp_str_to_int(scan, *next);
2790 KMP_ASSERT2(num >= 0, "bad explicit proc list");
2791
2792 //
2793 // Add the mask for that osId to the sum mask.
2794 //
2795 if ((num > maxOsId) ||
2796 (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
2797 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
2798 && (__kmp_affinity_type != affinity_none))) {
2799 KMP_WARNING(AffIgnoreInvalidProcID, num);
2800 }
2801 }
2802 else {
2803 KMP_CPU_UNION(sumMask, KMP_CPU_INDEX(osId2Mask, num));
2804 setSize++;
2805 }
2806 }
2807 if (setSize > 0) {
2808 ADD_MASK(sumMask);
2809 }
2810
2811 SKIP_WS(next);
2812 if (*next == ',') {
2813 next++;
2814 }
2815 scan = next;
2816 continue;
2817 }
2818
2819 //
2820 // Read the first integer.
2821 //
2822 KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list");
2823 SKIP_DIGITS(next);
2824 start = __kmp_str_to_int(scan, *next);
2825 KMP_ASSERT2(start >= 0, "bad explicit proc list");
2826 SKIP_WS(next);
2827
2828 //
2829 // If this isn't a range, then add a mask to the list and go on.
2830 //
2831 if (*next != '-') {
2832 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2833
2834 //
2835 // Skip optional comma.
2836 //
2837 if (*next == ',') {
2838 next++;
2839 }
2840 scan = next;
2841 continue;
2842 }
2843
2844 //
2845 // This is a range. Skip over the '-' and read in the 2nd int.
2846 //
2847 next++; // skip '-'
2848 SKIP_WS(next);
2849 scan = next;
2850 KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list");
2851 SKIP_DIGITS(next);
2852 end = __kmp_str_to_int(scan, *next);
2853 KMP_ASSERT2(end >= 0, "bad explicit proc list");
2854
2855 //
2856 // Check for a stride parameter
2857 //
2858 stride = 1;
2859 SKIP_WS(next);
2860 if (*next == ':') {
2861 //
2862 // A stride is specified. Skip over the ':" and read the 3rd int.
2863 //
2864 int sign = +1;
2865 next++; // skip ':'
2866 SKIP_WS(next);
2867 scan = next;
2868 if (*next == '-') {
2869 sign = -1;
2870 next++;
2871 SKIP_WS(next);
2872 scan = next;
2873 }
2874 KMP_ASSERT2((*next >= '0') && (*next <= '9'),
2875 "bad explicit proc list");
2876 SKIP_DIGITS(next);
2877 stride = __kmp_str_to_int(scan, *next);
2878 KMP_ASSERT2(stride >= 0, "bad explicit proc list");
2879 stride *= sign;
2880 }
2881
2882 //
2883 // Do some range checks.
2884 //
2885 KMP_ASSERT2(stride != 0, "bad explicit proc list");
2886 if (stride > 0) {
2887 KMP_ASSERT2(start <= end, "bad explicit proc list");
2888 }
2889 else {
2890 KMP_ASSERT2(start >= end, "bad explicit proc list");
2891 }
2892 KMP_ASSERT2((end - start) / stride <= 65536, "bad explicit proc list");
2893
2894 //
2895 // Add the mask for each OS proc # to the list.
2896 //
2897 if (stride > 0) {
2898 do {
2899 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2900 start += stride;
2901 } while (start <= end);
2902 }
2903 else {
2904 do {
2905 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2906 start += stride;
2907 } while (start >= end);
2908 }
2909
2910 //
2911 // Skip optional comma.
2912 //
2913 SKIP_WS(next);
2914 if (*next == ',') {
2915 next++;
2916 }
2917 scan = next;
2918 }
2919
2920 *out_numMasks = nextNewMask;
2921 if (nextNewMask == 0) {
2922 *out_masks = NULL;
2923 KMP_INTERNAL_FREE(newMasks);
2924 return;
2925 }
2926 *out_masks
2927 = (kmp_affin_mask_t *)__kmp_allocate(nextNewMask * __kmp_affin_mask_size);
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00002928 KMP_MEMCPY(*out_masks, newMasks, nextNewMask * __kmp_affin_mask_size);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002929 __kmp_free(sumMask);
2930 KMP_INTERNAL_FREE(newMasks);
2931}
2932
2933
2934# if OMP_40_ENABLED
2935
2936/*-----------------------------------------------------------------------------
2937
2938Re-parse the OMP_PLACES proc id list, forming the newMasks for the different
2939places. Again, Here is the grammar:
2940
2941place_list := place
2942place_list := place , place_list
2943place := num
2944place := place : num
2945place := place : num : signed
2946place := { subplacelist }
2947place := ! place // (lowest priority)
2948subplace_list := subplace
2949subplace_list := subplace , subplace_list
2950subplace := num
2951subplace := num : num
2952subplace := num : num : signed
2953signed := num
2954signed := + signed
2955signed := - signed
2956
2957-----------------------------------------------------------------------------*/
2958
2959static void
2960__kmp_process_subplace_list(const char **scan, kmp_affin_mask_t *osId2Mask,
2961 int maxOsId, kmp_affin_mask_t *tempMask, int *setSize)
2962{
2963 const char *next;
2964
2965 for (;;) {
2966 int start, count, stride, i;
2967
2968 //
2969 // Read in the starting proc id
2970 //
2971 SKIP_WS(*scan);
2972 KMP_ASSERT2((**scan >= '0') && (**scan <= '9'),
2973 "bad explicit places list");
2974 next = *scan;
2975 SKIP_DIGITS(next);
2976 start = __kmp_str_to_int(*scan, *next);
2977 KMP_ASSERT(start >= 0);
2978 *scan = next;
2979
2980 //
2981 // valid follow sets are ',' ':' and '}'
2982 //
2983 SKIP_WS(*scan);
2984 if (**scan == '}' || **scan == ',') {
2985 if ((start > maxOsId) ||
2986 (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
2987 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
2988 && (__kmp_affinity_type != affinity_none))) {
2989 KMP_WARNING(AffIgnoreInvalidProcID, start);
2990 }
2991 }
2992 else {
2993 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
2994 (*setSize)++;
2995 }
2996 if (**scan == '}') {
2997 break;
2998 }
2999 (*scan)++; // skip ','
3000 continue;
3001 }
3002 KMP_ASSERT2(**scan == ':', "bad explicit places list");
3003 (*scan)++; // skip ':'
3004
3005 //
3006 // Read count parameter
3007 //
3008 SKIP_WS(*scan);
3009 KMP_ASSERT2((**scan >= '0') && (**scan <= '9'),
3010 "bad explicit places list");
3011 next = *scan;
3012 SKIP_DIGITS(next);
3013 count = __kmp_str_to_int(*scan, *next);
3014 KMP_ASSERT(count >= 0);
3015 *scan = next;
3016
3017 //
3018 // valid follow sets are ',' ':' and '}'
3019 //
3020 SKIP_WS(*scan);
3021 if (**scan == '}' || **scan == ',') {
3022 for (i = 0; i < count; i++) {
3023 if ((start > maxOsId) ||
3024 (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
3025 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3026 && (__kmp_affinity_type != affinity_none))) {
3027 KMP_WARNING(AffIgnoreInvalidProcID, start);
3028 }
3029 break; // don't proliferate warnings for large count
3030 }
3031 else {
3032 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
3033 start++;
3034 (*setSize)++;
3035 }
3036 }
3037 if (**scan == '}') {
3038 break;
3039 }
3040 (*scan)++; // skip ','
3041 continue;
3042 }
3043 KMP_ASSERT2(**scan == ':', "bad explicit places list");
3044 (*scan)++; // skip ':'
3045
3046 //
3047 // Read stride parameter
3048 //
3049 int sign = +1;
3050 for (;;) {
3051 SKIP_WS(*scan);
3052 if (**scan == '+') {
3053 (*scan)++; // skip '+'
3054 continue;
3055 }
3056 if (**scan == '-') {
3057 sign *= -1;
3058 (*scan)++; // skip '-'
3059 continue;
3060 }
3061 break;
3062 }
3063 SKIP_WS(*scan);
3064 KMP_ASSERT2((**scan >= '0') && (**scan <= '9'),
3065 "bad explicit places list");
3066 next = *scan;
3067 SKIP_DIGITS(next);
3068 stride = __kmp_str_to_int(*scan, *next);
3069 KMP_ASSERT(stride >= 0);
3070 *scan = next;
3071 stride *= sign;
3072
3073 //
3074 // valid follow sets are ',' and '}'
3075 //
3076 SKIP_WS(*scan);
3077 if (**scan == '}' || **scan == ',') {
3078 for (i = 0; i < count; i++) {
3079 if ((start > maxOsId) ||
3080 (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
3081 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3082 && (__kmp_affinity_type != affinity_none))) {
3083 KMP_WARNING(AffIgnoreInvalidProcID, start);
3084 }
3085 break; // don't proliferate warnings for large count
3086 }
3087 else {
3088 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
3089 start += stride;
3090 (*setSize)++;
3091 }
3092 }
3093 if (**scan == '}') {
3094 break;
3095 }
3096 (*scan)++; // skip ','
3097 continue;
3098 }
3099
3100 KMP_ASSERT2(0, "bad explicit places list");
3101 }
3102}
3103
3104
3105static void
3106__kmp_process_place(const char **scan, kmp_affin_mask_t *osId2Mask,
3107 int maxOsId, kmp_affin_mask_t *tempMask, int *setSize)
3108{
3109 const char *next;
3110
3111 //
3112 // valid follow sets are '{' '!' and num
3113 //
3114 SKIP_WS(*scan);
3115 if (**scan == '{') {
3116 (*scan)++; // skip '{'
3117 __kmp_process_subplace_list(scan, osId2Mask, maxOsId , tempMask,
3118 setSize);
3119 KMP_ASSERT2(**scan == '}', "bad explicit places list");
3120 (*scan)++; // skip '}'
3121 }
3122 else if (**scan == '!') {
3123 __kmp_process_place(scan, osId2Mask, maxOsId, tempMask, setSize);
3124 KMP_CPU_COMPLEMENT(tempMask);
3125 (*scan)++; // skip '!'
3126 }
3127 else if ((**scan >= '0') && (**scan <= '9')) {
3128 next = *scan;
3129 SKIP_DIGITS(next);
3130 int num = __kmp_str_to_int(*scan, *next);
3131 KMP_ASSERT(num >= 0);
3132 if ((num > maxOsId) ||
3133 (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
3134 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3135 && (__kmp_affinity_type != affinity_none))) {
3136 KMP_WARNING(AffIgnoreInvalidProcID, num);
3137 }
3138 }
3139 else {
3140 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, num));
3141 (*setSize)++;
3142 }
3143 *scan = next; // skip num
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003144 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003145 else {
3146 KMP_ASSERT2(0, "bad explicit places list");
3147 }
3148}
3149
3150
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003151//static void
3152void
Jim Cownie5e8470a2013-09-27 10:38:44 +00003153__kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
3154 unsigned int *out_numMasks, const char *placelist,
3155 kmp_affin_mask_t *osId2Mask, int maxOsId)
3156{
3157 const char *scan = placelist;
3158 const char *next = placelist;
3159
3160 numNewMasks = 2;
3161 newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(numNewMasks
3162 * __kmp_affin_mask_size);
3163 nextNewMask = 0;
3164
3165 kmp_affin_mask_t *tempMask = (kmp_affin_mask_t *)__kmp_allocate(
3166 __kmp_affin_mask_size);
3167 KMP_CPU_ZERO(tempMask);
3168 int setSize = 0;
3169
3170 for (;;) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003171 __kmp_process_place(&scan, osId2Mask, maxOsId, tempMask, &setSize);
3172
3173 //
3174 // valid follow sets are ',' ':' and EOL
3175 //
3176 SKIP_WS(scan);
3177 if (*scan == '\0' || *scan == ',') {
3178 if (setSize > 0) {
3179 ADD_MASK(tempMask);
3180 }
3181 KMP_CPU_ZERO(tempMask);
3182 setSize = 0;
3183 if (*scan == '\0') {
3184 break;
3185 }
3186 scan++; // skip ','
3187 continue;
3188 }
3189
3190 KMP_ASSERT2(*scan == ':', "bad explicit places list");
3191 scan++; // skip ':'
3192
3193 //
3194 // Read count parameter
3195 //
3196 SKIP_WS(scan);
3197 KMP_ASSERT2((*scan >= '0') && (*scan <= '9'),
3198 "bad explicit places list");
3199 next = scan;
3200 SKIP_DIGITS(next);
Jim Cownie181b4bb2013-12-23 17:28:57 +00003201 int count = __kmp_str_to_int(scan, *next);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003202 KMP_ASSERT(count >= 0);
3203 scan = next;
3204
3205 //
3206 // valid follow sets are ',' ':' and EOL
3207 //
3208 SKIP_WS(scan);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003209 int stride;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003210 if (*scan == '\0' || *scan == ',') {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003211 stride = +1;
3212 }
3213 else {
3214 KMP_ASSERT2(*scan == ':', "bad explicit places list");
3215 scan++; // skip ':'
Jim Cownie5e8470a2013-09-27 10:38:44 +00003216
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003217 //
3218 // Read stride parameter
3219 //
3220 int sign = +1;
3221 for (;;) {
3222 SKIP_WS(scan);
3223 if (*scan == '+') {
3224 scan++; // skip '+'
3225 continue;
3226 }
3227 if (*scan == '-') {
3228 sign *= -1;
3229 scan++; // skip '-'
3230 continue;
3231 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003232 break;
3233 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003234 SKIP_WS(scan);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003235 KMP_ASSERT2((*scan >= '0') && (*scan <= '9'),
3236 "bad explicit places list");
3237 next = scan;
3238 SKIP_DIGITS(next);
3239 stride = __kmp_str_to_int(scan, *next);
3240 KMP_DEBUG_ASSERT(stride >= 0);
3241 scan = next;
3242 stride *= sign;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003243 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003244
3245 if (stride > 0) {
3246 int i;
3247 for (i = 0; i < count; i++) {
3248 int j;
3249 if (setSize == 0) {
3250 break;
3251 }
3252 ADD_MASK(tempMask);
3253 setSize = 0;
3254 for (j = __kmp_affin_mask_size * CHAR_BIT - 1; j >= stride; j--) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003255 if (! KMP_CPU_ISSET(j - stride, tempMask)) {
3256 KMP_CPU_CLR(j, tempMask);
3257 }
3258 else if ((j > maxOsId) ||
3259 (! KMP_CPU_ISSET(j, KMP_CPU_INDEX(osId2Mask, j)))) {
Andrey Churbanov16a14322015-03-10 09:34:38 +00003260 if ((__kmp_affinity_verbose || (__kmp_affinity_warnings
3261 && (__kmp_affinity_type != affinity_none))) && i < count - 1) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003262 KMP_WARNING(AffIgnoreInvalidProcID, j);
3263 }
3264 KMP_CPU_CLR(j, tempMask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003265 }
3266 else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003267 KMP_CPU_SET(j, tempMask);
3268 setSize++;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003269 }
3270 }
3271 for (; j >= 0; j--) {
3272 KMP_CPU_CLR(j, tempMask);
3273 }
3274 }
3275 }
3276 else {
3277 int i;
3278 for (i = 0; i < count; i++) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003279 int j;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003280 if (setSize == 0) {
3281 break;
3282 }
3283 ADD_MASK(tempMask);
3284 setSize = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003285 for (j = 0; j < ((int)__kmp_affin_mask_size * CHAR_BIT) + stride;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003286 j++) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003287 if (! KMP_CPU_ISSET(j - stride, tempMask)) {
3288 KMP_CPU_CLR(j, tempMask);
3289 }
3290 else if ((j > maxOsId) ||
3291 (! KMP_CPU_ISSET(j, KMP_CPU_INDEX(osId2Mask, j)))) {
Andrey Churbanov16a14322015-03-10 09:34:38 +00003292 if ((__kmp_affinity_verbose || (__kmp_affinity_warnings
3293 && (__kmp_affinity_type != affinity_none))) && i < count - 1) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003294 KMP_WARNING(AffIgnoreInvalidProcID, j);
3295 }
3296 KMP_CPU_CLR(j, tempMask);
3297 }
3298 else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003299 KMP_CPU_SET(j, tempMask);
3300 setSize++;
3301 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003302 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003303 for (; j < (int)__kmp_affin_mask_size * CHAR_BIT; j++) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003304 KMP_CPU_CLR(j, tempMask);
3305 }
3306 }
3307 }
3308 KMP_CPU_ZERO(tempMask);
3309 setSize = 0;
3310
3311 //
3312 // valid follow sets are ',' and EOL
3313 //
3314 SKIP_WS(scan);
3315 if (*scan == '\0') {
3316 break;
3317 }
3318 if (*scan == ',') {
3319 scan++; // skip ','
3320 continue;
3321 }
3322
3323 KMP_ASSERT2(0, "bad explicit places list");
3324 }
3325
3326 *out_numMasks = nextNewMask;
3327 if (nextNewMask == 0) {
3328 *out_masks = NULL;
3329 KMP_INTERNAL_FREE(newMasks);
3330 return;
3331 }
3332 *out_masks
3333 = (kmp_affin_mask_t *)__kmp_allocate(nextNewMask * __kmp_affin_mask_size);
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00003334 KMP_MEMCPY(*out_masks, newMasks, nextNewMask * __kmp_affin_mask_size);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003335 __kmp_free(tempMask);
3336 KMP_INTERNAL_FREE(newMasks);
3337}
3338
3339# endif /* OMP_40_ENABLED */
3340
3341#undef ADD_MASK
3342#undef ADD_MASK_OSID
3343
Jim Cownie5e8470a2013-09-27 10:38:44 +00003344static void
3345__kmp_apply_thread_places(AddrUnsPair **pAddr, int depth)
3346{
3347 if ( __kmp_place_num_cores == 0 ) {
3348 if ( __kmp_place_num_threads_per_core == 0 ) {
3349 return; // no cores limiting actions requested, exit
3350 }
3351 __kmp_place_num_cores = nCoresPerPkg; // use all available cores
3352 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003353 if ( !__kmp_affinity_uniform_topology() ) {
3354 KMP_WARNING( AffThrPlaceNonUniform );
3355 return; // don't support non-uniform topology
3356 }
3357 if ( depth != 3 ) {
3358 KMP_WARNING( AffThrPlaceNonThreeLevel );
3359 return; // don't support not-3-level topology
Jim Cownie5e8470a2013-09-27 10:38:44 +00003360 }
3361 if ( __kmp_place_num_threads_per_core == 0 ) {
3362 __kmp_place_num_threads_per_core = __kmp_nThreadsPerCore; // use all HW contexts
3363 }
Andrey Churbanov12875572015-03-10 09:00:36 +00003364 if ( __kmp_place_core_offset + __kmp_place_num_cores > nCoresPerPkg ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003365 KMP_WARNING( AffThrPlaceManyCores );
3366 return;
3367 }
3368
3369 AddrUnsPair *newAddr = (AddrUnsPair *)__kmp_allocate( sizeof(AddrUnsPair) *
3370 nPackages * __kmp_place_num_cores * __kmp_place_num_threads_per_core);
3371 int i, j, k, n_old = 0, n_new = 0;
3372 for ( i = 0; i < nPackages; ++i ) {
3373 for ( j = 0; j < nCoresPerPkg; ++j ) {
Andrey Churbanov12875572015-03-10 09:00:36 +00003374 if ( j < __kmp_place_core_offset || j >= __kmp_place_core_offset + __kmp_place_num_cores ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003375 n_old += __kmp_nThreadsPerCore; // skip not-requested core
3376 } else {
3377 for ( k = 0; k < __kmp_nThreadsPerCore; ++k ) {
Andrey Churbanov12875572015-03-10 09:00:36 +00003378 if ( k < __kmp_place_num_threads_per_core ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003379 newAddr[n_new] = (*pAddr)[n_old]; // copy requested core' data to new location
3380 n_new++;
3381 }
3382 n_old++;
3383 }
3384 }
3385 }
3386 }
3387 nCoresPerPkg = __kmp_place_num_cores; // correct nCoresPerPkg
3388 __kmp_nThreadsPerCore = __kmp_place_num_threads_per_core; // correct __kmp_nThreadsPerCore
3389 __kmp_avail_proc = n_new; // correct avail_proc
3390 __kmp_ncores = nPackages * __kmp_place_num_cores; // correct ncores
3391
3392 __kmp_free( *pAddr );
3393 *pAddr = newAddr; // replace old topology with new one
3394}
3395
Jim Cownie5e8470a2013-09-27 10:38:44 +00003396
3397static AddrUnsPair *address2os = NULL;
3398static int * procarr = NULL;
3399static int __kmp_aff_depth = 0;
3400
3401static void
3402__kmp_aux_affinity_initialize(void)
3403{
3404 if (__kmp_affinity_masks != NULL) {
3405 KMP_ASSERT(fullMask != NULL);
3406 return;
3407 }
3408
3409 //
3410 // Create the "full" mask - this defines all of the processors that we
3411 // consider to be in the machine model. If respect is set, then it is
3412 // the initialization thread's affinity mask. Otherwise, it is all
3413 // processors that we know about on the machine.
3414 //
3415 if (fullMask == NULL) {
3416 fullMask = (kmp_affin_mask_t *)__kmp_allocate(__kmp_affin_mask_size);
3417 }
3418 if (KMP_AFFINITY_CAPABLE()) {
3419 if (__kmp_affinity_respect_mask) {
3420 __kmp_get_system_affinity(fullMask, TRUE);
3421
3422 //
3423 // Count the number of available processors.
3424 //
3425 unsigned i;
3426 __kmp_avail_proc = 0;
3427 for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
3428 if (! KMP_CPU_ISSET(i, fullMask)) {
3429 continue;
3430 }
3431 __kmp_avail_proc++;
3432 }
3433 if (__kmp_avail_proc > __kmp_xproc) {
3434 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3435 && (__kmp_affinity_type != affinity_none))) {
3436 KMP_WARNING(ErrorInitializeAffinity);
3437 }
3438 __kmp_affinity_type = affinity_none;
Andrey Churbanov1f037e42015-03-10 09:15:26 +00003439 KMP_AFFINITY_DISABLE();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003440 return;
3441 }
3442 }
3443 else {
3444 __kmp_affinity_entire_machine_mask(fullMask);
3445 __kmp_avail_proc = __kmp_xproc;
3446 }
3447 }
3448
3449 int depth = -1;
3450 kmp_i18n_id_t msg_id = kmp_i18n_null;
3451
3452 //
Alp Toker8f2d3f02014-02-24 10:40:15 +00003453 // For backward compatibility, setting KMP_CPUINFO_FILE =>
Jim Cownie5e8470a2013-09-27 10:38:44 +00003454 // KMP_TOPOLOGY_METHOD=cpuinfo
3455 //
3456 if ((__kmp_cpuinfo_file != NULL) &&
3457 (__kmp_affinity_top_method == affinity_top_method_all)) {
3458 __kmp_affinity_top_method = affinity_top_method_cpuinfo;
3459 }
3460
3461 if (__kmp_affinity_top_method == affinity_top_method_all) {
3462 //
3463 // In the default code path, errors are not fatal - we just try using
3464 // another method. We only emit a warning message if affinity is on,
3465 // or the verbose flag is set, an the nowarnings flag was not set.
3466 //
3467 const char *file_name = NULL;
3468 int line = 0;
3469
3470# if KMP_ARCH_X86 || KMP_ARCH_X86_64
3471
3472 if (__kmp_affinity_verbose) {
3473 KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
3474 }
3475
3476 file_name = NULL;
3477 depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
3478 if (depth == 0) {
3479 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3480 KMP_ASSERT(address2os == NULL);
3481 return;
3482 }
3483
3484 if (depth < 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003485 if (__kmp_affinity_verbose) {
3486 if (msg_id != kmp_i18n_null) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003487 KMP_INFORM(AffInfoStrStr, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id),
3488 KMP_I18N_STR(DecodingLegacyAPIC));
3489 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003490 else {
3491 KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(DecodingLegacyAPIC));
3492 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003493 }
3494
3495 file_name = NULL;
3496 depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
3497 if (depth == 0) {
3498 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3499 KMP_ASSERT(address2os == NULL);
3500 return;
3501 }
3502 }
3503
3504# endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
3505
3506# if KMP_OS_LINUX
3507
3508 if (depth < 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003509 if (__kmp_affinity_verbose) {
3510 if (msg_id != kmp_i18n_null) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003511 KMP_INFORM(AffStrParseFilename, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id), "/proc/cpuinfo");
3512 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003513 else {
3514 KMP_INFORM(AffParseFilename, "KMP_AFFINITY", "/proc/cpuinfo");
3515 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003516 }
3517
3518 FILE *f = fopen("/proc/cpuinfo", "r");
3519 if (f == NULL) {
3520 msg_id = kmp_i18n_str_CantOpenCpuinfo;
3521 }
3522 else {
3523 file_name = "/proc/cpuinfo";
3524 depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
3525 fclose(f);
3526 if (depth == 0) {
3527 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3528 KMP_ASSERT(address2os == NULL);
3529 return;
3530 }
3531 }
3532 }
3533
3534# endif /* KMP_OS_LINUX */
3535
Andrey Churbanov7daf9802015-01-27 16:52:57 +00003536# if KMP_GROUP_AFFINITY
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003537
3538 if ((depth < 0) && (__kmp_num_proc_groups > 1)) {
3539 if (__kmp_affinity_verbose) {
3540 KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
3541 }
3542
3543 depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
3544 KMP_ASSERT(depth != 0);
3545 }
3546
Andrey Churbanov7daf9802015-01-27 16:52:57 +00003547# endif /* KMP_GROUP_AFFINITY */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003548
Jim Cownie5e8470a2013-09-27 10:38:44 +00003549 if (depth < 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003550 if (__kmp_affinity_verbose && (msg_id != kmp_i18n_null)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003551 if (file_name == NULL) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003552 KMP_INFORM(UsingFlatOS, __kmp_i18n_catgets(msg_id));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003553 }
3554 else if (line == 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003555 KMP_INFORM(UsingFlatOSFile, file_name, __kmp_i18n_catgets(msg_id));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003556 }
3557 else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003558 KMP_INFORM(UsingFlatOSFileLine, file_name, line, __kmp_i18n_catgets(msg_id));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003559 }
3560 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003561 // FIXME - print msg if msg_id = kmp_i18n_null ???
Jim Cownie5e8470a2013-09-27 10:38:44 +00003562
3563 file_name = "";
3564 depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
3565 if (depth == 0) {
3566 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3567 KMP_ASSERT(address2os == NULL);
3568 return;
3569 }
3570 KMP_ASSERT(depth > 0);
3571 KMP_ASSERT(address2os != NULL);
3572 }
3573 }
3574
3575 //
3576 // If the user has specified that a paricular topology discovery method
3577 // is to be used, then we abort if that method fails. The exception is
3578 // group affinity, which might have been implicitly set.
3579 //
3580
3581# if KMP_ARCH_X86 || KMP_ARCH_X86_64
3582
3583 else if (__kmp_affinity_top_method == affinity_top_method_x2apicid) {
3584 if (__kmp_affinity_verbose) {
3585 KMP_INFORM(AffInfoStr, "KMP_AFFINITY",
3586 KMP_I18N_STR(Decodingx2APIC));
3587 }
3588
3589 depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
3590 if (depth == 0) {
3591 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3592 KMP_ASSERT(address2os == NULL);
3593 return;
3594 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003595 if (depth < 0) {
3596 KMP_ASSERT(msg_id != kmp_i18n_null);
3597 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
3598 }
3599 }
3600 else if (__kmp_affinity_top_method == affinity_top_method_apicid) {
3601 if (__kmp_affinity_verbose) {
3602 KMP_INFORM(AffInfoStr, "KMP_AFFINITY",
3603 KMP_I18N_STR(DecodingLegacyAPIC));
3604 }
3605
3606 depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
3607 if (depth == 0) {
3608 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3609 KMP_ASSERT(address2os == NULL);
3610 return;
3611 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003612 if (depth < 0) {
3613 KMP_ASSERT(msg_id != kmp_i18n_null);
3614 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
3615 }
3616 }
3617
3618# endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
3619
3620 else if (__kmp_affinity_top_method == affinity_top_method_cpuinfo) {
3621 const char *filename;
3622 if (__kmp_cpuinfo_file != NULL) {
3623 filename = __kmp_cpuinfo_file;
3624 }
3625 else {
3626 filename = "/proc/cpuinfo";
3627 }
3628
3629 if (__kmp_affinity_verbose) {
3630 KMP_INFORM(AffParseFilename, "KMP_AFFINITY", filename);
3631 }
3632
3633 FILE *f = fopen(filename, "r");
3634 if (f == NULL) {
3635 int code = errno;
3636 if (__kmp_cpuinfo_file != NULL) {
3637 __kmp_msg(
3638 kmp_ms_fatal,
3639 KMP_MSG(CantOpenFileForReading, filename),
3640 KMP_ERR(code),
3641 KMP_HNT(NameComesFrom_CPUINFO_FILE),
3642 __kmp_msg_null
3643 );
3644 }
3645 else {
3646 __kmp_msg(
3647 kmp_ms_fatal,
3648 KMP_MSG(CantOpenFileForReading, filename),
3649 KMP_ERR(code),
3650 __kmp_msg_null
3651 );
3652 }
3653 }
3654 int line = 0;
3655 depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
3656 fclose(f);
3657 if (depth < 0) {
3658 KMP_ASSERT(msg_id != kmp_i18n_null);
3659 if (line > 0) {
3660 KMP_FATAL(FileLineMsgExiting, filename, line, __kmp_i18n_catgets(msg_id));
3661 }
3662 else {
3663 KMP_FATAL(FileMsgExiting, filename, __kmp_i18n_catgets(msg_id));
3664 }
3665 }
3666 if (__kmp_affinity_type == affinity_none) {
3667 KMP_ASSERT(depth == 0);
3668 KMP_ASSERT(address2os == NULL);
3669 return;
3670 }
3671 }
3672
Andrey Churbanov7daf9802015-01-27 16:52:57 +00003673# if KMP_GROUP_AFFINITY
Jim Cownie5e8470a2013-09-27 10:38:44 +00003674
3675 else if (__kmp_affinity_top_method == affinity_top_method_group) {
3676 if (__kmp_affinity_verbose) {
3677 KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
3678 }
3679
3680 depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
3681 KMP_ASSERT(depth != 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003682 if (depth < 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003683 KMP_ASSERT(msg_id != kmp_i18n_null);
3684 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003685 }
3686 }
3687
Andrey Churbanov7daf9802015-01-27 16:52:57 +00003688# endif /* KMP_GROUP_AFFINITY */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003689
3690 else if (__kmp_affinity_top_method == affinity_top_method_flat) {
3691 if (__kmp_affinity_verbose) {
3692 KMP_INFORM(AffUsingFlatOS, "KMP_AFFINITY");
3693 }
3694
3695 depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
3696 if (depth == 0) {
3697 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3698 KMP_ASSERT(address2os == NULL);
3699 return;
3700 }
3701 // should not fail
3702 KMP_ASSERT(depth > 0);
3703 KMP_ASSERT(address2os != NULL);
3704 }
3705
3706 if (address2os == NULL) {
3707 if (KMP_AFFINITY_CAPABLE()
3708 && (__kmp_affinity_verbose || (__kmp_affinity_warnings
3709 && (__kmp_affinity_type != affinity_none)))) {
3710 KMP_WARNING(ErrorInitializeAffinity);
3711 }
3712 __kmp_affinity_type = affinity_none;
Andrey Churbanov1f037e42015-03-10 09:15:26 +00003713 KMP_AFFINITY_DISABLE();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003714 return;
3715 }
3716
Jim Cownie5e8470a2013-09-27 10:38:44 +00003717 __kmp_apply_thread_places(&address2os, depth);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003718
3719 //
3720 // Create the table of masks, indexed by thread Id.
3721 //
3722 unsigned maxIndex;
3723 unsigned numUnique;
3724 kmp_affin_mask_t *osId2Mask = __kmp_create_masks(&maxIndex, &numUnique,
3725 address2os, __kmp_avail_proc);
3726 if (__kmp_affinity_gran_levels == 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003727 KMP_DEBUG_ASSERT((int)numUnique == __kmp_avail_proc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003728 }
3729
3730 //
3731 // Set the childNums vector in all Address objects. This must be done
3732 // before we can sort using __kmp_affinity_cmp_Address_child_num(),
3733 // which takes into account the setting of __kmp_affinity_compact.
3734 //
3735 __kmp_affinity_assign_child_nums(address2os, __kmp_avail_proc);
3736
3737 switch (__kmp_affinity_type) {
3738
3739 case affinity_explicit:
3740 KMP_DEBUG_ASSERT(__kmp_affinity_proclist != NULL);
3741# if OMP_40_ENABLED
3742 if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
3743# endif
3744 {
3745 __kmp_affinity_process_proclist(&__kmp_affinity_masks,
3746 &__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask,
3747 maxIndex);
3748 }
3749# if OMP_40_ENABLED
3750 else {
3751 __kmp_affinity_process_placelist(&__kmp_affinity_masks,
3752 &__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask,
3753 maxIndex);
3754 }
3755# endif
3756 if (__kmp_affinity_num_masks == 0) {
3757 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3758 && (__kmp_affinity_type != affinity_none))) {
3759 KMP_WARNING(AffNoValidProcID);
3760 }
3761 __kmp_affinity_type = affinity_none;
3762 return;
3763 }
3764 break;
3765
3766 //
3767 // The other affinity types rely on sorting the Addresses according
3768 // to some permutation of the machine topology tree. Set
3769 // __kmp_affinity_compact and __kmp_affinity_offset appropriately,
3770 // then jump to a common code fragment to do the sort and create
3771 // the array of affinity masks.
3772 //
3773
3774 case affinity_logical:
3775 __kmp_affinity_compact = 0;
3776 if (__kmp_affinity_offset) {
3777 __kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset
3778 % __kmp_avail_proc;
3779 }
3780 goto sortAddresses;
3781
3782 case affinity_physical:
3783 if (__kmp_nThreadsPerCore > 1) {
3784 __kmp_affinity_compact = 1;
3785 if (__kmp_affinity_compact >= depth) {
3786 __kmp_affinity_compact = 0;
3787 }
3788 } else {
3789 __kmp_affinity_compact = 0;
3790 }
3791 if (__kmp_affinity_offset) {
3792 __kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset
3793 % __kmp_avail_proc;
3794 }
3795 goto sortAddresses;
3796
3797 case affinity_scatter:
3798 if (__kmp_affinity_compact >= depth) {
3799 __kmp_affinity_compact = 0;
3800 }
3801 else {
3802 __kmp_affinity_compact = depth - 1 - __kmp_affinity_compact;
3803 }
3804 goto sortAddresses;
3805
3806 case affinity_compact:
3807 if (__kmp_affinity_compact >= depth) {
3808 __kmp_affinity_compact = depth - 1;
3809 }
3810 goto sortAddresses;
3811
Jim Cownie5e8470a2013-09-27 10:38:44 +00003812 case affinity_balanced:
Jonathan Peytoncaf09fe2015-05-27 23:27:33 +00003813 // Balanced works only for the case of a single package
Jim Cownie5e8470a2013-09-27 10:38:44 +00003814 if( nPackages > 1 ) {
3815 if( __kmp_affinity_verbose || __kmp_affinity_warnings ) {
3816 KMP_WARNING( AffBalancedNotAvail, "KMP_AFFINITY" );
3817 }
3818 __kmp_affinity_type = affinity_none;
3819 return;
3820 } else if( __kmp_affinity_uniform_topology() ) {
3821 break;
3822 } else { // Non-uniform topology
3823
3824 // Save the depth for further usage
3825 __kmp_aff_depth = depth;
3826
3827 // Number of hyper threads per core in HT machine
3828 int nth_per_core = __kmp_nThreadsPerCore;
3829
3830 int core_level;
3831 if( nth_per_core > 1 ) {
3832 core_level = depth - 2;
3833 } else {
3834 core_level = depth - 1;
3835 }
3836 int ncores = address2os[ __kmp_avail_proc - 1 ].first.labels[ core_level ] + 1;
3837 int nproc = nth_per_core * ncores;
3838
3839 procarr = ( int * )__kmp_allocate( sizeof( int ) * nproc );
3840 for( int i = 0; i < nproc; i++ ) {
3841 procarr[ i ] = -1;
3842 }
3843
3844 for( int i = 0; i < __kmp_avail_proc; i++ ) {
3845 int proc = address2os[ i ].second;
3846 // If depth == 3 then level=0 - package, level=1 - core, level=2 - thread.
3847 // If there is only one thread per core then depth == 2: level 0 - package,
3848 // level 1 - core.
3849 int level = depth - 1;
3850
3851 // __kmp_nth_per_core == 1
3852 int thread = 0;
3853 int core = address2os[ i ].first.labels[ level ];
3854 // If the thread level exists, that is we have more than one thread context per core
3855 if( nth_per_core > 1 ) {
3856 thread = address2os[ i ].first.labels[ level ] % nth_per_core;
3857 core = address2os[ i ].first.labels[ level - 1 ];
3858 }
3859 procarr[ core * nth_per_core + thread ] = proc;
3860 }
3861
3862 break;
3863 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003864
3865 sortAddresses:
3866 //
3867 // Allocate the gtid->affinity mask table.
3868 //
3869 if (__kmp_affinity_dups) {
3870 __kmp_affinity_num_masks = __kmp_avail_proc;
3871 }
3872 else {
3873 __kmp_affinity_num_masks = numUnique;
3874 }
3875
3876# if OMP_40_ENABLED
3877 if ( ( __kmp_nested_proc_bind.bind_types[0] != proc_bind_intel )
3878 && ( __kmp_affinity_num_places > 0 )
3879 && ( (unsigned)__kmp_affinity_num_places < __kmp_affinity_num_masks ) ) {
3880 __kmp_affinity_num_masks = __kmp_affinity_num_places;
3881 }
3882# endif
3883
3884 __kmp_affinity_masks = (kmp_affin_mask_t*)__kmp_allocate(
3885 __kmp_affinity_num_masks * __kmp_affin_mask_size);
3886
3887 //
3888 // Sort the address2os table according to the current setting of
3889 // __kmp_affinity_compact, then fill out __kmp_affinity_masks.
3890 //
3891 qsort(address2os, __kmp_avail_proc, sizeof(*address2os),
3892 __kmp_affinity_cmp_Address_child_num);
3893 {
3894 int i;
3895 unsigned j;
3896 for (i = 0, j = 0; i < __kmp_avail_proc; i++) {
3897 if ((! __kmp_affinity_dups) && (! address2os[i].first.leader)) {
3898 continue;
3899 }
3900 unsigned osId = address2os[i].second;
3901 kmp_affin_mask_t *src = KMP_CPU_INDEX(osId2Mask, osId);
3902 kmp_affin_mask_t *dest
3903 = KMP_CPU_INDEX(__kmp_affinity_masks, j);
3904 KMP_ASSERT(KMP_CPU_ISSET(osId, src));
3905 KMP_CPU_COPY(dest, src);
3906 if (++j >= __kmp_affinity_num_masks) {
3907 break;
3908 }
3909 }
3910 KMP_DEBUG_ASSERT(j == __kmp_affinity_num_masks);
3911 }
3912 break;
3913
3914 default:
3915 KMP_ASSERT2(0, "Unexpected affinity setting");
3916 }
3917
3918 __kmp_free(osId2Mask);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003919 machine_hierarchy.init(address2os, __kmp_avail_proc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003920}
3921
3922
3923void
3924__kmp_affinity_initialize(void)
3925{
3926 //
3927 // Much of the code above was written assumming that if a machine was not
3928 // affinity capable, then __kmp_affinity_type == affinity_none. We now
3929 // explicitly represent this as __kmp_affinity_type == affinity_disabled.
3930 //
3931 // There are too many checks for __kmp_affinity_type == affinity_none
3932 // in this code. Instead of trying to change them all, check if
3933 // __kmp_affinity_type == affinity_disabled, and if so, slam it with
3934 // affinity_none, call the real initialization routine, then restore
3935 // __kmp_affinity_type to affinity_disabled.
3936 //
3937 int disabled = (__kmp_affinity_type == affinity_disabled);
3938 if (! KMP_AFFINITY_CAPABLE()) {
3939 KMP_ASSERT(disabled);
3940 }
3941 if (disabled) {
3942 __kmp_affinity_type = affinity_none;
3943 }
3944 __kmp_aux_affinity_initialize();
3945 if (disabled) {
3946 __kmp_affinity_type = affinity_disabled;
3947 }
3948}
3949
3950
3951void
3952__kmp_affinity_uninitialize(void)
3953{
3954 if (__kmp_affinity_masks != NULL) {
3955 __kmp_free(__kmp_affinity_masks);
3956 __kmp_affinity_masks = NULL;
3957 }
3958 if (fullMask != NULL) {
3959 KMP_CPU_FREE(fullMask);
3960 fullMask = NULL;
3961 }
3962 __kmp_affinity_num_masks = 0;
3963# if OMP_40_ENABLED
3964 __kmp_affinity_num_places = 0;
3965# endif
3966 if (__kmp_affinity_proclist != NULL) {
3967 __kmp_free(__kmp_affinity_proclist);
3968 __kmp_affinity_proclist = NULL;
3969 }
3970 if( address2os != NULL ) {
3971 __kmp_free( address2os );
3972 address2os = NULL;
3973 }
3974 if( procarr != NULL ) {
3975 __kmp_free( procarr );
3976 procarr = NULL;
3977 }
3978}
3979
3980
3981void
3982__kmp_affinity_set_init_mask(int gtid, int isa_root)
3983{
3984 if (! KMP_AFFINITY_CAPABLE()) {
3985 return;
3986 }
3987
3988 kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
3989 if (th->th.th_affin_mask == NULL) {
3990 KMP_CPU_ALLOC(th->th.th_affin_mask);
3991 }
3992 else {
3993 KMP_CPU_ZERO(th->th.th_affin_mask);
3994 }
3995
3996 //
3997 // Copy the thread mask to the kmp_info_t strucuture.
3998 // If __kmp_affinity_type == affinity_none, copy the "full" mask, i.e. one
3999 // that has all of the OS proc ids set, or if __kmp_affinity_respect_mask
4000 // is set, then the full mask is the same as the mask of the initialization
4001 // thread.
4002 //
4003 kmp_affin_mask_t *mask;
4004 int i;
4005
4006# if OMP_40_ENABLED
4007 if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
4008# endif
4009 {
Andrey Churbanovf28f6132015-01-13 14:54:00 +00004010 if ((__kmp_affinity_type == affinity_none) || (__kmp_affinity_type == affinity_balanced)
Jim Cownie5e8470a2013-09-27 10:38:44 +00004011 ) {
Andrey Churbanov7daf9802015-01-27 16:52:57 +00004012# if KMP_GROUP_AFFINITY
Jim Cownie5e8470a2013-09-27 10:38:44 +00004013 if (__kmp_num_proc_groups > 1) {
4014 return;
4015 }
4016# endif
4017 KMP_ASSERT(fullMask != NULL);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004018 i = KMP_PLACE_ALL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004019 mask = fullMask;
4020 }
4021 else {
4022 KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 );
4023 i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
4024 mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
4025 }
4026 }
4027# if OMP_40_ENABLED
4028 else {
4029 if ((! isa_root)
4030 || (__kmp_nested_proc_bind.bind_types[0] == proc_bind_false)) {
Andrey Churbanov7daf9802015-01-27 16:52:57 +00004031# if KMP_GROUP_AFFINITY
Jim Cownie5e8470a2013-09-27 10:38:44 +00004032 if (__kmp_num_proc_groups > 1) {
4033 return;
4034 }
4035# endif
4036 KMP_ASSERT(fullMask != NULL);
4037 i = KMP_PLACE_ALL;
4038 mask = fullMask;
4039 }
4040 else {
4041 //
4042 // int i = some hash function or just a counter that doesn't
4043 // always start at 0. Use gtid for now.
4044 //
4045 KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 );
4046 i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
4047 mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
4048 }
4049 }
4050# endif
4051
4052# if OMP_40_ENABLED
4053 th->th.th_current_place = i;
4054 if (isa_root) {
4055 th->th.th_new_place = i;
4056 th->th.th_first_place = 0;
4057 th->th.th_last_place = __kmp_affinity_num_masks - 1;
4058 }
4059
4060 if (i == KMP_PLACE_ALL) {
4061 KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to all places\n",
4062 gtid));
4063 }
4064 else {
4065 KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to place %d\n",
4066 gtid, i));
4067 }
4068# else
4069 if (i == -1) {
4070 KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to fullMask\n",
4071 gtid));
4072 }
4073 else {
4074 KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to mask %d\n",
4075 gtid, i));
4076 }
4077# endif /* OMP_40_ENABLED */
4078
4079 KMP_CPU_COPY(th->th.th_affin_mask, mask);
4080
4081 if (__kmp_affinity_verbose) {
4082 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4083 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4084 th->th.th_affin_mask);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004085 KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(), gtid,
4086 buf);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004087 }
4088
4089# if KMP_OS_WINDOWS
4090 //
4091 // On Windows* OS, the process affinity mask might have changed.
4092 // If the user didn't request affinity and this call fails,
4093 // just continue silently. See CQ171393.
4094 //
4095 if ( __kmp_affinity_type == affinity_none ) {
4096 __kmp_set_system_affinity(th->th.th_affin_mask, FALSE);
4097 }
4098 else
4099# endif
4100 __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
4101}
4102
4103
4104# if OMP_40_ENABLED
4105
4106void
4107__kmp_affinity_set_place(int gtid)
4108{
4109 int retval;
4110
4111 if (! KMP_AFFINITY_CAPABLE()) {
4112 return;
4113 }
4114
4115 kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
4116
4117 KA_TRACE(100, ("__kmp_affinity_set_place: binding T#%d to place %d (current place = %d)\n",
4118 gtid, th->th.th_new_place, th->th.th_current_place));
4119
4120 //
Alp Toker8f2d3f02014-02-24 10:40:15 +00004121 // Check that the new place is within this thread's partition.
Jim Cownie5e8470a2013-09-27 10:38:44 +00004122 //
4123 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004124 KMP_ASSERT(th->th.th_new_place >= 0);
4125 KMP_ASSERT((unsigned)th->th.th_new_place <= __kmp_affinity_num_masks);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004126 if (th->th.th_first_place <= th->th.th_last_place) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004127 KMP_ASSERT((th->th.th_new_place >= th->th.th_first_place)
Jim Cownie5e8470a2013-09-27 10:38:44 +00004128 && (th->th.th_new_place <= th->th.th_last_place));
4129 }
4130 else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004131 KMP_ASSERT((th->th.th_new_place <= th->th.th_first_place)
Jim Cownie5e8470a2013-09-27 10:38:44 +00004132 || (th->th.th_new_place >= th->th.th_last_place));
4133 }
4134
4135 //
4136 // Copy the thread mask to the kmp_info_t strucuture,
4137 // and set this thread's affinity.
4138 //
4139 kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity_masks,
4140 th->th.th_new_place);
4141 KMP_CPU_COPY(th->th.th_affin_mask, mask);
4142 th->th.th_current_place = th->th.th_new_place;
4143
4144 if (__kmp_affinity_verbose) {
4145 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4146 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4147 th->th.th_affin_mask);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004148 KMP_INFORM(BoundToOSProcSet, "OMP_PROC_BIND", (kmp_int32)getpid(),
4149 gtid, buf);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004150 }
4151 __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
4152}
4153
4154# endif /* OMP_40_ENABLED */
4155
4156
4157int
4158__kmp_aux_set_affinity(void **mask)
4159{
4160 int gtid;
4161 kmp_info_t *th;
4162 int retval;
4163
4164 if (! KMP_AFFINITY_CAPABLE()) {
4165 return -1;
4166 }
4167
4168 gtid = __kmp_entry_gtid();
4169 KA_TRACE(1000, ;{
4170 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4171 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4172 (kmp_affin_mask_t *)(*mask));
4173 __kmp_debug_printf("kmp_set_affinity: setting affinity mask for thread %d = %s\n",
4174 gtid, buf);
4175 });
4176
4177 if (__kmp_env_consistency_check) {
4178 if ((mask == NULL) || (*mask == NULL)) {
4179 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
4180 }
4181 else {
4182 unsigned proc;
4183 int num_procs = 0;
4184
4185 for (proc = 0; proc < KMP_CPU_SETSIZE; proc++) {
4186 if (! KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask))) {
4187 continue;
4188 }
4189 num_procs++;
4190 if (! KMP_CPU_ISSET(proc, fullMask)) {
4191 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
4192 break;
4193 }
4194 }
4195 if (num_procs == 0) {
4196 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
4197 }
4198
Andrey Churbanov7daf9802015-01-27 16:52:57 +00004199# if KMP_GROUP_AFFINITY
Jim Cownie5e8470a2013-09-27 10:38:44 +00004200 if (__kmp_get_proc_group((kmp_affin_mask_t *)(*mask)) < 0) {
4201 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
4202 }
Andrey Churbanov7daf9802015-01-27 16:52:57 +00004203# endif /* KMP_GROUP_AFFINITY */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004204
4205 }
4206 }
4207
4208 th = __kmp_threads[gtid];
4209 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
4210 retval = __kmp_set_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
4211 if (retval == 0) {
4212 KMP_CPU_COPY(th->th.th_affin_mask, (kmp_affin_mask_t *)(*mask));
4213 }
4214
4215# if OMP_40_ENABLED
4216 th->th.th_current_place = KMP_PLACE_UNDEFINED;
4217 th->th.th_new_place = KMP_PLACE_UNDEFINED;
4218 th->th.th_first_place = 0;
4219 th->th.th_last_place = __kmp_affinity_num_masks - 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004220
4221 //
4222 // Turn off 4.0 affinity for the current tread at this parallel level.
4223 //
4224 th->th.th_current_task->td_icvs.proc_bind = proc_bind_false;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004225# endif
4226
4227 return retval;
4228}
4229
4230
4231int
4232__kmp_aux_get_affinity(void **mask)
4233{
4234 int gtid;
4235 int retval;
4236 kmp_info_t *th;
4237
4238 if (! KMP_AFFINITY_CAPABLE()) {
4239 return -1;
4240 }
4241
4242 gtid = __kmp_entry_gtid();
4243 th = __kmp_threads[gtid];
4244 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
4245
4246 KA_TRACE(1000, ;{
4247 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4248 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4249 th->th.th_affin_mask);
4250 __kmp_printf("kmp_get_affinity: stored affinity mask for thread %d = %s\n", gtid, buf);
4251 });
4252
4253 if (__kmp_env_consistency_check) {
4254 if ((mask == NULL) || (*mask == NULL)) {
4255 KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity");
4256 }
4257 }
4258
4259# if !KMP_OS_WINDOWS
4260
4261 retval = __kmp_get_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
4262 KA_TRACE(1000, ;{
4263 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4264 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4265 (kmp_affin_mask_t *)(*mask));
4266 __kmp_printf("kmp_get_affinity: system affinity mask for thread %d = %s\n", gtid, buf);
4267 });
4268 return retval;
4269
4270# else
4271
4272 KMP_CPU_COPY((kmp_affin_mask_t *)(*mask), th->th.th_affin_mask);
4273 return 0;
4274
4275# endif /* KMP_OS_WINDOWS */
4276
4277}
4278
Jim Cownie5e8470a2013-09-27 10:38:44 +00004279int
4280__kmp_aux_set_affinity_mask_proc(int proc, void **mask)
4281{
4282 int retval;
4283
4284 if (! KMP_AFFINITY_CAPABLE()) {
4285 return -1;
4286 }
4287
4288 KA_TRACE(1000, ;{
4289 int gtid = __kmp_entry_gtid();
4290 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4291 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4292 (kmp_affin_mask_t *)(*mask));
4293 __kmp_debug_printf("kmp_set_affinity_mask_proc: setting proc %d in affinity mask for thread %d = %s\n",
4294 proc, gtid, buf);
4295 });
4296
4297 if (__kmp_env_consistency_check) {
4298 if ((mask == NULL) || (*mask == NULL)) {
4299 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity_mask_proc");
4300 }
4301 }
4302
4303 if ((proc < 0) || ((unsigned)proc >= KMP_CPU_SETSIZE)) {
4304 return -1;
4305 }
4306 if (! KMP_CPU_ISSET(proc, fullMask)) {
4307 return -2;
4308 }
4309
4310 KMP_CPU_SET(proc, (kmp_affin_mask_t *)(*mask));
4311 return 0;
4312}
4313
4314
4315int
4316__kmp_aux_unset_affinity_mask_proc(int proc, void **mask)
4317{
4318 int retval;
4319
4320 if (! KMP_AFFINITY_CAPABLE()) {
4321 return -1;
4322 }
4323
4324 KA_TRACE(1000, ;{
4325 int gtid = __kmp_entry_gtid();
4326 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4327 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4328 (kmp_affin_mask_t *)(*mask));
4329 __kmp_debug_printf("kmp_unset_affinity_mask_proc: unsetting proc %d in affinity mask for thread %d = %s\n",
4330 proc, gtid, buf);
4331 });
4332
4333 if (__kmp_env_consistency_check) {
4334 if ((mask == NULL) || (*mask == NULL)) {
4335 KMP_FATAL(AffinityInvalidMask, "kmp_unset_affinity_mask_proc");
4336 }
4337 }
4338
4339 if ((proc < 0) || ((unsigned)proc >= KMP_CPU_SETSIZE)) {
4340 return -1;
4341 }
4342 if (! KMP_CPU_ISSET(proc, fullMask)) {
4343 return -2;
4344 }
4345
4346 KMP_CPU_CLR(proc, (kmp_affin_mask_t *)(*mask));
4347 return 0;
4348}
4349
4350
4351int
4352__kmp_aux_get_affinity_mask_proc(int proc, void **mask)
4353{
4354 int retval;
4355
4356 if (! KMP_AFFINITY_CAPABLE()) {
4357 return -1;
4358 }
4359
4360 KA_TRACE(1000, ;{
4361 int gtid = __kmp_entry_gtid();
4362 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4363 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4364 (kmp_affin_mask_t *)(*mask));
4365 __kmp_debug_printf("kmp_get_affinity_mask_proc: getting proc %d in affinity mask for thread %d = %s\n",
4366 proc, gtid, buf);
4367 });
4368
4369 if (__kmp_env_consistency_check) {
4370 if ((mask == NULL) || (*mask == NULL)) {
Andrey Churbanov4b2f17a2015-01-29 15:49:22 +00004371 KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity_mask_proc");
Jim Cownie5e8470a2013-09-27 10:38:44 +00004372 }
4373 }
4374
4375 if ((proc < 0) || ((unsigned)proc >= KMP_CPU_SETSIZE)) {
4376 return 0;
4377 }
4378 if (! KMP_CPU_ISSET(proc, fullMask)) {
4379 return 0;
4380 }
4381
4382 return KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask));
4383}
4384
Jim Cownie5e8470a2013-09-27 10:38:44 +00004385
4386// Dynamic affinity settings - Affinity balanced
4387void __kmp_balanced_affinity( int tid, int nthreads )
4388{
4389 if( __kmp_affinity_uniform_topology() ) {
4390 int coreID;
4391 int threadID;
4392 // Number of hyper threads per core in HT machine
4393 int __kmp_nth_per_core = __kmp_avail_proc / __kmp_ncores;
4394 // Number of cores
4395 int ncores = __kmp_ncores;
4396 // How many threads will be bound to each core
4397 int chunk = nthreads / ncores;
4398 // How many cores will have an additional thread bound to it - "big cores"
4399 int big_cores = nthreads % ncores;
4400 // Number of threads on the big cores
4401 int big_nth = ( chunk + 1 ) * big_cores;
4402 if( tid < big_nth ) {
4403 coreID = tid / (chunk + 1 );
4404 threadID = ( tid % (chunk + 1 ) ) % __kmp_nth_per_core ;
4405 } else { //tid >= big_nth
4406 coreID = ( tid - big_cores ) / chunk;
4407 threadID = ( ( tid - big_cores ) % chunk ) % __kmp_nth_per_core ;
4408 }
4409
4410 KMP_DEBUG_ASSERT2(KMP_AFFINITY_CAPABLE(),
4411 "Illegal set affinity operation when not capable");
4412
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00004413 kmp_affin_mask_t *mask = (kmp_affin_mask_t *)KMP_ALLOCA(__kmp_affin_mask_size);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004414 KMP_CPU_ZERO(mask);
4415
4416 // Granularity == thread
4417 if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) {
4418 int osID = address2os[ coreID * __kmp_nth_per_core + threadID ].second;
4419 KMP_CPU_SET( osID, mask);
4420 } else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
4421 for( int i = 0; i < __kmp_nth_per_core; i++ ) {
4422 int osID;
4423 osID = address2os[ coreID * __kmp_nth_per_core + i ].second;
4424 KMP_CPU_SET( osID, mask);
4425 }
4426 }
4427 if (__kmp_affinity_verbose) {
4428 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4429 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004430 KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
4431 tid, buf);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004432 }
4433 __kmp_set_system_affinity( mask, TRUE );
4434 } else { // Non-uniform topology
4435
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00004436 kmp_affin_mask_t *mask = (kmp_affin_mask_t *)KMP_ALLOCA(__kmp_affin_mask_size);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004437 KMP_CPU_ZERO(mask);
4438
4439 // Number of hyper threads per core in HT machine
4440 int nth_per_core = __kmp_nThreadsPerCore;
4441 int core_level;
4442 if( nth_per_core > 1 ) {
4443 core_level = __kmp_aff_depth - 2;
4444 } else {
4445 core_level = __kmp_aff_depth - 1;
4446 }
4447
4448 // Number of cores - maximum value; it does not count trail cores with 0 processors
4449 int ncores = address2os[ __kmp_avail_proc - 1 ].first.labels[ core_level ] + 1;
4450
4451 // For performance gain consider the special case nthreads == __kmp_avail_proc
4452 if( nthreads == __kmp_avail_proc ) {
4453 if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) {
4454 int osID = address2os[ tid ].second;
4455 KMP_CPU_SET( osID, mask);
4456 } else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
4457 int coreID = address2os[ tid ].first.labels[ core_level ];
4458 // We'll count found osIDs for the current core; they can be not more than nth_per_core;
4459 // since the address2os is sortied we can break when cnt==nth_per_core
4460 int cnt = 0;
4461 for( int i = 0; i < __kmp_avail_proc; i++ ) {
4462 int osID = address2os[ i ].second;
4463 int core = address2os[ i ].first.labels[ core_level ];
4464 if( core == coreID ) {
4465 KMP_CPU_SET( osID, mask);
4466 cnt++;
4467 if( cnt == nth_per_core ) {
4468 break;
4469 }
4470 }
4471 }
4472 }
4473 } else if( nthreads <= __kmp_ncores ) {
4474
4475 int core = 0;
4476 for( int i = 0; i < ncores; i++ ) {
4477 // Check if this core from procarr[] is in the mask
4478 int in_mask = 0;
4479 for( int j = 0; j < nth_per_core; j++ ) {
4480 if( procarr[ i * nth_per_core + j ] != - 1 ) {
4481 in_mask = 1;
4482 break;
4483 }
4484 }
4485 if( in_mask ) {
4486 if( tid == core ) {
4487 for( int j = 0; j < nth_per_core; j++ ) {
4488 int osID = procarr[ i * nth_per_core + j ];
4489 if( osID != -1 ) {
4490 KMP_CPU_SET( osID, mask );
4491 // For granularity=thread it is enough to set the first available osID for this core
4492 if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) {
4493 break;
4494 }
4495 }
4496 }
4497 break;
4498 } else {
4499 core++;
4500 }
4501 }
4502 }
4503
4504 } else { // nthreads > __kmp_ncores
4505
4506 // Array to save the number of processors at each core
Jonathan Peyton7be075332015-06-22 15:53:50 +00004507 int* nproc_at_core = (int*)KMP_ALLOCA(sizeof(int)*ncores);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004508 // Array to save the number of cores with "x" available processors;
Jonathan Peyton7be075332015-06-22 15:53:50 +00004509 int* ncores_with_x_procs = (int*)KMP_ALLOCA(sizeof(int)*(nth_per_core+1));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004510 // Array to save the number of cores with # procs from x to nth_per_core
Jonathan Peyton7be075332015-06-22 15:53:50 +00004511 int* ncores_with_x_to_max_procs = (int*)KMP_ALLOCA(sizeof(int)*(nth_per_core+1));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004512
4513 for( int i = 0; i <= nth_per_core; i++ ) {
4514 ncores_with_x_procs[ i ] = 0;
4515 ncores_with_x_to_max_procs[ i ] = 0;
4516 }
4517
4518 for( int i = 0; i < ncores; i++ ) {
4519 int cnt = 0;
4520 for( int j = 0; j < nth_per_core; j++ ) {
4521 if( procarr[ i * nth_per_core + j ] != -1 ) {
4522 cnt++;
4523 }
4524 }
4525 nproc_at_core[ i ] = cnt;
4526 ncores_with_x_procs[ cnt ]++;
4527 }
4528
4529 for( int i = 0; i <= nth_per_core; i++ ) {
4530 for( int j = i; j <= nth_per_core; j++ ) {
4531 ncores_with_x_to_max_procs[ i ] += ncores_with_x_procs[ j ];
4532 }
4533 }
4534
4535 // Max number of processors
4536 int nproc = nth_per_core * ncores;
4537 // An array to keep number of threads per each context
4538 int * newarr = ( int * )__kmp_allocate( sizeof( int ) * nproc );
4539 for( int i = 0; i < nproc; i++ ) {
4540 newarr[ i ] = 0;
4541 }
4542
4543 int nth = nthreads;
4544 int flag = 0;
4545 while( nth > 0 ) {
4546 for( int j = 1; j <= nth_per_core; j++ ) {
4547 int cnt = ncores_with_x_to_max_procs[ j ];
4548 for( int i = 0; i < ncores; i++ ) {
4549 // Skip the core with 0 processors
4550 if( nproc_at_core[ i ] == 0 ) {
4551 continue;
4552 }
4553 for( int k = 0; k < nth_per_core; k++ ) {
4554 if( procarr[ i * nth_per_core + k ] != -1 ) {
4555 if( newarr[ i * nth_per_core + k ] == 0 ) {
4556 newarr[ i * nth_per_core + k ] = 1;
4557 cnt--;
4558 nth--;
4559 break;
4560 } else {
4561 if( flag != 0 ) {
4562 newarr[ i * nth_per_core + k ] ++;
4563 cnt--;
4564 nth--;
4565 break;
4566 }
4567 }
4568 }
4569 }
4570 if( cnt == 0 || nth == 0 ) {
4571 break;
4572 }
4573 }
4574 if( nth == 0 ) {
4575 break;
4576 }
4577 }
4578 flag = 1;
4579 }
4580 int sum = 0;
4581 for( int i = 0; i < nproc; i++ ) {
4582 sum += newarr[ i ];
4583 if( sum > tid ) {
4584 // Granularity == thread
4585 if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) {
4586 int osID = procarr[ i ];
4587 KMP_CPU_SET( osID, mask);
4588 } else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
4589 int coreID = i / nth_per_core;
4590 for( int ii = 0; ii < nth_per_core; ii++ ) {
4591 int osID = procarr[ coreID * nth_per_core + ii ];
4592 if( osID != -1 ) {
4593 KMP_CPU_SET( osID, mask);
4594 }
4595 }
4596 }
4597 break;
4598 }
4599 }
4600 __kmp_free( newarr );
4601 }
4602
4603 if (__kmp_affinity_verbose) {
4604 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4605 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004606 KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
4607 tid, buf);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004608 }
4609 __kmp_set_system_affinity( mask, TRUE );
4610 }
4611}
4612
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004613#else
4614 // affinity not supported
4615
Andrey Churbanovaa1f2b62015-04-13 18:51:59 +00004616static const kmp_uint32 noaff_maxLevels=7;
4617kmp_uint32 noaff_skipPerLevel[noaff_maxLevels];
4618kmp_uint32 noaff_depth;
4619kmp_uint8 noaff_leaf_kids;
4620kmp_int8 noaff_uninitialized=1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004621
Andrey Churbanovaa1f2b62015-04-13 18:51:59 +00004622void noaff_init(int nprocs)
4623{
4624 kmp_int8 result = KMP_COMPARE_AND_STORE_ACQ8(&noaff_uninitialized, 1, 2);
4625 if (result == 0) return; // Already initialized
4626 else if (result == 2) { // Someone else is initializing
4627 while (TCR_1(noaff_uninitialized) != 0) KMP_CPU_PAUSE();
4628 return;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004629 }
Andrey Churbanovaa1f2b62015-04-13 18:51:59 +00004630 KMP_DEBUG_ASSERT(result==1);
4631
4632 kmp_uint32 numPerLevel[noaff_maxLevels];
4633 noaff_depth = 1;
4634 for (kmp_uint32 i=0; i<noaff_maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
4635 numPerLevel[i] = 1;
4636 noaff_skipPerLevel[i] = 1;
4637 }
4638
4639 numPerLevel[0] = 4;
4640 numPerLevel[1] = nprocs/4;
4641 if (nprocs%4) numPerLevel[1]++;
4642
4643 for (int i=noaff_maxLevels-1; i>=0; --i) // count non-empty levels to get depth
4644 if (numPerLevel[i] != 1 || noaff_depth > 1) // only count one top-level '1'
4645 noaff_depth++;
4646
4647 kmp_uint32 branch = 4;
4648 if (numPerLevel[0] == 1) branch = nprocs/4;
4649 if (branch<4) branch=4;
4650 for (kmp_uint32 d=0; d<noaff_depth-1; ++d) { // optimize hierarchy width
4651 while (numPerLevel[d] > branch || (d==0 && numPerLevel[d]>4)) { // max 4 on level 0!
4652 if (numPerLevel[d] & 1) numPerLevel[d]++;
4653 numPerLevel[d] = numPerLevel[d] >> 1;
4654 if (numPerLevel[d+1] == 1) noaff_depth++;
4655 numPerLevel[d+1] = numPerLevel[d+1] << 1;
4656 }
4657 if(numPerLevel[0] == 1) {
4658 branch = branch >> 1;
4659 if (branch<4) branch = 4;
4660 }
4661 }
4662
4663 for (kmp_uint32 i=1; i<noaff_depth; ++i)
4664 noaff_skipPerLevel[i] = numPerLevel[i-1] * noaff_skipPerLevel[i-1];
4665 // Fill in hierarchy in the case of oversubscription
4666 for (kmp_uint32 i=noaff_depth; i<noaff_maxLevels; ++i)
4667 noaff_skipPerLevel[i] = 2*noaff_skipPerLevel[i-1];
4668 noaff_leaf_kids = (kmp_uint8)numPerLevel[0]-1;
4669 noaff_uninitialized = 0; // One writer
4670
4671}
4672
4673void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
4674 if (noaff_uninitialized)
4675 noaff_init(nproc);
4676
4677 thr_bar->depth = noaff_depth;
4678 thr_bar->base_leaf_kids = noaff_leaf_kids;
4679 thr_bar->skip_per_level = noaff_skipPerLevel;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004680}
4681
Alp Toker763b9392014-02-28 09:42:41 +00004682#endif // KMP_AFFINITY_SUPPORTED