blob: 3664751ec73f10b448389e1a0be6316b576e68a2 [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
2 * kmp_affinity.cpp -- affinity management
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16#include "kmp.h"
17#include "kmp_i18n.h"
18#include "kmp_io.h"
19#include "kmp_str.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000020#include "kmp_wrapper_getpid.h"
Jonathan Peyton17078362015-09-10 19:22:07 +000021#include "kmp_affinity.h"
22
23// Store the real or imagined machine hierarchy here
24static hierarchy_info machine_hierarchy;
25
26void __kmp_cleanup_hierarchy() {
27 machine_hierarchy.fini();
28}
29
30void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
31 kmp_uint32 depth;
32 // The test below is true if affinity is available, but set to "none". Need to init on first use of hierarchical barrier.
33 if (TCR_1(machine_hierarchy.uninitialized))
34 machine_hierarchy.init(NULL, nproc);
35 // Adjust the hierarchy in case num threads exceeds original
36 if (nproc > machine_hierarchy.base_num_threads)
37 machine_hierarchy.resize(nproc);
38
39 depth = machine_hierarchy.depth;
40 KMP_DEBUG_ASSERT(depth > 0);
41 // The loop below adjusts the depth in the case of a resize
42 while (nproc > machine_hierarchy.skipPerLevel[depth-1])
43 depth++;
44
45 thr_bar->depth = depth;
46 thr_bar->base_leaf_kids = (kmp_uint8)machine_hierarchy.numPerLevel[0]-1;
47 thr_bar->skip_per_level = machine_hierarchy.skipPerLevel;
48}
Jim Cownie5e8470a2013-09-27 10:38:44 +000049
Alp Toker763b9392014-02-28 09:42:41 +000050#if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +000051
52//
53// Print the affinity mask to the character array in a pretty format.
54//
55char *
56__kmp_affinity_print_mask(char *buf, int buf_len, kmp_affin_mask_t *mask)
57{
58 KMP_ASSERT(buf_len >= 40);
59 char *scan = buf;
60 char *end = buf + buf_len - 1;
61
62 //
63 // Find first element / check for empty set.
64 //
65 size_t i;
66 for (i = 0; i < KMP_CPU_SETSIZE; i++) {
67 if (KMP_CPU_ISSET(i, mask)) {
68 break;
69 }
70 }
71 if (i == KMP_CPU_SETSIZE) {
Andrey Churbanov74bf17b2015-04-02 13:27:08 +000072 KMP_SNPRINTF(scan, buf_len, "{<empty>}");
Jim Cownie5e8470a2013-09-27 10:38:44 +000073 while (*scan != '\0') scan++;
74 KMP_ASSERT(scan <= end);
75 return buf;
76 }
77
Andrey Churbanov74bf17b2015-04-02 13:27:08 +000078 KMP_SNPRINTF(scan, buf_len, "{%ld", (long)i);
Jim Cownie5e8470a2013-09-27 10:38:44 +000079 while (*scan != '\0') scan++;
80 i++;
81 for (; i < KMP_CPU_SETSIZE; i++) {
82 if (! KMP_CPU_ISSET(i, mask)) {
83 continue;
84 }
85
86 //
87 // Check for buffer overflow. A string of the form ",<n>" will have
88 // at most 10 characters, plus we want to leave room to print ",...}"
89 // if the set is too large to print for a total of 15 characters.
90 // We already left room for '\0' in setting end.
91 //
92 if (end - scan < 15) {
93 break;
94 }
Andrey Churbanov74bf17b2015-04-02 13:27:08 +000095 KMP_SNPRINTF(scan, buf_len, ",%-ld", (long)i);
Jim Cownie5e8470a2013-09-27 10:38:44 +000096 while (*scan != '\0') scan++;
97 }
98 if (i < KMP_CPU_SETSIZE) {
Andrey Churbanov74bf17b2015-04-02 13:27:08 +000099 KMP_SNPRINTF(scan, buf_len, ",...");
Jim Cownie5e8470a2013-09-27 10:38:44 +0000100 while (*scan != '\0') scan++;
101 }
Andrey Churbanov74bf17b2015-04-02 13:27:08 +0000102 KMP_SNPRINTF(scan, buf_len, "}");
Jim Cownie5e8470a2013-09-27 10:38:44 +0000103 while (*scan != '\0') scan++;
104 KMP_ASSERT(scan <= end);
105 return buf;
106}
107
108
109void
110__kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask)
111{
112 KMP_CPU_ZERO(mask);
113
Andrey Churbanov7daf9802015-01-27 16:52:57 +0000114# if KMP_GROUP_AFFINITY
Jim Cownie5e8470a2013-09-27 10:38:44 +0000115
116 if (__kmp_num_proc_groups > 1) {
117 int group;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000118 KMP_DEBUG_ASSERT(__kmp_GetActiveProcessorCount != NULL);
119 for (group = 0; group < __kmp_num_proc_groups; group++) {
120 int i;
121 int num = __kmp_GetActiveProcessorCount(group);
122 for (i = 0; i < num; i++) {
123 KMP_CPU_SET(i + group * (CHAR_BIT * sizeof(DWORD_PTR)), mask);
124 }
125 }
126 }
127 else
128
Andrey Churbanov7daf9802015-01-27 16:52:57 +0000129# endif /* KMP_GROUP_AFFINITY */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000130
131 {
132 int proc;
133 for (proc = 0; proc < __kmp_xproc; proc++) {
134 KMP_CPU_SET(proc, mask);
135 }
136 }
137}
138
Jim Cownie5e8470a2013-09-27 10:38:44 +0000139//
140// When sorting by labels, __kmp_affinity_assign_child_nums() must first be
141// called to renumber the labels from [0..n] and place them into the child_num
142// vector of the address object. This is done in case the labels used for
Alp Toker8f2d3f02014-02-24 10:40:15 +0000143// the children at one node of the hierarchy differ from those used for
Jim Cownie5e8470a2013-09-27 10:38:44 +0000144// another node at the same level. Example: suppose the machine has 2 nodes
145// with 2 packages each. The first node contains packages 601 and 602, and
146// second node contains packages 603 and 604. If we try to sort the table
147// for "scatter" affinity, the table will still be sorted 601, 602, 603, 604
148// because we are paying attention to the labels themselves, not the ordinal
149// child numbers. By using the child numbers in the sort, the result is
150// {0,0}=601, {0,1}=603, {1,0}=602, {1,1}=604.
151//
152static void
153__kmp_affinity_assign_child_nums(AddrUnsPair *address2os,
154 int numAddrs)
155{
156 KMP_DEBUG_ASSERT(numAddrs > 0);
157 int depth = address2os->first.depth;
158 unsigned *counts = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
159 unsigned *lastLabel = (unsigned *)__kmp_allocate(depth
160 * sizeof(unsigned));
161 int labCt;
162 for (labCt = 0; labCt < depth; labCt++) {
163 address2os[0].first.childNums[labCt] = counts[labCt] = 0;
164 lastLabel[labCt] = address2os[0].first.labels[labCt];
165 }
166 int i;
167 for (i = 1; i < numAddrs; i++) {
168 for (labCt = 0; labCt < depth; labCt++) {
169 if (address2os[i].first.labels[labCt] != lastLabel[labCt]) {
170 int labCt2;
171 for (labCt2 = labCt + 1; labCt2 < depth; labCt2++) {
172 counts[labCt2] = 0;
173 lastLabel[labCt2] = address2os[i].first.labels[labCt2];
174 }
175 counts[labCt]++;
176 lastLabel[labCt] = address2os[i].first.labels[labCt];
177 break;
178 }
179 }
180 for (labCt = 0; labCt < depth; labCt++) {
181 address2os[i].first.childNums[labCt] = counts[labCt];
182 }
183 for (; labCt < (int)Address::maxDepth; labCt++) {
184 address2os[i].first.childNums[labCt] = 0;
185 }
186 }
187}
188
189
190//
191// All of the __kmp_affinity_create_*_map() routines should set
192// __kmp_affinity_masks to a vector of affinity mask objects of length
193// __kmp_affinity_num_masks, if __kmp_affinity_type != affinity_none, and
194// return the number of levels in the machine topology tree (zero if
195// __kmp_affinity_type == affinity_none).
196//
197// All of the __kmp_affinity_create_*_map() routines should set *fullMask
198// to the affinity mask for the initialization thread. They need to save and
199// restore the mask, and it could be needed later, so saving it is just an
200// optimization to avoid calling kmp_get_system_affinity() again.
201//
202static kmp_affin_mask_t *fullMask = NULL;
203
204kmp_affin_mask_t *
205__kmp_affinity_get_fullMask() { return fullMask; }
206
207
208static int nCoresPerPkg, nPackages;
Andrey Churbanovf696c822015-01-27 16:55:43 +0000209static int __kmp_nThreadsPerCore;
210#ifndef KMP_DFLT_NTH_CORES
211static int __kmp_ncores;
212#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000213
214//
215// __kmp_affinity_uniform_topology() doesn't work when called from
216// places which support arbitrarily many levels in the machine topology
217// map, i.e. the non-default cases in __kmp_affinity_create_cpuinfo_map()
218// __kmp_affinity_create_x2apicid_map().
219//
220inline static bool
221__kmp_affinity_uniform_topology()
222{
223 return __kmp_avail_proc == (__kmp_nThreadsPerCore * nCoresPerPkg * nPackages);
224}
225
226
227//
228// Print out the detailed machine topology map, i.e. the physical locations
229// of each OS proc.
230//
231static void
232__kmp_affinity_print_topology(AddrUnsPair *address2os, int len, int depth,
233 int pkgLevel, int coreLevel, int threadLevel)
234{
235 int proc;
236
237 KMP_INFORM(OSProcToPhysicalThreadMap, "KMP_AFFINITY");
238 for (proc = 0; proc < len; proc++) {
239 int level;
240 kmp_str_buf_t buf;
241 __kmp_str_buf_init(&buf);
242 for (level = 0; level < depth; level++) {
243 if (level == threadLevel) {
244 __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Thread));
245 }
246 else if (level == coreLevel) {
247 __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Core));
248 }
249 else if (level == pkgLevel) {
250 __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Package));
251 }
252 else if (level > pkgLevel) {
253 __kmp_str_buf_print(&buf, "%s_%d ", KMP_I18N_STR(Node),
254 level - pkgLevel - 1);
255 }
256 else {
257 __kmp_str_buf_print(&buf, "L%d ", level);
258 }
259 __kmp_str_buf_print(&buf, "%d ",
260 address2os[proc].first.labels[level]);
261 }
262 KMP_INFORM(OSProcMapToPack, "KMP_AFFINITY", address2os[proc].second,
263 buf.str);
264 __kmp_str_buf_free(&buf);
265 }
266}
267
268
269//
270// If we don't know how to retrieve the machine's processor topology, or
271// encounter an error in doing so, this routine is called to form a "flat"
272// mapping of os thread id's <-> processor id's.
273//
274static int
275__kmp_affinity_create_flat_map(AddrUnsPair **address2os,
276 kmp_i18n_id_t *const msg_id)
277{
278 *address2os = NULL;
279 *msg_id = kmp_i18n_null;
280
281 //
282 // Even if __kmp_affinity_type == affinity_none, this routine might still
Andrey Churbanovf696c822015-01-27 16:55:43 +0000283 // called to set __kmp_ncores, as well as
Jim Cownie5e8470a2013-09-27 10:38:44 +0000284 // __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
285 //
286 if (! KMP_AFFINITY_CAPABLE()) {
287 KMP_ASSERT(__kmp_affinity_type == affinity_none);
288 __kmp_ncores = nPackages = __kmp_xproc;
289 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000290 if (__kmp_affinity_verbose) {
291 KMP_INFORM(AffFlatTopology, "KMP_AFFINITY");
292 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
293 KMP_INFORM(Uniform, "KMP_AFFINITY");
294 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
295 __kmp_nThreadsPerCore, __kmp_ncores);
296 }
297 return 0;
298 }
299
300 //
301 // When affinity is off, this routine will still be called to set
Andrey Churbanovf696c822015-01-27 16:55:43 +0000302 // __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie5e8470a2013-09-27 10:38:44 +0000303 // nCoresPerPkg, & nPackages. Make sure all these vars are set
304 // correctly, and return now if affinity is not enabled.
305 //
306 __kmp_ncores = nPackages = __kmp_avail_proc;
307 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000308 if (__kmp_affinity_verbose) {
309 char buf[KMP_AFFIN_MASK_PRINT_LEN];
310 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, fullMask);
311
312 KMP_INFORM(AffCapableUseFlat, "KMP_AFFINITY");
313 if (__kmp_affinity_respect_mask) {
314 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
315 } else {
316 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
317 }
318 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
319 KMP_INFORM(Uniform, "KMP_AFFINITY");
320 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
321 __kmp_nThreadsPerCore, __kmp_ncores);
322 }
323 if (__kmp_affinity_type == affinity_none) {
324 return 0;
325 }
326
327 //
328 // Contruct the data structure to be returned.
329 //
330 *address2os = (AddrUnsPair*)
331 __kmp_allocate(sizeof(**address2os) * __kmp_avail_proc);
332 int avail_ct = 0;
333 unsigned int i;
334 for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
335 //
336 // Skip this proc if it is not included in the machine model.
337 //
338 if (! KMP_CPU_ISSET(i, fullMask)) {
339 continue;
340 }
341
342 Address addr(1);
343 addr.labels[0] = i;
344 (*address2os)[avail_ct++] = AddrUnsPair(addr,i);
345 }
346 if (__kmp_affinity_verbose) {
347 KMP_INFORM(OSProcToPackage, "KMP_AFFINITY");
348 }
349
350 if (__kmp_affinity_gran_levels < 0) {
351 //
352 // Only the package level is modeled in the machine topology map,
353 // so the #levels of granularity is either 0 or 1.
354 //
355 if (__kmp_affinity_gran > affinity_gran_package) {
356 __kmp_affinity_gran_levels = 1;
357 }
358 else {
359 __kmp_affinity_gran_levels = 0;
360 }
361 }
362 return 1;
363}
364
365
Andrey Churbanov7daf9802015-01-27 16:52:57 +0000366# if KMP_GROUP_AFFINITY
Jim Cownie5e8470a2013-09-27 10:38:44 +0000367
368//
369// If multiple Windows* OS processor groups exist, we can create a 2-level
370// topology map with the groups at level 0 and the individual procs at
371// level 1.
372//
373// This facilitates letting the threads float among all procs in a group,
374// if granularity=group (the default when there are multiple groups).
375//
376static int
377__kmp_affinity_create_proc_group_map(AddrUnsPair **address2os,
378 kmp_i18n_id_t *const msg_id)
379{
380 *address2os = NULL;
381 *msg_id = kmp_i18n_null;
382
383 //
384 // If we don't have multiple processor groups, return now.
385 // The flat mapping will be used.
386 //
387 if ((! KMP_AFFINITY_CAPABLE()) || (__kmp_get_proc_group(fullMask) >= 0)) {
388 // FIXME set *msg_id
389 return -1;
390 }
391
392 //
393 // Contruct the data structure to be returned.
394 //
395 *address2os = (AddrUnsPair*)
396 __kmp_allocate(sizeof(**address2os) * __kmp_avail_proc);
397 int avail_ct = 0;
398 int i;
399 for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
400 //
401 // Skip this proc if it is not included in the machine model.
402 //
403 if (! KMP_CPU_ISSET(i, fullMask)) {
404 continue;
405 }
406
407 Address addr(2);
408 addr.labels[0] = i / (CHAR_BIT * sizeof(DWORD_PTR));
409 addr.labels[1] = i % (CHAR_BIT * sizeof(DWORD_PTR));
410 (*address2os)[avail_ct++] = AddrUnsPair(addr,i);
411
412 if (__kmp_affinity_verbose) {
413 KMP_INFORM(AffOSProcToGroup, "KMP_AFFINITY", i, addr.labels[0],
414 addr.labels[1]);
415 }
416 }
417
418 if (__kmp_affinity_gran_levels < 0) {
419 if (__kmp_affinity_gran == affinity_gran_group) {
420 __kmp_affinity_gran_levels = 1;
421 }
422 else if ((__kmp_affinity_gran == affinity_gran_fine)
423 || (__kmp_affinity_gran == affinity_gran_thread)) {
424 __kmp_affinity_gran_levels = 0;
425 }
426 else {
427 const char *gran_str = NULL;
428 if (__kmp_affinity_gran == affinity_gran_core) {
429 gran_str = "core";
430 }
431 else if (__kmp_affinity_gran == affinity_gran_package) {
432 gran_str = "package";
433 }
434 else if (__kmp_affinity_gran == affinity_gran_node) {
435 gran_str = "node";
436 }
437 else {
438 KMP_ASSERT(0);
439 }
440
441 // Warning: can't use affinity granularity \"gran\" with group topology method, using "thread"
442 __kmp_affinity_gran_levels = 0;
443 }
444 }
445 return 2;
446}
447
Andrey Churbanov7daf9802015-01-27 16:52:57 +0000448# endif /* KMP_GROUP_AFFINITY */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000449
450
451# if KMP_ARCH_X86 || KMP_ARCH_X86_64
452
453static int
454__kmp_cpuid_mask_width(int count) {
455 int r = 0;
456
457 while((1<<r) < count)
458 ++r;
459 return r;
460}
461
462
463class apicThreadInfo {
464public:
465 unsigned osId; // param to __kmp_affinity_bind_thread
466 unsigned apicId; // from cpuid after binding
467 unsigned maxCoresPerPkg; // ""
468 unsigned maxThreadsPerPkg; // ""
469 unsigned pkgId; // inferred from above values
470 unsigned coreId; // ""
471 unsigned threadId; // ""
472};
473
474
475static int
476__kmp_affinity_cmp_apicThreadInfo_os_id(const void *a, const void *b)
477{
478 const apicThreadInfo *aa = (const apicThreadInfo *)a;
479 const apicThreadInfo *bb = (const apicThreadInfo *)b;
480 if (aa->osId < bb->osId) return -1;
481 if (aa->osId > bb->osId) return 1;
482 return 0;
483}
484
485
486static int
487__kmp_affinity_cmp_apicThreadInfo_phys_id(const void *a, const void *b)
488{
489 const apicThreadInfo *aa = (const apicThreadInfo *)a;
490 const apicThreadInfo *bb = (const apicThreadInfo *)b;
491 if (aa->pkgId < bb->pkgId) return -1;
492 if (aa->pkgId > bb->pkgId) return 1;
493 if (aa->coreId < bb->coreId) return -1;
494 if (aa->coreId > bb->coreId) return 1;
495 if (aa->threadId < bb->threadId) return -1;
496 if (aa->threadId > bb->threadId) return 1;
497 return 0;
498}
499
500
501//
502// On IA-32 architecture and Intel(R) 64 architecture, we attempt to use
503// an algorithm which cycles through the available os threads, setting
504// the current thread's affinity mask to that thread, and then retrieves
505// the Apic Id for each thread context using the cpuid instruction.
506//
507static int
508__kmp_affinity_create_apicid_map(AddrUnsPair **address2os,
509 kmp_i18n_id_t *const msg_id)
510{
Andrey Churbanov1c331292015-01-27 17:03:42 +0000511 kmp_cpuid buf;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000512 int rc;
513 *address2os = NULL;
514 *msg_id = kmp_i18n_null;
515
Andrey Churbanov1c331292015-01-27 17:03:42 +0000516 //
517 // Check if cpuid leaf 4 is supported.
518 //
Jim Cownie5e8470a2013-09-27 10:38:44 +0000519 __kmp_x86_cpuid(0, 0, &buf);
520 if (buf.eax < 4) {
521 *msg_id = kmp_i18n_str_NoLeaf4Support;
522 return -1;
523 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000524
525 //
Jim Cownie5e8470a2013-09-27 10:38:44 +0000526 // The algorithm used starts by setting the affinity to each available
Andrey Churbanov1c331292015-01-27 17:03:42 +0000527 // thread and retrieving info from the cpuid instruction, so if we are
528 // not capable of calling __kmp_get_system_affinity() and
529 // _kmp_get_system_affinity(), then we need to do something else - use
530 // the defaults that we calculated from issuing cpuid without binding
531 // to each proc.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000532 //
533 if (! KMP_AFFINITY_CAPABLE()) {
534 //
535 // Hack to try and infer the machine topology using only the data
536 // available from cpuid on the current thread, and __kmp_xproc.
537 //
538 KMP_ASSERT(__kmp_affinity_type == affinity_none);
539
540 //
541 // Get an upper bound on the number of threads per package using
542 // cpuid(1).
543 //
544 // On some OS/chps combinations where HT is supported by the chip
545 // but is disabled, this value will be 2 on a single core chip.
546 // Usually, it will be 2 if HT is enabled and 1 if HT is disabled.
547 //
Jim Cownie5e8470a2013-09-27 10:38:44 +0000548 __kmp_x86_cpuid(1, 0, &buf);
549 int maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
550 if (maxThreadsPerPkg == 0) {
551 maxThreadsPerPkg = 1;
552 }
553
554 //
555 // The num cores per pkg comes from cpuid(4).
556 // 1 must be added to the encoded value.
557 //
558 // The author of cpu_count.cpp treated this only an upper bound
559 // on the number of cores, but I haven't seen any cases where it
560 // was greater than the actual number of cores, so we will treat
561 // it as exact in this block of code.
562 //
563 // First, we need to check if cpuid(4) is supported on this chip.
564 // To see if cpuid(n) is supported, issue cpuid(0) and check if eax
565 // has the value n or greater.
566 //
567 __kmp_x86_cpuid(0, 0, &buf);
568 if (buf.eax >= 4) {
569 __kmp_x86_cpuid(4, 0, &buf);
570 nCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
571 }
572 else {
573 nCoresPerPkg = 1;
574 }
575
576 //
577 // There is no way to reliably tell if HT is enabled without issuing
578 // the cpuid instruction from every thread, can correlating the cpuid
579 // info, so if the machine is not affinity capable, we assume that HT
580 // is off. We have seen quite a few machines where maxThreadsPerPkg
581 // is 2, yet the machine does not support HT.
582 //
583 // - Older OSes are usually found on machines with older chips, which
584 // do not support HT.
585 //
586 // - The performance penalty for mistakenly identifying a machine as
587 // HT when it isn't (which results in blocktime being incorrecly set
588 // to 0) is greater than the penalty when for mistakenly identifying
589 // a machine as being 1 thread/core when it is really HT enabled
590 // (which results in blocktime being incorrectly set to a positive
591 // value).
592 //
593 __kmp_ncores = __kmp_xproc;
594 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
595 __kmp_nThreadsPerCore = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000596 if (__kmp_affinity_verbose) {
597 KMP_INFORM(AffNotCapableUseLocCpuid, "KMP_AFFINITY");
598 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
599 if (__kmp_affinity_uniform_topology()) {
600 KMP_INFORM(Uniform, "KMP_AFFINITY");
601 } else {
602 KMP_INFORM(NonUniform, "KMP_AFFINITY");
603 }
604 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
605 __kmp_nThreadsPerCore, __kmp_ncores);
606 }
607 return 0;
608 }
609
610 //
611 //
612 // From here on, we can assume that it is safe to call
613 // __kmp_get_system_affinity() and __kmp_set_system_affinity(),
614 // even if __kmp_affinity_type = affinity_none.
615 //
616
617 //
618 // Save the affinity mask for the current thread.
619 //
620 kmp_affin_mask_t *oldMask;
621 KMP_CPU_ALLOC(oldMask);
622 KMP_ASSERT(oldMask != NULL);
623 __kmp_get_system_affinity(oldMask, TRUE);
624
625 //
626 // Run through each of the available contexts, binding the current thread
627 // to it, and obtaining the pertinent information using the cpuid instr.
628 //
629 // The relevant information is:
630 //
631 // Apic Id: Bits 24:31 of ebx after issuing cpuid(1) - each thread context
632 // has a uniqie Apic Id, which is of the form pkg# : core# : thread#.
633 //
634 // Max Threads Per Pkg: Bits 16:23 of ebx after issuing cpuid(1). The
635 // value of this field determines the width of the core# + thread#
636 // fields in the Apic Id. It is also an upper bound on the number
637 // of threads per package, but it has been verified that situations
638 // happen were it is not exact. In particular, on certain OS/chip
639 // combinations where Intel(R) Hyper-Threading Technology is supported
640 // by the chip but has
641 // been disabled, the value of this field will be 2 (for a single core
642 // chip). On other OS/chip combinations supporting
643 // Intel(R) Hyper-Threading Technology, the value of
644 // this field will be 1 when Intel(R) Hyper-Threading Technology is
645 // disabled and 2 when it is enabled.
646 //
647 // Max Cores Per Pkg: Bits 26:31 of eax after issuing cpuid(4). The
648 // value of this field (+1) determines the width of the core# field in
649 // the Apic Id. The comments in "cpucount.cpp" say that this value is
650 // an upper bound, but the IA-32 architecture manual says that it is
651 // exactly the number of cores per package, and I haven't seen any
652 // case where it wasn't.
653 //
654 // From this information, deduce the package Id, core Id, and thread Id,
655 // and set the corresponding fields in the apicThreadInfo struct.
656 //
657 unsigned i;
658 apicThreadInfo *threadInfo = (apicThreadInfo *)__kmp_allocate(
659 __kmp_avail_proc * sizeof(apicThreadInfo));
660 unsigned nApics = 0;
661 for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
662 //
663 // Skip this proc if it is not included in the machine model.
664 //
665 if (! KMP_CPU_ISSET(i, fullMask)) {
666 continue;
667 }
668 KMP_DEBUG_ASSERT((int)nApics < __kmp_avail_proc);
669
670 __kmp_affinity_bind_thread(i);
671 threadInfo[nApics].osId = i;
672
673 //
674 // The apic id and max threads per pkg come from cpuid(1).
675 //
Jim Cownie5e8470a2013-09-27 10:38:44 +0000676 __kmp_x86_cpuid(1, 0, &buf);
677 if (! (buf.edx >> 9) & 1) {
678 __kmp_set_system_affinity(oldMask, TRUE);
679 __kmp_free(threadInfo);
680 KMP_CPU_FREE(oldMask);
681 *msg_id = kmp_i18n_str_ApicNotPresent;
682 return -1;
683 }
684 threadInfo[nApics].apicId = (buf.ebx >> 24) & 0xff;
685 threadInfo[nApics].maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
686 if (threadInfo[nApics].maxThreadsPerPkg == 0) {
687 threadInfo[nApics].maxThreadsPerPkg = 1;
688 }
689
690 //
691 // Max cores per pkg comes from cpuid(4).
692 // 1 must be added to the encoded value.
693 //
694 // First, we need to check if cpuid(4) is supported on this chip.
695 // To see if cpuid(n) is supported, issue cpuid(0) and check if eax
696 // has the value n or greater.
697 //
698 __kmp_x86_cpuid(0, 0, &buf);
699 if (buf.eax >= 4) {
700 __kmp_x86_cpuid(4, 0, &buf);
701 threadInfo[nApics].maxCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
702 }
703 else {
704 threadInfo[nApics].maxCoresPerPkg = 1;
705 }
706
707 //
708 // Infer the pkgId / coreId / threadId using only the info
709 // obtained locally.
710 //
711 int widthCT = __kmp_cpuid_mask_width(
712 threadInfo[nApics].maxThreadsPerPkg);
713 threadInfo[nApics].pkgId = threadInfo[nApics].apicId >> widthCT;
714
715 int widthC = __kmp_cpuid_mask_width(
716 threadInfo[nApics].maxCoresPerPkg);
717 int widthT = widthCT - widthC;
718 if (widthT < 0) {
719 //
720 // I've never seen this one happen, but I suppose it could, if
721 // the cpuid instruction on a chip was really screwed up.
722 // Make sure to restore the affinity mask before the tail call.
723 //
724 __kmp_set_system_affinity(oldMask, TRUE);
725 __kmp_free(threadInfo);
726 KMP_CPU_FREE(oldMask);
727 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
728 return -1;
729 }
730
731 int maskC = (1 << widthC) - 1;
732 threadInfo[nApics].coreId = (threadInfo[nApics].apicId >> widthT)
733 &maskC;
734
735 int maskT = (1 << widthT) - 1;
736 threadInfo[nApics].threadId = threadInfo[nApics].apicId &maskT;
737
738 nApics++;
739 }
740
741 //
742 // We've collected all the info we need.
743 // Restore the old affinity mask for this thread.
744 //
745 __kmp_set_system_affinity(oldMask, TRUE);
746
747 //
748 // If there's only one thread context to bind to, form an Address object
749 // with depth 1 and return immediately (or, if affinity is off, set
750 // address2os to NULL and return).
751 //
752 // If it is configured to omit the package level when there is only a
753 // single package, the logic at the end of this routine won't work if
754 // there is only a single thread - it would try to form an Address
755 // object with depth 0.
756 //
757 KMP_ASSERT(nApics > 0);
758 if (nApics == 1) {
759 __kmp_ncores = nPackages = 1;
760 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000761 if (__kmp_affinity_verbose) {
762 char buf[KMP_AFFIN_MASK_PRINT_LEN];
763 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
764
765 KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
766 if (__kmp_affinity_respect_mask) {
767 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
768 } else {
769 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
770 }
771 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
772 KMP_INFORM(Uniform, "KMP_AFFINITY");
773 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
774 __kmp_nThreadsPerCore, __kmp_ncores);
775 }
776
777 if (__kmp_affinity_type == affinity_none) {
778 __kmp_free(threadInfo);
779 KMP_CPU_FREE(oldMask);
780 return 0;
781 }
782
783 *address2os = (AddrUnsPair*)__kmp_allocate(sizeof(AddrUnsPair));
784 Address addr(1);
785 addr.labels[0] = threadInfo[0].pkgId;
786 (*address2os)[0] = AddrUnsPair(addr, threadInfo[0].osId);
787
788 if (__kmp_affinity_gran_levels < 0) {
789 __kmp_affinity_gran_levels = 0;
790 }
791
792 if (__kmp_affinity_verbose) {
793 __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
794 }
795
796 __kmp_free(threadInfo);
797 KMP_CPU_FREE(oldMask);
798 return 1;
799 }
800
801 //
802 // Sort the threadInfo table by physical Id.
803 //
804 qsort(threadInfo, nApics, sizeof(*threadInfo),
805 __kmp_affinity_cmp_apicThreadInfo_phys_id);
806
807 //
808 // The table is now sorted by pkgId / coreId / threadId, but we really
809 // don't know the radix of any of the fields. pkgId's may be sparsely
810 // assigned among the chips on a system. Although coreId's are usually
811 // assigned [0 .. coresPerPkg-1] and threadId's are usually assigned
812 // [0..threadsPerCore-1], we don't want to make any such assumptions.
813 //
814 // For that matter, we don't know what coresPerPkg and threadsPerCore
815 // (or the total # packages) are at this point - we want to determine
816 // that now. We only have an upper bound on the first two figures.
817 //
818 // We also perform a consistency check at this point: the values returned
819 // by the cpuid instruction for any thread bound to a given package had
820 // better return the same info for maxThreadsPerPkg and maxCoresPerPkg.
821 //
822 nPackages = 1;
823 nCoresPerPkg = 1;
824 __kmp_nThreadsPerCore = 1;
825 unsigned nCores = 1;
826
827 unsigned pkgCt = 1; // to determine radii
828 unsigned lastPkgId = threadInfo[0].pkgId;
829 unsigned coreCt = 1;
830 unsigned lastCoreId = threadInfo[0].coreId;
831 unsigned threadCt = 1;
832 unsigned lastThreadId = threadInfo[0].threadId;
833
834 // intra-pkg consist checks
835 unsigned prevMaxCoresPerPkg = threadInfo[0].maxCoresPerPkg;
836 unsigned prevMaxThreadsPerPkg = threadInfo[0].maxThreadsPerPkg;
837
838 for (i = 1; i < nApics; i++) {
839 if (threadInfo[i].pkgId != lastPkgId) {
840 nCores++;
841 pkgCt++;
842 lastPkgId = threadInfo[i].pkgId;
843 if ((int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt;
844 coreCt = 1;
845 lastCoreId = threadInfo[i].coreId;
846 if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
847 threadCt = 1;
848 lastThreadId = threadInfo[i].threadId;
849
850 //
851 // This is a different package, so go on to the next iteration
852 // without doing any consistency checks. Reset the consistency
853 // check vars, though.
854 //
855 prevMaxCoresPerPkg = threadInfo[i].maxCoresPerPkg;
856 prevMaxThreadsPerPkg = threadInfo[i].maxThreadsPerPkg;
857 continue;
858 }
859
860 if (threadInfo[i].coreId != lastCoreId) {
861 nCores++;
862 coreCt++;
863 lastCoreId = threadInfo[i].coreId;
864 if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
865 threadCt = 1;
866 lastThreadId = threadInfo[i].threadId;
867 }
868 else if (threadInfo[i].threadId != lastThreadId) {
869 threadCt++;
870 lastThreadId = threadInfo[i].threadId;
871 }
872 else {
873 __kmp_free(threadInfo);
874 KMP_CPU_FREE(oldMask);
875 *msg_id = kmp_i18n_str_LegacyApicIDsNotUnique;
876 return -1;
877 }
878
879 //
880 // Check to make certain that the maxCoresPerPkg and maxThreadsPerPkg
881 // fields agree between all the threads bounds to a given package.
882 //
883 if ((prevMaxCoresPerPkg != threadInfo[i].maxCoresPerPkg)
884 || (prevMaxThreadsPerPkg != threadInfo[i].maxThreadsPerPkg)) {
885 __kmp_free(threadInfo);
886 KMP_CPU_FREE(oldMask);
887 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
888 return -1;
889 }
890 }
891 nPackages = pkgCt;
892 if ((int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt;
893 if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
894
895 //
896 // When affinity is off, this routine will still be called to set
Andrey Churbanovf696c822015-01-27 16:55:43 +0000897 // __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie5e8470a2013-09-27 10:38:44 +0000898 // nCoresPerPkg, & nPackages. Make sure all these vars are set
899 // correctly, and return now if affinity is not enabled.
900 //
Jim Cownie5e8470a2013-09-27 10:38:44 +0000901 __kmp_ncores = nCores;
902 if (__kmp_affinity_verbose) {
903 char buf[KMP_AFFIN_MASK_PRINT_LEN];
904 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
905
906 KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
907 if (__kmp_affinity_respect_mask) {
908 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
909 } else {
910 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
911 }
912 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
913 if (__kmp_affinity_uniform_topology()) {
914 KMP_INFORM(Uniform, "KMP_AFFINITY");
915 } else {
916 KMP_INFORM(NonUniform, "KMP_AFFINITY");
917 }
918 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
919 __kmp_nThreadsPerCore, __kmp_ncores);
920
921 }
922
923 if (__kmp_affinity_type == affinity_none) {
924 __kmp_free(threadInfo);
925 KMP_CPU_FREE(oldMask);
926 return 0;
927 }
928
929 //
930 // Now that we've determined the number of packages, the number of cores
931 // per package, and the number of threads per core, we can construct the
932 // data structure that is to be returned.
933 //
934 int pkgLevel = 0;
935 int coreLevel = (nCoresPerPkg <= 1) ? -1 : 1;
936 int threadLevel = (__kmp_nThreadsPerCore <= 1) ? -1 : ((coreLevel >= 0) ? 2 : 1);
937 unsigned depth = (pkgLevel >= 0) + (coreLevel >= 0) + (threadLevel >= 0);
938
939 KMP_ASSERT(depth > 0);
940 *address2os = (AddrUnsPair*)__kmp_allocate(sizeof(AddrUnsPair) * nApics);
941
942 for (i = 0; i < nApics; ++i) {
943 Address addr(depth);
944 unsigned os = threadInfo[i].osId;
945 int d = 0;
946
947 if (pkgLevel >= 0) {
948 addr.labels[d++] = threadInfo[i].pkgId;
949 }
950 if (coreLevel >= 0) {
951 addr.labels[d++] = threadInfo[i].coreId;
952 }
953 if (threadLevel >= 0) {
954 addr.labels[d++] = threadInfo[i].threadId;
955 }
956 (*address2os)[i] = AddrUnsPair(addr, os);
957 }
958
959 if (__kmp_affinity_gran_levels < 0) {
960 //
961 // Set the granularity level based on what levels are modeled
962 // in the machine topology map.
963 //
964 __kmp_affinity_gran_levels = 0;
965 if ((threadLevel >= 0)
966 && (__kmp_affinity_gran > affinity_gran_thread)) {
967 __kmp_affinity_gran_levels++;
968 }
969 if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
970 __kmp_affinity_gran_levels++;
971 }
972 if ((pkgLevel >= 0) && (__kmp_affinity_gran > affinity_gran_package)) {
973 __kmp_affinity_gran_levels++;
974 }
975 }
976
977 if (__kmp_affinity_verbose) {
978 __kmp_affinity_print_topology(*address2os, nApics, depth, pkgLevel,
979 coreLevel, threadLevel);
980 }
981
982 __kmp_free(threadInfo);
983 KMP_CPU_FREE(oldMask);
984 return depth;
985}
986
987
988//
989// Intel(R) microarchitecture code name Nehalem, Dunnington and later
990// architectures support a newer interface for specifying the x2APIC Ids,
991// based on cpuid leaf 11.
992//
993static int
994__kmp_affinity_create_x2apicid_map(AddrUnsPair **address2os,
995 kmp_i18n_id_t *const msg_id)
996{
997 kmp_cpuid buf;
998
999 *address2os = NULL;
1000 *msg_id = kmp_i18n_null;
1001
1002 //
1003 // Check to see if cpuid leaf 11 is supported.
1004 //
1005 __kmp_x86_cpuid(0, 0, &buf);
1006 if (buf.eax < 11) {
1007 *msg_id = kmp_i18n_str_NoLeaf11Support;
1008 return -1;
1009 }
1010 __kmp_x86_cpuid(11, 0, &buf);
1011 if (buf.ebx == 0) {
1012 *msg_id = kmp_i18n_str_NoLeaf11Support;
1013 return -1;
1014 }
1015
1016 //
1017 // Find the number of levels in the machine topology. While we're at it,
1018 // get the default values for __kmp_nThreadsPerCore & nCoresPerPkg. We will
1019 // try to get more accurate values later by explicitly counting them,
1020 // but get reasonable defaults now, in case we return early.
1021 //
1022 int level;
1023 int threadLevel = -1;
1024 int coreLevel = -1;
1025 int pkgLevel = -1;
1026 __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
1027
1028 for (level = 0;; level++) {
1029 if (level > 31) {
1030 //
1031 // FIXME: Hack for DPD200163180
1032 //
1033 // If level is big then something went wrong -> exiting
1034 //
1035 // There could actually be 32 valid levels in the machine topology,
1036 // but so far, the only machine we have seen which does not exit
1037 // this loop before iteration 32 has fubar x2APIC settings.
1038 //
1039 // For now, just reject this case based upon loop trip count.
1040 //
1041 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1042 return -1;
1043 }
1044 __kmp_x86_cpuid(11, level, &buf);
1045 if (buf.ebx == 0) {
1046 if (pkgLevel < 0) {
1047 //
1048 // Will infer nPackages from __kmp_xproc
1049 //
1050 pkgLevel = level;
1051 level++;
1052 }
1053 break;
1054 }
1055 int kind = (buf.ecx >> 8) & 0xff;
1056 if (kind == 1) {
1057 //
1058 // SMT level
1059 //
1060 threadLevel = level;
1061 coreLevel = -1;
1062 pkgLevel = -1;
1063 __kmp_nThreadsPerCore = buf.ebx & 0xff;
1064 if (__kmp_nThreadsPerCore == 0) {
1065 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1066 return -1;
1067 }
1068 }
1069 else if (kind == 2) {
1070 //
1071 // core level
1072 //
1073 coreLevel = level;
1074 pkgLevel = -1;
1075 nCoresPerPkg = buf.ebx & 0xff;
1076 if (nCoresPerPkg == 0) {
1077 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1078 return -1;
1079 }
1080 }
1081 else {
1082 if (level <= 0) {
1083 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1084 return -1;
1085 }
1086 if (pkgLevel >= 0) {
1087 continue;
1088 }
1089 pkgLevel = level;
1090 nPackages = buf.ebx & 0xff;
1091 if (nPackages == 0) {
1092 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1093 return -1;
1094 }
1095 }
1096 }
1097 int depth = level;
1098
1099 //
1100 // In the above loop, "level" was counted from the finest level (usually
1101 // thread) to the coarsest. The caller expects that we will place the
1102 // labels in (*address2os)[].first.labels[] in the inverse order, so
1103 // we need to invert the vars saying which level means what.
1104 //
1105 if (threadLevel >= 0) {
1106 threadLevel = depth - threadLevel - 1;
1107 }
1108 if (coreLevel >= 0) {
1109 coreLevel = depth - coreLevel - 1;
1110 }
1111 KMP_DEBUG_ASSERT(pkgLevel >= 0);
1112 pkgLevel = depth - pkgLevel - 1;
1113
1114 //
1115 // The algorithm used starts by setting the affinity to each available
Andrey Churbanov1c331292015-01-27 17:03:42 +00001116 // thread and retrieving info from the cpuid instruction, so if we are
1117 // not capable of calling __kmp_get_system_affinity() and
1118 // _kmp_get_system_affinity(), then we need to do something else - use
1119 // the defaults that we calculated from issuing cpuid without binding
1120 // to each proc.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001121 //
1122 if (! KMP_AFFINITY_CAPABLE())
1123 {
1124 //
1125 // Hack to try and infer the machine topology using only the data
1126 // available from cpuid on the current thread, and __kmp_xproc.
1127 //
1128 KMP_ASSERT(__kmp_affinity_type == affinity_none);
1129
1130 __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
1131 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001132 if (__kmp_affinity_verbose) {
1133 KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY");
1134 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
1135 if (__kmp_affinity_uniform_topology()) {
1136 KMP_INFORM(Uniform, "KMP_AFFINITY");
1137 } else {
1138 KMP_INFORM(NonUniform, "KMP_AFFINITY");
1139 }
1140 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
1141 __kmp_nThreadsPerCore, __kmp_ncores);
1142 }
1143 return 0;
1144 }
1145
1146 //
1147 //
1148 // From here on, we can assume that it is safe to call
1149 // __kmp_get_system_affinity() and __kmp_set_system_affinity(),
1150 // even if __kmp_affinity_type = affinity_none.
1151 //
1152
1153 //
1154 // Save the affinity mask for the current thread.
1155 //
1156 kmp_affin_mask_t *oldMask;
1157 KMP_CPU_ALLOC(oldMask);
1158 __kmp_get_system_affinity(oldMask, TRUE);
1159
1160 //
1161 // Allocate the data structure to be returned.
1162 //
1163 AddrUnsPair *retval = (AddrUnsPair *)
1164 __kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc);
1165
1166 //
1167 // Run through each of the available contexts, binding the current thread
1168 // to it, and obtaining the pertinent information using the cpuid instr.
1169 //
1170 unsigned int proc;
1171 int nApics = 0;
1172 for (proc = 0; proc < KMP_CPU_SETSIZE; ++proc) {
1173 //
1174 // Skip this proc if it is not included in the machine model.
1175 //
1176 if (! KMP_CPU_ISSET(proc, fullMask)) {
1177 continue;
1178 }
1179 KMP_DEBUG_ASSERT(nApics < __kmp_avail_proc);
1180
1181 __kmp_affinity_bind_thread(proc);
1182
1183 //
1184 // Extrach the labels for each level in the machine topology map
1185 // from the Apic ID.
1186 //
1187 Address addr(depth);
1188 int prev_shift = 0;
1189
1190 for (level = 0; level < depth; level++) {
1191 __kmp_x86_cpuid(11, level, &buf);
1192 unsigned apicId = buf.edx;
1193 if (buf.ebx == 0) {
1194 if (level != depth - 1) {
1195 KMP_CPU_FREE(oldMask);
1196 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1197 return -1;
1198 }
1199 addr.labels[depth - level - 1] = apicId >> prev_shift;
1200 level++;
1201 break;
1202 }
1203 int shift = buf.eax & 0x1f;
1204 int mask = (1 << shift) - 1;
1205 addr.labels[depth - level - 1] = (apicId & mask) >> prev_shift;
1206 prev_shift = shift;
1207 }
1208 if (level != depth) {
1209 KMP_CPU_FREE(oldMask);
1210 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1211 return -1;
1212 }
1213
1214 retval[nApics] = AddrUnsPair(addr, proc);
1215 nApics++;
1216 }
1217
1218 //
1219 // We've collected all the info we need.
1220 // Restore the old affinity mask for this thread.
1221 //
1222 __kmp_set_system_affinity(oldMask, TRUE);
1223
1224 //
1225 // If there's only one thread context to bind to, return now.
1226 //
1227 KMP_ASSERT(nApics > 0);
1228 if (nApics == 1) {
1229 __kmp_ncores = nPackages = 1;
1230 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001231 if (__kmp_affinity_verbose) {
1232 char buf[KMP_AFFIN_MASK_PRINT_LEN];
1233 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1234
1235 KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
1236 if (__kmp_affinity_respect_mask) {
1237 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
1238 } else {
1239 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
1240 }
1241 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
1242 KMP_INFORM(Uniform, "KMP_AFFINITY");
1243 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
1244 __kmp_nThreadsPerCore, __kmp_ncores);
1245 }
1246
1247 if (__kmp_affinity_type == affinity_none) {
1248 __kmp_free(retval);
1249 KMP_CPU_FREE(oldMask);
1250 return 0;
1251 }
1252
1253 //
1254 // Form an Address object which only includes the package level.
1255 //
1256 Address addr(1);
1257 addr.labels[0] = retval[0].first.labels[pkgLevel];
1258 retval[0].first = addr;
1259
1260 if (__kmp_affinity_gran_levels < 0) {
1261 __kmp_affinity_gran_levels = 0;
1262 }
1263
1264 if (__kmp_affinity_verbose) {
1265 __kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
1266 }
1267
1268 *address2os = retval;
1269 KMP_CPU_FREE(oldMask);
1270 return 1;
1271 }
1272
1273 //
1274 // Sort the table by physical Id.
1275 //
1276 qsort(retval, nApics, sizeof(*retval), __kmp_affinity_cmp_Address_labels);
1277
1278 //
1279 // Find the radix at each of the levels.
1280 //
1281 unsigned *totals = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
1282 unsigned *counts = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
1283 unsigned *maxCt = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
1284 unsigned *last = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
1285 for (level = 0; level < depth; level++) {
1286 totals[level] = 1;
1287 maxCt[level] = 1;
1288 counts[level] = 1;
1289 last[level] = retval[0].first.labels[level];
1290 }
1291
1292 //
1293 // From here on, the iteration variable "level" runs from the finest
1294 // level to the coarsest, i.e. we iterate forward through
1295 // (*address2os)[].first.labels[] - in the previous loops, we iterated
1296 // backwards.
1297 //
1298 for (proc = 1; (int)proc < nApics; proc++) {
1299 int level;
1300 for (level = 0; level < depth; level++) {
1301 if (retval[proc].first.labels[level] != last[level]) {
1302 int j;
1303 for (j = level + 1; j < depth; j++) {
1304 totals[j]++;
1305 counts[j] = 1;
1306 // The line below causes printing incorrect topology information
1307 // in case the max value for some level (maxCt[level]) is encountered earlier than
1308 // some less value while going through the array.
1309 // For example, let pkg0 has 4 cores and pkg1 has 2 cores. Then maxCt[1] == 2
1310 // whereas it must be 4.
1311 // TODO!!! Check if it can be commented safely
1312 //maxCt[j] = 1;
1313 last[j] = retval[proc].first.labels[j];
1314 }
1315 totals[level]++;
1316 counts[level]++;
1317 if (counts[level] > maxCt[level]) {
1318 maxCt[level] = counts[level];
1319 }
1320 last[level] = retval[proc].first.labels[level];
1321 break;
1322 }
1323 else if (level == depth - 1) {
1324 __kmp_free(last);
1325 __kmp_free(maxCt);
1326 __kmp_free(counts);
1327 __kmp_free(totals);
1328 __kmp_free(retval);
1329 KMP_CPU_FREE(oldMask);
1330 *msg_id = kmp_i18n_str_x2ApicIDsNotUnique;
1331 return -1;
1332 }
1333 }
1334 }
1335
1336 //
1337 // When affinity is off, this routine will still be called to set
Andrey Churbanovf696c822015-01-27 16:55:43 +00001338 // __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie5e8470a2013-09-27 10:38:44 +00001339 // nCoresPerPkg, & nPackages. Make sure all these vars are set
1340 // correctly, and return if affinity is not enabled.
1341 //
1342 if (threadLevel >= 0) {
1343 __kmp_nThreadsPerCore = maxCt[threadLevel];
1344 }
1345 else {
1346 __kmp_nThreadsPerCore = 1;
1347 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001348 nPackages = totals[pkgLevel];
1349
1350 if (coreLevel >= 0) {
1351 __kmp_ncores = totals[coreLevel];
1352 nCoresPerPkg = maxCt[coreLevel];
1353 }
1354 else {
1355 __kmp_ncores = nPackages;
1356 nCoresPerPkg = 1;
1357 }
1358
1359 //
1360 // Check to see if the machine topology is uniform
1361 //
1362 unsigned prod = maxCt[0];
1363 for (level = 1; level < depth; level++) {
1364 prod *= maxCt[level];
1365 }
1366 bool uniform = (prod == totals[level - 1]);
1367
1368 //
1369 // Print the machine topology summary.
1370 //
1371 if (__kmp_affinity_verbose) {
1372 char mask[KMP_AFFIN_MASK_PRINT_LEN];
1373 __kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1374
1375 KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
1376 if (__kmp_affinity_respect_mask) {
1377 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask);
1378 } else {
1379 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask);
1380 }
1381 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
1382 if (uniform) {
1383 KMP_INFORM(Uniform, "KMP_AFFINITY");
1384 } else {
1385 KMP_INFORM(NonUniform, "KMP_AFFINITY");
1386 }
1387
1388 kmp_str_buf_t buf;
1389 __kmp_str_buf_init(&buf);
1390
1391 __kmp_str_buf_print(&buf, "%d", totals[0]);
1392 for (level = 1; level <= pkgLevel; level++) {
1393 __kmp_str_buf_print(&buf, " x %d", maxCt[level]);
1394 }
1395 KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg,
1396 __kmp_nThreadsPerCore, __kmp_ncores);
1397
1398 __kmp_str_buf_free(&buf);
1399 }
1400
1401 if (__kmp_affinity_type == affinity_none) {
1402 __kmp_free(last);
1403 __kmp_free(maxCt);
1404 __kmp_free(counts);
1405 __kmp_free(totals);
1406 __kmp_free(retval);
1407 KMP_CPU_FREE(oldMask);
1408 return 0;
1409 }
1410
1411 //
1412 // Find any levels with radiix 1, and remove them from the map
1413 // (except for the package level).
1414 //
1415 int new_depth = 0;
1416 for (level = 0; level < depth; level++) {
1417 if ((maxCt[level] == 1) && (level != pkgLevel)) {
1418 continue;
1419 }
1420 new_depth++;
1421 }
1422
1423 //
1424 // If we are removing any levels, allocate a new vector to return,
1425 // and copy the relevant information to it.
1426 //
1427 if (new_depth != depth) {
1428 AddrUnsPair *new_retval = (AddrUnsPair *)__kmp_allocate(
1429 sizeof(AddrUnsPair) * nApics);
1430 for (proc = 0; (int)proc < nApics; proc++) {
1431 Address addr(new_depth);
1432 new_retval[proc] = AddrUnsPair(addr, retval[proc].second);
1433 }
1434 int new_level = 0;
Jonathan Peyton62f38402015-08-25 18:44:41 +00001435 int newPkgLevel = -1;
1436 int newCoreLevel = -1;
1437 int newThreadLevel = -1;
1438 int i;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001439 for (level = 0; level < depth; level++) {
Jonathan Peyton62f38402015-08-25 18:44:41 +00001440 if ((maxCt[level] == 1)
1441 && (level != pkgLevel)) {
1442 //
1443 // Remove this level. Never remove the package level
1444 //
1445 continue;
1446 }
1447 if (level == pkgLevel) {
1448 newPkgLevel = level;
1449 }
1450 if (level == coreLevel) {
1451 newCoreLevel = level;
1452 }
1453 if (level == threadLevel) {
1454 newThreadLevel = level;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001455 }
1456 for (proc = 0; (int)proc < nApics; proc++) {
1457 new_retval[proc].first.labels[new_level]
1458 = retval[proc].first.labels[level];
1459 }
1460 new_level++;
1461 }
1462
1463 __kmp_free(retval);
1464 retval = new_retval;
1465 depth = new_depth;
Jonathan Peyton62f38402015-08-25 18:44:41 +00001466 pkgLevel = newPkgLevel;
1467 coreLevel = newCoreLevel;
1468 threadLevel = newThreadLevel;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001469 }
1470
1471 if (__kmp_affinity_gran_levels < 0) {
1472 //
1473 // Set the granularity level based on what levels are modeled
1474 // in the machine topology map.
1475 //
1476 __kmp_affinity_gran_levels = 0;
1477 if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
1478 __kmp_affinity_gran_levels++;
1479 }
1480 if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
1481 __kmp_affinity_gran_levels++;
1482 }
1483 if (__kmp_affinity_gran > affinity_gran_package) {
1484 __kmp_affinity_gran_levels++;
1485 }
1486 }
1487
1488 if (__kmp_affinity_verbose) {
1489 __kmp_affinity_print_topology(retval, nApics, depth, pkgLevel,
1490 coreLevel, threadLevel);
1491 }
1492
1493 __kmp_free(last);
1494 __kmp_free(maxCt);
1495 __kmp_free(counts);
1496 __kmp_free(totals);
1497 KMP_CPU_FREE(oldMask);
1498 *address2os = retval;
1499 return depth;
1500}
1501
1502
1503# endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1504
1505
1506#define osIdIndex 0
1507#define threadIdIndex 1
1508#define coreIdIndex 2
1509#define pkgIdIndex 3
1510#define nodeIdIndex 4
1511
1512typedef unsigned *ProcCpuInfo;
1513static unsigned maxIndex = pkgIdIndex;
1514
1515
1516static int
1517__kmp_affinity_cmp_ProcCpuInfo_os_id(const void *a, const void *b)
1518{
1519 const unsigned *aa = (const unsigned *)a;
1520 const unsigned *bb = (const unsigned *)b;
1521 if (aa[osIdIndex] < bb[osIdIndex]) return -1;
1522 if (aa[osIdIndex] > bb[osIdIndex]) return 1;
1523 return 0;
1524};
1525
1526
1527static int
1528__kmp_affinity_cmp_ProcCpuInfo_phys_id(const void *a, const void *b)
1529{
1530 unsigned i;
1531 const unsigned *aa = *((const unsigned **)a);
1532 const unsigned *bb = *((const unsigned **)b);
1533 for (i = maxIndex; ; i--) {
1534 if (aa[i] < bb[i]) return -1;
1535 if (aa[i] > bb[i]) return 1;
1536 if (i == osIdIndex) break;
1537 }
1538 return 0;
1539}
1540
1541
1542//
1543// Parse /proc/cpuinfo (or an alternate file in the same format) to obtain the
1544// affinity map.
1545//
1546static int
1547__kmp_affinity_create_cpuinfo_map(AddrUnsPair **address2os, int *line,
1548 kmp_i18n_id_t *const msg_id, FILE *f)
1549{
1550 *address2os = NULL;
1551 *msg_id = kmp_i18n_null;
1552
1553 //
1554 // Scan of the file, and count the number of "processor" (osId) fields,
Alp Toker8f2d3f02014-02-24 10:40:15 +00001555 // and find the highest value of <n> for a node_<n> field.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001556 //
1557 char buf[256];
1558 unsigned num_records = 0;
1559 while (! feof(f)) {
1560 buf[sizeof(buf) - 1] = 1;
1561 if (! fgets(buf, sizeof(buf), f)) {
1562 //
1563 // Read errors presumably because of EOF
1564 //
1565 break;
1566 }
1567
1568 char s1[] = "processor";
1569 if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
1570 num_records++;
1571 continue;
1572 }
1573
1574 //
1575 // FIXME - this will match "node_<n> <garbage>"
1576 //
1577 unsigned level;
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00001578 if (KMP_SSCANF(buf, "node_%d id", &level) == 1) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001579 if (nodeIdIndex + level >= maxIndex) {
1580 maxIndex = nodeIdIndex + level;
1581 }
1582 continue;
1583 }
1584 }
1585
1586 //
1587 // Check for empty file / no valid processor records, or too many.
1588 // The number of records can't exceed the number of valid bits in the
1589 // affinity mask.
1590 //
1591 if (num_records == 0) {
1592 *line = 0;
1593 *msg_id = kmp_i18n_str_NoProcRecords;
1594 return -1;
1595 }
1596 if (num_records > (unsigned)__kmp_xproc) {
1597 *line = 0;
1598 *msg_id = kmp_i18n_str_TooManyProcRecords;
1599 return -1;
1600 }
1601
1602 //
1603 // Set the file pointer back to the begginning, so that we can scan the
1604 // file again, this time performing a full parse of the data.
1605 // Allocate a vector of ProcCpuInfo object, where we will place the data.
1606 // Adding an extra element at the end allows us to remove a lot of extra
1607 // checks for termination conditions.
1608 //
1609 if (fseek(f, 0, SEEK_SET) != 0) {
1610 *line = 0;
1611 *msg_id = kmp_i18n_str_CantRewindCpuinfo;
1612 return -1;
1613 }
1614
1615 //
1616 // Allocate the array of records to store the proc info in. The dummy
1617 // element at the end makes the logic in filling them out easier to code.
1618 //
1619 unsigned **threadInfo = (unsigned **)__kmp_allocate((num_records + 1)
1620 * sizeof(unsigned *));
1621 unsigned i;
1622 for (i = 0; i <= num_records; i++) {
1623 threadInfo[i] = (unsigned *)__kmp_allocate((maxIndex + 1)
1624 * sizeof(unsigned));
1625 }
1626
1627#define CLEANUP_THREAD_INFO \
1628 for (i = 0; i <= num_records; i++) { \
1629 __kmp_free(threadInfo[i]); \
1630 } \
1631 __kmp_free(threadInfo);
1632
1633 //
1634 // A value of UINT_MAX means that we didn't find the field
1635 //
1636 unsigned __index;
1637
1638#define INIT_PROC_INFO(p) \
1639 for (__index = 0; __index <= maxIndex; __index++) { \
1640 (p)[__index] = UINT_MAX; \
1641 }
1642
1643 for (i = 0; i <= num_records; i++) {
1644 INIT_PROC_INFO(threadInfo[i]);
1645 }
1646
1647 unsigned num_avail = 0;
1648 *line = 0;
1649 while (! feof(f)) {
1650 //
1651 // Create an inner scoping level, so that all the goto targets at the
1652 // end of the loop appear in an outer scoping level. This avoids
1653 // warnings about jumping past an initialization to a target in the
1654 // same block.
1655 //
1656 {
1657 buf[sizeof(buf) - 1] = 1;
1658 bool long_line = false;
1659 if (! fgets(buf, sizeof(buf), f)) {
1660 //
1661 // Read errors presumably because of EOF
1662 //
1663 // If there is valid data in threadInfo[num_avail], then fake
1664 // a blank line in ensure that the last address gets parsed.
1665 //
1666 bool valid = false;
1667 for (i = 0; i <= maxIndex; i++) {
1668 if (threadInfo[num_avail][i] != UINT_MAX) {
1669 valid = true;
1670 }
1671 }
1672 if (! valid) {
1673 break;
1674 }
1675 buf[0] = 0;
1676 } else if (!buf[sizeof(buf) - 1]) {
1677 //
1678 // The line is longer than the buffer. Set a flag and don't
1679 // emit an error if we were going to ignore the line, anyway.
1680 //
1681 long_line = true;
1682
1683#define CHECK_LINE \
1684 if (long_line) { \
1685 CLEANUP_THREAD_INFO; \
1686 *msg_id = kmp_i18n_str_LongLineCpuinfo; \
1687 return -1; \
1688 }
1689 }
1690 (*line)++;
1691
1692 char s1[] = "processor";
1693 if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
1694 CHECK_LINE;
1695 char *p = strchr(buf + sizeof(s1) - 1, ':');
1696 unsigned val;
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00001697 if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001698 if (threadInfo[num_avail][osIdIndex] != UINT_MAX) goto dup_field;
1699 threadInfo[num_avail][osIdIndex] = val;
Jim Cownie181b4bb2013-12-23 17:28:57 +00001700#if KMP_OS_LINUX && USE_SYSFS_INFO
1701 char path[256];
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00001702 KMP_SNPRINTF(path, sizeof(path),
Jim Cownie181b4bb2013-12-23 17:28:57 +00001703 "/sys/devices/system/cpu/cpu%u/topology/physical_package_id",
1704 threadInfo[num_avail][osIdIndex]);
1705 __kmp_read_from_file(path, "%u", &threadInfo[num_avail][pkgIdIndex]);
1706
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00001707 KMP_SNPRINTF(path, sizeof(path),
Jim Cownie181b4bb2013-12-23 17:28:57 +00001708 "/sys/devices/system/cpu/cpu%u/topology/core_id",
1709 threadInfo[num_avail][osIdIndex]);
1710 __kmp_read_from_file(path, "%u", &threadInfo[num_avail][coreIdIndex]);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001711 continue;
Jim Cownie181b4bb2013-12-23 17:28:57 +00001712#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00001713 }
1714 char s2[] = "physical id";
1715 if (strncmp(buf, s2, sizeof(s2) - 1) == 0) {
1716 CHECK_LINE;
1717 char *p = strchr(buf + sizeof(s2) - 1, ':');
1718 unsigned val;
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00001719 if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001720 if (threadInfo[num_avail][pkgIdIndex] != UINT_MAX) goto dup_field;
1721 threadInfo[num_avail][pkgIdIndex] = val;
1722 continue;
1723 }
1724 char s3[] = "core id";
1725 if (strncmp(buf, s3, sizeof(s3) - 1) == 0) {
1726 CHECK_LINE;
1727 char *p = strchr(buf + sizeof(s3) - 1, ':');
1728 unsigned val;
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00001729 if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001730 if (threadInfo[num_avail][coreIdIndex] != UINT_MAX) goto dup_field;
1731 threadInfo[num_avail][coreIdIndex] = val;
1732 continue;
Jim Cownie181b4bb2013-12-23 17:28:57 +00001733#endif // KMP_OS_LINUX && USE_SYSFS_INFO
Jim Cownie5e8470a2013-09-27 10:38:44 +00001734 }
1735 char s4[] = "thread id";
1736 if (strncmp(buf, s4, sizeof(s4) - 1) == 0) {
1737 CHECK_LINE;
1738 char *p = strchr(buf + sizeof(s4) - 1, ':');
1739 unsigned val;
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00001740 if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001741 if (threadInfo[num_avail][threadIdIndex] != UINT_MAX) goto dup_field;
1742 threadInfo[num_avail][threadIdIndex] = val;
1743 continue;
1744 }
1745 unsigned level;
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00001746 if (KMP_SSCANF(buf, "node_%d id", &level) == 1) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001747 CHECK_LINE;
1748 char *p = strchr(buf + sizeof(s4) - 1, ':');
1749 unsigned val;
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00001750 if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001751 KMP_ASSERT(nodeIdIndex + level <= maxIndex);
1752 if (threadInfo[num_avail][nodeIdIndex + level] != UINT_MAX) goto dup_field;
1753 threadInfo[num_avail][nodeIdIndex + level] = val;
1754 continue;
1755 }
1756
1757 //
1758 // We didn't recognize the leading token on the line.
1759 // There are lots of leading tokens that we don't recognize -
1760 // if the line isn't empty, go on to the next line.
1761 //
1762 if ((*buf != 0) && (*buf != '\n')) {
1763 //
1764 // If the line is longer than the buffer, read characters
1765 // until we find a newline.
1766 //
1767 if (long_line) {
1768 int ch;
1769 while (((ch = fgetc(f)) != EOF) && (ch != '\n'));
1770 }
1771 continue;
1772 }
1773
1774 //
1775 // A newline has signalled the end of the processor record.
1776 // Check that there aren't too many procs specified.
1777 //
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001778 if ((int)num_avail == __kmp_xproc) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001779 CLEANUP_THREAD_INFO;
1780 *msg_id = kmp_i18n_str_TooManyEntries;
1781 return -1;
1782 }
1783
1784 //
1785 // Check for missing fields. The osId field must be there, and we
1786 // currently require that the physical id field is specified, also.
1787 //
1788 if (threadInfo[num_avail][osIdIndex] == UINT_MAX) {
1789 CLEANUP_THREAD_INFO;
1790 *msg_id = kmp_i18n_str_MissingProcField;
1791 return -1;
1792 }
1793 if (threadInfo[0][pkgIdIndex] == UINT_MAX) {
1794 CLEANUP_THREAD_INFO;
1795 *msg_id = kmp_i18n_str_MissingPhysicalIDField;
1796 return -1;
1797 }
1798
1799 //
1800 // Skip this proc if it is not included in the machine model.
1801 //
1802 if (! KMP_CPU_ISSET(threadInfo[num_avail][osIdIndex], fullMask)) {
1803 INIT_PROC_INFO(threadInfo[num_avail]);
1804 continue;
1805 }
1806
1807 //
1808 // We have a successful parse of this proc's info.
1809 // Increment the counter, and prepare for the next proc.
1810 //
1811 num_avail++;
1812 KMP_ASSERT(num_avail <= num_records);
1813 INIT_PROC_INFO(threadInfo[num_avail]);
1814 }
1815 continue;
1816
1817 no_val:
1818 CLEANUP_THREAD_INFO;
1819 *msg_id = kmp_i18n_str_MissingValCpuinfo;
1820 return -1;
1821
1822 dup_field:
1823 CLEANUP_THREAD_INFO;
1824 *msg_id = kmp_i18n_str_DuplicateFieldCpuinfo;
1825 return -1;
1826 }
1827 *line = 0;
1828
1829# if KMP_MIC && REDUCE_TEAM_SIZE
1830 unsigned teamSize = 0;
1831# endif // KMP_MIC && REDUCE_TEAM_SIZE
1832
1833 // check for num_records == __kmp_xproc ???
1834
1835 //
1836 // If there's only one thread context to bind to, form an Address object
1837 // with depth 1 and return immediately (or, if affinity is off, set
1838 // address2os to NULL and return).
1839 //
1840 // If it is configured to omit the package level when there is only a
1841 // single package, the logic at the end of this routine won't work if
1842 // there is only a single thread - it would try to form an Address
1843 // object with depth 0.
1844 //
1845 KMP_ASSERT(num_avail > 0);
1846 KMP_ASSERT(num_avail <= num_records);
1847 if (num_avail == 1) {
1848 __kmp_ncores = 1;
1849 __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001850 if (__kmp_affinity_verbose) {
1851 if (! KMP_AFFINITY_CAPABLE()) {
1852 KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
1853 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
1854 KMP_INFORM(Uniform, "KMP_AFFINITY");
1855 }
1856 else {
1857 char buf[KMP_AFFIN_MASK_PRINT_LEN];
1858 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
1859 fullMask);
1860 KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
1861 if (__kmp_affinity_respect_mask) {
1862 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
1863 } else {
1864 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
1865 }
1866 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
1867 KMP_INFORM(Uniform, "KMP_AFFINITY");
1868 }
1869 int index;
1870 kmp_str_buf_t buf;
1871 __kmp_str_buf_init(&buf);
1872 __kmp_str_buf_print(&buf, "1");
1873 for (index = maxIndex - 1; index > pkgIdIndex; index--) {
1874 __kmp_str_buf_print(&buf, " x 1");
1875 }
1876 KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, 1, 1, 1);
1877 __kmp_str_buf_free(&buf);
1878 }
1879
1880 if (__kmp_affinity_type == affinity_none) {
1881 CLEANUP_THREAD_INFO;
1882 return 0;
1883 }
1884
1885 *address2os = (AddrUnsPair*)__kmp_allocate(sizeof(AddrUnsPair));
1886 Address addr(1);
1887 addr.labels[0] = threadInfo[0][pkgIdIndex];
1888 (*address2os)[0] = AddrUnsPair(addr, threadInfo[0][osIdIndex]);
1889
1890 if (__kmp_affinity_gran_levels < 0) {
1891 __kmp_affinity_gran_levels = 0;
1892 }
1893
1894 if (__kmp_affinity_verbose) {
1895 __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
1896 }
1897
1898 CLEANUP_THREAD_INFO;
1899 return 1;
1900 }
1901
1902 //
1903 // Sort the threadInfo table by physical Id.
1904 //
1905 qsort(threadInfo, num_avail, sizeof(*threadInfo),
1906 __kmp_affinity_cmp_ProcCpuInfo_phys_id);
1907
1908 //
1909 // The table is now sorted by pkgId / coreId / threadId, but we really
1910 // don't know the radix of any of the fields. pkgId's may be sparsely
1911 // assigned among the chips on a system. Although coreId's are usually
1912 // assigned [0 .. coresPerPkg-1] and threadId's are usually assigned
1913 // [0..threadsPerCore-1], we don't want to make any such assumptions.
1914 //
1915 // For that matter, we don't know what coresPerPkg and threadsPerCore
1916 // (or the total # packages) are at this point - we want to determine
1917 // that now. We only have an upper bound on the first two figures.
1918 //
1919 unsigned *counts = (unsigned *)__kmp_allocate((maxIndex + 1)
1920 * sizeof(unsigned));
1921 unsigned *maxCt = (unsigned *)__kmp_allocate((maxIndex + 1)
1922 * sizeof(unsigned));
1923 unsigned *totals = (unsigned *)__kmp_allocate((maxIndex + 1)
1924 * sizeof(unsigned));
1925 unsigned *lastId = (unsigned *)__kmp_allocate((maxIndex + 1)
1926 * sizeof(unsigned));
1927
1928 bool assign_thread_ids = false;
1929 unsigned threadIdCt;
1930 unsigned index;
1931
1932 restart_radix_check:
1933 threadIdCt = 0;
1934
1935 //
1936 // Initialize the counter arrays with data from threadInfo[0].
1937 //
1938 if (assign_thread_ids) {
1939 if (threadInfo[0][threadIdIndex] == UINT_MAX) {
1940 threadInfo[0][threadIdIndex] = threadIdCt++;
1941 }
1942 else if (threadIdCt <= threadInfo[0][threadIdIndex]) {
1943 threadIdCt = threadInfo[0][threadIdIndex] + 1;
1944 }
1945 }
1946 for (index = 0; index <= maxIndex; index++) {
1947 counts[index] = 1;
1948 maxCt[index] = 1;
1949 totals[index] = 1;
1950 lastId[index] = threadInfo[0][index];;
1951 }
1952
1953 //
1954 // Run through the rest of the OS procs.
1955 //
1956 for (i = 1; i < num_avail; i++) {
1957 //
1958 // Find the most significant index whose id differs
1959 // from the id for the previous OS proc.
1960 //
1961 for (index = maxIndex; index >= threadIdIndex; index--) {
1962 if (assign_thread_ids && (index == threadIdIndex)) {
1963 //
1964 // Auto-assign the thread id field if it wasn't specified.
1965 //
1966 if (threadInfo[i][threadIdIndex] == UINT_MAX) {
1967 threadInfo[i][threadIdIndex] = threadIdCt++;
1968 }
1969
1970 //
1971 // Aparrently the thread id field was specified for some
1972 // entries and not others. Start the thread id counter
1973 // off at the next higher thread id.
1974 //
1975 else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
1976 threadIdCt = threadInfo[i][threadIdIndex] + 1;
1977 }
1978 }
1979 if (threadInfo[i][index] != lastId[index]) {
1980 //
1981 // Run through all indices which are less significant,
1982 // and reset the counts to 1.
1983 //
1984 // At all levels up to and including index, we need to
1985 // increment the totals and record the last id.
1986 //
1987 unsigned index2;
1988 for (index2 = threadIdIndex; index2 < index; index2++) {
1989 totals[index2]++;
1990 if (counts[index2] > maxCt[index2]) {
1991 maxCt[index2] = counts[index2];
1992 }
1993 counts[index2] = 1;
1994 lastId[index2] = threadInfo[i][index2];
1995 }
1996 counts[index]++;
1997 totals[index]++;
1998 lastId[index] = threadInfo[i][index];
1999
2000 if (assign_thread_ids && (index > threadIdIndex)) {
2001
2002# if KMP_MIC && REDUCE_TEAM_SIZE
2003 //
2004 // The default team size is the total #threads in the machine
2005 // minus 1 thread for every core that has 3 or more threads.
2006 //
2007 teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 );
2008# endif // KMP_MIC && REDUCE_TEAM_SIZE
2009
2010 //
2011 // Restart the thread counter, as we are on a new core.
2012 //
2013 threadIdCt = 0;
2014
2015 //
2016 // Auto-assign the thread id field if it wasn't specified.
2017 //
2018 if (threadInfo[i][threadIdIndex] == UINT_MAX) {
2019 threadInfo[i][threadIdIndex] = threadIdCt++;
2020 }
2021
2022 //
2023 // Aparrently the thread id field was specified for some
2024 // entries and not others. Start the thread id counter
2025 // off at the next higher thread id.
2026 //
2027 else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
2028 threadIdCt = threadInfo[i][threadIdIndex] + 1;
2029 }
2030 }
2031 break;
2032 }
2033 }
2034 if (index < threadIdIndex) {
2035 //
2036 // If thread ids were specified, it is an error if they are not
2037 // unique. Also, check that we waven't already restarted the
2038 // loop (to be safe - shouldn't need to).
2039 //
2040 if ((threadInfo[i][threadIdIndex] != UINT_MAX)
2041 || assign_thread_ids) {
2042 __kmp_free(lastId);
2043 __kmp_free(totals);
2044 __kmp_free(maxCt);
2045 __kmp_free(counts);
2046 CLEANUP_THREAD_INFO;
2047 *msg_id = kmp_i18n_str_PhysicalIDsNotUnique;
2048 return -1;
2049 }
2050
2051 //
2052 // If the thread ids were not specified and we see entries
2053 // entries that are duplicates, start the loop over and
2054 // assign the thread ids manually.
2055 //
2056 assign_thread_ids = true;
2057 goto restart_radix_check;
2058 }
2059 }
2060
2061# if KMP_MIC && REDUCE_TEAM_SIZE
2062 //
2063 // The default team size is the total #threads in the machine
2064 // minus 1 thread for every core that has 3 or more threads.
2065 //
2066 teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 );
2067# endif // KMP_MIC && REDUCE_TEAM_SIZE
2068
2069 for (index = threadIdIndex; index <= maxIndex; index++) {
2070 if (counts[index] > maxCt[index]) {
2071 maxCt[index] = counts[index];
2072 }
2073 }
2074
2075 __kmp_nThreadsPerCore = maxCt[threadIdIndex];
2076 nCoresPerPkg = maxCt[coreIdIndex];
2077 nPackages = totals[pkgIdIndex];
2078
2079 //
2080 // Check to see if the machine topology is uniform
2081 //
2082 unsigned prod = totals[maxIndex];
2083 for (index = threadIdIndex; index < maxIndex; index++) {
2084 prod *= maxCt[index];
2085 }
2086 bool uniform = (prod == totals[threadIdIndex]);
2087
2088 //
2089 // When affinity is off, this routine will still be called to set
Andrey Churbanovf696c822015-01-27 16:55:43 +00002090 // __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie5e8470a2013-09-27 10:38:44 +00002091 // nCoresPerPkg, & nPackages. Make sure all these vars are set
2092 // correctly, and return now if affinity is not enabled.
2093 //
Jim Cownie5e8470a2013-09-27 10:38:44 +00002094 __kmp_ncores = totals[coreIdIndex];
2095
2096 if (__kmp_affinity_verbose) {
2097 if (! KMP_AFFINITY_CAPABLE()) {
2098 KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
2099 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
2100 if (uniform) {
2101 KMP_INFORM(Uniform, "KMP_AFFINITY");
2102 } else {
2103 KMP_INFORM(NonUniform, "KMP_AFFINITY");
2104 }
2105 }
2106 else {
2107 char buf[KMP_AFFIN_MASK_PRINT_LEN];
2108 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, fullMask);
2109 KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
2110 if (__kmp_affinity_respect_mask) {
2111 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
2112 } else {
2113 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
2114 }
2115 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
2116 if (uniform) {
2117 KMP_INFORM(Uniform, "KMP_AFFINITY");
2118 } else {
2119 KMP_INFORM(NonUniform, "KMP_AFFINITY");
2120 }
2121 }
2122 kmp_str_buf_t buf;
2123 __kmp_str_buf_init(&buf);
2124
2125 __kmp_str_buf_print(&buf, "%d", totals[maxIndex]);
2126 for (index = maxIndex - 1; index >= pkgIdIndex; index--) {
2127 __kmp_str_buf_print(&buf, " x %d", maxCt[index]);
2128 }
2129 KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, maxCt[coreIdIndex],
2130 maxCt[threadIdIndex], __kmp_ncores);
2131
2132 __kmp_str_buf_free(&buf);
2133 }
2134
2135# if KMP_MIC && REDUCE_TEAM_SIZE
2136 //
2137 // Set the default team size.
2138 //
2139 if ((__kmp_dflt_team_nth == 0) && (teamSize > 0)) {
2140 __kmp_dflt_team_nth = teamSize;
2141 KA_TRACE(20, ("__kmp_affinity_create_cpuinfo_map: setting __kmp_dflt_team_nth = %d\n",
2142 __kmp_dflt_team_nth));
2143 }
2144# endif // KMP_MIC && REDUCE_TEAM_SIZE
2145
2146 if (__kmp_affinity_type == affinity_none) {
2147 __kmp_free(lastId);
2148 __kmp_free(totals);
2149 __kmp_free(maxCt);
2150 __kmp_free(counts);
2151 CLEANUP_THREAD_INFO;
2152 return 0;
2153 }
2154
2155 //
2156 // Count the number of levels which have more nodes at that level than
2157 // at the parent's level (with there being an implicit root node of
2158 // the top level). This is equivalent to saying that there is at least
2159 // one node at this level which has a sibling. These levels are in the
2160 // map, and the package level is always in the map.
2161 //
2162 bool *inMap = (bool *)__kmp_allocate((maxIndex + 1) * sizeof(bool));
2163 int level = 0;
2164 for (index = threadIdIndex; index < maxIndex; index++) {
2165 KMP_ASSERT(totals[index] >= totals[index + 1]);
2166 inMap[index] = (totals[index] > totals[index + 1]);
2167 }
2168 inMap[maxIndex] = (totals[maxIndex] > 1);
2169 inMap[pkgIdIndex] = true;
2170
2171 int depth = 0;
2172 for (index = threadIdIndex; index <= maxIndex; index++) {
2173 if (inMap[index]) {
2174 depth++;
2175 }
2176 }
2177 KMP_ASSERT(depth > 0);
2178
2179 //
2180 // Construct the data structure that is to be returned.
2181 //
2182 *address2os = (AddrUnsPair*)
2183 __kmp_allocate(sizeof(AddrUnsPair) * num_avail);
2184 int pkgLevel = -1;
2185 int coreLevel = -1;
2186 int threadLevel = -1;
2187
2188 for (i = 0; i < num_avail; ++i) {
2189 Address addr(depth);
2190 unsigned os = threadInfo[i][osIdIndex];
2191 int src_index;
2192 int dst_index = 0;
2193
2194 for (src_index = maxIndex; src_index >= threadIdIndex; src_index--) {
2195 if (! inMap[src_index]) {
2196 continue;
2197 }
2198 addr.labels[dst_index] = threadInfo[i][src_index];
2199 if (src_index == pkgIdIndex) {
2200 pkgLevel = dst_index;
2201 }
2202 else if (src_index == coreIdIndex) {
2203 coreLevel = dst_index;
2204 }
2205 else if (src_index == threadIdIndex) {
2206 threadLevel = dst_index;
2207 }
2208 dst_index++;
2209 }
2210 (*address2os)[i] = AddrUnsPair(addr, os);
2211 }
2212
2213 if (__kmp_affinity_gran_levels < 0) {
2214 //
2215 // Set the granularity level based on what levels are modeled
2216 // in the machine topology map.
2217 //
2218 unsigned src_index;
2219 __kmp_affinity_gran_levels = 0;
2220 for (src_index = threadIdIndex; src_index <= maxIndex; src_index++) {
2221 if (! inMap[src_index]) {
2222 continue;
2223 }
2224 switch (src_index) {
2225 case threadIdIndex:
2226 if (__kmp_affinity_gran > affinity_gran_thread) {
2227 __kmp_affinity_gran_levels++;
2228 }
2229
2230 break;
2231 case coreIdIndex:
2232 if (__kmp_affinity_gran > affinity_gran_core) {
2233 __kmp_affinity_gran_levels++;
2234 }
2235 break;
2236
2237 case pkgIdIndex:
2238 if (__kmp_affinity_gran > affinity_gran_package) {
2239 __kmp_affinity_gran_levels++;
2240 }
2241 break;
2242 }
2243 }
2244 }
2245
2246 if (__kmp_affinity_verbose) {
2247 __kmp_affinity_print_topology(*address2os, num_avail, depth, pkgLevel,
2248 coreLevel, threadLevel);
2249 }
2250
2251 __kmp_free(inMap);
2252 __kmp_free(lastId);
2253 __kmp_free(totals);
2254 __kmp_free(maxCt);
2255 __kmp_free(counts);
2256 CLEANUP_THREAD_INFO;
2257 return depth;
2258}
2259
2260
2261//
2262// Create and return a table of affinity masks, indexed by OS thread ID.
2263// This routine handles OR'ing together all the affinity masks of threads
2264// that are sufficiently close, if granularity > fine.
2265//
2266static kmp_affin_mask_t *
2267__kmp_create_masks(unsigned *maxIndex, unsigned *numUnique,
2268 AddrUnsPair *address2os, unsigned numAddrs)
2269{
2270 //
2271 // First form a table of affinity masks in order of OS thread id.
2272 //
2273 unsigned depth;
2274 unsigned maxOsId;
2275 unsigned i;
2276
2277 KMP_ASSERT(numAddrs > 0);
2278 depth = address2os[0].first.depth;
2279
2280 maxOsId = 0;
2281 for (i = 0; i < numAddrs; i++) {
2282 unsigned osId = address2os[i].second;
2283 if (osId > maxOsId) {
2284 maxOsId = osId;
2285 }
2286 }
2287 kmp_affin_mask_t *osId2Mask = (kmp_affin_mask_t *)__kmp_allocate(
2288 (maxOsId + 1) * __kmp_affin_mask_size);
2289
2290 //
2291 // Sort the address2os table according to physical order. Doing so
2292 // will put all threads on the same core/package/node in consecutive
2293 // locations.
2294 //
2295 qsort(address2os, numAddrs, sizeof(*address2os),
2296 __kmp_affinity_cmp_Address_labels);
2297
2298 KMP_ASSERT(__kmp_affinity_gran_levels >= 0);
2299 if (__kmp_affinity_verbose && (__kmp_affinity_gran_levels > 0)) {
2300 KMP_INFORM(ThreadsMigrate, "KMP_AFFINITY", __kmp_affinity_gran_levels);
2301 }
2302 if (__kmp_affinity_gran_levels >= (int)depth) {
2303 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
2304 && (__kmp_affinity_type != affinity_none))) {
2305 KMP_WARNING(AffThreadsMayMigrate);
2306 }
2307 }
2308
2309 //
2310 // Run through the table, forming the masks for all threads on each
2311 // core. Threads on the same core will have identical "Address"
2312 // objects, not considering the last level, which must be the thread
2313 // id. All threads on a core will appear consecutively.
2314 //
2315 unsigned unique = 0;
2316 unsigned j = 0; // index of 1st thread on core
2317 unsigned leader = 0;
2318 Address *leaderAddr = &(address2os[0].first);
2319 kmp_affin_mask_t *sum
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00002320 = (kmp_affin_mask_t *)KMP_ALLOCA(__kmp_affin_mask_size);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002321 KMP_CPU_ZERO(sum);
2322 KMP_CPU_SET(address2os[0].second, sum);
2323 for (i = 1; i < numAddrs; i++) {
2324 //
Alp Toker8f2d3f02014-02-24 10:40:15 +00002325 // If this thread is sufficiently close to the leader (within the
Jim Cownie5e8470a2013-09-27 10:38:44 +00002326 // granularity setting), then set the bit for this os thread in the
2327 // affinity mask for this group, and go on to the next thread.
2328 //
2329 if (leaderAddr->isClose(address2os[i].first,
2330 __kmp_affinity_gran_levels)) {
2331 KMP_CPU_SET(address2os[i].second, sum);
2332 continue;
2333 }
2334
2335 //
2336 // For every thread in this group, copy the mask to the thread's
2337 // entry in the osId2Mask table. Mark the first address as a
2338 // leader.
2339 //
2340 for (; j < i; j++) {
2341 unsigned osId = address2os[j].second;
2342 KMP_DEBUG_ASSERT(osId <= maxOsId);
2343 kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
2344 KMP_CPU_COPY(mask, sum);
2345 address2os[j].first.leader = (j == leader);
2346 }
2347 unique++;
2348
2349 //
2350 // Start a new mask.
2351 //
2352 leader = i;
2353 leaderAddr = &(address2os[i].first);
2354 KMP_CPU_ZERO(sum);
2355 KMP_CPU_SET(address2os[i].second, sum);
2356 }
2357
2358 //
2359 // For every thread in last group, copy the mask to the thread's
2360 // entry in the osId2Mask table.
2361 //
2362 for (; j < i; j++) {
2363 unsigned osId = address2os[j].second;
2364 KMP_DEBUG_ASSERT(osId <= maxOsId);
2365 kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
2366 KMP_CPU_COPY(mask, sum);
2367 address2os[j].first.leader = (j == leader);
2368 }
2369 unique++;
2370
2371 *maxIndex = maxOsId;
2372 *numUnique = unique;
2373 return osId2Mask;
2374}
2375
2376
2377//
2378// Stuff for the affinity proclist parsers. It's easier to declare these vars
2379// as file-static than to try and pass them through the calling sequence of
2380// the recursive-descent OMP_PLACES parser.
2381//
2382static kmp_affin_mask_t *newMasks;
2383static int numNewMasks;
2384static int nextNewMask;
2385
2386#define ADD_MASK(_mask) \
2387 { \
2388 if (nextNewMask >= numNewMasks) { \
2389 numNewMasks *= 2; \
2390 newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_REALLOC(newMasks, \
2391 numNewMasks * __kmp_affin_mask_size); \
2392 } \
2393 KMP_CPU_COPY(KMP_CPU_INDEX(newMasks, nextNewMask), (_mask)); \
2394 nextNewMask++; \
2395 }
2396
2397#define ADD_MASK_OSID(_osId,_osId2Mask,_maxOsId) \
2398 { \
2399 if (((_osId) > _maxOsId) || \
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002400 (! KMP_CPU_ISSET((_osId), KMP_CPU_INDEX((_osId2Mask), (_osId))))) { \
Jim Cownie5e8470a2013-09-27 10:38:44 +00002401 if (__kmp_affinity_verbose || (__kmp_affinity_warnings \
2402 && (__kmp_affinity_type != affinity_none))) { \
2403 KMP_WARNING(AffIgnoreInvalidProcID, _osId); \
2404 } \
2405 } \
2406 else { \
2407 ADD_MASK(KMP_CPU_INDEX(_osId2Mask, (_osId))); \
2408 } \
2409 }
2410
2411
2412//
2413// Re-parse the proclist (for the explicit affinity type), and form the list
2414// of affinity newMasks indexed by gtid.
2415//
2416static void
2417__kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks,
2418 unsigned int *out_numMasks, const char *proclist,
2419 kmp_affin_mask_t *osId2Mask, int maxOsId)
2420{
2421 const char *scan = proclist;
2422 const char *next = proclist;
2423
2424 //
2425 // We use malloc() for the temporary mask vector,
2426 // so that we can use realloc() to extend it.
2427 //
2428 numNewMasks = 2;
2429 newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(numNewMasks
2430 * __kmp_affin_mask_size);
2431 nextNewMask = 0;
2432 kmp_affin_mask_t *sumMask = (kmp_affin_mask_t *)__kmp_allocate(
2433 __kmp_affin_mask_size);
2434 int setSize = 0;
2435
2436 for (;;) {
2437 int start, end, stride;
2438
2439 SKIP_WS(scan);
2440 next = scan;
2441 if (*next == '\0') {
2442 break;
2443 }
2444
2445 if (*next == '{') {
2446 int num;
2447 setSize = 0;
2448 next++; // skip '{'
2449 SKIP_WS(next);
2450 scan = next;
2451
2452 //
2453 // Read the first integer in the set.
2454 //
2455 KMP_ASSERT2((*next >= '0') && (*next <= '9'),
2456 "bad proclist");
2457 SKIP_DIGITS(next);
2458 num = __kmp_str_to_int(scan, *next);
2459 KMP_ASSERT2(num >= 0, "bad explicit proc list");
2460
2461 //
2462 // Copy the mask for that osId to the sum (union) mask.
2463 //
2464 if ((num > maxOsId) ||
2465 (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
2466 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
2467 && (__kmp_affinity_type != affinity_none))) {
2468 KMP_WARNING(AffIgnoreInvalidProcID, num);
2469 }
2470 KMP_CPU_ZERO(sumMask);
2471 }
2472 else {
2473 KMP_CPU_COPY(sumMask, KMP_CPU_INDEX(osId2Mask, num));
2474 setSize = 1;
2475 }
2476
2477 for (;;) {
2478 //
2479 // Check for end of set.
2480 //
2481 SKIP_WS(next);
2482 if (*next == '}') {
2483 next++; // skip '}'
2484 break;
2485 }
2486
2487 //
2488 // Skip optional comma.
2489 //
2490 if (*next == ',') {
2491 next++;
2492 }
2493 SKIP_WS(next);
2494
2495 //
2496 // Read the next integer in the set.
2497 //
2498 scan = next;
2499 KMP_ASSERT2((*next >= '0') && (*next <= '9'),
2500 "bad explicit proc list");
2501
2502 SKIP_DIGITS(next);
2503 num = __kmp_str_to_int(scan, *next);
2504 KMP_ASSERT2(num >= 0, "bad explicit proc list");
2505
2506 //
2507 // Add the mask for that osId to the sum mask.
2508 //
2509 if ((num > maxOsId) ||
2510 (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
2511 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
2512 && (__kmp_affinity_type != affinity_none))) {
2513 KMP_WARNING(AffIgnoreInvalidProcID, num);
2514 }
2515 }
2516 else {
2517 KMP_CPU_UNION(sumMask, KMP_CPU_INDEX(osId2Mask, num));
2518 setSize++;
2519 }
2520 }
2521 if (setSize > 0) {
2522 ADD_MASK(sumMask);
2523 }
2524
2525 SKIP_WS(next);
2526 if (*next == ',') {
2527 next++;
2528 }
2529 scan = next;
2530 continue;
2531 }
2532
2533 //
2534 // Read the first integer.
2535 //
2536 KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list");
2537 SKIP_DIGITS(next);
2538 start = __kmp_str_to_int(scan, *next);
2539 KMP_ASSERT2(start >= 0, "bad explicit proc list");
2540 SKIP_WS(next);
2541
2542 //
2543 // If this isn't a range, then add a mask to the list and go on.
2544 //
2545 if (*next != '-') {
2546 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2547
2548 //
2549 // Skip optional comma.
2550 //
2551 if (*next == ',') {
2552 next++;
2553 }
2554 scan = next;
2555 continue;
2556 }
2557
2558 //
2559 // This is a range. Skip over the '-' and read in the 2nd int.
2560 //
2561 next++; // skip '-'
2562 SKIP_WS(next);
2563 scan = next;
2564 KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list");
2565 SKIP_DIGITS(next);
2566 end = __kmp_str_to_int(scan, *next);
2567 KMP_ASSERT2(end >= 0, "bad explicit proc list");
2568
2569 //
2570 // Check for a stride parameter
2571 //
2572 stride = 1;
2573 SKIP_WS(next);
2574 if (*next == ':') {
2575 //
2576 // A stride is specified. Skip over the ':" and read the 3rd int.
2577 //
2578 int sign = +1;
2579 next++; // skip ':'
2580 SKIP_WS(next);
2581 scan = next;
2582 if (*next == '-') {
2583 sign = -1;
2584 next++;
2585 SKIP_WS(next);
2586 scan = next;
2587 }
2588 KMP_ASSERT2((*next >= '0') && (*next <= '9'),
2589 "bad explicit proc list");
2590 SKIP_DIGITS(next);
2591 stride = __kmp_str_to_int(scan, *next);
2592 KMP_ASSERT2(stride >= 0, "bad explicit proc list");
2593 stride *= sign;
2594 }
2595
2596 //
2597 // Do some range checks.
2598 //
2599 KMP_ASSERT2(stride != 0, "bad explicit proc list");
2600 if (stride > 0) {
2601 KMP_ASSERT2(start <= end, "bad explicit proc list");
2602 }
2603 else {
2604 KMP_ASSERT2(start >= end, "bad explicit proc list");
2605 }
2606 KMP_ASSERT2((end - start) / stride <= 65536, "bad explicit proc list");
2607
2608 //
2609 // Add the mask for each OS proc # to the list.
2610 //
2611 if (stride > 0) {
2612 do {
2613 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2614 start += stride;
2615 } while (start <= end);
2616 }
2617 else {
2618 do {
2619 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2620 start += stride;
2621 } while (start >= end);
2622 }
2623
2624 //
2625 // Skip optional comma.
2626 //
2627 SKIP_WS(next);
2628 if (*next == ',') {
2629 next++;
2630 }
2631 scan = next;
2632 }
2633
2634 *out_numMasks = nextNewMask;
2635 if (nextNewMask == 0) {
2636 *out_masks = NULL;
2637 KMP_INTERNAL_FREE(newMasks);
2638 return;
2639 }
2640 *out_masks
2641 = (kmp_affin_mask_t *)__kmp_allocate(nextNewMask * __kmp_affin_mask_size);
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00002642 KMP_MEMCPY(*out_masks, newMasks, nextNewMask * __kmp_affin_mask_size);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002643 __kmp_free(sumMask);
2644 KMP_INTERNAL_FREE(newMasks);
2645}
2646
2647
2648# if OMP_40_ENABLED
2649
2650/*-----------------------------------------------------------------------------
2651
2652Re-parse the OMP_PLACES proc id list, forming the newMasks for the different
2653places. Again, Here is the grammar:
2654
2655place_list := place
2656place_list := place , place_list
2657place := num
2658place := place : num
2659place := place : num : signed
2660place := { subplacelist }
2661place := ! place // (lowest priority)
2662subplace_list := subplace
2663subplace_list := subplace , subplace_list
2664subplace := num
2665subplace := num : num
2666subplace := num : num : signed
2667signed := num
2668signed := + signed
2669signed := - signed
2670
2671-----------------------------------------------------------------------------*/
2672
2673static void
2674__kmp_process_subplace_list(const char **scan, kmp_affin_mask_t *osId2Mask,
2675 int maxOsId, kmp_affin_mask_t *tempMask, int *setSize)
2676{
2677 const char *next;
2678
2679 for (;;) {
2680 int start, count, stride, i;
2681
2682 //
2683 // Read in the starting proc id
2684 //
2685 SKIP_WS(*scan);
2686 KMP_ASSERT2((**scan >= '0') && (**scan <= '9'),
2687 "bad explicit places list");
2688 next = *scan;
2689 SKIP_DIGITS(next);
2690 start = __kmp_str_to_int(*scan, *next);
2691 KMP_ASSERT(start >= 0);
2692 *scan = next;
2693
2694 //
2695 // valid follow sets are ',' ':' and '}'
2696 //
2697 SKIP_WS(*scan);
2698 if (**scan == '}' || **scan == ',') {
2699 if ((start > maxOsId) ||
2700 (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
2701 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
2702 && (__kmp_affinity_type != affinity_none))) {
2703 KMP_WARNING(AffIgnoreInvalidProcID, start);
2704 }
2705 }
2706 else {
2707 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
2708 (*setSize)++;
2709 }
2710 if (**scan == '}') {
2711 break;
2712 }
2713 (*scan)++; // skip ','
2714 continue;
2715 }
2716 KMP_ASSERT2(**scan == ':', "bad explicit places list");
2717 (*scan)++; // skip ':'
2718
2719 //
2720 // Read count parameter
2721 //
2722 SKIP_WS(*scan);
2723 KMP_ASSERT2((**scan >= '0') && (**scan <= '9'),
2724 "bad explicit places list");
2725 next = *scan;
2726 SKIP_DIGITS(next);
2727 count = __kmp_str_to_int(*scan, *next);
2728 KMP_ASSERT(count >= 0);
2729 *scan = next;
2730
2731 //
2732 // valid follow sets are ',' ':' and '}'
2733 //
2734 SKIP_WS(*scan);
2735 if (**scan == '}' || **scan == ',') {
2736 for (i = 0; i < count; i++) {
2737 if ((start > maxOsId) ||
2738 (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
2739 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
2740 && (__kmp_affinity_type != affinity_none))) {
2741 KMP_WARNING(AffIgnoreInvalidProcID, start);
2742 }
2743 break; // don't proliferate warnings for large count
2744 }
2745 else {
2746 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
2747 start++;
2748 (*setSize)++;
2749 }
2750 }
2751 if (**scan == '}') {
2752 break;
2753 }
2754 (*scan)++; // skip ','
2755 continue;
2756 }
2757 KMP_ASSERT2(**scan == ':', "bad explicit places list");
2758 (*scan)++; // skip ':'
2759
2760 //
2761 // Read stride parameter
2762 //
2763 int sign = +1;
2764 for (;;) {
2765 SKIP_WS(*scan);
2766 if (**scan == '+') {
2767 (*scan)++; // skip '+'
2768 continue;
2769 }
2770 if (**scan == '-') {
2771 sign *= -1;
2772 (*scan)++; // skip '-'
2773 continue;
2774 }
2775 break;
2776 }
2777 SKIP_WS(*scan);
2778 KMP_ASSERT2((**scan >= '0') && (**scan <= '9'),
2779 "bad explicit places list");
2780 next = *scan;
2781 SKIP_DIGITS(next);
2782 stride = __kmp_str_to_int(*scan, *next);
2783 KMP_ASSERT(stride >= 0);
2784 *scan = next;
2785 stride *= sign;
2786
2787 //
2788 // valid follow sets are ',' and '}'
2789 //
2790 SKIP_WS(*scan);
2791 if (**scan == '}' || **scan == ',') {
2792 for (i = 0; i < count; i++) {
2793 if ((start > maxOsId) ||
2794 (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
2795 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
2796 && (__kmp_affinity_type != affinity_none))) {
2797 KMP_WARNING(AffIgnoreInvalidProcID, start);
2798 }
2799 break; // don't proliferate warnings for large count
2800 }
2801 else {
2802 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
2803 start += stride;
2804 (*setSize)++;
2805 }
2806 }
2807 if (**scan == '}') {
2808 break;
2809 }
2810 (*scan)++; // skip ','
2811 continue;
2812 }
2813
2814 KMP_ASSERT2(0, "bad explicit places list");
2815 }
2816}
2817
2818
2819static void
2820__kmp_process_place(const char **scan, kmp_affin_mask_t *osId2Mask,
2821 int maxOsId, kmp_affin_mask_t *tempMask, int *setSize)
2822{
2823 const char *next;
2824
2825 //
2826 // valid follow sets are '{' '!' and num
2827 //
2828 SKIP_WS(*scan);
2829 if (**scan == '{') {
2830 (*scan)++; // skip '{'
2831 __kmp_process_subplace_list(scan, osId2Mask, maxOsId , tempMask,
2832 setSize);
2833 KMP_ASSERT2(**scan == '}', "bad explicit places list");
2834 (*scan)++; // skip '}'
2835 }
2836 else if (**scan == '!') {
2837 __kmp_process_place(scan, osId2Mask, maxOsId, tempMask, setSize);
2838 KMP_CPU_COMPLEMENT(tempMask);
2839 (*scan)++; // skip '!'
2840 }
2841 else if ((**scan >= '0') && (**scan <= '9')) {
2842 next = *scan;
2843 SKIP_DIGITS(next);
2844 int num = __kmp_str_to_int(*scan, *next);
2845 KMP_ASSERT(num >= 0);
2846 if ((num > maxOsId) ||
2847 (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
2848 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
2849 && (__kmp_affinity_type != affinity_none))) {
2850 KMP_WARNING(AffIgnoreInvalidProcID, num);
2851 }
2852 }
2853 else {
2854 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, num));
2855 (*setSize)++;
2856 }
2857 *scan = next; // skip num
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002858 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002859 else {
2860 KMP_ASSERT2(0, "bad explicit places list");
2861 }
2862}
2863
2864
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002865//static void
2866void
Jim Cownie5e8470a2013-09-27 10:38:44 +00002867__kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
2868 unsigned int *out_numMasks, const char *placelist,
2869 kmp_affin_mask_t *osId2Mask, int maxOsId)
2870{
2871 const char *scan = placelist;
2872 const char *next = placelist;
2873
2874 numNewMasks = 2;
2875 newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(numNewMasks
2876 * __kmp_affin_mask_size);
2877 nextNewMask = 0;
2878
2879 kmp_affin_mask_t *tempMask = (kmp_affin_mask_t *)__kmp_allocate(
2880 __kmp_affin_mask_size);
2881 KMP_CPU_ZERO(tempMask);
2882 int setSize = 0;
2883
2884 for (;;) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002885 __kmp_process_place(&scan, osId2Mask, maxOsId, tempMask, &setSize);
2886
2887 //
2888 // valid follow sets are ',' ':' and EOL
2889 //
2890 SKIP_WS(scan);
2891 if (*scan == '\0' || *scan == ',') {
2892 if (setSize > 0) {
2893 ADD_MASK(tempMask);
2894 }
2895 KMP_CPU_ZERO(tempMask);
2896 setSize = 0;
2897 if (*scan == '\0') {
2898 break;
2899 }
2900 scan++; // skip ','
2901 continue;
2902 }
2903
2904 KMP_ASSERT2(*scan == ':', "bad explicit places list");
2905 scan++; // skip ':'
2906
2907 //
2908 // Read count parameter
2909 //
2910 SKIP_WS(scan);
2911 KMP_ASSERT2((*scan >= '0') && (*scan <= '9'),
2912 "bad explicit places list");
2913 next = scan;
2914 SKIP_DIGITS(next);
Jim Cownie181b4bb2013-12-23 17:28:57 +00002915 int count = __kmp_str_to_int(scan, *next);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002916 KMP_ASSERT(count >= 0);
2917 scan = next;
2918
2919 //
2920 // valid follow sets are ',' ':' and EOL
2921 //
2922 SKIP_WS(scan);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002923 int stride;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002924 if (*scan == '\0' || *scan == ',') {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002925 stride = +1;
2926 }
2927 else {
2928 KMP_ASSERT2(*scan == ':', "bad explicit places list");
2929 scan++; // skip ':'
Jim Cownie5e8470a2013-09-27 10:38:44 +00002930
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002931 //
2932 // Read stride parameter
2933 //
2934 int sign = +1;
2935 for (;;) {
2936 SKIP_WS(scan);
2937 if (*scan == '+') {
2938 scan++; // skip '+'
2939 continue;
2940 }
2941 if (*scan == '-') {
2942 sign *= -1;
2943 scan++; // skip '-'
2944 continue;
2945 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002946 break;
2947 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002948 SKIP_WS(scan);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002949 KMP_ASSERT2((*scan >= '0') && (*scan <= '9'),
2950 "bad explicit places list");
2951 next = scan;
2952 SKIP_DIGITS(next);
2953 stride = __kmp_str_to_int(scan, *next);
2954 KMP_DEBUG_ASSERT(stride >= 0);
2955 scan = next;
2956 stride *= sign;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002957 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002958
2959 if (stride > 0) {
2960 int i;
2961 for (i = 0; i < count; i++) {
2962 int j;
2963 if (setSize == 0) {
2964 break;
2965 }
2966 ADD_MASK(tempMask);
2967 setSize = 0;
2968 for (j = __kmp_affin_mask_size * CHAR_BIT - 1; j >= stride; j--) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002969 if (! KMP_CPU_ISSET(j - stride, tempMask)) {
2970 KMP_CPU_CLR(j, tempMask);
2971 }
2972 else if ((j > maxOsId) ||
2973 (! KMP_CPU_ISSET(j, KMP_CPU_INDEX(osId2Mask, j)))) {
Andrey Churbanov16a14322015-03-10 09:34:38 +00002974 if ((__kmp_affinity_verbose || (__kmp_affinity_warnings
2975 && (__kmp_affinity_type != affinity_none))) && i < count - 1) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002976 KMP_WARNING(AffIgnoreInvalidProcID, j);
2977 }
2978 KMP_CPU_CLR(j, tempMask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002979 }
2980 else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002981 KMP_CPU_SET(j, tempMask);
2982 setSize++;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002983 }
2984 }
2985 for (; j >= 0; j--) {
2986 KMP_CPU_CLR(j, tempMask);
2987 }
2988 }
2989 }
2990 else {
2991 int i;
2992 for (i = 0; i < count; i++) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002993 int j;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002994 if (setSize == 0) {
2995 break;
2996 }
2997 ADD_MASK(tempMask);
2998 setSize = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002999 for (j = 0; j < ((int)__kmp_affin_mask_size * CHAR_BIT) + stride;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003000 j++) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003001 if (! KMP_CPU_ISSET(j - stride, tempMask)) {
3002 KMP_CPU_CLR(j, tempMask);
3003 }
3004 else if ((j > maxOsId) ||
3005 (! KMP_CPU_ISSET(j, KMP_CPU_INDEX(osId2Mask, j)))) {
Andrey Churbanov16a14322015-03-10 09:34:38 +00003006 if ((__kmp_affinity_verbose || (__kmp_affinity_warnings
3007 && (__kmp_affinity_type != affinity_none))) && i < count - 1) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003008 KMP_WARNING(AffIgnoreInvalidProcID, j);
3009 }
3010 KMP_CPU_CLR(j, tempMask);
3011 }
3012 else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003013 KMP_CPU_SET(j, tempMask);
3014 setSize++;
3015 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003016 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003017 for (; j < (int)__kmp_affin_mask_size * CHAR_BIT; j++) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003018 KMP_CPU_CLR(j, tempMask);
3019 }
3020 }
3021 }
3022 KMP_CPU_ZERO(tempMask);
3023 setSize = 0;
3024
3025 //
3026 // valid follow sets are ',' and EOL
3027 //
3028 SKIP_WS(scan);
3029 if (*scan == '\0') {
3030 break;
3031 }
3032 if (*scan == ',') {
3033 scan++; // skip ','
3034 continue;
3035 }
3036
3037 KMP_ASSERT2(0, "bad explicit places list");
3038 }
3039
3040 *out_numMasks = nextNewMask;
3041 if (nextNewMask == 0) {
3042 *out_masks = NULL;
3043 KMP_INTERNAL_FREE(newMasks);
3044 return;
3045 }
3046 *out_masks
3047 = (kmp_affin_mask_t *)__kmp_allocate(nextNewMask * __kmp_affin_mask_size);
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00003048 KMP_MEMCPY(*out_masks, newMasks, nextNewMask * __kmp_affin_mask_size);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003049 __kmp_free(tempMask);
3050 KMP_INTERNAL_FREE(newMasks);
3051}
3052
3053# endif /* OMP_40_ENABLED */
3054
3055#undef ADD_MASK
3056#undef ADD_MASK_OSID
3057
Jim Cownie5e8470a2013-09-27 10:38:44 +00003058static void
3059__kmp_apply_thread_places(AddrUnsPair **pAddr, int depth)
3060{
3061 if ( __kmp_place_num_cores == 0 ) {
3062 if ( __kmp_place_num_threads_per_core == 0 ) {
3063 return; // no cores limiting actions requested, exit
3064 }
3065 __kmp_place_num_cores = nCoresPerPkg; // use all available cores
3066 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003067 if ( !__kmp_affinity_uniform_topology() ) {
3068 KMP_WARNING( AffThrPlaceNonUniform );
3069 return; // don't support non-uniform topology
3070 }
3071 if ( depth != 3 ) {
3072 KMP_WARNING( AffThrPlaceNonThreeLevel );
3073 return; // don't support not-3-level topology
Jim Cownie5e8470a2013-09-27 10:38:44 +00003074 }
3075 if ( __kmp_place_num_threads_per_core == 0 ) {
3076 __kmp_place_num_threads_per_core = __kmp_nThreadsPerCore; // use all HW contexts
3077 }
Andrey Churbanov12875572015-03-10 09:00:36 +00003078 if ( __kmp_place_core_offset + __kmp_place_num_cores > nCoresPerPkg ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003079 KMP_WARNING( AffThrPlaceManyCores );
3080 return;
3081 }
3082
3083 AddrUnsPair *newAddr = (AddrUnsPair *)__kmp_allocate( sizeof(AddrUnsPair) *
3084 nPackages * __kmp_place_num_cores * __kmp_place_num_threads_per_core);
3085 int i, j, k, n_old = 0, n_new = 0;
3086 for ( i = 0; i < nPackages; ++i ) {
3087 for ( j = 0; j < nCoresPerPkg; ++j ) {
Andrey Churbanov12875572015-03-10 09:00:36 +00003088 if ( j < __kmp_place_core_offset || j >= __kmp_place_core_offset + __kmp_place_num_cores ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003089 n_old += __kmp_nThreadsPerCore; // skip not-requested core
3090 } else {
3091 for ( k = 0; k < __kmp_nThreadsPerCore; ++k ) {
Andrey Churbanov12875572015-03-10 09:00:36 +00003092 if ( k < __kmp_place_num_threads_per_core ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003093 newAddr[n_new] = (*pAddr)[n_old]; // copy requested core' data to new location
3094 n_new++;
3095 }
3096 n_old++;
3097 }
3098 }
3099 }
3100 }
3101 nCoresPerPkg = __kmp_place_num_cores; // correct nCoresPerPkg
3102 __kmp_nThreadsPerCore = __kmp_place_num_threads_per_core; // correct __kmp_nThreadsPerCore
3103 __kmp_avail_proc = n_new; // correct avail_proc
3104 __kmp_ncores = nPackages * __kmp_place_num_cores; // correct ncores
3105
3106 __kmp_free( *pAddr );
3107 *pAddr = newAddr; // replace old topology with new one
3108}
3109
Jim Cownie5e8470a2013-09-27 10:38:44 +00003110
3111static AddrUnsPair *address2os = NULL;
3112static int * procarr = NULL;
3113static int __kmp_aff_depth = 0;
3114
3115static void
3116__kmp_aux_affinity_initialize(void)
3117{
3118 if (__kmp_affinity_masks != NULL) {
3119 KMP_ASSERT(fullMask != NULL);
3120 return;
3121 }
3122
3123 //
3124 // Create the "full" mask - this defines all of the processors that we
3125 // consider to be in the machine model. If respect is set, then it is
3126 // the initialization thread's affinity mask. Otherwise, it is all
3127 // processors that we know about on the machine.
3128 //
3129 if (fullMask == NULL) {
3130 fullMask = (kmp_affin_mask_t *)__kmp_allocate(__kmp_affin_mask_size);
3131 }
3132 if (KMP_AFFINITY_CAPABLE()) {
3133 if (__kmp_affinity_respect_mask) {
3134 __kmp_get_system_affinity(fullMask, TRUE);
3135
3136 //
3137 // Count the number of available processors.
3138 //
3139 unsigned i;
3140 __kmp_avail_proc = 0;
3141 for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
3142 if (! KMP_CPU_ISSET(i, fullMask)) {
3143 continue;
3144 }
3145 __kmp_avail_proc++;
3146 }
3147 if (__kmp_avail_proc > __kmp_xproc) {
3148 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3149 && (__kmp_affinity_type != affinity_none))) {
3150 KMP_WARNING(ErrorInitializeAffinity);
3151 }
3152 __kmp_affinity_type = affinity_none;
Andrey Churbanov1f037e42015-03-10 09:15:26 +00003153 KMP_AFFINITY_DISABLE();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003154 return;
3155 }
3156 }
3157 else {
3158 __kmp_affinity_entire_machine_mask(fullMask);
3159 __kmp_avail_proc = __kmp_xproc;
3160 }
3161 }
3162
3163 int depth = -1;
3164 kmp_i18n_id_t msg_id = kmp_i18n_null;
3165
3166 //
Alp Toker8f2d3f02014-02-24 10:40:15 +00003167 // For backward compatibility, setting KMP_CPUINFO_FILE =>
Jim Cownie5e8470a2013-09-27 10:38:44 +00003168 // KMP_TOPOLOGY_METHOD=cpuinfo
3169 //
3170 if ((__kmp_cpuinfo_file != NULL) &&
3171 (__kmp_affinity_top_method == affinity_top_method_all)) {
3172 __kmp_affinity_top_method = affinity_top_method_cpuinfo;
3173 }
3174
3175 if (__kmp_affinity_top_method == affinity_top_method_all) {
3176 //
3177 // In the default code path, errors are not fatal - we just try using
3178 // another method. We only emit a warning message if affinity is on,
3179 // or the verbose flag is set, an the nowarnings flag was not set.
3180 //
3181 const char *file_name = NULL;
3182 int line = 0;
3183
3184# if KMP_ARCH_X86 || KMP_ARCH_X86_64
3185
3186 if (__kmp_affinity_verbose) {
3187 KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
3188 }
3189
3190 file_name = NULL;
3191 depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
3192 if (depth == 0) {
3193 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3194 KMP_ASSERT(address2os == NULL);
3195 return;
3196 }
3197
3198 if (depth < 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003199 if (__kmp_affinity_verbose) {
3200 if (msg_id != kmp_i18n_null) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003201 KMP_INFORM(AffInfoStrStr, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id),
3202 KMP_I18N_STR(DecodingLegacyAPIC));
3203 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003204 else {
3205 KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(DecodingLegacyAPIC));
3206 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003207 }
3208
3209 file_name = NULL;
3210 depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
3211 if (depth == 0) {
3212 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3213 KMP_ASSERT(address2os == NULL);
3214 return;
3215 }
3216 }
3217
3218# endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
3219
3220# if KMP_OS_LINUX
3221
3222 if (depth < 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003223 if (__kmp_affinity_verbose) {
3224 if (msg_id != kmp_i18n_null) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003225 KMP_INFORM(AffStrParseFilename, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id), "/proc/cpuinfo");
3226 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003227 else {
3228 KMP_INFORM(AffParseFilename, "KMP_AFFINITY", "/proc/cpuinfo");
3229 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003230 }
3231
3232 FILE *f = fopen("/proc/cpuinfo", "r");
3233 if (f == NULL) {
3234 msg_id = kmp_i18n_str_CantOpenCpuinfo;
3235 }
3236 else {
3237 file_name = "/proc/cpuinfo";
3238 depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
3239 fclose(f);
3240 if (depth == 0) {
3241 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3242 KMP_ASSERT(address2os == NULL);
3243 return;
3244 }
3245 }
3246 }
3247
3248# endif /* KMP_OS_LINUX */
3249
Andrey Churbanov7daf9802015-01-27 16:52:57 +00003250# if KMP_GROUP_AFFINITY
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003251
3252 if ((depth < 0) && (__kmp_num_proc_groups > 1)) {
3253 if (__kmp_affinity_verbose) {
3254 KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
3255 }
3256
3257 depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
3258 KMP_ASSERT(depth != 0);
3259 }
3260
Andrey Churbanov7daf9802015-01-27 16:52:57 +00003261# endif /* KMP_GROUP_AFFINITY */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003262
Jim Cownie5e8470a2013-09-27 10:38:44 +00003263 if (depth < 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003264 if (__kmp_affinity_verbose && (msg_id != kmp_i18n_null)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003265 if (file_name == NULL) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003266 KMP_INFORM(UsingFlatOS, __kmp_i18n_catgets(msg_id));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003267 }
3268 else if (line == 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003269 KMP_INFORM(UsingFlatOSFile, file_name, __kmp_i18n_catgets(msg_id));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003270 }
3271 else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003272 KMP_INFORM(UsingFlatOSFileLine, file_name, line, __kmp_i18n_catgets(msg_id));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003273 }
3274 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003275 // FIXME - print msg if msg_id = kmp_i18n_null ???
Jim Cownie5e8470a2013-09-27 10:38:44 +00003276
3277 file_name = "";
3278 depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
3279 if (depth == 0) {
3280 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3281 KMP_ASSERT(address2os == NULL);
3282 return;
3283 }
3284 KMP_ASSERT(depth > 0);
3285 KMP_ASSERT(address2os != NULL);
3286 }
3287 }
3288
3289 //
3290 // If the user has specified that a paricular topology discovery method
3291 // is to be used, then we abort if that method fails. The exception is
3292 // group affinity, which might have been implicitly set.
3293 //
3294
3295# if KMP_ARCH_X86 || KMP_ARCH_X86_64
3296
3297 else if (__kmp_affinity_top_method == affinity_top_method_x2apicid) {
3298 if (__kmp_affinity_verbose) {
3299 KMP_INFORM(AffInfoStr, "KMP_AFFINITY",
3300 KMP_I18N_STR(Decodingx2APIC));
3301 }
3302
3303 depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
3304 if (depth == 0) {
3305 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3306 KMP_ASSERT(address2os == NULL);
3307 return;
3308 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003309 if (depth < 0) {
3310 KMP_ASSERT(msg_id != kmp_i18n_null);
3311 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
3312 }
3313 }
3314 else if (__kmp_affinity_top_method == affinity_top_method_apicid) {
3315 if (__kmp_affinity_verbose) {
3316 KMP_INFORM(AffInfoStr, "KMP_AFFINITY",
3317 KMP_I18N_STR(DecodingLegacyAPIC));
3318 }
3319
3320 depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
3321 if (depth == 0) {
3322 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3323 KMP_ASSERT(address2os == NULL);
3324 return;
3325 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003326 if (depth < 0) {
3327 KMP_ASSERT(msg_id != kmp_i18n_null);
3328 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
3329 }
3330 }
3331
3332# endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
3333
3334 else if (__kmp_affinity_top_method == affinity_top_method_cpuinfo) {
3335 const char *filename;
3336 if (__kmp_cpuinfo_file != NULL) {
3337 filename = __kmp_cpuinfo_file;
3338 }
3339 else {
3340 filename = "/proc/cpuinfo";
3341 }
3342
3343 if (__kmp_affinity_verbose) {
3344 KMP_INFORM(AffParseFilename, "KMP_AFFINITY", filename);
3345 }
3346
3347 FILE *f = fopen(filename, "r");
3348 if (f == NULL) {
3349 int code = errno;
3350 if (__kmp_cpuinfo_file != NULL) {
3351 __kmp_msg(
3352 kmp_ms_fatal,
3353 KMP_MSG(CantOpenFileForReading, filename),
3354 KMP_ERR(code),
3355 KMP_HNT(NameComesFrom_CPUINFO_FILE),
3356 __kmp_msg_null
3357 );
3358 }
3359 else {
3360 __kmp_msg(
3361 kmp_ms_fatal,
3362 KMP_MSG(CantOpenFileForReading, filename),
3363 KMP_ERR(code),
3364 __kmp_msg_null
3365 );
3366 }
3367 }
3368 int line = 0;
3369 depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
3370 fclose(f);
3371 if (depth < 0) {
3372 KMP_ASSERT(msg_id != kmp_i18n_null);
3373 if (line > 0) {
3374 KMP_FATAL(FileLineMsgExiting, filename, line, __kmp_i18n_catgets(msg_id));
3375 }
3376 else {
3377 KMP_FATAL(FileMsgExiting, filename, __kmp_i18n_catgets(msg_id));
3378 }
3379 }
3380 if (__kmp_affinity_type == affinity_none) {
3381 KMP_ASSERT(depth == 0);
3382 KMP_ASSERT(address2os == NULL);
3383 return;
3384 }
3385 }
3386
Andrey Churbanov7daf9802015-01-27 16:52:57 +00003387# if KMP_GROUP_AFFINITY
Jim Cownie5e8470a2013-09-27 10:38:44 +00003388
3389 else if (__kmp_affinity_top_method == affinity_top_method_group) {
3390 if (__kmp_affinity_verbose) {
3391 KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
3392 }
3393
3394 depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
3395 KMP_ASSERT(depth != 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003396 if (depth < 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003397 KMP_ASSERT(msg_id != kmp_i18n_null);
3398 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003399 }
3400 }
3401
Andrey Churbanov7daf9802015-01-27 16:52:57 +00003402# endif /* KMP_GROUP_AFFINITY */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003403
3404 else if (__kmp_affinity_top_method == affinity_top_method_flat) {
3405 if (__kmp_affinity_verbose) {
3406 KMP_INFORM(AffUsingFlatOS, "KMP_AFFINITY");
3407 }
3408
3409 depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
3410 if (depth == 0) {
3411 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3412 KMP_ASSERT(address2os == NULL);
3413 return;
3414 }
3415 // should not fail
3416 KMP_ASSERT(depth > 0);
3417 KMP_ASSERT(address2os != NULL);
3418 }
3419
3420 if (address2os == NULL) {
3421 if (KMP_AFFINITY_CAPABLE()
3422 && (__kmp_affinity_verbose || (__kmp_affinity_warnings
3423 && (__kmp_affinity_type != affinity_none)))) {
3424 KMP_WARNING(ErrorInitializeAffinity);
3425 }
3426 __kmp_affinity_type = affinity_none;
Andrey Churbanov1f037e42015-03-10 09:15:26 +00003427 KMP_AFFINITY_DISABLE();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003428 return;
3429 }
3430
Jim Cownie5e8470a2013-09-27 10:38:44 +00003431 __kmp_apply_thread_places(&address2os, depth);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003432
3433 //
3434 // Create the table of masks, indexed by thread Id.
3435 //
3436 unsigned maxIndex;
3437 unsigned numUnique;
3438 kmp_affin_mask_t *osId2Mask = __kmp_create_masks(&maxIndex, &numUnique,
3439 address2os, __kmp_avail_proc);
3440 if (__kmp_affinity_gran_levels == 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003441 KMP_DEBUG_ASSERT((int)numUnique == __kmp_avail_proc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003442 }
3443
3444 //
3445 // Set the childNums vector in all Address objects. This must be done
3446 // before we can sort using __kmp_affinity_cmp_Address_child_num(),
3447 // which takes into account the setting of __kmp_affinity_compact.
3448 //
3449 __kmp_affinity_assign_child_nums(address2os, __kmp_avail_proc);
3450
3451 switch (__kmp_affinity_type) {
3452
3453 case affinity_explicit:
3454 KMP_DEBUG_ASSERT(__kmp_affinity_proclist != NULL);
3455# if OMP_40_ENABLED
3456 if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
3457# endif
3458 {
3459 __kmp_affinity_process_proclist(&__kmp_affinity_masks,
3460 &__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask,
3461 maxIndex);
3462 }
3463# if OMP_40_ENABLED
3464 else {
3465 __kmp_affinity_process_placelist(&__kmp_affinity_masks,
3466 &__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask,
3467 maxIndex);
3468 }
3469# endif
3470 if (__kmp_affinity_num_masks == 0) {
3471 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3472 && (__kmp_affinity_type != affinity_none))) {
3473 KMP_WARNING(AffNoValidProcID);
3474 }
3475 __kmp_affinity_type = affinity_none;
3476 return;
3477 }
3478 break;
3479
3480 //
3481 // The other affinity types rely on sorting the Addresses according
3482 // to some permutation of the machine topology tree. Set
3483 // __kmp_affinity_compact and __kmp_affinity_offset appropriately,
3484 // then jump to a common code fragment to do the sort and create
3485 // the array of affinity masks.
3486 //
3487
3488 case affinity_logical:
3489 __kmp_affinity_compact = 0;
3490 if (__kmp_affinity_offset) {
3491 __kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset
3492 % __kmp_avail_proc;
3493 }
3494 goto sortAddresses;
3495
3496 case affinity_physical:
3497 if (__kmp_nThreadsPerCore > 1) {
3498 __kmp_affinity_compact = 1;
3499 if (__kmp_affinity_compact >= depth) {
3500 __kmp_affinity_compact = 0;
3501 }
3502 } else {
3503 __kmp_affinity_compact = 0;
3504 }
3505 if (__kmp_affinity_offset) {
3506 __kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset
3507 % __kmp_avail_proc;
3508 }
3509 goto sortAddresses;
3510
3511 case affinity_scatter:
3512 if (__kmp_affinity_compact >= depth) {
3513 __kmp_affinity_compact = 0;
3514 }
3515 else {
3516 __kmp_affinity_compact = depth - 1 - __kmp_affinity_compact;
3517 }
3518 goto sortAddresses;
3519
3520 case affinity_compact:
3521 if (__kmp_affinity_compact >= depth) {
3522 __kmp_affinity_compact = depth - 1;
3523 }
3524 goto sortAddresses;
3525
Jim Cownie5e8470a2013-09-27 10:38:44 +00003526 case affinity_balanced:
Jonathan Peytoncaf09fe2015-05-27 23:27:33 +00003527 // Balanced works only for the case of a single package
Jim Cownie5e8470a2013-09-27 10:38:44 +00003528 if( nPackages > 1 ) {
3529 if( __kmp_affinity_verbose || __kmp_affinity_warnings ) {
3530 KMP_WARNING( AffBalancedNotAvail, "KMP_AFFINITY" );
3531 }
3532 __kmp_affinity_type = affinity_none;
3533 return;
3534 } else if( __kmp_affinity_uniform_topology() ) {
3535 break;
3536 } else { // Non-uniform topology
3537
3538 // Save the depth for further usage
3539 __kmp_aff_depth = depth;
3540
3541 // Number of hyper threads per core in HT machine
3542 int nth_per_core = __kmp_nThreadsPerCore;
3543
3544 int core_level;
3545 if( nth_per_core > 1 ) {
3546 core_level = depth - 2;
3547 } else {
3548 core_level = depth - 1;
3549 }
3550 int ncores = address2os[ __kmp_avail_proc - 1 ].first.labels[ core_level ] + 1;
3551 int nproc = nth_per_core * ncores;
3552
3553 procarr = ( int * )__kmp_allocate( sizeof( int ) * nproc );
3554 for( int i = 0; i < nproc; i++ ) {
3555 procarr[ i ] = -1;
3556 }
3557
3558 for( int i = 0; i < __kmp_avail_proc; i++ ) {
3559 int proc = address2os[ i ].second;
3560 // If depth == 3 then level=0 - package, level=1 - core, level=2 - thread.
3561 // If there is only one thread per core then depth == 2: level 0 - package,
3562 // level 1 - core.
3563 int level = depth - 1;
3564
3565 // __kmp_nth_per_core == 1
3566 int thread = 0;
3567 int core = address2os[ i ].first.labels[ level ];
3568 // If the thread level exists, that is we have more than one thread context per core
3569 if( nth_per_core > 1 ) {
3570 thread = address2os[ i ].first.labels[ level ] % nth_per_core;
3571 core = address2os[ i ].first.labels[ level - 1 ];
3572 }
3573 procarr[ core * nth_per_core + thread ] = proc;
3574 }
3575
3576 break;
3577 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003578
3579 sortAddresses:
3580 //
3581 // Allocate the gtid->affinity mask table.
3582 //
3583 if (__kmp_affinity_dups) {
3584 __kmp_affinity_num_masks = __kmp_avail_proc;
3585 }
3586 else {
3587 __kmp_affinity_num_masks = numUnique;
3588 }
3589
3590# if OMP_40_ENABLED
3591 if ( ( __kmp_nested_proc_bind.bind_types[0] != proc_bind_intel )
3592 && ( __kmp_affinity_num_places > 0 )
3593 && ( (unsigned)__kmp_affinity_num_places < __kmp_affinity_num_masks ) ) {
3594 __kmp_affinity_num_masks = __kmp_affinity_num_places;
3595 }
3596# endif
3597
3598 __kmp_affinity_masks = (kmp_affin_mask_t*)__kmp_allocate(
3599 __kmp_affinity_num_masks * __kmp_affin_mask_size);
3600
3601 //
3602 // Sort the address2os table according to the current setting of
3603 // __kmp_affinity_compact, then fill out __kmp_affinity_masks.
3604 //
3605 qsort(address2os, __kmp_avail_proc, sizeof(*address2os),
3606 __kmp_affinity_cmp_Address_child_num);
3607 {
3608 int i;
3609 unsigned j;
3610 for (i = 0, j = 0; i < __kmp_avail_proc; i++) {
3611 if ((! __kmp_affinity_dups) && (! address2os[i].first.leader)) {
3612 continue;
3613 }
3614 unsigned osId = address2os[i].second;
3615 kmp_affin_mask_t *src = KMP_CPU_INDEX(osId2Mask, osId);
3616 kmp_affin_mask_t *dest
3617 = KMP_CPU_INDEX(__kmp_affinity_masks, j);
3618 KMP_ASSERT(KMP_CPU_ISSET(osId, src));
3619 KMP_CPU_COPY(dest, src);
3620 if (++j >= __kmp_affinity_num_masks) {
3621 break;
3622 }
3623 }
3624 KMP_DEBUG_ASSERT(j == __kmp_affinity_num_masks);
3625 }
3626 break;
3627
3628 default:
3629 KMP_ASSERT2(0, "Unexpected affinity setting");
3630 }
3631
3632 __kmp_free(osId2Mask);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003633 machine_hierarchy.init(address2os, __kmp_avail_proc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003634}
3635
3636
3637void
3638__kmp_affinity_initialize(void)
3639{
3640 //
3641 // Much of the code above was written assumming that if a machine was not
3642 // affinity capable, then __kmp_affinity_type == affinity_none. We now
3643 // explicitly represent this as __kmp_affinity_type == affinity_disabled.
3644 //
3645 // There are too many checks for __kmp_affinity_type == affinity_none
3646 // in this code. Instead of trying to change them all, check if
3647 // __kmp_affinity_type == affinity_disabled, and if so, slam it with
3648 // affinity_none, call the real initialization routine, then restore
3649 // __kmp_affinity_type to affinity_disabled.
3650 //
3651 int disabled = (__kmp_affinity_type == affinity_disabled);
3652 if (! KMP_AFFINITY_CAPABLE()) {
3653 KMP_ASSERT(disabled);
3654 }
3655 if (disabled) {
3656 __kmp_affinity_type = affinity_none;
3657 }
3658 __kmp_aux_affinity_initialize();
3659 if (disabled) {
3660 __kmp_affinity_type = affinity_disabled;
3661 }
3662}
3663
3664
3665void
3666__kmp_affinity_uninitialize(void)
3667{
3668 if (__kmp_affinity_masks != NULL) {
3669 __kmp_free(__kmp_affinity_masks);
3670 __kmp_affinity_masks = NULL;
3671 }
3672 if (fullMask != NULL) {
3673 KMP_CPU_FREE(fullMask);
3674 fullMask = NULL;
3675 }
3676 __kmp_affinity_num_masks = 0;
3677# if OMP_40_ENABLED
3678 __kmp_affinity_num_places = 0;
3679# endif
3680 if (__kmp_affinity_proclist != NULL) {
3681 __kmp_free(__kmp_affinity_proclist);
3682 __kmp_affinity_proclist = NULL;
3683 }
3684 if( address2os != NULL ) {
3685 __kmp_free( address2os );
3686 address2os = NULL;
3687 }
3688 if( procarr != NULL ) {
3689 __kmp_free( procarr );
3690 procarr = NULL;
3691 }
3692}
3693
3694
3695void
3696__kmp_affinity_set_init_mask(int gtid, int isa_root)
3697{
3698 if (! KMP_AFFINITY_CAPABLE()) {
3699 return;
3700 }
3701
3702 kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
3703 if (th->th.th_affin_mask == NULL) {
3704 KMP_CPU_ALLOC(th->th.th_affin_mask);
3705 }
3706 else {
3707 KMP_CPU_ZERO(th->th.th_affin_mask);
3708 }
3709
3710 //
3711 // Copy the thread mask to the kmp_info_t strucuture.
3712 // If __kmp_affinity_type == affinity_none, copy the "full" mask, i.e. one
3713 // that has all of the OS proc ids set, or if __kmp_affinity_respect_mask
3714 // is set, then the full mask is the same as the mask of the initialization
3715 // thread.
3716 //
3717 kmp_affin_mask_t *mask;
3718 int i;
3719
3720# if OMP_40_ENABLED
3721 if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
3722# endif
3723 {
Andrey Churbanovf28f6132015-01-13 14:54:00 +00003724 if ((__kmp_affinity_type == affinity_none) || (__kmp_affinity_type == affinity_balanced)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003725 ) {
Andrey Churbanov7daf9802015-01-27 16:52:57 +00003726# if KMP_GROUP_AFFINITY
Jim Cownie5e8470a2013-09-27 10:38:44 +00003727 if (__kmp_num_proc_groups > 1) {
3728 return;
3729 }
3730# endif
3731 KMP_ASSERT(fullMask != NULL);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003732 i = KMP_PLACE_ALL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003733 mask = fullMask;
3734 }
3735 else {
3736 KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 );
3737 i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
3738 mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
3739 }
3740 }
3741# if OMP_40_ENABLED
3742 else {
3743 if ((! isa_root)
3744 || (__kmp_nested_proc_bind.bind_types[0] == proc_bind_false)) {
Andrey Churbanov7daf9802015-01-27 16:52:57 +00003745# if KMP_GROUP_AFFINITY
Jim Cownie5e8470a2013-09-27 10:38:44 +00003746 if (__kmp_num_proc_groups > 1) {
3747 return;
3748 }
3749# endif
3750 KMP_ASSERT(fullMask != NULL);
3751 i = KMP_PLACE_ALL;
3752 mask = fullMask;
3753 }
3754 else {
3755 //
3756 // int i = some hash function or just a counter that doesn't
3757 // always start at 0. Use gtid for now.
3758 //
3759 KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 );
3760 i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
3761 mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
3762 }
3763 }
3764# endif
3765
3766# if OMP_40_ENABLED
3767 th->th.th_current_place = i;
3768 if (isa_root) {
3769 th->th.th_new_place = i;
3770 th->th.th_first_place = 0;
3771 th->th.th_last_place = __kmp_affinity_num_masks - 1;
3772 }
3773
3774 if (i == KMP_PLACE_ALL) {
3775 KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to all places\n",
3776 gtid));
3777 }
3778 else {
3779 KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to place %d\n",
3780 gtid, i));
3781 }
3782# else
3783 if (i == -1) {
3784 KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to fullMask\n",
3785 gtid));
3786 }
3787 else {
3788 KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to mask %d\n",
3789 gtid, i));
3790 }
3791# endif /* OMP_40_ENABLED */
3792
3793 KMP_CPU_COPY(th->th.th_affin_mask, mask);
3794
3795 if (__kmp_affinity_verbose) {
3796 char buf[KMP_AFFIN_MASK_PRINT_LEN];
3797 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
3798 th->th.th_affin_mask);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003799 KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(), gtid,
3800 buf);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003801 }
3802
3803# if KMP_OS_WINDOWS
3804 //
3805 // On Windows* OS, the process affinity mask might have changed.
3806 // If the user didn't request affinity and this call fails,
3807 // just continue silently. See CQ171393.
3808 //
3809 if ( __kmp_affinity_type == affinity_none ) {
3810 __kmp_set_system_affinity(th->th.th_affin_mask, FALSE);
3811 }
3812 else
3813# endif
3814 __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
3815}
3816
3817
3818# if OMP_40_ENABLED
3819
3820void
3821__kmp_affinity_set_place(int gtid)
3822{
3823 int retval;
3824
3825 if (! KMP_AFFINITY_CAPABLE()) {
3826 return;
3827 }
3828
3829 kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
3830
3831 KA_TRACE(100, ("__kmp_affinity_set_place: binding T#%d to place %d (current place = %d)\n",
3832 gtid, th->th.th_new_place, th->th.th_current_place));
3833
3834 //
Alp Toker8f2d3f02014-02-24 10:40:15 +00003835 // Check that the new place is within this thread's partition.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003836 //
3837 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003838 KMP_ASSERT(th->th.th_new_place >= 0);
3839 KMP_ASSERT((unsigned)th->th.th_new_place <= __kmp_affinity_num_masks);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003840 if (th->th.th_first_place <= th->th.th_last_place) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003841 KMP_ASSERT((th->th.th_new_place >= th->th.th_first_place)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003842 && (th->th.th_new_place <= th->th.th_last_place));
3843 }
3844 else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003845 KMP_ASSERT((th->th.th_new_place <= th->th.th_first_place)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003846 || (th->th.th_new_place >= th->th.th_last_place));
3847 }
3848
3849 //
3850 // Copy the thread mask to the kmp_info_t strucuture,
3851 // and set this thread's affinity.
3852 //
3853 kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity_masks,
3854 th->th.th_new_place);
3855 KMP_CPU_COPY(th->th.th_affin_mask, mask);
3856 th->th.th_current_place = th->th.th_new_place;
3857
3858 if (__kmp_affinity_verbose) {
3859 char buf[KMP_AFFIN_MASK_PRINT_LEN];
3860 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
3861 th->th.th_affin_mask);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003862 KMP_INFORM(BoundToOSProcSet, "OMP_PROC_BIND", (kmp_int32)getpid(),
3863 gtid, buf);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003864 }
3865 __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
3866}
3867
3868# endif /* OMP_40_ENABLED */
3869
3870
3871int
3872__kmp_aux_set_affinity(void **mask)
3873{
3874 int gtid;
3875 kmp_info_t *th;
3876 int retval;
3877
3878 if (! KMP_AFFINITY_CAPABLE()) {
3879 return -1;
3880 }
3881
3882 gtid = __kmp_entry_gtid();
3883 KA_TRACE(1000, ;{
3884 char buf[KMP_AFFIN_MASK_PRINT_LEN];
3885 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
3886 (kmp_affin_mask_t *)(*mask));
3887 __kmp_debug_printf("kmp_set_affinity: setting affinity mask for thread %d = %s\n",
3888 gtid, buf);
3889 });
3890
3891 if (__kmp_env_consistency_check) {
3892 if ((mask == NULL) || (*mask == NULL)) {
3893 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
3894 }
3895 else {
3896 unsigned proc;
3897 int num_procs = 0;
3898
3899 for (proc = 0; proc < KMP_CPU_SETSIZE; proc++) {
3900 if (! KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask))) {
3901 continue;
3902 }
3903 num_procs++;
3904 if (! KMP_CPU_ISSET(proc, fullMask)) {
3905 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
3906 break;
3907 }
3908 }
3909 if (num_procs == 0) {
3910 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
3911 }
3912
Andrey Churbanov7daf9802015-01-27 16:52:57 +00003913# if KMP_GROUP_AFFINITY
Jim Cownie5e8470a2013-09-27 10:38:44 +00003914 if (__kmp_get_proc_group((kmp_affin_mask_t *)(*mask)) < 0) {
3915 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
3916 }
Andrey Churbanov7daf9802015-01-27 16:52:57 +00003917# endif /* KMP_GROUP_AFFINITY */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003918
3919 }
3920 }
3921
3922 th = __kmp_threads[gtid];
3923 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
3924 retval = __kmp_set_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
3925 if (retval == 0) {
3926 KMP_CPU_COPY(th->th.th_affin_mask, (kmp_affin_mask_t *)(*mask));
3927 }
3928
3929# if OMP_40_ENABLED
3930 th->th.th_current_place = KMP_PLACE_UNDEFINED;
3931 th->th.th_new_place = KMP_PLACE_UNDEFINED;
3932 th->th.th_first_place = 0;
3933 th->th.th_last_place = __kmp_affinity_num_masks - 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003934
3935 //
3936 // Turn off 4.0 affinity for the current tread at this parallel level.
3937 //
3938 th->th.th_current_task->td_icvs.proc_bind = proc_bind_false;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003939# endif
3940
3941 return retval;
3942}
3943
3944
3945int
3946__kmp_aux_get_affinity(void **mask)
3947{
3948 int gtid;
3949 int retval;
3950 kmp_info_t *th;
3951
3952 if (! KMP_AFFINITY_CAPABLE()) {
3953 return -1;
3954 }
3955
3956 gtid = __kmp_entry_gtid();
3957 th = __kmp_threads[gtid];
3958 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
3959
3960 KA_TRACE(1000, ;{
3961 char buf[KMP_AFFIN_MASK_PRINT_LEN];
3962 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
3963 th->th.th_affin_mask);
3964 __kmp_printf("kmp_get_affinity: stored affinity mask for thread %d = %s\n", gtid, buf);
3965 });
3966
3967 if (__kmp_env_consistency_check) {
3968 if ((mask == NULL) || (*mask == NULL)) {
3969 KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity");
3970 }
3971 }
3972
3973# if !KMP_OS_WINDOWS
3974
3975 retval = __kmp_get_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
3976 KA_TRACE(1000, ;{
3977 char buf[KMP_AFFIN_MASK_PRINT_LEN];
3978 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
3979 (kmp_affin_mask_t *)(*mask));
3980 __kmp_printf("kmp_get_affinity: system affinity mask for thread %d = %s\n", gtid, buf);
3981 });
3982 return retval;
3983
3984# else
3985
3986 KMP_CPU_COPY((kmp_affin_mask_t *)(*mask), th->th.th_affin_mask);
3987 return 0;
3988
3989# endif /* KMP_OS_WINDOWS */
3990
3991}
3992
Jim Cownie5e8470a2013-09-27 10:38:44 +00003993int
3994__kmp_aux_set_affinity_mask_proc(int proc, void **mask)
3995{
3996 int retval;
3997
3998 if (! KMP_AFFINITY_CAPABLE()) {
3999 return -1;
4000 }
4001
4002 KA_TRACE(1000, ;{
4003 int gtid = __kmp_entry_gtid();
4004 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4005 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4006 (kmp_affin_mask_t *)(*mask));
4007 __kmp_debug_printf("kmp_set_affinity_mask_proc: setting proc %d in affinity mask for thread %d = %s\n",
4008 proc, gtid, buf);
4009 });
4010
4011 if (__kmp_env_consistency_check) {
4012 if ((mask == NULL) || (*mask == NULL)) {
4013 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity_mask_proc");
4014 }
4015 }
4016
4017 if ((proc < 0) || ((unsigned)proc >= KMP_CPU_SETSIZE)) {
4018 return -1;
4019 }
4020 if (! KMP_CPU_ISSET(proc, fullMask)) {
4021 return -2;
4022 }
4023
4024 KMP_CPU_SET(proc, (kmp_affin_mask_t *)(*mask));
4025 return 0;
4026}
4027
4028
4029int
4030__kmp_aux_unset_affinity_mask_proc(int proc, void **mask)
4031{
4032 int retval;
4033
4034 if (! KMP_AFFINITY_CAPABLE()) {
4035 return -1;
4036 }
4037
4038 KA_TRACE(1000, ;{
4039 int gtid = __kmp_entry_gtid();
4040 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4041 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4042 (kmp_affin_mask_t *)(*mask));
4043 __kmp_debug_printf("kmp_unset_affinity_mask_proc: unsetting proc %d in affinity mask for thread %d = %s\n",
4044 proc, gtid, buf);
4045 });
4046
4047 if (__kmp_env_consistency_check) {
4048 if ((mask == NULL) || (*mask == NULL)) {
4049 KMP_FATAL(AffinityInvalidMask, "kmp_unset_affinity_mask_proc");
4050 }
4051 }
4052
4053 if ((proc < 0) || ((unsigned)proc >= KMP_CPU_SETSIZE)) {
4054 return -1;
4055 }
4056 if (! KMP_CPU_ISSET(proc, fullMask)) {
4057 return -2;
4058 }
4059
4060 KMP_CPU_CLR(proc, (kmp_affin_mask_t *)(*mask));
4061 return 0;
4062}
4063
4064
4065int
4066__kmp_aux_get_affinity_mask_proc(int proc, void **mask)
4067{
4068 int retval;
4069
4070 if (! KMP_AFFINITY_CAPABLE()) {
4071 return -1;
4072 }
4073
4074 KA_TRACE(1000, ;{
4075 int gtid = __kmp_entry_gtid();
4076 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4077 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4078 (kmp_affin_mask_t *)(*mask));
4079 __kmp_debug_printf("kmp_get_affinity_mask_proc: getting proc %d in affinity mask for thread %d = %s\n",
4080 proc, gtid, buf);
4081 });
4082
4083 if (__kmp_env_consistency_check) {
4084 if ((mask == NULL) || (*mask == NULL)) {
Andrey Churbanov4b2f17a2015-01-29 15:49:22 +00004085 KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity_mask_proc");
Jim Cownie5e8470a2013-09-27 10:38:44 +00004086 }
4087 }
4088
4089 if ((proc < 0) || ((unsigned)proc >= KMP_CPU_SETSIZE)) {
4090 return 0;
4091 }
4092 if (! KMP_CPU_ISSET(proc, fullMask)) {
4093 return 0;
4094 }
4095
4096 return KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask));
4097}
4098
Jim Cownie5e8470a2013-09-27 10:38:44 +00004099
4100// Dynamic affinity settings - Affinity balanced
4101void __kmp_balanced_affinity( int tid, int nthreads )
4102{
4103 if( __kmp_affinity_uniform_topology() ) {
4104 int coreID;
4105 int threadID;
4106 // Number of hyper threads per core in HT machine
4107 int __kmp_nth_per_core = __kmp_avail_proc / __kmp_ncores;
4108 // Number of cores
4109 int ncores = __kmp_ncores;
4110 // How many threads will be bound to each core
4111 int chunk = nthreads / ncores;
4112 // How many cores will have an additional thread bound to it - "big cores"
4113 int big_cores = nthreads % ncores;
4114 // Number of threads on the big cores
4115 int big_nth = ( chunk + 1 ) * big_cores;
4116 if( tid < big_nth ) {
4117 coreID = tid / (chunk + 1 );
4118 threadID = ( tid % (chunk + 1 ) ) % __kmp_nth_per_core ;
4119 } else { //tid >= big_nth
4120 coreID = ( tid - big_cores ) / chunk;
4121 threadID = ( ( tid - big_cores ) % chunk ) % __kmp_nth_per_core ;
4122 }
4123
4124 KMP_DEBUG_ASSERT2(KMP_AFFINITY_CAPABLE(),
4125 "Illegal set affinity operation when not capable");
4126
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00004127 kmp_affin_mask_t *mask = (kmp_affin_mask_t *)KMP_ALLOCA(__kmp_affin_mask_size);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004128 KMP_CPU_ZERO(mask);
4129
4130 // Granularity == thread
4131 if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) {
4132 int osID = address2os[ coreID * __kmp_nth_per_core + threadID ].second;
4133 KMP_CPU_SET( osID, mask);
4134 } else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
4135 for( int i = 0; i < __kmp_nth_per_core; i++ ) {
4136 int osID;
4137 osID = address2os[ coreID * __kmp_nth_per_core + i ].second;
4138 KMP_CPU_SET( osID, mask);
4139 }
4140 }
4141 if (__kmp_affinity_verbose) {
4142 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4143 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004144 KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
4145 tid, buf);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004146 }
4147 __kmp_set_system_affinity( mask, TRUE );
4148 } else { // Non-uniform topology
4149
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00004150 kmp_affin_mask_t *mask = (kmp_affin_mask_t *)KMP_ALLOCA(__kmp_affin_mask_size);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004151 KMP_CPU_ZERO(mask);
4152
4153 // Number of hyper threads per core in HT machine
4154 int nth_per_core = __kmp_nThreadsPerCore;
4155 int core_level;
4156 if( nth_per_core > 1 ) {
4157 core_level = __kmp_aff_depth - 2;
4158 } else {
4159 core_level = __kmp_aff_depth - 1;
4160 }
4161
4162 // Number of cores - maximum value; it does not count trail cores with 0 processors
4163 int ncores = address2os[ __kmp_avail_proc - 1 ].first.labels[ core_level ] + 1;
4164
4165 // For performance gain consider the special case nthreads == __kmp_avail_proc
4166 if( nthreads == __kmp_avail_proc ) {
4167 if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) {
4168 int osID = address2os[ tid ].second;
4169 KMP_CPU_SET( osID, mask);
4170 } else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
4171 int coreID = address2os[ tid ].first.labels[ core_level ];
4172 // We'll count found osIDs for the current core; they can be not more than nth_per_core;
4173 // since the address2os is sortied we can break when cnt==nth_per_core
4174 int cnt = 0;
4175 for( int i = 0; i < __kmp_avail_proc; i++ ) {
4176 int osID = address2os[ i ].second;
4177 int core = address2os[ i ].first.labels[ core_level ];
4178 if( core == coreID ) {
4179 KMP_CPU_SET( osID, mask);
4180 cnt++;
4181 if( cnt == nth_per_core ) {
4182 break;
4183 }
4184 }
4185 }
4186 }
4187 } else if( nthreads <= __kmp_ncores ) {
4188
4189 int core = 0;
4190 for( int i = 0; i < ncores; i++ ) {
4191 // Check if this core from procarr[] is in the mask
4192 int in_mask = 0;
4193 for( int j = 0; j < nth_per_core; j++ ) {
4194 if( procarr[ i * nth_per_core + j ] != - 1 ) {
4195 in_mask = 1;
4196 break;
4197 }
4198 }
4199 if( in_mask ) {
4200 if( tid == core ) {
4201 for( int j = 0; j < nth_per_core; j++ ) {
4202 int osID = procarr[ i * nth_per_core + j ];
4203 if( osID != -1 ) {
4204 KMP_CPU_SET( osID, mask );
4205 // For granularity=thread it is enough to set the first available osID for this core
4206 if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) {
4207 break;
4208 }
4209 }
4210 }
4211 break;
4212 } else {
4213 core++;
4214 }
4215 }
4216 }
4217
4218 } else { // nthreads > __kmp_ncores
4219
4220 // Array to save the number of processors at each core
Jonathan Peyton7be075332015-06-22 15:53:50 +00004221 int* nproc_at_core = (int*)KMP_ALLOCA(sizeof(int)*ncores);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004222 // Array to save the number of cores with "x" available processors;
Jonathan Peyton7be075332015-06-22 15:53:50 +00004223 int* ncores_with_x_procs = (int*)KMP_ALLOCA(sizeof(int)*(nth_per_core+1));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004224 // Array to save the number of cores with # procs from x to nth_per_core
Jonathan Peyton7be075332015-06-22 15:53:50 +00004225 int* ncores_with_x_to_max_procs = (int*)KMP_ALLOCA(sizeof(int)*(nth_per_core+1));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004226
4227 for( int i = 0; i <= nth_per_core; i++ ) {
4228 ncores_with_x_procs[ i ] = 0;
4229 ncores_with_x_to_max_procs[ i ] = 0;
4230 }
4231
4232 for( int i = 0; i < ncores; i++ ) {
4233 int cnt = 0;
4234 for( int j = 0; j < nth_per_core; j++ ) {
4235 if( procarr[ i * nth_per_core + j ] != -1 ) {
4236 cnt++;
4237 }
4238 }
4239 nproc_at_core[ i ] = cnt;
4240 ncores_with_x_procs[ cnt ]++;
4241 }
4242
4243 for( int i = 0; i <= nth_per_core; i++ ) {
4244 for( int j = i; j <= nth_per_core; j++ ) {
4245 ncores_with_x_to_max_procs[ i ] += ncores_with_x_procs[ j ];
4246 }
4247 }
4248
4249 // Max number of processors
4250 int nproc = nth_per_core * ncores;
4251 // An array to keep number of threads per each context
4252 int * newarr = ( int * )__kmp_allocate( sizeof( int ) * nproc );
4253 for( int i = 0; i < nproc; i++ ) {
4254 newarr[ i ] = 0;
4255 }
4256
4257 int nth = nthreads;
4258 int flag = 0;
4259 while( nth > 0 ) {
4260 for( int j = 1; j <= nth_per_core; j++ ) {
4261 int cnt = ncores_with_x_to_max_procs[ j ];
4262 for( int i = 0; i < ncores; i++ ) {
4263 // Skip the core with 0 processors
4264 if( nproc_at_core[ i ] == 0 ) {
4265 continue;
4266 }
4267 for( int k = 0; k < nth_per_core; k++ ) {
4268 if( procarr[ i * nth_per_core + k ] != -1 ) {
4269 if( newarr[ i * nth_per_core + k ] == 0 ) {
4270 newarr[ i * nth_per_core + k ] = 1;
4271 cnt--;
4272 nth--;
4273 break;
4274 } else {
4275 if( flag != 0 ) {
4276 newarr[ i * nth_per_core + k ] ++;
4277 cnt--;
4278 nth--;
4279 break;
4280 }
4281 }
4282 }
4283 }
4284 if( cnt == 0 || nth == 0 ) {
4285 break;
4286 }
4287 }
4288 if( nth == 0 ) {
4289 break;
4290 }
4291 }
4292 flag = 1;
4293 }
4294 int sum = 0;
4295 for( int i = 0; i < nproc; i++ ) {
4296 sum += newarr[ i ];
4297 if( sum > tid ) {
4298 // Granularity == thread
4299 if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) {
4300 int osID = procarr[ i ];
4301 KMP_CPU_SET( osID, mask);
4302 } else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
4303 int coreID = i / nth_per_core;
4304 for( int ii = 0; ii < nth_per_core; ii++ ) {
4305 int osID = procarr[ coreID * nth_per_core + ii ];
4306 if( osID != -1 ) {
4307 KMP_CPU_SET( osID, mask);
4308 }
4309 }
4310 }
4311 break;
4312 }
4313 }
4314 __kmp_free( newarr );
4315 }
4316
4317 if (__kmp_affinity_verbose) {
4318 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4319 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004320 KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
4321 tid, buf);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004322 }
4323 __kmp_set_system_affinity( mask, TRUE );
4324 }
4325}
4326
Alp Toker763b9392014-02-28 09:42:41 +00004327#endif // KMP_AFFINITY_SUPPORTED