blob: 0dc0d4829ea56c68d1743bb0a60572e45d6fc13c [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
2 * kmp_affinity.cpp -- affinity management
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16#include "kmp.h"
17#include "kmp_i18n.h"
18#include "kmp_io.h"
19#include "kmp_str.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000020#include "kmp_wrapper_getpid.h"
Jonathan Peyton17078362015-09-10 19:22:07 +000021#include "kmp_affinity.h"
22
23// Store the real or imagined machine hierarchy here
24static hierarchy_info machine_hierarchy;
25
26void __kmp_cleanup_hierarchy() {
27 machine_hierarchy.fini();
28}
29
30void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
31 kmp_uint32 depth;
32 // The test below is true if affinity is available, but set to "none". Need to init on first use of hierarchical barrier.
33 if (TCR_1(machine_hierarchy.uninitialized))
34 machine_hierarchy.init(NULL, nproc);
Jonathan Peyton17078362015-09-10 19:22:07 +000035
Jonathan Peyton7dee82e2015-11-09 16:24:53 +000036 // Adjust the hierarchy in case num threads exceeds original
37 if (nproc > machine_hierarchy.base_num_threads)
38 machine_hierarchy.resize(nproc);
39
Jonathan Peyton17078362015-09-10 19:22:07 +000040 depth = machine_hierarchy.depth;
41 KMP_DEBUG_ASSERT(depth > 0);
Jonathan Peyton17078362015-09-10 19:22:07 +000042
43 thr_bar->depth = depth;
44 thr_bar->base_leaf_kids = (kmp_uint8)machine_hierarchy.numPerLevel[0]-1;
45 thr_bar->skip_per_level = machine_hierarchy.skipPerLevel;
46}
Jim Cownie5e8470a2013-09-27 10:38:44 +000047
Alp Toker763b9392014-02-28 09:42:41 +000048#if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +000049
50//
51// Print the affinity mask to the character array in a pretty format.
52//
53char *
54__kmp_affinity_print_mask(char *buf, int buf_len, kmp_affin_mask_t *mask)
55{
56 KMP_ASSERT(buf_len >= 40);
57 char *scan = buf;
58 char *end = buf + buf_len - 1;
59
60 //
61 // Find first element / check for empty set.
62 //
63 size_t i;
64 for (i = 0; i < KMP_CPU_SETSIZE; i++) {
65 if (KMP_CPU_ISSET(i, mask)) {
66 break;
67 }
68 }
69 if (i == KMP_CPU_SETSIZE) {
Jonathan Peyton7edeef12015-09-25 17:23:17 +000070 KMP_SNPRINTF(scan, end-scan+1, "{<empty>}");
Jim Cownie5e8470a2013-09-27 10:38:44 +000071 while (*scan != '\0') scan++;
72 KMP_ASSERT(scan <= end);
73 return buf;
74 }
75
Jonathan Peyton7edeef12015-09-25 17:23:17 +000076 KMP_SNPRINTF(scan, end-scan+1, "{%ld", (long)i);
Jim Cownie5e8470a2013-09-27 10:38:44 +000077 while (*scan != '\0') scan++;
78 i++;
79 for (; i < KMP_CPU_SETSIZE; i++) {
80 if (! KMP_CPU_ISSET(i, mask)) {
81 continue;
82 }
83
84 //
85 // Check for buffer overflow. A string of the form ",<n>" will have
86 // at most 10 characters, plus we want to leave room to print ",...}"
87 // if the set is too large to print for a total of 15 characters.
88 // We already left room for '\0' in setting end.
89 //
90 if (end - scan < 15) {
91 break;
92 }
Jonathan Peyton7edeef12015-09-25 17:23:17 +000093 KMP_SNPRINTF(scan, end-scan+1, ",%-ld", (long)i);
Jim Cownie5e8470a2013-09-27 10:38:44 +000094 while (*scan != '\0') scan++;
95 }
96 if (i < KMP_CPU_SETSIZE) {
Jonathan Peyton7edeef12015-09-25 17:23:17 +000097 KMP_SNPRINTF(scan, end-scan+1, ",...");
Jim Cownie5e8470a2013-09-27 10:38:44 +000098 while (*scan != '\0') scan++;
99 }
Jonathan Peyton7edeef12015-09-25 17:23:17 +0000100 KMP_SNPRINTF(scan, end-scan+1, "}");
Jim Cownie5e8470a2013-09-27 10:38:44 +0000101 while (*scan != '\0') scan++;
102 KMP_ASSERT(scan <= end);
103 return buf;
104}
105
106
107void
108__kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask)
109{
110 KMP_CPU_ZERO(mask);
111
Andrey Churbanov7daf9802015-01-27 16:52:57 +0000112# if KMP_GROUP_AFFINITY
Jim Cownie5e8470a2013-09-27 10:38:44 +0000113
114 if (__kmp_num_proc_groups > 1) {
115 int group;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000116 KMP_DEBUG_ASSERT(__kmp_GetActiveProcessorCount != NULL);
117 for (group = 0; group < __kmp_num_proc_groups; group++) {
118 int i;
119 int num = __kmp_GetActiveProcessorCount(group);
120 for (i = 0; i < num; i++) {
121 KMP_CPU_SET(i + group * (CHAR_BIT * sizeof(DWORD_PTR)), mask);
122 }
123 }
124 }
125 else
126
Andrey Churbanov7daf9802015-01-27 16:52:57 +0000127# endif /* KMP_GROUP_AFFINITY */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000128
129 {
130 int proc;
131 for (proc = 0; proc < __kmp_xproc; proc++) {
132 KMP_CPU_SET(proc, mask);
133 }
134 }
135}
136
Jim Cownie5e8470a2013-09-27 10:38:44 +0000137//
138// When sorting by labels, __kmp_affinity_assign_child_nums() must first be
139// called to renumber the labels from [0..n] and place them into the child_num
140// vector of the address object. This is done in case the labels used for
Alp Toker8f2d3f02014-02-24 10:40:15 +0000141// the children at one node of the hierarchy differ from those used for
Jim Cownie5e8470a2013-09-27 10:38:44 +0000142// another node at the same level. Example: suppose the machine has 2 nodes
143// with 2 packages each. The first node contains packages 601 and 602, and
144// second node contains packages 603 and 604. If we try to sort the table
145// for "scatter" affinity, the table will still be sorted 601, 602, 603, 604
146// because we are paying attention to the labels themselves, not the ordinal
147// child numbers. By using the child numbers in the sort, the result is
148// {0,0}=601, {0,1}=603, {1,0}=602, {1,1}=604.
149//
150static void
151__kmp_affinity_assign_child_nums(AddrUnsPair *address2os,
152 int numAddrs)
153{
154 KMP_DEBUG_ASSERT(numAddrs > 0);
155 int depth = address2os->first.depth;
156 unsigned *counts = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
157 unsigned *lastLabel = (unsigned *)__kmp_allocate(depth
158 * sizeof(unsigned));
159 int labCt;
160 for (labCt = 0; labCt < depth; labCt++) {
161 address2os[0].first.childNums[labCt] = counts[labCt] = 0;
162 lastLabel[labCt] = address2os[0].first.labels[labCt];
163 }
164 int i;
165 for (i = 1; i < numAddrs; i++) {
166 for (labCt = 0; labCt < depth; labCt++) {
167 if (address2os[i].first.labels[labCt] != lastLabel[labCt]) {
168 int labCt2;
169 for (labCt2 = labCt + 1; labCt2 < depth; labCt2++) {
170 counts[labCt2] = 0;
171 lastLabel[labCt2] = address2os[i].first.labels[labCt2];
172 }
173 counts[labCt]++;
174 lastLabel[labCt] = address2os[i].first.labels[labCt];
175 break;
176 }
177 }
178 for (labCt = 0; labCt < depth; labCt++) {
179 address2os[i].first.childNums[labCt] = counts[labCt];
180 }
181 for (; labCt < (int)Address::maxDepth; labCt++) {
182 address2os[i].first.childNums[labCt] = 0;
183 }
184 }
185}
186
187
188//
189// All of the __kmp_affinity_create_*_map() routines should set
190// __kmp_affinity_masks to a vector of affinity mask objects of length
191// __kmp_affinity_num_masks, if __kmp_affinity_type != affinity_none, and
192// return the number of levels in the machine topology tree (zero if
193// __kmp_affinity_type == affinity_none).
194//
195// All of the __kmp_affinity_create_*_map() routines should set *fullMask
196// to the affinity mask for the initialization thread. They need to save and
197// restore the mask, and it could be needed later, so saving it is just an
198// optimization to avoid calling kmp_get_system_affinity() again.
199//
200static kmp_affin_mask_t *fullMask = NULL;
201
202kmp_affin_mask_t *
203__kmp_affinity_get_fullMask() { return fullMask; }
204
205
206static int nCoresPerPkg, nPackages;
Andrey Churbanovf696c822015-01-27 16:55:43 +0000207static int __kmp_nThreadsPerCore;
208#ifndef KMP_DFLT_NTH_CORES
209static int __kmp_ncores;
210#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000211
212//
213// __kmp_affinity_uniform_topology() doesn't work when called from
214// places which support arbitrarily many levels in the machine topology
215// map, i.e. the non-default cases in __kmp_affinity_create_cpuinfo_map()
216// __kmp_affinity_create_x2apicid_map().
217//
218inline static bool
219__kmp_affinity_uniform_topology()
220{
221 return __kmp_avail_proc == (__kmp_nThreadsPerCore * nCoresPerPkg * nPackages);
222}
223
224
225//
226// Print out the detailed machine topology map, i.e. the physical locations
227// of each OS proc.
228//
229static void
230__kmp_affinity_print_topology(AddrUnsPair *address2os, int len, int depth,
231 int pkgLevel, int coreLevel, int threadLevel)
232{
233 int proc;
234
235 KMP_INFORM(OSProcToPhysicalThreadMap, "KMP_AFFINITY");
236 for (proc = 0; proc < len; proc++) {
237 int level;
238 kmp_str_buf_t buf;
239 __kmp_str_buf_init(&buf);
240 for (level = 0; level < depth; level++) {
241 if (level == threadLevel) {
242 __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Thread));
243 }
244 else if (level == coreLevel) {
245 __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Core));
246 }
247 else if (level == pkgLevel) {
248 __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Package));
249 }
250 else if (level > pkgLevel) {
251 __kmp_str_buf_print(&buf, "%s_%d ", KMP_I18N_STR(Node),
252 level - pkgLevel - 1);
253 }
254 else {
255 __kmp_str_buf_print(&buf, "L%d ", level);
256 }
257 __kmp_str_buf_print(&buf, "%d ",
258 address2os[proc].first.labels[level]);
259 }
260 KMP_INFORM(OSProcMapToPack, "KMP_AFFINITY", address2os[proc].second,
261 buf.str);
262 __kmp_str_buf_free(&buf);
263 }
264}
265
266
267//
268// If we don't know how to retrieve the machine's processor topology, or
269// encounter an error in doing so, this routine is called to form a "flat"
270// mapping of os thread id's <-> processor id's.
271//
272static int
273__kmp_affinity_create_flat_map(AddrUnsPair **address2os,
274 kmp_i18n_id_t *const msg_id)
275{
276 *address2os = NULL;
277 *msg_id = kmp_i18n_null;
278
279 //
280 // Even if __kmp_affinity_type == affinity_none, this routine might still
Andrey Churbanovf696c822015-01-27 16:55:43 +0000281 // called to set __kmp_ncores, as well as
Jim Cownie5e8470a2013-09-27 10:38:44 +0000282 // __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
283 //
284 if (! KMP_AFFINITY_CAPABLE()) {
285 KMP_ASSERT(__kmp_affinity_type == affinity_none);
286 __kmp_ncores = nPackages = __kmp_xproc;
287 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000288 if (__kmp_affinity_verbose) {
289 KMP_INFORM(AffFlatTopology, "KMP_AFFINITY");
290 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
291 KMP_INFORM(Uniform, "KMP_AFFINITY");
292 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
293 __kmp_nThreadsPerCore, __kmp_ncores);
294 }
295 return 0;
296 }
297
298 //
299 // When affinity is off, this routine will still be called to set
Andrey Churbanovf696c822015-01-27 16:55:43 +0000300 // __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie5e8470a2013-09-27 10:38:44 +0000301 // nCoresPerPkg, & nPackages. Make sure all these vars are set
302 // correctly, and return now if affinity is not enabled.
303 //
304 __kmp_ncores = nPackages = __kmp_avail_proc;
305 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000306 if (__kmp_affinity_verbose) {
307 char buf[KMP_AFFIN_MASK_PRINT_LEN];
308 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, fullMask);
309
310 KMP_INFORM(AffCapableUseFlat, "KMP_AFFINITY");
311 if (__kmp_affinity_respect_mask) {
312 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
313 } else {
314 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
315 }
316 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
317 KMP_INFORM(Uniform, "KMP_AFFINITY");
318 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
319 __kmp_nThreadsPerCore, __kmp_ncores);
320 }
321 if (__kmp_affinity_type == affinity_none) {
322 return 0;
323 }
324
325 //
326 // Contruct the data structure to be returned.
327 //
328 *address2os = (AddrUnsPair*)
329 __kmp_allocate(sizeof(**address2os) * __kmp_avail_proc);
330 int avail_ct = 0;
331 unsigned int i;
332 for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
333 //
334 // Skip this proc if it is not included in the machine model.
335 //
336 if (! KMP_CPU_ISSET(i, fullMask)) {
337 continue;
338 }
339
340 Address addr(1);
341 addr.labels[0] = i;
342 (*address2os)[avail_ct++] = AddrUnsPair(addr,i);
343 }
344 if (__kmp_affinity_verbose) {
345 KMP_INFORM(OSProcToPackage, "KMP_AFFINITY");
346 }
347
348 if (__kmp_affinity_gran_levels < 0) {
349 //
350 // Only the package level is modeled in the machine topology map,
351 // so the #levels of granularity is either 0 or 1.
352 //
353 if (__kmp_affinity_gran > affinity_gran_package) {
354 __kmp_affinity_gran_levels = 1;
355 }
356 else {
357 __kmp_affinity_gran_levels = 0;
358 }
359 }
360 return 1;
361}
362
363
Andrey Churbanov7daf9802015-01-27 16:52:57 +0000364# if KMP_GROUP_AFFINITY
Jim Cownie5e8470a2013-09-27 10:38:44 +0000365
366//
367// If multiple Windows* OS processor groups exist, we can create a 2-level
368// topology map with the groups at level 0 and the individual procs at
369// level 1.
370//
371// This facilitates letting the threads float among all procs in a group,
372// if granularity=group (the default when there are multiple groups).
373//
374static int
375__kmp_affinity_create_proc_group_map(AddrUnsPair **address2os,
376 kmp_i18n_id_t *const msg_id)
377{
378 *address2os = NULL;
379 *msg_id = kmp_i18n_null;
380
381 //
382 // If we don't have multiple processor groups, return now.
383 // The flat mapping will be used.
384 //
385 if ((! KMP_AFFINITY_CAPABLE()) || (__kmp_get_proc_group(fullMask) >= 0)) {
386 // FIXME set *msg_id
387 return -1;
388 }
389
390 //
391 // Contruct the data structure to be returned.
392 //
393 *address2os = (AddrUnsPair*)
394 __kmp_allocate(sizeof(**address2os) * __kmp_avail_proc);
395 int avail_ct = 0;
396 int i;
397 for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
398 //
399 // Skip this proc if it is not included in the machine model.
400 //
401 if (! KMP_CPU_ISSET(i, fullMask)) {
402 continue;
403 }
404
405 Address addr(2);
406 addr.labels[0] = i / (CHAR_BIT * sizeof(DWORD_PTR));
407 addr.labels[1] = i % (CHAR_BIT * sizeof(DWORD_PTR));
408 (*address2os)[avail_ct++] = AddrUnsPair(addr,i);
409
410 if (__kmp_affinity_verbose) {
411 KMP_INFORM(AffOSProcToGroup, "KMP_AFFINITY", i, addr.labels[0],
412 addr.labels[1]);
413 }
414 }
415
416 if (__kmp_affinity_gran_levels < 0) {
417 if (__kmp_affinity_gran == affinity_gran_group) {
418 __kmp_affinity_gran_levels = 1;
419 }
420 else if ((__kmp_affinity_gran == affinity_gran_fine)
421 || (__kmp_affinity_gran == affinity_gran_thread)) {
422 __kmp_affinity_gran_levels = 0;
423 }
424 else {
425 const char *gran_str = NULL;
426 if (__kmp_affinity_gran == affinity_gran_core) {
427 gran_str = "core";
428 }
429 else if (__kmp_affinity_gran == affinity_gran_package) {
430 gran_str = "package";
431 }
432 else if (__kmp_affinity_gran == affinity_gran_node) {
433 gran_str = "node";
434 }
435 else {
436 KMP_ASSERT(0);
437 }
438
439 // Warning: can't use affinity granularity \"gran\" with group topology method, using "thread"
440 __kmp_affinity_gran_levels = 0;
441 }
442 }
443 return 2;
444}
445
Andrey Churbanov7daf9802015-01-27 16:52:57 +0000446# endif /* KMP_GROUP_AFFINITY */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000447
448
449# if KMP_ARCH_X86 || KMP_ARCH_X86_64
450
451static int
452__kmp_cpuid_mask_width(int count) {
453 int r = 0;
454
455 while((1<<r) < count)
456 ++r;
457 return r;
458}
459
460
461class apicThreadInfo {
462public:
463 unsigned osId; // param to __kmp_affinity_bind_thread
464 unsigned apicId; // from cpuid after binding
465 unsigned maxCoresPerPkg; // ""
466 unsigned maxThreadsPerPkg; // ""
467 unsigned pkgId; // inferred from above values
468 unsigned coreId; // ""
469 unsigned threadId; // ""
470};
471
472
473static int
474__kmp_affinity_cmp_apicThreadInfo_os_id(const void *a, const void *b)
475{
476 const apicThreadInfo *aa = (const apicThreadInfo *)a;
477 const apicThreadInfo *bb = (const apicThreadInfo *)b;
478 if (aa->osId < bb->osId) return -1;
479 if (aa->osId > bb->osId) return 1;
480 return 0;
481}
482
483
484static int
485__kmp_affinity_cmp_apicThreadInfo_phys_id(const void *a, const void *b)
486{
487 const apicThreadInfo *aa = (const apicThreadInfo *)a;
488 const apicThreadInfo *bb = (const apicThreadInfo *)b;
489 if (aa->pkgId < bb->pkgId) return -1;
490 if (aa->pkgId > bb->pkgId) return 1;
491 if (aa->coreId < bb->coreId) return -1;
492 if (aa->coreId > bb->coreId) return 1;
493 if (aa->threadId < bb->threadId) return -1;
494 if (aa->threadId > bb->threadId) return 1;
495 return 0;
496}
497
498
499//
500// On IA-32 architecture and Intel(R) 64 architecture, we attempt to use
501// an algorithm which cycles through the available os threads, setting
502// the current thread's affinity mask to that thread, and then retrieves
503// the Apic Id for each thread context using the cpuid instruction.
504//
505static int
506__kmp_affinity_create_apicid_map(AddrUnsPair **address2os,
507 kmp_i18n_id_t *const msg_id)
508{
Andrey Churbanov1c331292015-01-27 17:03:42 +0000509 kmp_cpuid buf;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000510 int rc;
511 *address2os = NULL;
512 *msg_id = kmp_i18n_null;
513
Andrey Churbanov1c331292015-01-27 17:03:42 +0000514 //
515 // Check if cpuid leaf 4 is supported.
516 //
Jim Cownie5e8470a2013-09-27 10:38:44 +0000517 __kmp_x86_cpuid(0, 0, &buf);
518 if (buf.eax < 4) {
519 *msg_id = kmp_i18n_str_NoLeaf4Support;
520 return -1;
521 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000522
523 //
Jim Cownie5e8470a2013-09-27 10:38:44 +0000524 // The algorithm used starts by setting the affinity to each available
Andrey Churbanov1c331292015-01-27 17:03:42 +0000525 // thread and retrieving info from the cpuid instruction, so if we are
526 // not capable of calling __kmp_get_system_affinity() and
527 // _kmp_get_system_affinity(), then we need to do something else - use
528 // the defaults that we calculated from issuing cpuid without binding
529 // to each proc.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000530 //
531 if (! KMP_AFFINITY_CAPABLE()) {
532 //
533 // Hack to try and infer the machine topology using only the data
534 // available from cpuid on the current thread, and __kmp_xproc.
535 //
536 KMP_ASSERT(__kmp_affinity_type == affinity_none);
537
538 //
539 // Get an upper bound on the number of threads per package using
540 // cpuid(1).
541 //
542 // On some OS/chps combinations where HT is supported by the chip
543 // but is disabled, this value will be 2 on a single core chip.
544 // Usually, it will be 2 if HT is enabled and 1 if HT is disabled.
545 //
Jim Cownie5e8470a2013-09-27 10:38:44 +0000546 __kmp_x86_cpuid(1, 0, &buf);
547 int maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
548 if (maxThreadsPerPkg == 0) {
549 maxThreadsPerPkg = 1;
550 }
551
552 //
553 // The num cores per pkg comes from cpuid(4).
554 // 1 must be added to the encoded value.
555 //
556 // The author of cpu_count.cpp treated this only an upper bound
557 // on the number of cores, but I haven't seen any cases where it
558 // was greater than the actual number of cores, so we will treat
559 // it as exact in this block of code.
560 //
561 // First, we need to check if cpuid(4) is supported on this chip.
562 // To see if cpuid(n) is supported, issue cpuid(0) and check if eax
563 // has the value n or greater.
564 //
565 __kmp_x86_cpuid(0, 0, &buf);
566 if (buf.eax >= 4) {
567 __kmp_x86_cpuid(4, 0, &buf);
568 nCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
569 }
570 else {
571 nCoresPerPkg = 1;
572 }
573
574 //
575 // There is no way to reliably tell if HT is enabled without issuing
576 // the cpuid instruction from every thread, can correlating the cpuid
577 // info, so if the machine is not affinity capable, we assume that HT
578 // is off. We have seen quite a few machines where maxThreadsPerPkg
579 // is 2, yet the machine does not support HT.
580 //
581 // - Older OSes are usually found on machines with older chips, which
582 // do not support HT.
583 //
584 // - The performance penalty for mistakenly identifying a machine as
585 // HT when it isn't (which results in blocktime being incorrecly set
586 // to 0) is greater than the penalty when for mistakenly identifying
587 // a machine as being 1 thread/core when it is really HT enabled
588 // (which results in blocktime being incorrectly set to a positive
589 // value).
590 //
591 __kmp_ncores = __kmp_xproc;
592 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
593 __kmp_nThreadsPerCore = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000594 if (__kmp_affinity_verbose) {
595 KMP_INFORM(AffNotCapableUseLocCpuid, "KMP_AFFINITY");
596 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
597 if (__kmp_affinity_uniform_topology()) {
598 KMP_INFORM(Uniform, "KMP_AFFINITY");
599 } else {
600 KMP_INFORM(NonUniform, "KMP_AFFINITY");
601 }
602 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
603 __kmp_nThreadsPerCore, __kmp_ncores);
604 }
605 return 0;
606 }
607
608 //
609 //
610 // From here on, we can assume that it is safe to call
611 // __kmp_get_system_affinity() and __kmp_set_system_affinity(),
612 // even if __kmp_affinity_type = affinity_none.
613 //
614
615 //
616 // Save the affinity mask for the current thread.
617 //
618 kmp_affin_mask_t *oldMask;
619 KMP_CPU_ALLOC(oldMask);
620 KMP_ASSERT(oldMask != NULL);
621 __kmp_get_system_affinity(oldMask, TRUE);
622
623 //
624 // Run through each of the available contexts, binding the current thread
625 // to it, and obtaining the pertinent information using the cpuid instr.
626 //
627 // The relevant information is:
628 //
629 // Apic Id: Bits 24:31 of ebx after issuing cpuid(1) - each thread context
630 // has a uniqie Apic Id, which is of the form pkg# : core# : thread#.
631 //
632 // Max Threads Per Pkg: Bits 16:23 of ebx after issuing cpuid(1). The
633 // value of this field determines the width of the core# + thread#
634 // fields in the Apic Id. It is also an upper bound on the number
635 // of threads per package, but it has been verified that situations
636 // happen were it is not exact. In particular, on certain OS/chip
637 // combinations where Intel(R) Hyper-Threading Technology is supported
638 // by the chip but has
639 // been disabled, the value of this field will be 2 (for a single core
640 // chip). On other OS/chip combinations supporting
641 // Intel(R) Hyper-Threading Technology, the value of
642 // this field will be 1 when Intel(R) Hyper-Threading Technology is
643 // disabled and 2 when it is enabled.
644 //
645 // Max Cores Per Pkg: Bits 26:31 of eax after issuing cpuid(4). The
646 // value of this field (+1) determines the width of the core# field in
647 // the Apic Id. The comments in "cpucount.cpp" say that this value is
648 // an upper bound, but the IA-32 architecture manual says that it is
649 // exactly the number of cores per package, and I haven't seen any
650 // case where it wasn't.
651 //
652 // From this information, deduce the package Id, core Id, and thread Id,
653 // and set the corresponding fields in the apicThreadInfo struct.
654 //
655 unsigned i;
656 apicThreadInfo *threadInfo = (apicThreadInfo *)__kmp_allocate(
657 __kmp_avail_proc * sizeof(apicThreadInfo));
658 unsigned nApics = 0;
659 for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
660 //
661 // Skip this proc if it is not included in the machine model.
662 //
663 if (! KMP_CPU_ISSET(i, fullMask)) {
664 continue;
665 }
666 KMP_DEBUG_ASSERT((int)nApics < __kmp_avail_proc);
667
668 __kmp_affinity_bind_thread(i);
669 threadInfo[nApics].osId = i;
670
671 //
672 // The apic id and max threads per pkg come from cpuid(1).
673 //
Jim Cownie5e8470a2013-09-27 10:38:44 +0000674 __kmp_x86_cpuid(1, 0, &buf);
675 if (! (buf.edx >> 9) & 1) {
676 __kmp_set_system_affinity(oldMask, TRUE);
677 __kmp_free(threadInfo);
678 KMP_CPU_FREE(oldMask);
679 *msg_id = kmp_i18n_str_ApicNotPresent;
680 return -1;
681 }
682 threadInfo[nApics].apicId = (buf.ebx >> 24) & 0xff;
683 threadInfo[nApics].maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
684 if (threadInfo[nApics].maxThreadsPerPkg == 0) {
685 threadInfo[nApics].maxThreadsPerPkg = 1;
686 }
687
688 //
689 // Max cores per pkg comes from cpuid(4).
690 // 1 must be added to the encoded value.
691 //
692 // First, we need to check if cpuid(4) is supported on this chip.
693 // To see if cpuid(n) is supported, issue cpuid(0) and check if eax
694 // has the value n or greater.
695 //
696 __kmp_x86_cpuid(0, 0, &buf);
697 if (buf.eax >= 4) {
698 __kmp_x86_cpuid(4, 0, &buf);
699 threadInfo[nApics].maxCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
700 }
701 else {
702 threadInfo[nApics].maxCoresPerPkg = 1;
703 }
704
705 //
706 // Infer the pkgId / coreId / threadId using only the info
707 // obtained locally.
708 //
709 int widthCT = __kmp_cpuid_mask_width(
710 threadInfo[nApics].maxThreadsPerPkg);
711 threadInfo[nApics].pkgId = threadInfo[nApics].apicId >> widthCT;
712
713 int widthC = __kmp_cpuid_mask_width(
714 threadInfo[nApics].maxCoresPerPkg);
715 int widthT = widthCT - widthC;
716 if (widthT < 0) {
717 //
718 // I've never seen this one happen, but I suppose it could, if
719 // the cpuid instruction on a chip was really screwed up.
720 // Make sure to restore the affinity mask before the tail call.
721 //
722 __kmp_set_system_affinity(oldMask, TRUE);
723 __kmp_free(threadInfo);
724 KMP_CPU_FREE(oldMask);
725 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
726 return -1;
727 }
728
729 int maskC = (1 << widthC) - 1;
730 threadInfo[nApics].coreId = (threadInfo[nApics].apicId >> widthT)
731 &maskC;
732
733 int maskT = (1 << widthT) - 1;
734 threadInfo[nApics].threadId = threadInfo[nApics].apicId &maskT;
735
736 nApics++;
737 }
738
739 //
740 // We've collected all the info we need.
741 // Restore the old affinity mask for this thread.
742 //
743 __kmp_set_system_affinity(oldMask, TRUE);
744
745 //
746 // If there's only one thread context to bind to, form an Address object
747 // with depth 1 and return immediately (or, if affinity is off, set
748 // address2os to NULL and return).
749 //
750 // If it is configured to omit the package level when there is only a
751 // single package, the logic at the end of this routine won't work if
752 // there is only a single thread - it would try to form an Address
753 // object with depth 0.
754 //
755 KMP_ASSERT(nApics > 0);
756 if (nApics == 1) {
757 __kmp_ncores = nPackages = 1;
758 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000759 if (__kmp_affinity_verbose) {
760 char buf[KMP_AFFIN_MASK_PRINT_LEN];
761 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
762
763 KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
764 if (__kmp_affinity_respect_mask) {
765 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
766 } else {
767 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
768 }
769 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
770 KMP_INFORM(Uniform, "KMP_AFFINITY");
771 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
772 __kmp_nThreadsPerCore, __kmp_ncores);
773 }
774
775 if (__kmp_affinity_type == affinity_none) {
776 __kmp_free(threadInfo);
777 KMP_CPU_FREE(oldMask);
778 return 0;
779 }
780
781 *address2os = (AddrUnsPair*)__kmp_allocate(sizeof(AddrUnsPair));
782 Address addr(1);
783 addr.labels[0] = threadInfo[0].pkgId;
784 (*address2os)[0] = AddrUnsPair(addr, threadInfo[0].osId);
785
786 if (__kmp_affinity_gran_levels < 0) {
787 __kmp_affinity_gran_levels = 0;
788 }
789
790 if (__kmp_affinity_verbose) {
791 __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
792 }
793
794 __kmp_free(threadInfo);
795 KMP_CPU_FREE(oldMask);
796 return 1;
797 }
798
799 //
800 // Sort the threadInfo table by physical Id.
801 //
802 qsort(threadInfo, nApics, sizeof(*threadInfo),
803 __kmp_affinity_cmp_apicThreadInfo_phys_id);
804
805 //
806 // The table is now sorted by pkgId / coreId / threadId, but we really
807 // don't know the radix of any of the fields. pkgId's may be sparsely
808 // assigned among the chips on a system. Although coreId's are usually
809 // assigned [0 .. coresPerPkg-1] and threadId's are usually assigned
810 // [0..threadsPerCore-1], we don't want to make any such assumptions.
811 //
812 // For that matter, we don't know what coresPerPkg and threadsPerCore
813 // (or the total # packages) are at this point - we want to determine
814 // that now. We only have an upper bound on the first two figures.
815 //
816 // We also perform a consistency check at this point: the values returned
817 // by the cpuid instruction for any thread bound to a given package had
818 // better return the same info for maxThreadsPerPkg and maxCoresPerPkg.
819 //
820 nPackages = 1;
821 nCoresPerPkg = 1;
822 __kmp_nThreadsPerCore = 1;
823 unsigned nCores = 1;
824
825 unsigned pkgCt = 1; // to determine radii
826 unsigned lastPkgId = threadInfo[0].pkgId;
827 unsigned coreCt = 1;
828 unsigned lastCoreId = threadInfo[0].coreId;
829 unsigned threadCt = 1;
830 unsigned lastThreadId = threadInfo[0].threadId;
831
832 // intra-pkg consist checks
833 unsigned prevMaxCoresPerPkg = threadInfo[0].maxCoresPerPkg;
834 unsigned prevMaxThreadsPerPkg = threadInfo[0].maxThreadsPerPkg;
835
836 for (i = 1; i < nApics; i++) {
837 if (threadInfo[i].pkgId != lastPkgId) {
838 nCores++;
839 pkgCt++;
840 lastPkgId = threadInfo[i].pkgId;
841 if ((int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt;
842 coreCt = 1;
843 lastCoreId = threadInfo[i].coreId;
844 if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
845 threadCt = 1;
846 lastThreadId = threadInfo[i].threadId;
847
848 //
849 // This is a different package, so go on to the next iteration
850 // without doing any consistency checks. Reset the consistency
851 // check vars, though.
852 //
853 prevMaxCoresPerPkg = threadInfo[i].maxCoresPerPkg;
854 prevMaxThreadsPerPkg = threadInfo[i].maxThreadsPerPkg;
855 continue;
856 }
857
858 if (threadInfo[i].coreId != lastCoreId) {
859 nCores++;
860 coreCt++;
861 lastCoreId = threadInfo[i].coreId;
862 if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
863 threadCt = 1;
864 lastThreadId = threadInfo[i].threadId;
865 }
866 else if (threadInfo[i].threadId != lastThreadId) {
867 threadCt++;
868 lastThreadId = threadInfo[i].threadId;
869 }
870 else {
871 __kmp_free(threadInfo);
872 KMP_CPU_FREE(oldMask);
873 *msg_id = kmp_i18n_str_LegacyApicIDsNotUnique;
874 return -1;
875 }
876
877 //
878 // Check to make certain that the maxCoresPerPkg and maxThreadsPerPkg
879 // fields agree between all the threads bounds to a given package.
880 //
881 if ((prevMaxCoresPerPkg != threadInfo[i].maxCoresPerPkg)
882 || (prevMaxThreadsPerPkg != threadInfo[i].maxThreadsPerPkg)) {
883 __kmp_free(threadInfo);
884 KMP_CPU_FREE(oldMask);
885 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
886 return -1;
887 }
888 }
889 nPackages = pkgCt;
890 if ((int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt;
891 if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
892
893 //
894 // When affinity is off, this routine will still be called to set
Andrey Churbanovf696c822015-01-27 16:55:43 +0000895 // __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie5e8470a2013-09-27 10:38:44 +0000896 // nCoresPerPkg, & nPackages. Make sure all these vars are set
897 // correctly, and return now if affinity is not enabled.
898 //
Jim Cownie5e8470a2013-09-27 10:38:44 +0000899 __kmp_ncores = nCores;
900 if (__kmp_affinity_verbose) {
901 char buf[KMP_AFFIN_MASK_PRINT_LEN];
902 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
903
904 KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
905 if (__kmp_affinity_respect_mask) {
906 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
907 } else {
908 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
909 }
910 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
911 if (__kmp_affinity_uniform_topology()) {
912 KMP_INFORM(Uniform, "KMP_AFFINITY");
913 } else {
914 KMP_INFORM(NonUniform, "KMP_AFFINITY");
915 }
916 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
917 __kmp_nThreadsPerCore, __kmp_ncores);
918
919 }
920
921 if (__kmp_affinity_type == affinity_none) {
922 __kmp_free(threadInfo);
923 KMP_CPU_FREE(oldMask);
924 return 0;
925 }
926
927 //
928 // Now that we've determined the number of packages, the number of cores
929 // per package, and the number of threads per core, we can construct the
930 // data structure that is to be returned.
931 //
932 int pkgLevel = 0;
933 int coreLevel = (nCoresPerPkg <= 1) ? -1 : 1;
934 int threadLevel = (__kmp_nThreadsPerCore <= 1) ? -1 : ((coreLevel >= 0) ? 2 : 1);
935 unsigned depth = (pkgLevel >= 0) + (coreLevel >= 0) + (threadLevel >= 0);
936
937 KMP_ASSERT(depth > 0);
938 *address2os = (AddrUnsPair*)__kmp_allocate(sizeof(AddrUnsPair) * nApics);
939
940 for (i = 0; i < nApics; ++i) {
941 Address addr(depth);
942 unsigned os = threadInfo[i].osId;
943 int d = 0;
944
945 if (pkgLevel >= 0) {
946 addr.labels[d++] = threadInfo[i].pkgId;
947 }
948 if (coreLevel >= 0) {
949 addr.labels[d++] = threadInfo[i].coreId;
950 }
951 if (threadLevel >= 0) {
952 addr.labels[d++] = threadInfo[i].threadId;
953 }
954 (*address2os)[i] = AddrUnsPair(addr, os);
955 }
956
957 if (__kmp_affinity_gran_levels < 0) {
958 //
959 // Set the granularity level based on what levels are modeled
960 // in the machine topology map.
961 //
962 __kmp_affinity_gran_levels = 0;
963 if ((threadLevel >= 0)
964 && (__kmp_affinity_gran > affinity_gran_thread)) {
965 __kmp_affinity_gran_levels++;
966 }
967 if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
968 __kmp_affinity_gran_levels++;
969 }
970 if ((pkgLevel >= 0) && (__kmp_affinity_gran > affinity_gran_package)) {
971 __kmp_affinity_gran_levels++;
972 }
973 }
974
975 if (__kmp_affinity_verbose) {
976 __kmp_affinity_print_topology(*address2os, nApics, depth, pkgLevel,
977 coreLevel, threadLevel);
978 }
979
980 __kmp_free(threadInfo);
981 KMP_CPU_FREE(oldMask);
982 return depth;
983}
984
985
986//
987// Intel(R) microarchitecture code name Nehalem, Dunnington and later
988// architectures support a newer interface for specifying the x2APIC Ids,
989// based on cpuid leaf 11.
990//
991static int
992__kmp_affinity_create_x2apicid_map(AddrUnsPair **address2os,
993 kmp_i18n_id_t *const msg_id)
994{
995 kmp_cpuid buf;
996
997 *address2os = NULL;
998 *msg_id = kmp_i18n_null;
999
1000 //
1001 // Check to see if cpuid leaf 11 is supported.
1002 //
1003 __kmp_x86_cpuid(0, 0, &buf);
1004 if (buf.eax < 11) {
1005 *msg_id = kmp_i18n_str_NoLeaf11Support;
1006 return -1;
1007 }
1008 __kmp_x86_cpuid(11, 0, &buf);
1009 if (buf.ebx == 0) {
1010 *msg_id = kmp_i18n_str_NoLeaf11Support;
1011 return -1;
1012 }
1013
1014 //
1015 // Find the number of levels in the machine topology. While we're at it,
1016 // get the default values for __kmp_nThreadsPerCore & nCoresPerPkg. We will
1017 // try to get more accurate values later by explicitly counting them,
1018 // but get reasonable defaults now, in case we return early.
1019 //
1020 int level;
1021 int threadLevel = -1;
1022 int coreLevel = -1;
1023 int pkgLevel = -1;
1024 __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
1025
1026 for (level = 0;; level++) {
1027 if (level > 31) {
1028 //
1029 // FIXME: Hack for DPD200163180
1030 //
1031 // If level is big then something went wrong -> exiting
1032 //
1033 // There could actually be 32 valid levels in the machine topology,
1034 // but so far, the only machine we have seen which does not exit
1035 // this loop before iteration 32 has fubar x2APIC settings.
1036 //
1037 // For now, just reject this case based upon loop trip count.
1038 //
1039 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1040 return -1;
1041 }
1042 __kmp_x86_cpuid(11, level, &buf);
1043 if (buf.ebx == 0) {
1044 if (pkgLevel < 0) {
1045 //
1046 // Will infer nPackages from __kmp_xproc
1047 //
1048 pkgLevel = level;
1049 level++;
1050 }
1051 break;
1052 }
1053 int kind = (buf.ecx >> 8) & 0xff;
1054 if (kind == 1) {
1055 //
1056 // SMT level
1057 //
1058 threadLevel = level;
1059 coreLevel = -1;
1060 pkgLevel = -1;
1061 __kmp_nThreadsPerCore = buf.ebx & 0xff;
1062 if (__kmp_nThreadsPerCore == 0) {
1063 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1064 return -1;
1065 }
1066 }
1067 else if (kind == 2) {
1068 //
1069 // core level
1070 //
1071 coreLevel = level;
1072 pkgLevel = -1;
1073 nCoresPerPkg = buf.ebx & 0xff;
1074 if (nCoresPerPkg == 0) {
1075 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1076 return -1;
1077 }
1078 }
1079 else {
1080 if (level <= 0) {
1081 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1082 return -1;
1083 }
1084 if (pkgLevel >= 0) {
1085 continue;
1086 }
1087 pkgLevel = level;
1088 nPackages = buf.ebx & 0xff;
1089 if (nPackages == 0) {
1090 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1091 return -1;
1092 }
1093 }
1094 }
1095 int depth = level;
1096
1097 //
1098 // In the above loop, "level" was counted from the finest level (usually
1099 // thread) to the coarsest. The caller expects that we will place the
1100 // labels in (*address2os)[].first.labels[] in the inverse order, so
1101 // we need to invert the vars saying which level means what.
1102 //
1103 if (threadLevel >= 0) {
1104 threadLevel = depth - threadLevel - 1;
1105 }
1106 if (coreLevel >= 0) {
1107 coreLevel = depth - coreLevel - 1;
1108 }
1109 KMP_DEBUG_ASSERT(pkgLevel >= 0);
1110 pkgLevel = depth - pkgLevel - 1;
1111
1112 //
1113 // The algorithm used starts by setting the affinity to each available
Andrey Churbanov1c331292015-01-27 17:03:42 +00001114 // thread and retrieving info from the cpuid instruction, so if we are
1115 // not capable of calling __kmp_get_system_affinity() and
1116 // _kmp_get_system_affinity(), then we need to do something else - use
1117 // the defaults that we calculated from issuing cpuid without binding
1118 // to each proc.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001119 //
1120 if (! KMP_AFFINITY_CAPABLE())
1121 {
1122 //
1123 // Hack to try and infer the machine topology using only the data
1124 // available from cpuid on the current thread, and __kmp_xproc.
1125 //
1126 KMP_ASSERT(__kmp_affinity_type == affinity_none);
1127
1128 __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
1129 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001130 if (__kmp_affinity_verbose) {
1131 KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY");
1132 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
1133 if (__kmp_affinity_uniform_topology()) {
1134 KMP_INFORM(Uniform, "KMP_AFFINITY");
1135 } else {
1136 KMP_INFORM(NonUniform, "KMP_AFFINITY");
1137 }
1138 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
1139 __kmp_nThreadsPerCore, __kmp_ncores);
1140 }
1141 return 0;
1142 }
1143
1144 //
1145 //
1146 // From here on, we can assume that it is safe to call
1147 // __kmp_get_system_affinity() and __kmp_set_system_affinity(),
1148 // even if __kmp_affinity_type = affinity_none.
1149 //
1150
1151 //
1152 // Save the affinity mask for the current thread.
1153 //
1154 kmp_affin_mask_t *oldMask;
1155 KMP_CPU_ALLOC(oldMask);
1156 __kmp_get_system_affinity(oldMask, TRUE);
1157
1158 //
1159 // Allocate the data structure to be returned.
1160 //
1161 AddrUnsPair *retval = (AddrUnsPair *)
1162 __kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc);
1163
1164 //
1165 // Run through each of the available contexts, binding the current thread
1166 // to it, and obtaining the pertinent information using the cpuid instr.
1167 //
1168 unsigned int proc;
1169 int nApics = 0;
1170 for (proc = 0; proc < KMP_CPU_SETSIZE; ++proc) {
1171 //
1172 // Skip this proc if it is not included in the machine model.
1173 //
1174 if (! KMP_CPU_ISSET(proc, fullMask)) {
1175 continue;
1176 }
1177 KMP_DEBUG_ASSERT(nApics < __kmp_avail_proc);
1178
1179 __kmp_affinity_bind_thread(proc);
1180
1181 //
1182 // Extrach the labels for each level in the machine topology map
1183 // from the Apic ID.
1184 //
1185 Address addr(depth);
1186 int prev_shift = 0;
1187
1188 for (level = 0; level < depth; level++) {
1189 __kmp_x86_cpuid(11, level, &buf);
1190 unsigned apicId = buf.edx;
1191 if (buf.ebx == 0) {
1192 if (level != depth - 1) {
1193 KMP_CPU_FREE(oldMask);
1194 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1195 return -1;
1196 }
1197 addr.labels[depth - level - 1] = apicId >> prev_shift;
1198 level++;
1199 break;
1200 }
1201 int shift = buf.eax & 0x1f;
1202 int mask = (1 << shift) - 1;
1203 addr.labels[depth - level - 1] = (apicId & mask) >> prev_shift;
1204 prev_shift = shift;
1205 }
1206 if (level != depth) {
1207 KMP_CPU_FREE(oldMask);
1208 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1209 return -1;
1210 }
1211
1212 retval[nApics] = AddrUnsPair(addr, proc);
1213 nApics++;
1214 }
1215
1216 //
1217 // We've collected all the info we need.
1218 // Restore the old affinity mask for this thread.
1219 //
1220 __kmp_set_system_affinity(oldMask, TRUE);
1221
1222 //
1223 // If there's only one thread context to bind to, return now.
1224 //
1225 KMP_ASSERT(nApics > 0);
1226 if (nApics == 1) {
1227 __kmp_ncores = nPackages = 1;
1228 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001229 if (__kmp_affinity_verbose) {
1230 char buf[KMP_AFFIN_MASK_PRINT_LEN];
1231 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1232
1233 KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
1234 if (__kmp_affinity_respect_mask) {
1235 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
1236 } else {
1237 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
1238 }
1239 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
1240 KMP_INFORM(Uniform, "KMP_AFFINITY");
1241 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
1242 __kmp_nThreadsPerCore, __kmp_ncores);
1243 }
1244
1245 if (__kmp_affinity_type == affinity_none) {
1246 __kmp_free(retval);
1247 KMP_CPU_FREE(oldMask);
1248 return 0;
1249 }
1250
1251 //
1252 // Form an Address object which only includes the package level.
1253 //
1254 Address addr(1);
1255 addr.labels[0] = retval[0].first.labels[pkgLevel];
1256 retval[0].first = addr;
1257
1258 if (__kmp_affinity_gran_levels < 0) {
1259 __kmp_affinity_gran_levels = 0;
1260 }
1261
1262 if (__kmp_affinity_verbose) {
1263 __kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
1264 }
1265
1266 *address2os = retval;
1267 KMP_CPU_FREE(oldMask);
1268 return 1;
1269 }
1270
1271 //
1272 // Sort the table by physical Id.
1273 //
1274 qsort(retval, nApics, sizeof(*retval), __kmp_affinity_cmp_Address_labels);
1275
1276 //
1277 // Find the radix at each of the levels.
1278 //
1279 unsigned *totals = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
1280 unsigned *counts = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
1281 unsigned *maxCt = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
1282 unsigned *last = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
1283 for (level = 0; level < depth; level++) {
1284 totals[level] = 1;
1285 maxCt[level] = 1;
1286 counts[level] = 1;
1287 last[level] = retval[0].first.labels[level];
1288 }
1289
1290 //
1291 // From here on, the iteration variable "level" runs from the finest
1292 // level to the coarsest, i.e. we iterate forward through
1293 // (*address2os)[].first.labels[] - in the previous loops, we iterated
1294 // backwards.
1295 //
1296 for (proc = 1; (int)proc < nApics; proc++) {
1297 int level;
1298 for (level = 0; level < depth; level++) {
1299 if (retval[proc].first.labels[level] != last[level]) {
1300 int j;
1301 for (j = level + 1; j < depth; j++) {
1302 totals[j]++;
1303 counts[j] = 1;
1304 // The line below causes printing incorrect topology information
1305 // in case the max value for some level (maxCt[level]) is encountered earlier than
1306 // some less value while going through the array.
1307 // For example, let pkg0 has 4 cores and pkg1 has 2 cores. Then maxCt[1] == 2
1308 // whereas it must be 4.
1309 // TODO!!! Check if it can be commented safely
1310 //maxCt[j] = 1;
1311 last[j] = retval[proc].first.labels[j];
1312 }
1313 totals[level]++;
1314 counts[level]++;
1315 if (counts[level] > maxCt[level]) {
1316 maxCt[level] = counts[level];
1317 }
1318 last[level] = retval[proc].first.labels[level];
1319 break;
1320 }
1321 else if (level == depth - 1) {
1322 __kmp_free(last);
1323 __kmp_free(maxCt);
1324 __kmp_free(counts);
1325 __kmp_free(totals);
1326 __kmp_free(retval);
1327 KMP_CPU_FREE(oldMask);
1328 *msg_id = kmp_i18n_str_x2ApicIDsNotUnique;
1329 return -1;
1330 }
1331 }
1332 }
1333
1334 //
1335 // When affinity is off, this routine will still be called to set
Andrey Churbanovf696c822015-01-27 16:55:43 +00001336 // __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie5e8470a2013-09-27 10:38:44 +00001337 // nCoresPerPkg, & nPackages. Make sure all these vars are set
1338 // correctly, and return if affinity is not enabled.
1339 //
1340 if (threadLevel >= 0) {
1341 __kmp_nThreadsPerCore = maxCt[threadLevel];
1342 }
1343 else {
1344 __kmp_nThreadsPerCore = 1;
1345 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001346 nPackages = totals[pkgLevel];
1347
1348 if (coreLevel >= 0) {
1349 __kmp_ncores = totals[coreLevel];
1350 nCoresPerPkg = maxCt[coreLevel];
1351 }
1352 else {
1353 __kmp_ncores = nPackages;
1354 nCoresPerPkg = 1;
1355 }
1356
1357 //
1358 // Check to see if the machine topology is uniform
1359 //
1360 unsigned prod = maxCt[0];
1361 for (level = 1; level < depth; level++) {
1362 prod *= maxCt[level];
1363 }
1364 bool uniform = (prod == totals[level - 1]);
1365
1366 //
1367 // Print the machine topology summary.
1368 //
1369 if (__kmp_affinity_verbose) {
1370 char mask[KMP_AFFIN_MASK_PRINT_LEN];
1371 __kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1372
1373 KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
1374 if (__kmp_affinity_respect_mask) {
1375 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask);
1376 } else {
1377 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask);
1378 }
1379 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
1380 if (uniform) {
1381 KMP_INFORM(Uniform, "KMP_AFFINITY");
1382 } else {
1383 KMP_INFORM(NonUniform, "KMP_AFFINITY");
1384 }
1385
1386 kmp_str_buf_t buf;
1387 __kmp_str_buf_init(&buf);
1388
1389 __kmp_str_buf_print(&buf, "%d", totals[0]);
1390 for (level = 1; level <= pkgLevel; level++) {
1391 __kmp_str_buf_print(&buf, " x %d", maxCt[level]);
1392 }
1393 KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg,
1394 __kmp_nThreadsPerCore, __kmp_ncores);
1395
1396 __kmp_str_buf_free(&buf);
1397 }
1398
1399 if (__kmp_affinity_type == affinity_none) {
1400 __kmp_free(last);
1401 __kmp_free(maxCt);
1402 __kmp_free(counts);
1403 __kmp_free(totals);
1404 __kmp_free(retval);
1405 KMP_CPU_FREE(oldMask);
1406 return 0;
1407 }
1408
1409 //
1410 // Find any levels with radiix 1, and remove them from the map
1411 // (except for the package level).
1412 //
1413 int new_depth = 0;
1414 for (level = 0; level < depth; level++) {
1415 if ((maxCt[level] == 1) && (level != pkgLevel)) {
1416 continue;
1417 }
1418 new_depth++;
1419 }
1420
1421 //
1422 // If we are removing any levels, allocate a new vector to return,
1423 // and copy the relevant information to it.
1424 //
1425 if (new_depth != depth) {
1426 AddrUnsPair *new_retval = (AddrUnsPair *)__kmp_allocate(
1427 sizeof(AddrUnsPair) * nApics);
1428 for (proc = 0; (int)proc < nApics; proc++) {
1429 Address addr(new_depth);
1430 new_retval[proc] = AddrUnsPair(addr, retval[proc].second);
1431 }
1432 int new_level = 0;
Jonathan Peyton62f38402015-08-25 18:44:41 +00001433 int newPkgLevel = -1;
1434 int newCoreLevel = -1;
1435 int newThreadLevel = -1;
1436 int i;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001437 for (level = 0; level < depth; level++) {
Jonathan Peyton62f38402015-08-25 18:44:41 +00001438 if ((maxCt[level] == 1)
1439 && (level != pkgLevel)) {
1440 //
1441 // Remove this level. Never remove the package level
1442 //
1443 continue;
1444 }
1445 if (level == pkgLevel) {
1446 newPkgLevel = level;
1447 }
1448 if (level == coreLevel) {
1449 newCoreLevel = level;
1450 }
1451 if (level == threadLevel) {
1452 newThreadLevel = level;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001453 }
1454 for (proc = 0; (int)proc < nApics; proc++) {
1455 new_retval[proc].first.labels[new_level]
1456 = retval[proc].first.labels[level];
1457 }
1458 new_level++;
1459 }
1460
1461 __kmp_free(retval);
1462 retval = new_retval;
1463 depth = new_depth;
Jonathan Peyton62f38402015-08-25 18:44:41 +00001464 pkgLevel = newPkgLevel;
1465 coreLevel = newCoreLevel;
1466 threadLevel = newThreadLevel;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001467 }
1468
1469 if (__kmp_affinity_gran_levels < 0) {
1470 //
1471 // Set the granularity level based on what levels are modeled
1472 // in the machine topology map.
1473 //
1474 __kmp_affinity_gran_levels = 0;
1475 if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
1476 __kmp_affinity_gran_levels++;
1477 }
1478 if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
1479 __kmp_affinity_gran_levels++;
1480 }
1481 if (__kmp_affinity_gran > affinity_gran_package) {
1482 __kmp_affinity_gran_levels++;
1483 }
1484 }
1485
1486 if (__kmp_affinity_verbose) {
1487 __kmp_affinity_print_topology(retval, nApics, depth, pkgLevel,
1488 coreLevel, threadLevel);
1489 }
1490
1491 __kmp_free(last);
1492 __kmp_free(maxCt);
1493 __kmp_free(counts);
1494 __kmp_free(totals);
1495 KMP_CPU_FREE(oldMask);
1496 *address2os = retval;
1497 return depth;
1498}
1499
1500
1501# endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1502
1503
1504#define osIdIndex 0
1505#define threadIdIndex 1
1506#define coreIdIndex 2
1507#define pkgIdIndex 3
1508#define nodeIdIndex 4
1509
1510typedef unsigned *ProcCpuInfo;
1511static unsigned maxIndex = pkgIdIndex;
1512
1513
1514static int
1515__kmp_affinity_cmp_ProcCpuInfo_os_id(const void *a, const void *b)
1516{
1517 const unsigned *aa = (const unsigned *)a;
1518 const unsigned *bb = (const unsigned *)b;
1519 if (aa[osIdIndex] < bb[osIdIndex]) return -1;
1520 if (aa[osIdIndex] > bb[osIdIndex]) return 1;
1521 return 0;
1522};
1523
1524
1525static int
1526__kmp_affinity_cmp_ProcCpuInfo_phys_id(const void *a, const void *b)
1527{
1528 unsigned i;
1529 const unsigned *aa = *((const unsigned **)a);
1530 const unsigned *bb = *((const unsigned **)b);
1531 for (i = maxIndex; ; i--) {
1532 if (aa[i] < bb[i]) return -1;
1533 if (aa[i] > bb[i]) return 1;
1534 if (i == osIdIndex) break;
1535 }
1536 return 0;
1537}
1538
1539
1540//
1541// Parse /proc/cpuinfo (or an alternate file in the same format) to obtain the
1542// affinity map.
1543//
1544static int
1545__kmp_affinity_create_cpuinfo_map(AddrUnsPair **address2os, int *line,
1546 kmp_i18n_id_t *const msg_id, FILE *f)
1547{
1548 *address2os = NULL;
1549 *msg_id = kmp_i18n_null;
1550
1551 //
1552 // Scan of the file, and count the number of "processor" (osId) fields,
Alp Toker8f2d3f02014-02-24 10:40:15 +00001553 // and find the highest value of <n> for a node_<n> field.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001554 //
1555 char buf[256];
1556 unsigned num_records = 0;
1557 while (! feof(f)) {
1558 buf[sizeof(buf) - 1] = 1;
1559 if (! fgets(buf, sizeof(buf), f)) {
1560 //
1561 // Read errors presumably because of EOF
1562 //
1563 break;
1564 }
1565
1566 char s1[] = "processor";
1567 if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
1568 num_records++;
1569 continue;
1570 }
1571
1572 //
1573 // FIXME - this will match "node_<n> <garbage>"
1574 //
1575 unsigned level;
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00001576 if (KMP_SSCANF(buf, "node_%d id", &level) == 1) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001577 if (nodeIdIndex + level >= maxIndex) {
1578 maxIndex = nodeIdIndex + level;
1579 }
1580 continue;
1581 }
1582 }
1583
1584 //
1585 // Check for empty file / no valid processor records, or too many.
1586 // The number of records can't exceed the number of valid bits in the
1587 // affinity mask.
1588 //
1589 if (num_records == 0) {
1590 *line = 0;
1591 *msg_id = kmp_i18n_str_NoProcRecords;
1592 return -1;
1593 }
1594 if (num_records > (unsigned)__kmp_xproc) {
1595 *line = 0;
1596 *msg_id = kmp_i18n_str_TooManyProcRecords;
1597 return -1;
1598 }
1599
1600 //
1601 // Set the file pointer back to the begginning, so that we can scan the
1602 // file again, this time performing a full parse of the data.
1603 // Allocate a vector of ProcCpuInfo object, where we will place the data.
1604 // Adding an extra element at the end allows us to remove a lot of extra
1605 // checks for termination conditions.
1606 //
1607 if (fseek(f, 0, SEEK_SET) != 0) {
1608 *line = 0;
1609 *msg_id = kmp_i18n_str_CantRewindCpuinfo;
1610 return -1;
1611 }
1612
1613 //
1614 // Allocate the array of records to store the proc info in. The dummy
1615 // element at the end makes the logic in filling them out easier to code.
1616 //
1617 unsigned **threadInfo = (unsigned **)__kmp_allocate((num_records + 1)
1618 * sizeof(unsigned *));
1619 unsigned i;
1620 for (i = 0; i <= num_records; i++) {
1621 threadInfo[i] = (unsigned *)__kmp_allocate((maxIndex + 1)
1622 * sizeof(unsigned));
1623 }
1624
1625#define CLEANUP_THREAD_INFO \
1626 for (i = 0; i <= num_records; i++) { \
1627 __kmp_free(threadInfo[i]); \
1628 } \
1629 __kmp_free(threadInfo);
1630
1631 //
1632 // A value of UINT_MAX means that we didn't find the field
1633 //
1634 unsigned __index;
1635
1636#define INIT_PROC_INFO(p) \
1637 for (__index = 0; __index <= maxIndex; __index++) { \
1638 (p)[__index] = UINT_MAX; \
1639 }
1640
1641 for (i = 0; i <= num_records; i++) {
1642 INIT_PROC_INFO(threadInfo[i]);
1643 }
1644
1645 unsigned num_avail = 0;
1646 *line = 0;
1647 while (! feof(f)) {
1648 //
1649 // Create an inner scoping level, so that all the goto targets at the
1650 // end of the loop appear in an outer scoping level. This avoids
1651 // warnings about jumping past an initialization to a target in the
1652 // same block.
1653 //
1654 {
1655 buf[sizeof(buf) - 1] = 1;
1656 bool long_line = false;
1657 if (! fgets(buf, sizeof(buf), f)) {
1658 //
1659 // Read errors presumably because of EOF
1660 //
1661 // If there is valid data in threadInfo[num_avail], then fake
1662 // a blank line in ensure that the last address gets parsed.
1663 //
1664 bool valid = false;
1665 for (i = 0; i <= maxIndex; i++) {
1666 if (threadInfo[num_avail][i] != UINT_MAX) {
1667 valid = true;
1668 }
1669 }
1670 if (! valid) {
1671 break;
1672 }
1673 buf[0] = 0;
1674 } else if (!buf[sizeof(buf) - 1]) {
1675 //
1676 // The line is longer than the buffer. Set a flag and don't
1677 // emit an error if we were going to ignore the line, anyway.
1678 //
1679 long_line = true;
1680
1681#define CHECK_LINE \
1682 if (long_line) { \
1683 CLEANUP_THREAD_INFO; \
1684 *msg_id = kmp_i18n_str_LongLineCpuinfo; \
1685 return -1; \
1686 }
1687 }
1688 (*line)++;
1689
1690 char s1[] = "processor";
1691 if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
1692 CHECK_LINE;
1693 char *p = strchr(buf + sizeof(s1) - 1, ':');
1694 unsigned val;
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00001695 if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001696 if (threadInfo[num_avail][osIdIndex] != UINT_MAX) goto dup_field;
1697 threadInfo[num_avail][osIdIndex] = val;
Jim Cownie181b4bb2013-12-23 17:28:57 +00001698#if KMP_OS_LINUX && USE_SYSFS_INFO
1699 char path[256];
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00001700 KMP_SNPRINTF(path, sizeof(path),
Jim Cownie181b4bb2013-12-23 17:28:57 +00001701 "/sys/devices/system/cpu/cpu%u/topology/physical_package_id",
1702 threadInfo[num_avail][osIdIndex]);
1703 __kmp_read_from_file(path, "%u", &threadInfo[num_avail][pkgIdIndex]);
1704
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00001705 KMP_SNPRINTF(path, sizeof(path),
Jim Cownie181b4bb2013-12-23 17:28:57 +00001706 "/sys/devices/system/cpu/cpu%u/topology/core_id",
1707 threadInfo[num_avail][osIdIndex]);
1708 __kmp_read_from_file(path, "%u", &threadInfo[num_avail][coreIdIndex]);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001709 continue;
Jim Cownie181b4bb2013-12-23 17:28:57 +00001710#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00001711 }
1712 char s2[] = "physical id";
1713 if (strncmp(buf, s2, sizeof(s2) - 1) == 0) {
1714 CHECK_LINE;
1715 char *p = strchr(buf + sizeof(s2) - 1, ':');
1716 unsigned val;
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00001717 if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001718 if (threadInfo[num_avail][pkgIdIndex] != UINT_MAX) goto dup_field;
1719 threadInfo[num_avail][pkgIdIndex] = val;
1720 continue;
1721 }
1722 char s3[] = "core id";
1723 if (strncmp(buf, s3, sizeof(s3) - 1) == 0) {
1724 CHECK_LINE;
1725 char *p = strchr(buf + sizeof(s3) - 1, ':');
1726 unsigned val;
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00001727 if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001728 if (threadInfo[num_avail][coreIdIndex] != UINT_MAX) goto dup_field;
1729 threadInfo[num_avail][coreIdIndex] = val;
1730 continue;
Jim Cownie181b4bb2013-12-23 17:28:57 +00001731#endif // KMP_OS_LINUX && USE_SYSFS_INFO
Jim Cownie5e8470a2013-09-27 10:38:44 +00001732 }
1733 char s4[] = "thread id";
1734 if (strncmp(buf, s4, sizeof(s4) - 1) == 0) {
1735 CHECK_LINE;
1736 char *p = strchr(buf + sizeof(s4) - 1, ':');
1737 unsigned val;
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00001738 if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001739 if (threadInfo[num_avail][threadIdIndex] != UINT_MAX) goto dup_field;
1740 threadInfo[num_avail][threadIdIndex] = val;
1741 continue;
1742 }
1743 unsigned level;
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00001744 if (KMP_SSCANF(buf, "node_%d id", &level) == 1) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001745 CHECK_LINE;
1746 char *p = strchr(buf + sizeof(s4) - 1, ':');
1747 unsigned val;
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00001748 if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001749 KMP_ASSERT(nodeIdIndex + level <= maxIndex);
1750 if (threadInfo[num_avail][nodeIdIndex + level] != UINT_MAX) goto dup_field;
1751 threadInfo[num_avail][nodeIdIndex + level] = val;
1752 continue;
1753 }
1754
1755 //
1756 // We didn't recognize the leading token on the line.
1757 // There are lots of leading tokens that we don't recognize -
1758 // if the line isn't empty, go on to the next line.
1759 //
1760 if ((*buf != 0) && (*buf != '\n')) {
1761 //
1762 // If the line is longer than the buffer, read characters
1763 // until we find a newline.
1764 //
1765 if (long_line) {
1766 int ch;
1767 while (((ch = fgetc(f)) != EOF) && (ch != '\n'));
1768 }
1769 continue;
1770 }
1771
1772 //
1773 // A newline has signalled the end of the processor record.
1774 // Check that there aren't too many procs specified.
1775 //
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001776 if ((int)num_avail == __kmp_xproc) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001777 CLEANUP_THREAD_INFO;
1778 *msg_id = kmp_i18n_str_TooManyEntries;
1779 return -1;
1780 }
1781
1782 //
1783 // Check for missing fields. The osId field must be there, and we
1784 // currently require that the physical id field is specified, also.
1785 //
1786 if (threadInfo[num_avail][osIdIndex] == UINT_MAX) {
1787 CLEANUP_THREAD_INFO;
1788 *msg_id = kmp_i18n_str_MissingProcField;
1789 return -1;
1790 }
1791 if (threadInfo[0][pkgIdIndex] == UINT_MAX) {
1792 CLEANUP_THREAD_INFO;
1793 *msg_id = kmp_i18n_str_MissingPhysicalIDField;
1794 return -1;
1795 }
1796
1797 //
1798 // Skip this proc if it is not included in the machine model.
1799 //
1800 if (! KMP_CPU_ISSET(threadInfo[num_avail][osIdIndex], fullMask)) {
1801 INIT_PROC_INFO(threadInfo[num_avail]);
1802 continue;
1803 }
1804
1805 //
1806 // We have a successful parse of this proc's info.
1807 // Increment the counter, and prepare for the next proc.
1808 //
1809 num_avail++;
1810 KMP_ASSERT(num_avail <= num_records);
1811 INIT_PROC_INFO(threadInfo[num_avail]);
1812 }
1813 continue;
1814
1815 no_val:
1816 CLEANUP_THREAD_INFO;
1817 *msg_id = kmp_i18n_str_MissingValCpuinfo;
1818 return -1;
1819
1820 dup_field:
1821 CLEANUP_THREAD_INFO;
1822 *msg_id = kmp_i18n_str_DuplicateFieldCpuinfo;
1823 return -1;
1824 }
1825 *line = 0;
1826
1827# if KMP_MIC && REDUCE_TEAM_SIZE
1828 unsigned teamSize = 0;
1829# endif // KMP_MIC && REDUCE_TEAM_SIZE
1830
1831 // check for num_records == __kmp_xproc ???
1832
1833 //
1834 // If there's only one thread context to bind to, form an Address object
1835 // with depth 1 and return immediately (or, if affinity is off, set
1836 // address2os to NULL and return).
1837 //
1838 // If it is configured to omit the package level when there is only a
1839 // single package, the logic at the end of this routine won't work if
1840 // there is only a single thread - it would try to form an Address
1841 // object with depth 0.
1842 //
1843 KMP_ASSERT(num_avail > 0);
1844 KMP_ASSERT(num_avail <= num_records);
1845 if (num_avail == 1) {
1846 __kmp_ncores = 1;
1847 __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001848 if (__kmp_affinity_verbose) {
1849 if (! KMP_AFFINITY_CAPABLE()) {
1850 KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
1851 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
1852 KMP_INFORM(Uniform, "KMP_AFFINITY");
1853 }
1854 else {
1855 char buf[KMP_AFFIN_MASK_PRINT_LEN];
1856 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
1857 fullMask);
1858 KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
1859 if (__kmp_affinity_respect_mask) {
1860 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
1861 } else {
1862 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
1863 }
1864 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
1865 KMP_INFORM(Uniform, "KMP_AFFINITY");
1866 }
1867 int index;
1868 kmp_str_buf_t buf;
1869 __kmp_str_buf_init(&buf);
1870 __kmp_str_buf_print(&buf, "1");
1871 for (index = maxIndex - 1; index > pkgIdIndex; index--) {
1872 __kmp_str_buf_print(&buf, " x 1");
1873 }
1874 KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, 1, 1, 1);
1875 __kmp_str_buf_free(&buf);
1876 }
1877
1878 if (__kmp_affinity_type == affinity_none) {
1879 CLEANUP_THREAD_INFO;
1880 return 0;
1881 }
1882
1883 *address2os = (AddrUnsPair*)__kmp_allocate(sizeof(AddrUnsPair));
1884 Address addr(1);
1885 addr.labels[0] = threadInfo[0][pkgIdIndex];
1886 (*address2os)[0] = AddrUnsPair(addr, threadInfo[0][osIdIndex]);
1887
1888 if (__kmp_affinity_gran_levels < 0) {
1889 __kmp_affinity_gran_levels = 0;
1890 }
1891
1892 if (__kmp_affinity_verbose) {
1893 __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
1894 }
1895
1896 CLEANUP_THREAD_INFO;
1897 return 1;
1898 }
1899
1900 //
1901 // Sort the threadInfo table by physical Id.
1902 //
1903 qsort(threadInfo, num_avail, sizeof(*threadInfo),
1904 __kmp_affinity_cmp_ProcCpuInfo_phys_id);
1905
1906 //
1907 // The table is now sorted by pkgId / coreId / threadId, but we really
1908 // don't know the radix of any of the fields. pkgId's may be sparsely
1909 // assigned among the chips on a system. Although coreId's are usually
1910 // assigned [0 .. coresPerPkg-1] and threadId's are usually assigned
1911 // [0..threadsPerCore-1], we don't want to make any such assumptions.
1912 //
1913 // For that matter, we don't know what coresPerPkg and threadsPerCore
1914 // (or the total # packages) are at this point - we want to determine
1915 // that now. We only have an upper bound on the first two figures.
1916 //
1917 unsigned *counts = (unsigned *)__kmp_allocate((maxIndex + 1)
1918 * sizeof(unsigned));
1919 unsigned *maxCt = (unsigned *)__kmp_allocate((maxIndex + 1)
1920 * sizeof(unsigned));
1921 unsigned *totals = (unsigned *)__kmp_allocate((maxIndex + 1)
1922 * sizeof(unsigned));
1923 unsigned *lastId = (unsigned *)__kmp_allocate((maxIndex + 1)
1924 * sizeof(unsigned));
1925
1926 bool assign_thread_ids = false;
1927 unsigned threadIdCt;
1928 unsigned index;
1929
1930 restart_radix_check:
1931 threadIdCt = 0;
1932
1933 //
1934 // Initialize the counter arrays with data from threadInfo[0].
1935 //
1936 if (assign_thread_ids) {
1937 if (threadInfo[0][threadIdIndex] == UINT_MAX) {
1938 threadInfo[0][threadIdIndex] = threadIdCt++;
1939 }
1940 else if (threadIdCt <= threadInfo[0][threadIdIndex]) {
1941 threadIdCt = threadInfo[0][threadIdIndex] + 1;
1942 }
1943 }
1944 for (index = 0; index <= maxIndex; index++) {
1945 counts[index] = 1;
1946 maxCt[index] = 1;
1947 totals[index] = 1;
1948 lastId[index] = threadInfo[0][index];;
1949 }
1950
1951 //
1952 // Run through the rest of the OS procs.
1953 //
1954 for (i = 1; i < num_avail; i++) {
1955 //
1956 // Find the most significant index whose id differs
1957 // from the id for the previous OS proc.
1958 //
1959 for (index = maxIndex; index >= threadIdIndex; index--) {
1960 if (assign_thread_ids && (index == threadIdIndex)) {
1961 //
1962 // Auto-assign the thread id field if it wasn't specified.
1963 //
1964 if (threadInfo[i][threadIdIndex] == UINT_MAX) {
1965 threadInfo[i][threadIdIndex] = threadIdCt++;
1966 }
1967
1968 //
1969 // Aparrently the thread id field was specified for some
1970 // entries and not others. Start the thread id counter
1971 // off at the next higher thread id.
1972 //
1973 else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
1974 threadIdCt = threadInfo[i][threadIdIndex] + 1;
1975 }
1976 }
1977 if (threadInfo[i][index] != lastId[index]) {
1978 //
1979 // Run through all indices which are less significant,
1980 // and reset the counts to 1.
1981 //
1982 // At all levels up to and including index, we need to
1983 // increment the totals and record the last id.
1984 //
1985 unsigned index2;
1986 for (index2 = threadIdIndex; index2 < index; index2++) {
1987 totals[index2]++;
1988 if (counts[index2] > maxCt[index2]) {
1989 maxCt[index2] = counts[index2];
1990 }
1991 counts[index2] = 1;
1992 lastId[index2] = threadInfo[i][index2];
1993 }
1994 counts[index]++;
1995 totals[index]++;
1996 lastId[index] = threadInfo[i][index];
1997
1998 if (assign_thread_ids && (index > threadIdIndex)) {
1999
2000# if KMP_MIC && REDUCE_TEAM_SIZE
2001 //
2002 // The default team size is the total #threads in the machine
2003 // minus 1 thread for every core that has 3 or more threads.
2004 //
2005 teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 );
2006# endif // KMP_MIC && REDUCE_TEAM_SIZE
2007
2008 //
2009 // Restart the thread counter, as we are on a new core.
2010 //
2011 threadIdCt = 0;
2012
2013 //
2014 // Auto-assign the thread id field if it wasn't specified.
2015 //
2016 if (threadInfo[i][threadIdIndex] == UINT_MAX) {
2017 threadInfo[i][threadIdIndex] = threadIdCt++;
2018 }
2019
2020 //
2021 // Aparrently the thread id field was specified for some
2022 // entries and not others. Start the thread id counter
2023 // off at the next higher thread id.
2024 //
2025 else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
2026 threadIdCt = threadInfo[i][threadIdIndex] + 1;
2027 }
2028 }
2029 break;
2030 }
2031 }
2032 if (index < threadIdIndex) {
2033 //
2034 // If thread ids were specified, it is an error if they are not
2035 // unique. Also, check that we waven't already restarted the
2036 // loop (to be safe - shouldn't need to).
2037 //
2038 if ((threadInfo[i][threadIdIndex] != UINT_MAX)
2039 || assign_thread_ids) {
2040 __kmp_free(lastId);
2041 __kmp_free(totals);
2042 __kmp_free(maxCt);
2043 __kmp_free(counts);
2044 CLEANUP_THREAD_INFO;
2045 *msg_id = kmp_i18n_str_PhysicalIDsNotUnique;
2046 return -1;
2047 }
2048
2049 //
2050 // If the thread ids were not specified and we see entries
2051 // entries that are duplicates, start the loop over and
2052 // assign the thread ids manually.
2053 //
2054 assign_thread_ids = true;
2055 goto restart_radix_check;
2056 }
2057 }
2058
2059# if KMP_MIC && REDUCE_TEAM_SIZE
2060 //
2061 // The default team size is the total #threads in the machine
2062 // minus 1 thread for every core that has 3 or more threads.
2063 //
2064 teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 );
2065# endif // KMP_MIC && REDUCE_TEAM_SIZE
2066
2067 for (index = threadIdIndex; index <= maxIndex; index++) {
2068 if (counts[index] > maxCt[index]) {
2069 maxCt[index] = counts[index];
2070 }
2071 }
2072
2073 __kmp_nThreadsPerCore = maxCt[threadIdIndex];
2074 nCoresPerPkg = maxCt[coreIdIndex];
2075 nPackages = totals[pkgIdIndex];
2076
2077 //
2078 // Check to see if the machine topology is uniform
2079 //
2080 unsigned prod = totals[maxIndex];
2081 for (index = threadIdIndex; index < maxIndex; index++) {
2082 prod *= maxCt[index];
2083 }
2084 bool uniform = (prod == totals[threadIdIndex]);
2085
2086 //
2087 // When affinity is off, this routine will still be called to set
Andrey Churbanovf696c822015-01-27 16:55:43 +00002088 // __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie5e8470a2013-09-27 10:38:44 +00002089 // nCoresPerPkg, & nPackages. Make sure all these vars are set
2090 // correctly, and return now if affinity is not enabled.
2091 //
Jim Cownie5e8470a2013-09-27 10:38:44 +00002092 __kmp_ncores = totals[coreIdIndex];
2093
2094 if (__kmp_affinity_verbose) {
2095 if (! KMP_AFFINITY_CAPABLE()) {
2096 KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
2097 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
2098 if (uniform) {
2099 KMP_INFORM(Uniform, "KMP_AFFINITY");
2100 } else {
2101 KMP_INFORM(NonUniform, "KMP_AFFINITY");
2102 }
2103 }
2104 else {
2105 char buf[KMP_AFFIN_MASK_PRINT_LEN];
2106 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, fullMask);
2107 KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
2108 if (__kmp_affinity_respect_mask) {
2109 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
2110 } else {
2111 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
2112 }
2113 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
2114 if (uniform) {
2115 KMP_INFORM(Uniform, "KMP_AFFINITY");
2116 } else {
2117 KMP_INFORM(NonUniform, "KMP_AFFINITY");
2118 }
2119 }
2120 kmp_str_buf_t buf;
2121 __kmp_str_buf_init(&buf);
2122
2123 __kmp_str_buf_print(&buf, "%d", totals[maxIndex]);
2124 for (index = maxIndex - 1; index >= pkgIdIndex; index--) {
2125 __kmp_str_buf_print(&buf, " x %d", maxCt[index]);
2126 }
2127 KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, maxCt[coreIdIndex],
2128 maxCt[threadIdIndex], __kmp_ncores);
2129
2130 __kmp_str_buf_free(&buf);
2131 }
2132
2133# if KMP_MIC && REDUCE_TEAM_SIZE
2134 //
2135 // Set the default team size.
2136 //
2137 if ((__kmp_dflt_team_nth == 0) && (teamSize > 0)) {
2138 __kmp_dflt_team_nth = teamSize;
2139 KA_TRACE(20, ("__kmp_affinity_create_cpuinfo_map: setting __kmp_dflt_team_nth = %d\n",
2140 __kmp_dflt_team_nth));
2141 }
2142# endif // KMP_MIC && REDUCE_TEAM_SIZE
2143
2144 if (__kmp_affinity_type == affinity_none) {
2145 __kmp_free(lastId);
2146 __kmp_free(totals);
2147 __kmp_free(maxCt);
2148 __kmp_free(counts);
2149 CLEANUP_THREAD_INFO;
2150 return 0;
2151 }
2152
2153 //
2154 // Count the number of levels which have more nodes at that level than
2155 // at the parent's level (with there being an implicit root node of
2156 // the top level). This is equivalent to saying that there is at least
2157 // one node at this level which has a sibling. These levels are in the
2158 // map, and the package level is always in the map.
2159 //
2160 bool *inMap = (bool *)__kmp_allocate((maxIndex + 1) * sizeof(bool));
2161 int level = 0;
2162 for (index = threadIdIndex; index < maxIndex; index++) {
2163 KMP_ASSERT(totals[index] >= totals[index + 1]);
2164 inMap[index] = (totals[index] > totals[index + 1]);
2165 }
2166 inMap[maxIndex] = (totals[maxIndex] > 1);
2167 inMap[pkgIdIndex] = true;
2168
2169 int depth = 0;
2170 for (index = threadIdIndex; index <= maxIndex; index++) {
2171 if (inMap[index]) {
2172 depth++;
2173 }
2174 }
2175 KMP_ASSERT(depth > 0);
2176
2177 //
2178 // Construct the data structure that is to be returned.
2179 //
2180 *address2os = (AddrUnsPair*)
2181 __kmp_allocate(sizeof(AddrUnsPair) * num_avail);
2182 int pkgLevel = -1;
2183 int coreLevel = -1;
2184 int threadLevel = -1;
2185
2186 for (i = 0; i < num_avail; ++i) {
2187 Address addr(depth);
2188 unsigned os = threadInfo[i][osIdIndex];
2189 int src_index;
2190 int dst_index = 0;
2191
2192 for (src_index = maxIndex; src_index >= threadIdIndex; src_index--) {
2193 if (! inMap[src_index]) {
2194 continue;
2195 }
2196 addr.labels[dst_index] = threadInfo[i][src_index];
2197 if (src_index == pkgIdIndex) {
2198 pkgLevel = dst_index;
2199 }
2200 else if (src_index == coreIdIndex) {
2201 coreLevel = dst_index;
2202 }
2203 else if (src_index == threadIdIndex) {
2204 threadLevel = dst_index;
2205 }
2206 dst_index++;
2207 }
2208 (*address2os)[i] = AddrUnsPair(addr, os);
2209 }
2210
2211 if (__kmp_affinity_gran_levels < 0) {
2212 //
2213 // Set the granularity level based on what levels are modeled
2214 // in the machine topology map.
2215 //
2216 unsigned src_index;
2217 __kmp_affinity_gran_levels = 0;
2218 for (src_index = threadIdIndex; src_index <= maxIndex; src_index++) {
2219 if (! inMap[src_index]) {
2220 continue;
2221 }
2222 switch (src_index) {
2223 case threadIdIndex:
2224 if (__kmp_affinity_gran > affinity_gran_thread) {
2225 __kmp_affinity_gran_levels++;
2226 }
2227
2228 break;
2229 case coreIdIndex:
2230 if (__kmp_affinity_gran > affinity_gran_core) {
2231 __kmp_affinity_gran_levels++;
2232 }
2233 break;
2234
2235 case pkgIdIndex:
2236 if (__kmp_affinity_gran > affinity_gran_package) {
2237 __kmp_affinity_gran_levels++;
2238 }
2239 break;
2240 }
2241 }
2242 }
2243
2244 if (__kmp_affinity_verbose) {
2245 __kmp_affinity_print_topology(*address2os, num_avail, depth, pkgLevel,
2246 coreLevel, threadLevel);
2247 }
2248
2249 __kmp_free(inMap);
2250 __kmp_free(lastId);
2251 __kmp_free(totals);
2252 __kmp_free(maxCt);
2253 __kmp_free(counts);
2254 CLEANUP_THREAD_INFO;
2255 return depth;
2256}
2257
2258
2259//
2260// Create and return a table of affinity masks, indexed by OS thread ID.
2261// This routine handles OR'ing together all the affinity masks of threads
2262// that are sufficiently close, if granularity > fine.
2263//
2264static kmp_affin_mask_t *
2265__kmp_create_masks(unsigned *maxIndex, unsigned *numUnique,
2266 AddrUnsPair *address2os, unsigned numAddrs)
2267{
2268 //
2269 // First form a table of affinity masks in order of OS thread id.
2270 //
2271 unsigned depth;
2272 unsigned maxOsId;
2273 unsigned i;
2274
2275 KMP_ASSERT(numAddrs > 0);
2276 depth = address2os[0].first.depth;
2277
2278 maxOsId = 0;
2279 for (i = 0; i < numAddrs; i++) {
2280 unsigned osId = address2os[i].second;
2281 if (osId > maxOsId) {
2282 maxOsId = osId;
2283 }
2284 }
2285 kmp_affin_mask_t *osId2Mask = (kmp_affin_mask_t *)__kmp_allocate(
2286 (maxOsId + 1) * __kmp_affin_mask_size);
2287
2288 //
2289 // Sort the address2os table according to physical order. Doing so
2290 // will put all threads on the same core/package/node in consecutive
2291 // locations.
2292 //
2293 qsort(address2os, numAddrs, sizeof(*address2os),
2294 __kmp_affinity_cmp_Address_labels);
2295
2296 KMP_ASSERT(__kmp_affinity_gran_levels >= 0);
2297 if (__kmp_affinity_verbose && (__kmp_affinity_gran_levels > 0)) {
2298 KMP_INFORM(ThreadsMigrate, "KMP_AFFINITY", __kmp_affinity_gran_levels);
2299 }
2300 if (__kmp_affinity_gran_levels >= (int)depth) {
2301 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
2302 && (__kmp_affinity_type != affinity_none))) {
2303 KMP_WARNING(AffThreadsMayMigrate);
2304 }
2305 }
2306
2307 //
2308 // Run through the table, forming the masks for all threads on each
2309 // core. Threads on the same core will have identical "Address"
2310 // objects, not considering the last level, which must be the thread
2311 // id. All threads on a core will appear consecutively.
2312 //
2313 unsigned unique = 0;
2314 unsigned j = 0; // index of 1st thread on core
2315 unsigned leader = 0;
2316 Address *leaderAddr = &(address2os[0].first);
2317 kmp_affin_mask_t *sum
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00002318 = (kmp_affin_mask_t *)KMP_ALLOCA(__kmp_affin_mask_size);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002319 KMP_CPU_ZERO(sum);
2320 KMP_CPU_SET(address2os[0].second, sum);
2321 for (i = 1; i < numAddrs; i++) {
2322 //
Alp Toker8f2d3f02014-02-24 10:40:15 +00002323 // If this thread is sufficiently close to the leader (within the
Jim Cownie5e8470a2013-09-27 10:38:44 +00002324 // granularity setting), then set the bit for this os thread in the
2325 // affinity mask for this group, and go on to the next thread.
2326 //
2327 if (leaderAddr->isClose(address2os[i].first,
2328 __kmp_affinity_gran_levels)) {
2329 KMP_CPU_SET(address2os[i].second, sum);
2330 continue;
2331 }
2332
2333 //
2334 // For every thread in this group, copy the mask to the thread's
2335 // entry in the osId2Mask table. Mark the first address as a
2336 // leader.
2337 //
2338 for (; j < i; j++) {
2339 unsigned osId = address2os[j].second;
2340 KMP_DEBUG_ASSERT(osId <= maxOsId);
2341 kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
2342 KMP_CPU_COPY(mask, sum);
2343 address2os[j].first.leader = (j == leader);
2344 }
2345 unique++;
2346
2347 //
2348 // Start a new mask.
2349 //
2350 leader = i;
2351 leaderAddr = &(address2os[i].first);
2352 KMP_CPU_ZERO(sum);
2353 KMP_CPU_SET(address2os[i].second, sum);
2354 }
2355
2356 //
2357 // For every thread in last group, copy the mask to the thread's
2358 // entry in the osId2Mask table.
2359 //
2360 for (; j < i; j++) {
2361 unsigned osId = address2os[j].second;
2362 KMP_DEBUG_ASSERT(osId <= maxOsId);
2363 kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
2364 KMP_CPU_COPY(mask, sum);
2365 address2os[j].first.leader = (j == leader);
2366 }
2367 unique++;
2368
2369 *maxIndex = maxOsId;
2370 *numUnique = unique;
2371 return osId2Mask;
2372}
2373
2374
2375//
2376// Stuff for the affinity proclist parsers. It's easier to declare these vars
2377// as file-static than to try and pass them through the calling sequence of
2378// the recursive-descent OMP_PLACES parser.
2379//
2380static kmp_affin_mask_t *newMasks;
2381static int numNewMasks;
2382static int nextNewMask;
2383
2384#define ADD_MASK(_mask) \
2385 { \
2386 if (nextNewMask >= numNewMasks) { \
2387 numNewMasks *= 2; \
2388 newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_REALLOC(newMasks, \
2389 numNewMasks * __kmp_affin_mask_size); \
2390 } \
2391 KMP_CPU_COPY(KMP_CPU_INDEX(newMasks, nextNewMask), (_mask)); \
2392 nextNewMask++; \
2393 }
2394
2395#define ADD_MASK_OSID(_osId,_osId2Mask,_maxOsId) \
2396 { \
2397 if (((_osId) > _maxOsId) || \
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002398 (! KMP_CPU_ISSET((_osId), KMP_CPU_INDEX((_osId2Mask), (_osId))))) { \
Jim Cownie5e8470a2013-09-27 10:38:44 +00002399 if (__kmp_affinity_verbose || (__kmp_affinity_warnings \
2400 && (__kmp_affinity_type != affinity_none))) { \
2401 KMP_WARNING(AffIgnoreInvalidProcID, _osId); \
2402 } \
2403 } \
2404 else { \
2405 ADD_MASK(KMP_CPU_INDEX(_osId2Mask, (_osId))); \
2406 } \
2407 }
2408
2409
2410//
2411// Re-parse the proclist (for the explicit affinity type), and form the list
2412// of affinity newMasks indexed by gtid.
2413//
2414static void
2415__kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks,
2416 unsigned int *out_numMasks, const char *proclist,
2417 kmp_affin_mask_t *osId2Mask, int maxOsId)
2418{
2419 const char *scan = proclist;
2420 const char *next = proclist;
2421
2422 //
2423 // We use malloc() for the temporary mask vector,
2424 // so that we can use realloc() to extend it.
2425 //
2426 numNewMasks = 2;
2427 newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(numNewMasks
2428 * __kmp_affin_mask_size);
2429 nextNewMask = 0;
2430 kmp_affin_mask_t *sumMask = (kmp_affin_mask_t *)__kmp_allocate(
2431 __kmp_affin_mask_size);
2432 int setSize = 0;
2433
2434 for (;;) {
2435 int start, end, stride;
2436
2437 SKIP_WS(scan);
2438 next = scan;
2439 if (*next == '\0') {
2440 break;
2441 }
2442
2443 if (*next == '{') {
2444 int num;
2445 setSize = 0;
2446 next++; // skip '{'
2447 SKIP_WS(next);
2448 scan = next;
2449
2450 //
2451 // Read the first integer in the set.
2452 //
2453 KMP_ASSERT2((*next >= '0') && (*next <= '9'),
2454 "bad proclist");
2455 SKIP_DIGITS(next);
2456 num = __kmp_str_to_int(scan, *next);
2457 KMP_ASSERT2(num >= 0, "bad explicit proc list");
2458
2459 //
2460 // Copy the mask for that osId to the sum (union) mask.
2461 //
2462 if ((num > maxOsId) ||
2463 (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
2464 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
2465 && (__kmp_affinity_type != affinity_none))) {
2466 KMP_WARNING(AffIgnoreInvalidProcID, num);
2467 }
2468 KMP_CPU_ZERO(sumMask);
2469 }
2470 else {
2471 KMP_CPU_COPY(sumMask, KMP_CPU_INDEX(osId2Mask, num));
2472 setSize = 1;
2473 }
2474
2475 for (;;) {
2476 //
2477 // Check for end of set.
2478 //
2479 SKIP_WS(next);
2480 if (*next == '}') {
2481 next++; // skip '}'
2482 break;
2483 }
2484
2485 //
2486 // Skip optional comma.
2487 //
2488 if (*next == ',') {
2489 next++;
2490 }
2491 SKIP_WS(next);
2492
2493 //
2494 // Read the next integer in the set.
2495 //
2496 scan = next;
2497 KMP_ASSERT2((*next >= '0') && (*next <= '9'),
2498 "bad explicit proc list");
2499
2500 SKIP_DIGITS(next);
2501 num = __kmp_str_to_int(scan, *next);
2502 KMP_ASSERT2(num >= 0, "bad explicit proc list");
2503
2504 //
2505 // Add the mask for that osId to the sum mask.
2506 //
2507 if ((num > maxOsId) ||
2508 (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
2509 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
2510 && (__kmp_affinity_type != affinity_none))) {
2511 KMP_WARNING(AffIgnoreInvalidProcID, num);
2512 }
2513 }
2514 else {
2515 KMP_CPU_UNION(sumMask, KMP_CPU_INDEX(osId2Mask, num));
2516 setSize++;
2517 }
2518 }
2519 if (setSize > 0) {
2520 ADD_MASK(sumMask);
2521 }
2522
2523 SKIP_WS(next);
2524 if (*next == ',') {
2525 next++;
2526 }
2527 scan = next;
2528 continue;
2529 }
2530
2531 //
2532 // Read the first integer.
2533 //
2534 KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list");
2535 SKIP_DIGITS(next);
2536 start = __kmp_str_to_int(scan, *next);
2537 KMP_ASSERT2(start >= 0, "bad explicit proc list");
2538 SKIP_WS(next);
2539
2540 //
2541 // If this isn't a range, then add a mask to the list and go on.
2542 //
2543 if (*next != '-') {
2544 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2545
2546 //
2547 // Skip optional comma.
2548 //
2549 if (*next == ',') {
2550 next++;
2551 }
2552 scan = next;
2553 continue;
2554 }
2555
2556 //
2557 // This is a range. Skip over the '-' and read in the 2nd int.
2558 //
2559 next++; // skip '-'
2560 SKIP_WS(next);
2561 scan = next;
2562 KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list");
2563 SKIP_DIGITS(next);
2564 end = __kmp_str_to_int(scan, *next);
2565 KMP_ASSERT2(end >= 0, "bad explicit proc list");
2566
2567 //
2568 // Check for a stride parameter
2569 //
2570 stride = 1;
2571 SKIP_WS(next);
2572 if (*next == ':') {
2573 //
2574 // A stride is specified. Skip over the ':" and read the 3rd int.
2575 //
2576 int sign = +1;
2577 next++; // skip ':'
2578 SKIP_WS(next);
2579 scan = next;
2580 if (*next == '-') {
2581 sign = -1;
2582 next++;
2583 SKIP_WS(next);
2584 scan = next;
2585 }
2586 KMP_ASSERT2((*next >= '0') && (*next <= '9'),
2587 "bad explicit proc list");
2588 SKIP_DIGITS(next);
2589 stride = __kmp_str_to_int(scan, *next);
2590 KMP_ASSERT2(stride >= 0, "bad explicit proc list");
2591 stride *= sign;
2592 }
2593
2594 //
2595 // Do some range checks.
2596 //
2597 KMP_ASSERT2(stride != 0, "bad explicit proc list");
2598 if (stride > 0) {
2599 KMP_ASSERT2(start <= end, "bad explicit proc list");
2600 }
2601 else {
2602 KMP_ASSERT2(start >= end, "bad explicit proc list");
2603 }
2604 KMP_ASSERT2((end - start) / stride <= 65536, "bad explicit proc list");
2605
2606 //
2607 // Add the mask for each OS proc # to the list.
2608 //
2609 if (stride > 0) {
2610 do {
2611 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2612 start += stride;
2613 } while (start <= end);
2614 }
2615 else {
2616 do {
2617 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2618 start += stride;
2619 } while (start >= end);
2620 }
2621
2622 //
2623 // Skip optional comma.
2624 //
2625 SKIP_WS(next);
2626 if (*next == ',') {
2627 next++;
2628 }
2629 scan = next;
2630 }
2631
2632 *out_numMasks = nextNewMask;
2633 if (nextNewMask == 0) {
2634 *out_masks = NULL;
2635 KMP_INTERNAL_FREE(newMasks);
2636 return;
2637 }
2638 *out_masks
2639 = (kmp_affin_mask_t *)__kmp_allocate(nextNewMask * __kmp_affin_mask_size);
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00002640 KMP_MEMCPY(*out_masks, newMasks, nextNewMask * __kmp_affin_mask_size);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002641 __kmp_free(sumMask);
2642 KMP_INTERNAL_FREE(newMasks);
2643}
2644
2645
2646# if OMP_40_ENABLED
2647
2648/*-----------------------------------------------------------------------------
2649
2650Re-parse the OMP_PLACES proc id list, forming the newMasks for the different
2651places. Again, Here is the grammar:
2652
2653place_list := place
2654place_list := place , place_list
2655place := num
2656place := place : num
2657place := place : num : signed
2658place := { subplacelist }
2659place := ! place // (lowest priority)
2660subplace_list := subplace
2661subplace_list := subplace , subplace_list
2662subplace := num
2663subplace := num : num
2664subplace := num : num : signed
2665signed := num
2666signed := + signed
2667signed := - signed
2668
2669-----------------------------------------------------------------------------*/
2670
2671static void
2672__kmp_process_subplace_list(const char **scan, kmp_affin_mask_t *osId2Mask,
2673 int maxOsId, kmp_affin_mask_t *tempMask, int *setSize)
2674{
2675 const char *next;
2676
2677 for (;;) {
2678 int start, count, stride, i;
2679
2680 //
2681 // Read in the starting proc id
2682 //
2683 SKIP_WS(*scan);
2684 KMP_ASSERT2((**scan >= '0') && (**scan <= '9'),
2685 "bad explicit places list");
2686 next = *scan;
2687 SKIP_DIGITS(next);
2688 start = __kmp_str_to_int(*scan, *next);
2689 KMP_ASSERT(start >= 0);
2690 *scan = next;
2691
2692 //
2693 // valid follow sets are ',' ':' and '}'
2694 //
2695 SKIP_WS(*scan);
2696 if (**scan == '}' || **scan == ',') {
2697 if ((start > maxOsId) ||
2698 (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
2699 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
2700 && (__kmp_affinity_type != affinity_none))) {
2701 KMP_WARNING(AffIgnoreInvalidProcID, start);
2702 }
2703 }
2704 else {
2705 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
2706 (*setSize)++;
2707 }
2708 if (**scan == '}') {
2709 break;
2710 }
2711 (*scan)++; // skip ','
2712 continue;
2713 }
2714 KMP_ASSERT2(**scan == ':', "bad explicit places list");
2715 (*scan)++; // skip ':'
2716
2717 //
2718 // Read count parameter
2719 //
2720 SKIP_WS(*scan);
2721 KMP_ASSERT2((**scan >= '0') && (**scan <= '9'),
2722 "bad explicit places list");
2723 next = *scan;
2724 SKIP_DIGITS(next);
2725 count = __kmp_str_to_int(*scan, *next);
2726 KMP_ASSERT(count >= 0);
2727 *scan = next;
2728
2729 //
2730 // valid follow sets are ',' ':' and '}'
2731 //
2732 SKIP_WS(*scan);
2733 if (**scan == '}' || **scan == ',') {
2734 for (i = 0; i < count; i++) {
2735 if ((start > maxOsId) ||
2736 (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
2737 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
2738 && (__kmp_affinity_type != affinity_none))) {
2739 KMP_WARNING(AffIgnoreInvalidProcID, start);
2740 }
2741 break; // don't proliferate warnings for large count
2742 }
2743 else {
2744 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
2745 start++;
2746 (*setSize)++;
2747 }
2748 }
2749 if (**scan == '}') {
2750 break;
2751 }
2752 (*scan)++; // skip ','
2753 continue;
2754 }
2755 KMP_ASSERT2(**scan == ':', "bad explicit places list");
2756 (*scan)++; // skip ':'
2757
2758 //
2759 // Read stride parameter
2760 //
2761 int sign = +1;
2762 for (;;) {
2763 SKIP_WS(*scan);
2764 if (**scan == '+') {
2765 (*scan)++; // skip '+'
2766 continue;
2767 }
2768 if (**scan == '-') {
2769 sign *= -1;
2770 (*scan)++; // skip '-'
2771 continue;
2772 }
2773 break;
2774 }
2775 SKIP_WS(*scan);
2776 KMP_ASSERT2((**scan >= '0') && (**scan <= '9'),
2777 "bad explicit places list");
2778 next = *scan;
2779 SKIP_DIGITS(next);
2780 stride = __kmp_str_to_int(*scan, *next);
2781 KMP_ASSERT(stride >= 0);
2782 *scan = next;
2783 stride *= sign;
2784
2785 //
2786 // valid follow sets are ',' and '}'
2787 //
2788 SKIP_WS(*scan);
2789 if (**scan == '}' || **scan == ',') {
2790 for (i = 0; i < count; i++) {
2791 if ((start > maxOsId) ||
2792 (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
2793 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
2794 && (__kmp_affinity_type != affinity_none))) {
2795 KMP_WARNING(AffIgnoreInvalidProcID, start);
2796 }
2797 break; // don't proliferate warnings for large count
2798 }
2799 else {
2800 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
2801 start += stride;
2802 (*setSize)++;
2803 }
2804 }
2805 if (**scan == '}') {
2806 break;
2807 }
2808 (*scan)++; // skip ','
2809 continue;
2810 }
2811
2812 KMP_ASSERT2(0, "bad explicit places list");
2813 }
2814}
2815
2816
2817static void
2818__kmp_process_place(const char **scan, kmp_affin_mask_t *osId2Mask,
2819 int maxOsId, kmp_affin_mask_t *tempMask, int *setSize)
2820{
2821 const char *next;
2822
2823 //
2824 // valid follow sets are '{' '!' and num
2825 //
2826 SKIP_WS(*scan);
2827 if (**scan == '{') {
2828 (*scan)++; // skip '{'
2829 __kmp_process_subplace_list(scan, osId2Mask, maxOsId , tempMask,
2830 setSize);
2831 KMP_ASSERT2(**scan == '}', "bad explicit places list");
2832 (*scan)++; // skip '}'
2833 }
2834 else if (**scan == '!') {
Jonathan Peyton6778c732015-10-19 19:43:01 +00002835 (*scan)++; // skip '!'
Jim Cownie5e8470a2013-09-27 10:38:44 +00002836 __kmp_process_place(scan, osId2Mask, maxOsId, tempMask, setSize);
2837 KMP_CPU_COMPLEMENT(tempMask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002838 }
2839 else if ((**scan >= '0') && (**scan <= '9')) {
2840 next = *scan;
2841 SKIP_DIGITS(next);
2842 int num = __kmp_str_to_int(*scan, *next);
2843 KMP_ASSERT(num >= 0);
2844 if ((num > maxOsId) ||
2845 (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
2846 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
2847 && (__kmp_affinity_type != affinity_none))) {
2848 KMP_WARNING(AffIgnoreInvalidProcID, num);
2849 }
2850 }
2851 else {
2852 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, num));
2853 (*setSize)++;
2854 }
2855 *scan = next; // skip num
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002856 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002857 else {
2858 KMP_ASSERT2(0, "bad explicit places list");
2859 }
2860}
2861
2862
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002863//static void
2864void
Jim Cownie5e8470a2013-09-27 10:38:44 +00002865__kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
2866 unsigned int *out_numMasks, const char *placelist,
2867 kmp_affin_mask_t *osId2Mask, int maxOsId)
2868{
2869 const char *scan = placelist;
2870 const char *next = placelist;
2871
2872 numNewMasks = 2;
2873 newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(numNewMasks
2874 * __kmp_affin_mask_size);
2875 nextNewMask = 0;
2876
2877 kmp_affin_mask_t *tempMask = (kmp_affin_mask_t *)__kmp_allocate(
2878 __kmp_affin_mask_size);
2879 KMP_CPU_ZERO(tempMask);
2880 int setSize = 0;
2881
2882 for (;;) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002883 __kmp_process_place(&scan, osId2Mask, maxOsId, tempMask, &setSize);
2884
2885 //
2886 // valid follow sets are ',' ':' and EOL
2887 //
2888 SKIP_WS(scan);
2889 if (*scan == '\0' || *scan == ',') {
2890 if (setSize > 0) {
2891 ADD_MASK(tempMask);
2892 }
2893 KMP_CPU_ZERO(tempMask);
2894 setSize = 0;
2895 if (*scan == '\0') {
2896 break;
2897 }
2898 scan++; // skip ','
2899 continue;
2900 }
2901
2902 KMP_ASSERT2(*scan == ':', "bad explicit places list");
2903 scan++; // skip ':'
2904
2905 //
2906 // Read count parameter
2907 //
2908 SKIP_WS(scan);
2909 KMP_ASSERT2((*scan >= '0') && (*scan <= '9'),
2910 "bad explicit places list");
2911 next = scan;
2912 SKIP_DIGITS(next);
Jim Cownie181b4bb2013-12-23 17:28:57 +00002913 int count = __kmp_str_to_int(scan, *next);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002914 KMP_ASSERT(count >= 0);
2915 scan = next;
2916
2917 //
2918 // valid follow sets are ',' ':' and EOL
2919 //
2920 SKIP_WS(scan);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002921 int stride;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002922 if (*scan == '\0' || *scan == ',') {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002923 stride = +1;
2924 }
2925 else {
2926 KMP_ASSERT2(*scan == ':', "bad explicit places list");
2927 scan++; // skip ':'
Jim Cownie5e8470a2013-09-27 10:38:44 +00002928
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002929 //
2930 // Read stride parameter
2931 //
2932 int sign = +1;
2933 for (;;) {
2934 SKIP_WS(scan);
2935 if (*scan == '+') {
2936 scan++; // skip '+'
2937 continue;
2938 }
2939 if (*scan == '-') {
2940 sign *= -1;
2941 scan++; // skip '-'
2942 continue;
2943 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002944 break;
2945 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002946 SKIP_WS(scan);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002947 KMP_ASSERT2((*scan >= '0') && (*scan <= '9'),
2948 "bad explicit places list");
2949 next = scan;
2950 SKIP_DIGITS(next);
2951 stride = __kmp_str_to_int(scan, *next);
2952 KMP_DEBUG_ASSERT(stride >= 0);
2953 scan = next;
2954 stride *= sign;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002955 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002956
2957 if (stride > 0) {
2958 int i;
2959 for (i = 0; i < count; i++) {
2960 int j;
2961 if (setSize == 0) {
2962 break;
2963 }
2964 ADD_MASK(tempMask);
2965 setSize = 0;
2966 for (j = __kmp_affin_mask_size * CHAR_BIT - 1; j >= stride; j--) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002967 if (! KMP_CPU_ISSET(j - stride, tempMask)) {
2968 KMP_CPU_CLR(j, tempMask);
2969 }
2970 else if ((j > maxOsId) ||
2971 (! KMP_CPU_ISSET(j, KMP_CPU_INDEX(osId2Mask, j)))) {
Andrey Churbanov16a14322015-03-10 09:34:38 +00002972 if ((__kmp_affinity_verbose || (__kmp_affinity_warnings
2973 && (__kmp_affinity_type != affinity_none))) && i < count - 1) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002974 KMP_WARNING(AffIgnoreInvalidProcID, j);
2975 }
2976 KMP_CPU_CLR(j, tempMask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002977 }
2978 else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002979 KMP_CPU_SET(j, tempMask);
2980 setSize++;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002981 }
2982 }
2983 for (; j >= 0; j--) {
2984 KMP_CPU_CLR(j, tempMask);
2985 }
2986 }
2987 }
2988 else {
2989 int i;
2990 for (i = 0; i < count; i++) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002991 int j;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002992 if (setSize == 0) {
2993 break;
2994 }
2995 ADD_MASK(tempMask);
2996 setSize = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002997 for (j = 0; j < ((int)__kmp_affin_mask_size * CHAR_BIT) + stride;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002998 j++) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002999 if (! KMP_CPU_ISSET(j - stride, tempMask)) {
3000 KMP_CPU_CLR(j, tempMask);
3001 }
3002 else if ((j > maxOsId) ||
3003 (! KMP_CPU_ISSET(j, KMP_CPU_INDEX(osId2Mask, j)))) {
Andrey Churbanov16a14322015-03-10 09:34:38 +00003004 if ((__kmp_affinity_verbose || (__kmp_affinity_warnings
3005 && (__kmp_affinity_type != affinity_none))) && i < count - 1) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003006 KMP_WARNING(AffIgnoreInvalidProcID, j);
3007 }
3008 KMP_CPU_CLR(j, tempMask);
3009 }
3010 else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003011 KMP_CPU_SET(j, tempMask);
3012 setSize++;
3013 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003014 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003015 for (; j < (int)__kmp_affin_mask_size * CHAR_BIT; j++) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003016 KMP_CPU_CLR(j, tempMask);
3017 }
3018 }
3019 }
3020 KMP_CPU_ZERO(tempMask);
3021 setSize = 0;
3022
3023 //
3024 // valid follow sets are ',' and EOL
3025 //
3026 SKIP_WS(scan);
3027 if (*scan == '\0') {
3028 break;
3029 }
3030 if (*scan == ',') {
3031 scan++; // skip ','
3032 continue;
3033 }
3034
3035 KMP_ASSERT2(0, "bad explicit places list");
3036 }
3037
3038 *out_numMasks = nextNewMask;
3039 if (nextNewMask == 0) {
3040 *out_masks = NULL;
3041 KMP_INTERNAL_FREE(newMasks);
3042 return;
3043 }
3044 *out_masks
3045 = (kmp_affin_mask_t *)__kmp_allocate(nextNewMask * __kmp_affin_mask_size);
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00003046 KMP_MEMCPY(*out_masks, newMasks, nextNewMask * __kmp_affin_mask_size);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003047 __kmp_free(tempMask);
3048 KMP_INTERNAL_FREE(newMasks);
3049}
3050
3051# endif /* OMP_40_ENABLED */
3052
3053#undef ADD_MASK
3054#undef ADD_MASK_OSID
3055
Jim Cownie5e8470a2013-09-27 10:38:44 +00003056static void
3057__kmp_apply_thread_places(AddrUnsPair **pAddr, int depth)
3058{
Jonathan Peytondd4aa9b2015-10-08 17:55:54 +00003059 if (__kmp_place_num_sockets == 0 &&
3060 __kmp_place_num_cores == 0 &&
3061 __kmp_place_num_threads_per_core == 0 )
3062 return; // no topology limiting actions requested, exit
3063 if (__kmp_place_num_sockets == 0)
3064 __kmp_place_num_sockets = nPackages; // use all available sockets
3065 if (__kmp_place_num_cores == 0)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003066 __kmp_place_num_cores = nCoresPerPkg; // use all available cores
Jonathan Peytondd4aa9b2015-10-08 17:55:54 +00003067 if (__kmp_place_num_threads_per_core == 0 ||
3068 __kmp_place_num_threads_per_core > __kmp_nThreadsPerCore)
3069 __kmp_place_num_threads_per_core = __kmp_nThreadsPerCore; // use all HW contexts
3070
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003071 if ( !__kmp_affinity_uniform_topology() ) {
3072 KMP_WARNING( AffThrPlaceNonUniform );
3073 return; // don't support non-uniform topology
3074 }
3075 if ( depth != 3 ) {
3076 KMP_WARNING( AffThrPlaceNonThreeLevel );
3077 return; // don't support not-3-level topology
Jim Cownie5e8470a2013-09-27 10:38:44 +00003078 }
Jonathan Peytondd4aa9b2015-10-08 17:55:54 +00003079 if (__kmp_place_socket_offset + __kmp_place_num_sockets > nPackages) {
3080 KMP_WARNING(AffThrPlaceManySockets);
3081 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003082 }
Andrey Churbanov12875572015-03-10 09:00:36 +00003083 if ( __kmp_place_core_offset + __kmp_place_num_cores > nCoresPerPkg ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003084 KMP_WARNING( AffThrPlaceManyCores );
3085 return;
3086 }
3087
3088 AddrUnsPair *newAddr = (AddrUnsPair *)__kmp_allocate( sizeof(AddrUnsPair) *
Jonathan Peytondd4aa9b2015-10-08 17:55:54 +00003089 __kmp_place_num_sockets * __kmp_place_num_cores * __kmp_place_num_threads_per_core);
3090
Jim Cownie5e8470a2013-09-27 10:38:44 +00003091 int i, j, k, n_old = 0, n_new = 0;
Jonathan Peytondd4aa9b2015-10-08 17:55:54 +00003092 for (i = 0; i < nPackages; ++i)
3093 if (i < __kmp_place_socket_offset ||
3094 i >= __kmp_place_socket_offset + __kmp_place_num_sockets)
3095 n_old += nCoresPerPkg * __kmp_nThreadsPerCore; // skip not-requested socket
3096 else
3097 for (j = 0; j < nCoresPerPkg; ++j) // walk through requested socket
3098 if (j < __kmp_place_core_offset ||
3099 j >= __kmp_place_core_offset + __kmp_place_num_cores)
3100 n_old += __kmp_nThreadsPerCore; // skip not-requested core
3101 else
3102 for (k = 0; k < __kmp_nThreadsPerCore; ++k) { // walk through requested core
3103 if (k < __kmp_place_num_threads_per_core) {
3104 newAddr[n_new] = (*pAddr)[n_old]; // collect requested thread's data
3105 n_new++;
3106 }
3107 n_old++;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003108 }
Jonathan Peytondd4aa9b2015-10-08 17:55:54 +00003109 KMP_DEBUG_ASSERT(n_old == nPackages * nCoresPerPkg * __kmp_nThreadsPerCore);
3110 KMP_DEBUG_ASSERT(n_new == __kmp_place_num_sockets * __kmp_place_num_cores *
3111 __kmp_place_num_threads_per_core);
3112
3113 nPackages = __kmp_place_num_sockets; // correct nPackages
Jim Cownie5e8470a2013-09-27 10:38:44 +00003114 nCoresPerPkg = __kmp_place_num_cores; // correct nCoresPerPkg
3115 __kmp_nThreadsPerCore = __kmp_place_num_threads_per_core; // correct __kmp_nThreadsPerCore
3116 __kmp_avail_proc = n_new; // correct avail_proc
3117 __kmp_ncores = nPackages * __kmp_place_num_cores; // correct ncores
3118
3119 __kmp_free( *pAddr );
3120 *pAddr = newAddr; // replace old topology with new one
3121}
3122
Jim Cownie5e8470a2013-09-27 10:38:44 +00003123
3124static AddrUnsPair *address2os = NULL;
3125static int * procarr = NULL;
3126static int __kmp_aff_depth = 0;
3127
3128static void
3129__kmp_aux_affinity_initialize(void)
3130{
3131 if (__kmp_affinity_masks != NULL) {
3132 KMP_ASSERT(fullMask != NULL);
3133 return;
3134 }
3135
3136 //
3137 // Create the "full" mask - this defines all of the processors that we
3138 // consider to be in the machine model. If respect is set, then it is
3139 // the initialization thread's affinity mask. Otherwise, it is all
3140 // processors that we know about on the machine.
3141 //
3142 if (fullMask == NULL) {
3143 fullMask = (kmp_affin_mask_t *)__kmp_allocate(__kmp_affin_mask_size);
3144 }
3145 if (KMP_AFFINITY_CAPABLE()) {
3146 if (__kmp_affinity_respect_mask) {
3147 __kmp_get_system_affinity(fullMask, TRUE);
3148
3149 //
3150 // Count the number of available processors.
3151 //
3152 unsigned i;
3153 __kmp_avail_proc = 0;
3154 for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
3155 if (! KMP_CPU_ISSET(i, fullMask)) {
3156 continue;
3157 }
3158 __kmp_avail_proc++;
3159 }
3160 if (__kmp_avail_proc > __kmp_xproc) {
3161 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3162 && (__kmp_affinity_type != affinity_none))) {
3163 KMP_WARNING(ErrorInitializeAffinity);
3164 }
3165 __kmp_affinity_type = affinity_none;
Andrey Churbanov1f037e42015-03-10 09:15:26 +00003166 KMP_AFFINITY_DISABLE();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003167 return;
3168 }
3169 }
3170 else {
3171 __kmp_affinity_entire_machine_mask(fullMask);
3172 __kmp_avail_proc = __kmp_xproc;
3173 }
3174 }
3175
3176 int depth = -1;
3177 kmp_i18n_id_t msg_id = kmp_i18n_null;
3178
3179 //
Alp Toker8f2d3f02014-02-24 10:40:15 +00003180 // For backward compatibility, setting KMP_CPUINFO_FILE =>
Jim Cownie5e8470a2013-09-27 10:38:44 +00003181 // KMP_TOPOLOGY_METHOD=cpuinfo
3182 //
3183 if ((__kmp_cpuinfo_file != NULL) &&
3184 (__kmp_affinity_top_method == affinity_top_method_all)) {
3185 __kmp_affinity_top_method = affinity_top_method_cpuinfo;
3186 }
3187
3188 if (__kmp_affinity_top_method == affinity_top_method_all) {
3189 //
3190 // In the default code path, errors are not fatal - we just try using
3191 // another method. We only emit a warning message if affinity is on,
3192 // or the verbose flag is set, an the nowarnings flag was not set.
3193 //
3194 const char *file_name = NULL;
3195 int line = 0;
3196
3197# if KMP_ARCH_X86 || KMP_ARCH_X86_64
3198
3199 if (__kmp_affinity_verbose) {
3200 KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
3201 }
3202
3203 file_name = NULL;
3204 depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
3205 if (depth == 0) {
3206 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3207 KMP_ASSERT(address2os == NULL);
3208 return;
3209 }
3210
3211 if (depth < 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003212 if (__kmp_affinity_verbose) {
3213 if (msg_id != kmp_i18n_null) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003214 KMP_INFORM(AffInfoStrStr, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id),
3215 KMP_I18N_STR(DecodingLegacyAPIC));
3216 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003217 else {
3218 KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(DecodingLegacyAPIC));
3219 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003220 }
3221
3222 file_name = NULL;
3223 depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
3224 if (depth == 0) {
3225 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3226 KMP_ASSERT(address2os == NULL);
3227 return;
3228 }
3229 }
3230
3231# endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
3232
3233# if KMP_OS_LINUX
3234
3235 if (depth < 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003236 if (__kmp_affinity_verbose) {
3237 if (msg_id != kmp_i18n_null) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003238 KMP_INFORM(AffStrParseFilename, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id), "/proc/cpuinfo");
3239 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003240 else {
3241 KMP_INFORM(AffParseFilename, "KMP_AFFINITY", "/proc/cpuinfo");
3242 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003243 }
3244
3245 FILE *f = fopen("/proc/cpuinfo", "r");
3246 if (f == NULL) {
3247 msg_id = kmp_i18n_str_CantOpenCpuinfo;
3248 }
3249 else {
3250 file_name = "/proc/cpuinfo";
3251 depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
3252 fclose(f);
3253 if (depth == 0) {
3254 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3255 KMP_ASSERT(address2os == NULL);
3256 return;
3257 }
3258 }
3259 }
3260
3261# endif /* KMP_OS_LINUX */
3262
Andrey Churbanov7daf9802015-01-27 16:52:57 +00003263# if KMP_GROUP_AFFINITY
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003264
3265 if ((depth < 0) && (__kmp_num_proc_groups > 1)) {
3266 if (__kmp_affinity_verbose) {
3267 KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
3268 }
3269
3270 depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
3271 KMP_ASSERT(depth != 0);
3272 }
3273
Andrey Churbanov7daf9802015-01-27 16:52:57 +00003274# endif /* KMP_GROUP_AFFINITY */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003275
Jim Cownie5e8470a2013-09-27 10:38:44 +00003276 if (depth < 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003277 if (__kmp_affinity_verbose && (msg_id != kmp_i18n_null)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003278 if (file_name == NULL) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003279 KMP_INFORM(UsingFlatOS, __kmp_i18n_catgets(msg_id));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003280 }
3281 else if (line == 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003282 KMP_INFORM(UsingFlatOSFile, file_name, __kmp_i18n_catgets(msg_id));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003283 }
3284 else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003285 KMP_INFORM(UsingFlatOSFileLine, file_name, line, __kmp_i18n_catgets(msg_id));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003286 }
3287 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003288 // FIXME - print msg if msg_id = kmp_i18n_null ???
Jim Cownie5e8470a2013-09-27 10:38:44 +00003289
3290 file_name = "";
3291 depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
3292 if (depth == 0) {
3293 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3294 KMP_ASSERT(address2os == NULL);
3295 return;
3296 }
3297 KMP_ASSERT(depth > 0);
3298 KMP_ASSERT(address2os != NULL);
3299 }
3300 }
3301
3302 //
3303 // If the user has specified that a paricular topology discovery method
3304 // is to be used, then we abort if that method fails. The exception is
3305 // group affinity, which might have been implicitly set.
3306 //
3307
3308# if KMP_ARCH_X86 || KMP_ARCH_X86_64
3309
3310 else if (__kmp_affinity_top_method == affinity_top_method_x2apicid) {
3311 if (__kmp_affinity_verbose) {
3312 KMP_INFORM(AffInfoStr, "KMP_AFFINITY",
3313 KMP_I18N_STR(Decodingx2APIC));
3314 }
3315
3316 depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
3317 if (depth == 0) {
3318 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3319 KMP_ASSERT(address2os == NULL);
3320 return;
3321 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003322 if (depth < 0) {
3323 KMP_ASSERT(msg_id != kmp_i18n_null);
3324 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
3325 }
3326 }
3327 else if (__kmp_affinity_top_method == affinity_top_method_apicid) {
3328 if (__kmp_affinity_verbose) {
3329 KMP_INFORM(AffInfoStr, "KMP_AFFINITY",
3330 KMP_I18N_STR(DecodingLegacyAPIC));
3331 }
3332
3333 depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
3334 if (depth == 0) {
3335 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3336 KMP_ASSERT(address2os == NULL);
3337 return;
3338 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003339 if (depth < 0) {
3340 KMP_ASSERT(msg_id != kmp_i18n_null);
3341 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
3342 }
3343 }
3344
3345# endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
3346
3347 else if (__kmp_affinity_top_method == affinity_top_method_cpuinfo) {
3348 const char *filename;
3349 if (__kmp_cpuinfo_file != NULL) {
3350 filename = __kmp_cpuinfo_file;
3351 }
3352 else {
3353 filename = "/proc/cpuinfo";
3354 }
3355
3356 if (__kmp_affinity_verbose) {
3357 KMP_INFORM(AffParseFilename, "KMP_AFFINITY", filename);
3358 }
3359
3360 FILE *f = fopen(filename, "r");
3361 if (f == NULL) {
3362 int code = errno;
3363 if (__kmp_cpuinfo_file != NULL) {
3364 __kmp_msg(
3365 kmp_ms_fatal,
3366 KMP_MSG(CantOpenFileForReading, filename),
3367 KMP_ERR(code),
3368 KMP_HNT(NameComesFrom_CPUINFO_FILE),
3369 __kmp_msg_null
3370 );
3371 }
3372 else {
3373 __kmp_msg(
3374 kmp_ms_fatal,
3375 KMP_MSG(CantOpenFileForReading, filename),
3376 KMP_ERR(code),
3377 __kmp_msg_null
3378 );
3379 }
3380 }
3381 int line = 0;
3382 depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
3383 fclose(f);
3384 if (depth < 0) {
3385 KMP_ASSERT(msg_id != kmp_i18n_null);
3386 if (line > 0) {
3387 KMP_FATAL(FileLineMsgExiting, filename, line, __kmp_i18n_catgets(msg_id));
3388 }
3389 else {
3390 KMP_FATAL(FileMsgExiting, filename, __kmp_i18n_catgets(msg_id));
3391 }
3392 }
3393 if (__kmp_affinity_type == affinity_none) {
3394 KMP_ASSERT(depth == 0);
3395 KMP_ASSERT(address2os == NULL);
3396 return;
3397 }
3398 }
3399
Andrey Churbanov7daf9802015-01-27 16:52:57 +00003400# if KMP_GROUP_AFFINITY
Jim Cownie5e8470a2013-09-27 10:38:44 +00003401
3402 else if (__kmp_affinity_top_method == affinity_top_method_group) {
3403 if (__kmp_affinity_verbose) {
3404 KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
3405 }
3406
3407 depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
3408 KMP_ASSERT(depth != 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003409 if (depth < 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003410 KMP_ASSERT(msg_id != kmp_i18n_null);
3411 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003412 }
3413 }
3414
Andrey Churbanov7daf9802015-01-27 16:52:57 +00003415# endif /* KMP_GROUP_AFFINITY */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003416
3417 else if (__kmp_affinity_top_method == affinity_top_method_flat) {
3418 if (__kmp_affinity_verbose) {
3419 KMP_INFORM(AffUsingFlatOS, "KMP_AFFINITY");
3420 }
3421
3422 depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
3423 if (depth == 0) {
3424 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3425 KMP_ASSERT(address2os == NULL);
3426 return;
3427 }
3428 // should not fail
3429 KMP_ASSERT(depth > 0);
3430 KMP_ASSERT(address2os != NULL);
3431 }
3432
3433 if (address2os == NULL) {
3434 if (KMP_AFFINITY_CAPABLE()
3435 && (__kmp_affinity_verbose || (__kmp_affinity_warnings
3436 && (__kmp_affinity_type != affinity_none)))) {
3437 KMP_WARNING(ErrorInitializeAffinity);
3438 }
3439 __kmp_affinity_type = affinity_none;
Andrey Churbanov1f037e42015-03-10 09:15:26 +00003440 KMP_AFFINITY_DISABLE();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003441 return;
3442 }
3443
Jim Cownie5e8470a2013-09-27 10:38:44 +00003444 __kmp_apply_thread_places(&address2os, depth);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003445
3446 //
3447 // Create the table of masks, indexed by thread Id.
3448 //
3449 unsigned maxIndex;
3450 unsigned numUnique;
3451 kmp_affin_mask_t *osId2Mask = __kmp_create_masks(&maxIndex, &numUnique,
3452 address2os, __kmp_avail_proc);
3453 if (__kmp_affinity_gran_levels == 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003454 KMP_DEBUG_ASSERT((int)numUnique == __kmp_avail_proc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003455 }
3456
3457 //
3458 // Set the childNums vector in all Address objects. This must be done
3459 // before we can sort using __kmp_affinity_cmp_Address_child_num(),
3460 // which takes into account the setting of __kmp_affinity_compact.
3461 //
3462 __kmp_affinity_assign_child_nums(address2os, __kmp_avail_proc);
3463
3464 switch (__kmp_affinity_type) {
3465
3466 case affinity_explicit:
3467 KMP_DEBUG_ASSERT(__kmp_affinity_proclist != NULL);
3468# if OMP_40_ENABLED
3469 if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
3470# endif
3471 {
3472 __kmp_affinity_process_proclist(&__kmp_affinity_masks,
3473 &__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask,
3474 maxIndex);
3475 }
3476# if OMP_40_ENABLED
3477 else {
3478 __kmp_affinity_process_placelist(&__kmp_affinity_masks,
3479 &__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask,
3480 maxIndex);
3481 }
3482# endif
3483 if (__kmp_affinity_num_masks == 0) {
3484 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3485 && (__kmp_affinity_type != affinity_none))) {
3486 KMP_WARNING(AffNoValidProcID);
3487 }
3488 __kmp_affinity_type = affinity_none;
3489 return;
3490 }
3491 break;
3492
3493 //
3494 // The other affinity types rely on sorting the Addresses according
3495 // to some permutation of the machine topology tree. Set
3496 // __kmp_affinity_compact and __kmp_affinity_offset appropriately,
3497 // then jump to a common code fragment to do the sort and create
3498 // the array of affinity masks.
3499 //
3500
3501 case affinity_logical:
3502 __kmp_affinity_compact = 0;
3503 if (__kmp_affinity_offset) {
3504 __kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset
3505 % __kmp_avail_proc;
3506 }
3507 goto sortAddresses;
3508
3509 case affinity_physical:
3510 if (__kmp_nThreadsPerCore > 1) {
3511 __kmp_affinity_compact = 1;
3512 if (__kmp_affinity_compact >= depth) {
3513 __kmp_affinity_compact = 0;
3514 }
3515 } else {
3516 __kmp_affinity_compact = 0;
3517 }
3518 if (__kmp_affinity_offset) {
3519 __kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset
3520 % __kmp_avail_proc;
3521 }
3522 goto sortAddresses;
3523
3524 case affinity_scatter:
3525 if (__kmp_affinity_compact >= depth) {
3526 __kmp_affinity_compact = 0;
3527 }
3528 else {
3529 __kmp_affinity_compact = depth - 1 - __kmp_affinity_compact;
3530 }
3531 goto sortAddresses;
3532
3533 case affinity_compact:
3534 if (__kmp_affinity_compact >= depth) {
3535 __kmp_affinity_compact = depth - 1;
3536 }
3537 goto sortAddresses;
3538
Jim Cownie5e8470a2013-09-27 10:38:44 +00003539 case affinity_balanced:
Jonathan Peytoncaf09fe2015-05-27 23:27:33 +00003540 // Balanced works only for the case of a single package
Jim Cownie5e8470a2013-09-27 10:38:44 +00003541 if( nPackages > 1 ) {
3542 if( __kmp_affinity_verbose || __kmp_affinity_warnings ) {
3543 KMP_WARNING( AffBalancedNotAvail, "KMP_AFFINITY" );
3544 }
3545 __kmp_affinity_type = affinity_none;
3546 return;
3547 } else if( __kmp_affinity_uniform_topology() ) {
3548 break;
3549 } else { // Non-uniform topology
3550
3551 // Save the depth for further usage
3552 __kmp_aff_depth = depth;
3553
3554 // Number of hyper threads per core in HT machine
3555 int nth_per_core = __kmp_nThreadsPerCore;
3556
3557 int core_level;
3558 if( nth_per_core > 1 ) {
3559 core_level = depth - 2;
3560 } else {
3561 core_level = depth - 1;
3562 }
3563 int ncores = address2os[ __kmp_avail_proc - 1 ].first.labels[ core_level ] + 1;
3564 int nproc = nth_per_core * ncores;
3565
3566 procarr = ( int * )__kmp_allocate( sizeof( int ) * nproc );
3567 for( int i = 0; i < nproc; i++ ) {
3568 procarr[ i ] = -1;
3569 }
3570
3571 for( int i = 0; i < __kmp_avail_proc; i++ ) {
3572 int proc = address2os[ i ].second;
3573 // If depth == 3 then level=0 - package, level=1 - core, level=2 - thread.
3574 // If there is only one thread per core then depth == 2: level 0 - package,
3575 // level 1 - core.
3576 int level = depth - 1;
3577
3578 // __kmp_nth_per_core == 1
3579 int thread = 0;
3580 int core = address2os[ i ].first.labels[ level ];
3581 // If the thread level exists, that is we have more than one thread context per core
3582 if( nth_per_core > 1 ) {
3583 thread = address2os[ i ].first.labels[ level ] % nth_per_core;
3584 core = address2os[ i ].first.labels[ level - 1 ];
3585 }
3586 procarr[ core * nth_per_core + thread ] = proc;
3587 }
3588
3589 break;
3590 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003591
3592 sortAddresses:
3593 //
3594 // Allocate the gtid->affinity mask table.
3595 //
3596 if (__kmp_affinity_dups) {
3597 __kmp_affinity_num_masks = __kmp_avail_proc;
3598 }
3599 else {
3600 __kmp_affinity_num_masks = numUnique;
3601 }
3602
3603# if OMP_40_ENABLED
3604 if ( ( __kmp_nested_proc_bind.bind_types[0] != proc_bind_intel )
3605 && ( __kmp_affinity_num_places > 0 )
3606 && ( (unsigned)__kmp_affinity_num_places < __kmp_affinity_num_masks ) ) {
3607 __kmp_affinity_num_masks = __kmp_affinity_num_places;
3608 }
3609# endif
3610
3611 __kmp_affinity_masks = (kmp_affin_mask_t*)__kmp_allocate(
3612 __kmp_affinity_num_masks * __kmp_affin_mask_size);
3613
3614 //
3615 // Sort the address2os table according to the current setting of
3616 // __kmp_affinity_compact, then fill out __kmp_affinity_masks.
3617 //
3618 qsort(address2os, __kmp_avail_proc, sizeof(*address2os),
3619 __kmp_affinity_cmp_Address_child_num);
3620 {
3621 int i;
3622 unsigned j;
3623 for (i = 0, j = 0; i < __kmp_avail_proc; i++) {
3624 if ((! __kmp_affinity_dups) && (! address2os[i].first.leader)) {
3625 continue;
3626 }
3627 unsigned osId = address2os[i].second;
3628 kmp_affin_mask_t *src = KMP_CPU_INDEX(osId2Mask, osId);
3629 kmp_affin_mask_t *dest
3630 = KMP_CPU_INDEX(__kmp_affinity_masks, j);
3631 KMP_ASSERT(KMP_CPU_ISSET(osId, src));
3632 KMP_CPU_COPY(dest, src);
3633 if (++j >= __kmp_affinity_num_masks) {
3634 break;
3635 }
3636 }
3637 KMP_DEBUG_ASSERT(j == __kmp_affinity_num_masks);
3638 }
3639 break;
3640
3641 default:
3642 KMP_ASSERT2(0, "Unexpected affinity setting");
3643 }
3644
3645 __kmp_free(osId2Mask);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003646 machine_hierarchy.init(address2os, __kmp_avail_proc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003647}
3648
3649
3650void
3651__kmp_affinity_initialize(void)
3652{
3653 //
3654 // Much of the code above was written assumming that if a machine was not
3655 // affinity capable, then __kmp_affinity_type == affinity_none. We now
3656 // explicitly represent this as __kmp_affinity_type == affinity_disabled.
3657 //
3658 // There are too many checks for __kmp_affinity_type == affinity_none
3659 // in this code. Instead of trying to change them all, check if
3660 // __kmp_affinity_type == affinity_disabled, and if so, slam it with
3661 // affinity_none, call the real initialization routine, then restore
3662 // __kmp_affinity_type to affinity_disabled.
3663 //
3664 int disabled = (__kmp_affinity_type == affinity_disabled);
3665 if (! KMP_AFFINITY_CAPABLE()) {
3666 KMP_ASSERT(disabled);
3667 }
3668 if (disabled) {
3669 __kmp_affinity_type = affinity_none;
3670 }
3671 __kmp_aux_affinity_initialize();
3672 if (disabled) {
3673 __kmp_affinity_type = affinity_disabled;
3674 }
3675}
3676
3677
3678void
3679__kmp_affinity_uninitialize(void)
3680{
3681 if (__kmp_affinity_masks != NULL) {
3682 __kmp_free(__kmp_affinity_masks);
3683 __kmp_affinity_masks = NULL;
3684 }
3685 if (fullMask != NULL) {
3686 KMP_CPU_FREE(fullMask);
3687 fullMask = NULL;
3688 }
3689 __kmp_affinity_num_masks = 0;
3690# if OMP_40_ENABLED
3691 __kmp_affinity_num_places = 0;
3692# endif
3693 if (__kmp_affinity_proclist != NULL) {
3694 __kmp_free(__kmp_affinity_proclist);
3695 __kmp_affinity_proclist = NULL;
3696 }
3697 if( address2os != NULL ) {
3698 __kmp_free( address2os );
3699 address2os = NULL;
3700 }
3701 if( procarr != NULL ) {
3702 __kmp_free( procarr );
3703 procarr = NULL;
3704 }
3705}
3706
3707
3708void
3709__kmp_affinity_set_init_mask(int gtid, int isa_root)
3710{
3711 if (! KMP_AFFINITY_CAPABLE()) {
3712 return;
3713 }
3714
3715 kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
3716 if (th->th.th_affin_mask == NULL) {
3717 KMP_CPU_ALLOC(th->th.th_affin_mask);
3718 }
3719 else {
3720 KMP_CPU_ZERO(th->th.th_affin_mask);
3721 }
3722
3723 //
3724 // Copy the thread mask to the kmp_info_t strucuture.
3725 // If __kmp_affinity_type == affinity_none, copy the "full" mask, i.e. one
3726 // that has all of the OS proc ids set, or if __kmp_affinity_respect_mask
3727 // is set, then the full mask is the same as the mask of the initialization
3728 // thread.
3729 //
3730 kmp_affin_mask_t *mask;
3731 int i;
3732
3733# if OMP_40_ENABLED
3734 if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
3735# endif
3736 {
Andrey Churbanovf28f6132015-01-13 14:54:00 +00003737 if ((__kmp_affinity_type == affinity_none) || (__kmp_affinity_type == affinity_balanced)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003738 ) {
Andrey Churbanov7daf9802015-01-27 16:52:57 +00003739# if KMP_GROUP_AFFINITY
Jim Cownie5e8470a2013-09-27 10:38:44 +00003740 if (__kmp_num_proc_groups > 1) {
3741 return;
3742 }
3743# endif
3744 KMP_ASSERT(fullMask != NULL);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003745 i = KMP_PLACE_ALL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003746 mask = fullMask;
3747 }
3748 else {
3749 KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 );
3750 i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
3751 mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
3752 }
3753 }
3754# if OMP_40_ENABLED
3755 else {
3756 if ((! isa_root)
3757 || (__kmp_nested_proc_bind.bind_types[0] == proc_bind_false)) {
Andrey Churbanov7daf9802015-01-27 16:52:57 +00003758# if KMP_GROUP_AFFINITY
Jim Cownie5e8470a2013-09-27 10:38:44 +00003759 if (__kmp_num_proc_groups > 1) {
3760 return;
3761 }
3762# endif
3763 KMP_ASSERT(fullMask != NULL);
3764 i = KMP_PLACE_ALL;
3765 mask = fullMask;
3766 }
3767 else {
3768 //
3769 // int i = some hash function or just a counter that doesn't
3770 // always start at 0. Use gtid for now.
3771 //
3772 KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 );
3773 i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
3774 mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
3775 }
3776 }
3777# endif
3778
3779# if OMP_40_ENABLED
3780 th->th.th_current_place = i;
3781 if (isa_root) {
3782 th->th.th_new_place = i;
3783 th->th.th_first_place = 0;
3784 th->th.th_last_place = __kmp_affinity_num_masks - 1;
3785 }
3786
3787 if (i == KMP_PLACE_ALL) {
3788 KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to all places\n",
3789 gtid));
3790 }
3791 else {
3792 KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to place %d\n",
3793 gtid, i));
3794 }
3795# else
3796 if (i == -1) {
3797 KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to fullMask\n",
3798 gtid));
3799 }
3800 else {
3801 KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to mask %d\n",
3802 gtid, i));
3803 }
3804# endif /* OMP_40_ENABLED */
3805
3806 KMP_CPU_COPY(th->th.th_affin_mask, mask);
3807
3808 if (__kmp_affinity_verbose) {
3809 char buf[KMP_AFFIN_MASK_PRINT_LEN];
3810 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
3811 th->th.th_affin_mask);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003812 KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(), gtid,
3813 buf);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003814 }
3815
3816# if KMP_OS_WINDOWS
3817 //
3818 // On Windows* OS, the process affinity mask might have changed.
3819 // If the user didn't request affinity and this call fails,
3820 // just continue silently. See CQ171393.
3821 //
3822 if ( __kmp_affinity_type == affinity_none ) {
3823 __kmp_set_system_affinity(th->th.th_affin_mask, FALSE);
3824 }
3825 else
3826# endif
3827 __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
3828}
3829
3830
3831# if OMP_40_ENABLED
3832
3833void
3834__kmp_affinity_set_place(int gtid)
3835{
3836 int retval;
3837
3838 if (! KMP_AFFINITY_CAPABLE()) {
3839 return;
3840 }
3841
3842 kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
3843
3844 KA_TRACE(100, ("__kmp_affinity_set_place: binding T#%d to place %d (current place = %d)\n",
3845 gtid, th->th.th_new_place, th->th.th_current_place));
3846
3847 //
Alp Toker8f2d3f02014-02-24 10:40:15 +00003848 // Check that the new place is within this thread's partition.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003849 //
3850 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003851 KMP_ASSERT(th->th.th_new_place >= 0);
3852 KMP_ASSERT((unsigned)th->th.th_new_place <= __kmp_affinity_num_masks);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003853 if (th->th.th_first_place <= th->th.th_last_place) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003854 KMP_ASSERT((th->th.th_new_place >= th->th.th_first_place)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003855 && (th->th.th_new_place <= th->th.th_last_place));
3856 }
3857 else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003858 KMP_ASSERT((th->th.th_new_place <= th->th.th_first_place)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003859 || (th->th.th_new_place >= th->th.th_last_place));
3860 }
3861
3862 //
3863 // Copy the thread mask to the kmp_info_t strucuture,
3864 // and set this thread's affinity.
3865 //
3866 kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity_masks,
3867 th->th.th_new_place);
3868 KMP_CPU_COPY(th->th.th_affin_mask, mask);
3869 th->th.th_current_place = th->th.th_new_place;
3870
3871 if (__kmp_affinity_verbose) {
3872 char buf[KMP_AFFIN_MASK_PRINT_LEN];
3873 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
3874 th->th.th_affin_mask);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003875 KMP_INFORM(BoundToOSProcSet, "OMP_PROC_BIND", (kmp_int32)getpid(),
3876 gtid, buf);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003877 }
3878 __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
3879}
3880
3881# endif /* OMP_40_ENABLED */
3882
3883
3884int
3885__kmp_aux_set_affinity(void **mask)
3886{
3887 int gtid;
3888 kmp_info_t *th;
3889 int retval;
3890
3891 if (! KMP_AFFINITY_CAPABLE()) {
3892 return -1;
3893 }
3894
3895 gtid = __kmp_entry_gtid();
3896 KA_TRACE(1000, ;{
3897 char buf[KMP_AFFIN_MASK_PRINT_LEN];
3898 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
3899 (kmp_affin_mask_t *)(*mask));
3900 __kmp_debug_printf("kmp_set_affinity: setting affinity mask for thread %d = %s\n",
3901 gtid, buf);
3902 });
3903
3904 if (__kmp_env_consistency_check) {
3905 if ((mask == NULL) || (*mask == NULL)) {
3906 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
3907 }
3908 else {
3909 unsigned proc;
3910 int num_procs = 0;
3911
3912 for (proc = 0; proc < KMP_CPU_SETSIZE; proc++) {
3913 if (! KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask))) {
3914 continue;
3915 }
3916 num_procs++;
3917 if (! KMP_CPU_ISSET(proc, fullMask)) {
3918 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
3919 break;
3920 }
3921 }
3922 if (num_procs == 0) {
3923 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
3924 }
3925
Andrey Churbanov7daf9802015-01-27 16:52:57 +00003926# if KMP_GROUP_AFFINITY
Jim Cownie5e8470a2013-09-27 10:38:44 +00003927 if (__kmp_get_proc_group((kmp_affin_mask_t *)(*mask)) < 0) {
3928 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
3929 }
Andrey Churbanov7daf9802015-01-27 16:52:57 +00003930# endif /* KMP_GROUP_AFFINITY */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003931
3932 }
3933 }
3934
3935 th = __kmp_threads[gtid];
3936 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
3937 retval = __kmp_set_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
3938 if (retval == 0) {
3939 KMP_CPU_COPY(th->th.th_affin_mask, (kmp_affin_mask_t *)(*mask));
3940 }
3941
3942# if OMP_40_ENABLED
3943 th->th.th_current_place = KMP_PLACE_UNDEFINED;
3944 th->th.th_new_place = KMP_PLACE_UNDEFINED;
3945 th->th.th_first_place = 0;
3946 th->th.th_last_place = __kmp_affinity_num_masks - 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003947
3948 //
3949 // Turn off 4.0 affinity for the current tread at this parallel level.
3950 //
3951 th->th.th_current_task->td_icvs.proc_bind = proc_bind_false;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003952# endif
3953
3954 return retval;
3955}
3956
3957
3958int
3959__kmp_aux_get_affinity(void **mask)
3960{
3961 int gtid;
3962 int retval;
3963 kmp_info_t *th;
3964
3965 if (! KMP_AFFINITY_CAPABLE()) {
3966 return -1;
3967 }
3968
3969 gtid = __kmp_entry_gtid();
3970 th = __kmp_threads[gtid];
3971 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
3972
3973 KA_TRACE(1000, ;{
3974 char buf[KMP_AFFIN_MASK_PRINT_LEN];
3975 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
3976 th->th.th_affin_mask);
3977 __kmp_printf("kmp_get_affinity: stored affinity mask for thread %d = %s\n", gtid, buf);
3978 });
3979
3980 if (__kmp_env_consistency_check) {
3981 if ((mask == NULL) || (*mask == NULL)) {
3982 KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity");
3983 }
3984 }
3985
3986# if !KMP_OS_WINDOWS
3987
3988 retval = __kmp_get_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
3989 KA_TRACE(1000, ;{
3990 char buf[KMP_AFFIN_MASK_PRINT_LEN];
3991 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
3992 (kmp_affin_mask_t *)(*mask));
3993 __kmp_printf("kmp_get_affinity: system affinity mask for thread %d = %s\n", gtid, buf);
3994 });
3995 return retval;
3996
3997# else
3998
3999 KMP_CPU_COPY((kmp_affin_mask_t *)(*mask), th->th.th_affin_mask);
4000 return 0;
4001
4002# endif /* KMP_OS_WINDOWS */
4003
4004}
4005
Jim Cownie5e8470a2013-09-27 10:38:44 +00004006int
4007__kmp_aux_set_affinity_mask_proc(int proc, void **mask)
4008{
4009 int retval;
4010
4011 if (! KMP_AFFINITY_CAPABLE()) {
4012 return -1;
4013 }
4014
4015 KA_TRACE(1000, ;{
4016 int gtid = __kmp_entry_gtid();
4017 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4018 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4019 (kmp_affin_mask_t *)(*mask));
4020 __kmp_debug_printf("kmp_set_affinity_mask_proc: setting proc %d in affinity mask for thread %d = %s\n",
4021 proc, gtid, buf);
4022 });
4023
4024 if (__kmp_env_consistency_check) {
4025 if ((mask == NULL) || (*mask == NULL)) {
4026 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity_mask_proc");
4027 }
4028 }
4029
4030 if ((proc < 0) || ((unsigned)proc >= KMP_CPU_SETSIZE)) {
4031 return -1;
4032 }
4033 if (! KMP_CPU_ISSET(proc, fullMask)) {
4034 return -2;
4035 }
4036
4037 KMP_CPU_SET(proc, (kmp_affin_mask_t *)(*mask));
4038 return 0;
4039}
4040
4041
4042int
4043__kmp_aux_unset_affinity_mask_proc(int proc, void **mask)
4044{
4045 int retval;
4046
4047 if (! KMP_AFFINITY_CAPABLE()) {
4048 return -1;
4049 }
4050
4051 KA_TRACE(1000, ;{
4052 int gtid = __kmp_entry_gtid();
4053 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4054 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4055 (kmp_affin_mask_t *)(*mask));
4056 __kmp_debug_printf("kmp_unset_affinity_mask_proc: unsetting proc %d in affinity mask for thread %d = %s\n",
4057 proc, gtid, buf);
4058 });
4059
4060 if (__kmp_env_consistency_check) {
4061 if ((mask == NULL) || (*mask == NULL)) {
4062 KMP_FATAL(AffinityInvalidMask, "kmp_unset_affinity_mask_proc");
4063 }
4064 }
4065
4066 if ((proc < 0) || ((unsigned)proc >= KMP_CPU_SETSIZE)) {
4067 return -1;
4068 }
4069 if (! KMP_CPU_ISSET(proc, fullMask)) {
4070 return -2;
4071 }
4072
4073 KMP_CPU_CLR(proc, (kmp_affin_mask_t *)(*mask));
4074 return 0;
4075}
4076
4077
4078int
4079__kmp_aux_get_affinity_mask_proc(int proc, void **mask)
4080{
4081 int retval;
4082
4083 if (! KMP_AFFINITY_CAPABLE()) {
4084 return -1;
4085 }
4086
4087 KA_TRACE(1000, ;{
4088 int gtid = __kmp_entry_gtid();
4089 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4090 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4091 (kmp_affin_mask_t *)(*mask));
4092 __kmp_debug_printf("kmp_get_affinity_mask_proc: getting proc %d in affinity mask for thread %d = %s\n",
4093 proc, gtid, buf);
4094 });
4095
4096 if (__kmp_env_consistency_check) {
4097 if ((mask == NULL) || (*mask == NULL)) {
Andrey Churbanov4b2f17a2015-01-29 15:49:22 +00004098 KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity_mask_proc");
Jim Cownie5e8470a2013-09-27 10:38:44 +00004099 }
4100 }
4101
4102 if ((proc < 0) || ((unsigned)proc >= KMP_CPU_SETSIZE)) {
4103 return 0;
4104 }
4105 if (! KMP_CPU_ISSET(proc, fullMask)) {
4106 return 0;
4107 }
4108
4109 return KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask));
4110}
4111
Jim Cownie5e8470a2013-09-27 10:38:44 +00004112
4113// Dynamic affinity settings - Affinity balanced
4114void __kmp_balanced_affinity( int tid, int nthreads )
4115{
4116 if( __kmp_affinity_uniform_topology() ) {
4117 int coreID;
4118 int threadID;
4119 // Number of hyper threads per core in HT machine
4120 int __kmp_nth_per_core = __kmp_avail_proc / __kmp_ncores;
4121 // Number of cores
4122 int ncores = __kmp_ncores;
4123 // How many threads will be bound to each core
4124 int chunk = nthreads / ncores;
4125 // How many cores will have an additional thread bound to it - "big cores"
4126 int big_cores = nthreads % ncores;
4127 // Number of threads on the big cores
4128 int big_nth = ( chunk + 1 ) * big_cores;
4129 if( tid < big_nth ) {
4130 coreID = tid / (chunk + 1 );
4131 threadID = ( tid % (chunk + 1 ) ) % __kmp_nth_per_core ;
4132 } else { //tid >= big_nth
4133 coreID = ( tid - big_cores ) / chunk;
4134 threadID = ( ( tid - big_cores ) % chunk ) % __kmp_nth_per_core ;
4135 }
4136
4137 KMP_DEBUG_ASSERT2(KMP_AFFINITY_CAPABLE(),
4138 "Illegal set affinity operation when not capable");
4139
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00004140 kmp_affin_mask_t *mask = (kmp_affin_mask_t *)KMP_ALLOCA(__kmp_affin_mask_size);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004141 KMP_CPU_ZERO(mask);
4142
4143 // Granularity == thread
4144 if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) {
4145 int osID = address2os[ coreID * __kmp_nth_per_core + threadID ].second;
4146 KMP_CPU_SET( osID, mask);
4147 } else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
4148 for( int i = 0; i < __kmp_nth_per_core; i++ ) {
4149 int osID;
4150 osID = address2os[ coreID * __kmp_nth_per_core + i ].second;
4151 KMP_CPU_SET( osID, mask);
4152 }
4153 }
4154 if (__kmp_affinity_verbose) {
4155 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4156 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004157 KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
4158 tid, buf);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004159 }
4160 __kmp_set_system_affinity( mask, TRUE );
4161 } else { // Non-uniform topology
4162
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00004163 kmp_affin_mask_t *mask = (kmp_affin_mask_t *)KMP_ALLOCA(__kmp_affin_mask_size);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004164 KMP_CPU_ZERO(mask);
4165
4166 // Number of hyper threads per core in HT machine
4167 int nth_per_core = __kmp_nThreadsPerCore;
4168 int core_level;
4169 if( nth_per_core > 1 ) {
4170 core_level = __kmp_aff_depth - 2;
4171 } else {
4172 core_level = __kmp_aff_depth - 1;
4173 }
4174
4175 // Number of cores - maximum value; it does not count trail cores with 0 processors
4176 int ncores = address2os[ __kmp_avail_proc - 1 ].first.labels[ core_level ] + 1;
4177
4178 // For performance gain consider the special case nthreads == __kmp_avail_proc
4179 if( nthreads == __kmp_avail_proc ) {
4180 if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) {
4181 int osID = address2os[ tid ].second;
4182 KMP_CPU_SET( osID, mask);
4183 } else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
4184 int coreID = address2os[ tid ].first.labels[ core_level ];
4185 // We'll count found osIDs for the current core; they can be not more than nth_per_core;
4186 // since the address2os is sortied we can break when cnt==nth_per_core
4187 int cnt = 0;
4188 for( int i = 0; i < __kmp_avail_proc; i++ ) {
4189 int osID = address2os[ i ].second;
4190 int core = address2os[ i ].first.labels[ core_level ];
4191 if( core == coreID ) {
4192 KMP_CPU_SET( osID, mask);
4193 cnt++;
4194 if( cnt == nth_per_core ) {
4195 break;
4196 }
4197 }
4198 }
4199 }
4200 } else if( nthreads <= __kmp_ncores ) {
4201
4202 int core = 0;
4203 for( int i = 0; i < ncores; i++ ) {
4204 // Check if this core from procarr[] is in the mask
4205 int in_mask = 0;
4206 for( int j = 0; j < nth_per_core; j++ ) {
4207 if( procarr[ i * nth_per_core + j ] != - 1 ) {
4208 in_mask = 1;
4209 break;
4210 }
4211 }
4212 if( in_mask ) {
4213 if( tid == core ) {
4214 for( int j = 0; j < nth_per_core; j++ ) {
4215 int osID = procarr[ i * nth_per_core + j ];
4216 if( osID != -1 ) {
4217 KMP_CPU_SET( osID, mask );
4218 // For granularity=thread it is enough to set the first available osID for this core
4219 if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) {
4220 break;
4221 }
4222 }
4223 }
4224 break;
4225 } else {
4226 core++;
4227 }
4228 }
4229 }
4230
4231 } else { // nthreads > __kmp_ncores
4232
4233 // Array to save the number of processors at each core
Jonathan Peyton7be075332015-06-22 15:53:50 +00004234 int* nproc_at_core = (int*)KMP_ALLOCA(sizeof(int)*ncores);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004235 // Array to save the number of cores with "x" available processors;
Jonathan Peyton7be075332015-06-22 15:53:50 +00004236 int* ncores_with_x_procs = (int*)KMP_ALLOCA(sizeof(int)*(nth_per_core+1));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004237 // Array to save the number of cores with # procs from x to nth_per_core
Jonathan Peyton7be075332015-06-22 15:53:50 +00004238 int* ncores_with_x_to_max_procs = (int*)KMP_ALLOCA(sizeof(int)*(nth_per_core+1));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004239
4240 for( int i = 0; i <= nth_per_core; i++ ) {
4241 ncores_with_x_procs[ i ] = 0;
4242 ncores_with_x_to_max_procs[ i ] = 0;
4243 }
4244
4245 for( int i = 0; i < ncores; i++ ) {
4246 int cnt = 0;
4247 for( int j = 0; j < nth_per_core; j++ ) {
4248 if( procarr[ i * nth_per_core + j ] != -1 ) {
4249 cnt++;
4250 }
4251 }
4252 nproc_at_core[ i ] = cnt;
4253 ncores_with_x_procs[ cnt ]++;
4254 }
4255
4256 for( int i = 0; i <= nth_per_core; i++ ) {
4257 for( int j = i; j <= nth_per_core; j++ ) {
4258 ncores_with_x_to_max_procs[ i ] += ncores_with_x_procs[ j ];
4259 }
4260 }
4261
4262 // Max number of processors
4263 int nproc = nth_per_core * ncores;
4264 // An array to keep number of threads per each context
4265 int * newarr = ( int * )__kmp_allocate( sizeof( int ) * nproc );
4266 for( int i = 0; i < nproc; i++ ) {
4267 newarr[ i ] = 0;
4268 }
4269
4270 int nth = nthreads;
4271 int flag = 0;
4272 while( nth > 0 ) {
4273 for( int j = 1; j <= nth_per_core; j++ ) {
4274 int cnt = ncores_with_x_to_max_procs[ j ];
4275 for( int i = 0; i < ncores; i++ ) {
4276 // Skip the core with 0 processors
4277 if( nproc_at_core[ i ] == 0 ) {
4278 continue;
4279 }
4280 for( int k = 0; k < nth_per_core; k++ ) {
4281 if( procarr[ i * nth_per_core + k ] != -1 ) {
4282 if( newarr[ i * nth_per_core + k ] == 0 ) {
4283 newarr[ i * nth_per_core + k ] = 1;
4284 cnt--;
4285 nth--;
4286 break;
4287 } else {
4288 if( flag != 0 ) {
4289 newarr[ i * nth_per_core + k ] ++;
4290 cnt--;
4291 nth--;
4292 break;
4293 }
4294 }
4295 }
4296 }
4297 if( cnt == 0 || nth == 0 ) {
4298 break;
4299 }
4300 }
4301 if( nth == 0 ) {
4302 break;
4303 }
4304 }
4305 flag = 1;
4306 }
4307 int sum = 0;
4308 for( int i = 0; i < nproc; i++ ) {
4309 sum += newarr[ i ];
4310 if( sum > tid ) {
4311 // Granularity == thread
4312 if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) {
4313 int osID = procarr[ i ];
4314 KMP_CPU_SET( osID, mask);
4315 } else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
4316 int coreID = i / nth_per_core;
4317 for( int ii = 0; ii < nth_per_core; ii++ ) {
4318 int osID = procarr[ coreID * nth_per_core + ii ];
4319 if( osID != -1 ) {
4320 KMP_CPU_SET( osID, mask);
4321 }
4322 }
4323 }
4324 break;
4325 }
4326 }
4327 __kmp_free( newarr );
4328 }
4329
4330 if (__kmp_affinity_verbose) {
4331 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4332 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004333 KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
4334 tid, buf);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004335 }
4336 __kmp_set_system_affinity( mask, TRUE );
4337 }
4338}
4339
Alp Toker763b9392014-02-28 09:42:41 +00004340#endif // KMP_AFFINITY_SUPPORTED