blob: ddeb67dbdb54c474a78757fe11637cfa59499791 [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
2 * kmp_affinity.cpp -- affinity management
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16#include "kmp.h"
17#include "kmp_i18n.h"
18#include "kmp_io.h"
19#include "kmp_str.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000020#include "kmp_wrapper_getpid.h"
Jonathan Peyton17078362015-09-10 19:22:07 +000021#include "kmp_affinity.h"
22
23// Store the real or imagined machine hierarchy here
24static hierarchy_info machine_hierarchy;
25
26void __kmp_cleanup_hierarchy() {
27 machine_hierarchy.fini();
28}
29
30void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
31 kmp_uint32 depth;
32 // The test below is true if affinity is available, but set to "none". Need to init on first use of hierarchical barrier.
33 if (TCR_1(machine_hierarchy.uninitialized))
34 machine_hierarchy.init(NULL, nproc);
Jonathan Peyton17078362015-09-10 19:22:07 +000035
Jonathan Peyton7dee82e2015-11-09 16:24:53 +000036 // Adjust the hierarchy in case num threads exceeds original
37 if (nproc > machine_hierarchy.base_num_threads)
38 machine_hierarchy.resize(nproc);
39
Jonathan Peyton17078362015-09-10 19:22:07 +000040 depth = machine_hierarchy.depth;
41 KMP_DEBUG_ASSERT(depth > 0);
Jonathan Peyton17078362015-09-10 19:22:07 +000042
43 thr_bar->depth = depth;
44 thr_bar->base_leaf_kids = (kmp_uint8)machine_hierarchy.numPerLevel[0]-1;
45 thr_bar->skip_per_level = machine_hierarchy.skipPerLevel;
46}
Jim Cownie5e8470a2013-09-27 10:38:44 +000047
Alp Toker763b9392014-02-28 09:42:41 +000048#if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +000049
50//
51// Print the affinity mask to the character array in a pretty format.
52//
Jonathan Peyton01dcf362015-11-30 20:02:59 +000053#if KMP_USE_HWLOC
54char *
55__kmp_affinity_print_mask(char *buf, int buf_len, kmp_affin_mask_t *mask)
56{
57 int num_chars_to_write, num_chars_written;
58 char* scan;
59 KMP_ASSERT(buf_len >= 40);
60
61 // bufsize of 0 just retrieves the needed buffer size.
62 num_chars_to_write = hwloc_bitmap_list_snprintf(buf, 0, (hwloc_bitmap_t)mask);
63
64 // need '{', "xxxxxxxx...xx", '}', '\0' = num_chars_to_write + 3 bytes
65 // * num_chars_to_write returned by hwloc_bitmap_list_snprintf does not
66 // take into account the '\0' character.
67 if(hwloc_bitmap_iszero((hwloc_bitmap_t)mask)) {
68 KMP_SNPRINTF(buf, buf_len, "{<empty>}");
69 } else if(num_chars_to_write < buf_len - 3) {
70 // no problem fitting the mask into buf_len number of characters
71 buf[0] = '{';
72 // use buf_len-3 because we have the three characters: '{' '}' '\0' to add to the buffer
73 num_chars_written = hwloc_bitmap_list_snprintf(buf+1, buf_len-3, (hwloc_bitmap_t)mask);
74 buf[num_chars_written+1] = '}';
75 buf[num_chars_written+2] = '\0';
76 } else {
77 // Need to truncate the affinity mask string and add ellipsis.
78 // To do this, we first write out the '{' + str(mask)
79 buf[0] = '{';
Jonathan Peyton1d5487c2016-04-25 21:08:31 +000080 hwloc_bitmap_list_snprintf(buf+1, buf_len-1, (hwloc_bitmap_t)mask);
Jonathan Peyton01dcf362015-11-30 20:02:59 +000081 // then, what we do here is go to the 7th to last character, then go backwards until we are NOT
82 // on a digit then write "...}\0". This way it is a clean ellipsis addition and we don't
83 // overwrite part of an affinity number. i.e., we avoid something like { 45, 67, 8...} and get
84 // { 45, 67,...} instead.
85 scan = buf + buf_len - 7;
86 while(*scan >= '0' && *scan <= '9' && scan >= buf)
87 scan--;
88 *(scan+1) = '.';
89 *(scan+2) = '.';
90 *(scan+3) = '.';
91 *(scan+4) = '}';
92 *(scan+5) = '\0';
93 }
94 return buf;
95}
96#else
Jim Cownie5e8470a2013-09-27 10:38:44 +000097char *
98__kmp_affinity_print_mask(char *buf, int buf_len, kmp_affin_mask_t *mask)
99{
100 KMP_ASSERT(buf_len >= 40);
101 char *scan = buf;
102 char *end = buf + buf_len - 1;
103
104 //
105 // Find first element / check for empty set.
106 //
107 size_t i;
108 for (i = 0; i < KMP_CPU_SETSIZE; i++) {
109 if (KMP_CPU_ISSET(i, mask)) {
110 break;
111 }
112 }
113 if (i == KMP_CPU_SETSIZE) {
Jonathan Peyton7edeef12015-09-25 17:23:17 +0000114 KMP_SNPRINTF(scan, end-scan+1, "{<empty>}");
Jim Cownie5e8470a2013-09-27 10:38:44 +0000115 while (*scan != '\0') scan++;
116 KMP_ASSERT(scan <= end);
117 return buf;
118 }
119
Jonathan Peyton7edeef12015-09-25 17:23:17 +0000120 KMP_SNPRINTF(scan, end-scan+1, "{%ld", (long)i);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000121 while (*scan != '\0') scan++;
122 i++;
123 for (; i < KMP_CPU_SETSIZE; i++) {
124 if (! KMP_CPU_ISSET(i, mask)) {
125 continue;
126 }
127
128 //
129 // Check for buffer overflow. A string of the form ",<n>" will have
130 // at most 10 characters, plus we want to leave room to print ",...}"
131 // if the set is too large to print for a total of 15 characters.
132 // We already left room for '\0' in setting end.
133 //
134 if (end - scan < 15) {
135 break;
136 }
Jonathan Peyton7edeef12015-09-25 17:23:17 +0000137 KMP_SNPRINTF(scan, end-scan+1, ",%-ld", (long)i);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000138 while (*scan != '\0') scan++;
139 }
140 if (i < KMP_CPU_SETSIZE) {
Jonathan Peyton7edeef12015-09-25 17:23:17 +0000141 KMP_SNPRINTF(scan, end-scan+1, ",...");
Jim Cownie5e8470a2013-09-27 10:38:44 +0000142 while (*scan != '\0') scan++;
143 }
Jonathan Peyton7edeef12015-09-25 17:23:17 +0000144 KMP_SNPRINTF(scan, end-scan+1, "}");
Jim Cownie5e8470a2013-09-27 10:38:44 +0000145 while (*scan != '\0') scan++;
146 KMP_ASSERT(scan <= end);
147 return buf;
148}
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000149#endif // KMP_USE_HWLOC
Jim Cownie5e8470a2013-09-27 10:38:44 +0000150
151
152void
153__kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask)
154{
155 KMP_CPU_ZERO(mask);
156
Andrey Churbanov7daf9802015-01-27 16:52:57 +0000157# if KMP_GROUP_AFFINITY
Jim Cownie5e8470a2013-09-27 10:38:44 +0000158
159 if (__kmp_num_proc_groups > 1) {
160 int group;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000161 KMP_DEBUG_ASSERT(__kmp_GetActiveProcessorCount != NULL);
162 for (group = 0; group < __kmp_num_proc_groups; group++) {
163 int i;
164 int num = __kmp_GetActiveProcessorCount(group);
165 for (i = 0; i < num; i++) {
166 KMP_CPU_SET(i + group * (CHAR_BIT * sizeof(DWORD_PTR)), mask);
167 }
168 }
169 }
170 else
171
Andrey Churbanov7daf9802015-01-27 16:52:57 +0000172# endif /* KMP_GROUP_AFFINITY */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000173
174 {
175 int proc;
176 for (proc = 0; proc < __kmp_xproc; proc++) {
177 KMP_CPU_SET(proc, mask);
178 }
179 }
180}
181
Jim Cownie5e8470a2013-09-27 10:38:44 +0000182//
183// When sorting by labels, __kmp_affinity_assign_child_nums() must first be
184// called to renumber the labels from [0..n] and place them into the child_num
185// vector of the address object. This is done in case the labels used for
Alp Toker8f2d3f02014-02-24 10:40:15 +0000186// the children at one node of the hierarchy differ from those used for
Jim Cownie5e8470a2013-09-27 10:38:44 +0000187// another node at the same level. Example: suppose the machine has 2 nodes
188// with 2 packages each. The first node contains packages 601 and 602, and
189// second node contains packages 603 and 604. If we try to sort the table
190// for "scatter" affinity, the table will still be sorted 601, 602, 603, 604
191// because we are paying attention to the labels themselves, not the ordinal
192// child numbers. By using the child numbers in the sort, the result is
193// {0,0}=601, {0,1}=603, {1,0}=602, {1,1}=604.
194//
195static void
196__kmp_affinity_assign_child_nums(AddrUnsPair *address2os,
197 int numAddrs)
198{
199 KMP_DEBUG_ASSERT(numAddrs > 0);
200 int depth = address2os->first.depth;
201 unsigned *counts = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
202 unsigned *lastLabel = (unsigned *)__kmp_allocate(depth
203 * sizeof(unsigned));
204 int labCt;
205 for (labCt = 0; labCt < depth; labCt++) {
206 address2os[0].first.childNums[labCt] = counts[labCt] = 0;
207 lastLabel[labCt] = address2os[0].first.labels[labCt];
208 }
209 int i;
210 for (i = 1; i < numAddrs; i++) {
211 for (labCt = 0; labCt < depth; labCt++) {
212 if (address2os[i].first.labels[labCt] != lastLabel[labCt]) {
213 int labCt2;
214 for (labCt2 = labCt + 1; labCt2 < depth; labCt2++) {
215 counts[labCt2] = 0;
216 lastLabel[labCt2] = address2os[i].first.labels[labCt2];
217 }
218 counts[labCt]++;
219 lastLabel[labCt] = address2os[i].first.labels[labCt];
220 break;
221 }
222 }
223 for (labCt = 0; labCt < depth; labCt++) {
224 address2os[i].first.childNums[labCt] = counts[labCt];
225 }
226 for (; labCt < (int)Address::maxDepth; labCt++) {
227 address2os[i].first.childNums[labCt] = 0;
228 }
229 }
230}
231
232
233//
234// All of the __kmp_affinity_create_*_map() routines should set
235// __kmp_affinity_masks to a vector of affinity mask objects of length
236// __kmp_affinity_num_masks, if __kmp_affinity_type != affinity_none, and
237// return the number of levels in the machine topology tree (zero if
238// __kmp_affinity_type == affinity_none).
239//
Jonathan Peytonc5304aa2016-06-13 21:28:03 +0000240// All of the __kmp_affinity_create_*_map() routines should set *__kmp_affin_fullMask
Jim Cownie5e8470a2013-09-27 10:38:44 +0000241// to the affinity mask for the initialization thread. They need to save and
242// restore the mask, and it could be needed later, so saving it is just an
243// optimization to avoid calling kmp_get_system_affinity() again.
244//
Jonathan Peytonc5304aa2016-06-13 21:28:03 +0000245kmp_affin_mask_t *__kmp_affin_fullMask = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000246
247static int nCoresPerPkg, nPackages;
Andrey Churbanovf696c822015-01-27 16:55:43 +0000248static int __kmp_nThreadsPerCore;
249#ifndef KMP_DFLT_NTH_CORES
250static int __kmp_ncores;
251#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000252
253//
254// __kmp_affinity_uniform_topology() doesn't work when called from
255// places which support arbitrarily many levels in the machine topology
256// map, i.e. the non-default cases in __kmp_affinity_create_cpuinfo_map()
257// __kmp_affinity_create_x2apicid_map().
258//
259inline static bool
260__kmp_affinity_uniform_topology()
261{
262 return __kmp_avail_proc == (__kmp_nThreadsPerCore * nCoresPerPkg * nPackages);
263}
264
265
266//
267// Print out the detailed machine topology map, i.e. the physical locations
268// of each OS proc.
269//
270static void
271__kmp_affinity_print_topology(AddrUnsPair *address2os, int len, int depth,
272 int pkgLevel, int coreLevel, int threadLevel)
273{
274 int proc;
275
276 KMP_INFORM(OSProcToPhysicalThreadMap, "KMP_AFFINITY");
277 for (proc = 0; proc < len; proc++) {
278 int level;
279 kmp_str_buf_t buf;
280 __kmp_str_buf_init(&buf);
281 for (level = 0; level < depth; level++) {
282 if (level == threadLevel) {
283 __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Thread));
284 }
285 else if (level == coreLevel) {
286 __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Core));
287 }
288 else if (level == pkgLevel) {
289 __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Package));
290 }
291 else if (level > pkgLevel) {
292 __kmp_str_buf_print(&buf, "%s_%d ", KMP_I18N_STR(Node),
293 level - pkgLevel - 1);
294 }
295 else {
296 __kmp_str_buf_print(&buf, "L%d ", level);
297 }
298 __kmp_str_buf_print(&buf, "%d ",
299 address2os[proc].first.labels[level]);
300 }
301 KMP_INFORM(OSProcMapToPack, "KMP_AFFINITY", address2os[proc].second,
302 buf.str);
303 __kmp_str_buf_free(&buf);
304 }
305}
306
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000307#if KMP_USE_HWLOC
Jonathan Peyton202a24d2016-06-13 17:30:08 +0000308
309// This function removes the topology levels that are radix 1 and don't offer
310// further information about the topology. The most common example is when you
311// have one thread context per core, we don't want the extra thread context
312// level if it offers no unique labels. So they are removed.
313// return value: the new depth of address2os
314static int
315__kmp_affinity_remove_radix_one_levels(AddrUnsPair *address2os, int nActiveThreads, int depth, int* pkgLevel, int* coreLevel, int* threadLevel) {
316 int level;
317 int i;
318 int radix1_detected;
319
320 for (level = depth-1; level >= 0; --level) {
321 // Always keep the package level
322 if (level == *pkgLevel)
323 continue;
324 // Detect if this level is radix 1
325 radix1_detected = 1;
326 for (i = 1; i < nActiveThreads; ++i) {
327 if (address2os[0].first.labels[level] != address2os[i].first.labels[level]) {
328 // There are differing label values for this level so it stays
329 radix1_detected = 0;
330 break;
331 }
332 }
333 if (!radix1_detected)
334 continue;
335 // Radix 1 was detected
336 if (level == *threadLevel) {
337 // If only one thread per core, then just decrement
338 // the depth which removes the threadlevel from address2os
339 for (i = 0; i < nActiveThreads; ++i) {
340 address2os[i].first.depth--;
341 }
342 *threadLevel = -1;
343 } else if (level == *coreLevel) {
344 // For core level, we move the thread labels over if they are still
345 // valid (*threadLevel != -1), and also reduce the depth another level
346 for (i = 0; i < nActiveThreads; ++i) {
347 if (*threadLevel != -1) {
348 address2os[i].first.labels[*coreLevel] = address2os[i].first.labels[*threadLevel];
349 }
350 address2os[i].first.depth--;
351 }
352 *coreLevel = -1;
353 }
354 }
355 return address2os[0].first.depth;
356}
357
358// Returns the number of objects of type 'type' below 'obj' within the topology tree structure.
359// e.g., if obj is a HWLOC_OBJ_SOCKET object, and type is HWLOC_OBJ_PU, then
360// this will return the number of PU's under the SOCKET object.
361static int
362__kmp_hwloc_get_nobjs_under_obj(hwloc_obj_t obj, hwloc_obj_type_t type) {
363 int retval = 0;
364 hwloc_obj_t first;
365 for(first = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, obj->type, obj->logical_index, type, 0);
366 first != NULL && hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, obj->type, first) == obj;
367 first = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, first->type, first))
368 {
369 ++retval;
370 }
371 return retval;
372}
373
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000374static int
375__kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
376 kmp_i18n_id_t *const msg_id)
377{
378 *address2os = NULL;
379 *msg_id = kmp_i18n_null;
380
381 //
382 // Save the affinity mask for the current thread.
383 //
384 kmp_affin_mask_t *oldMask;
385 KMP_CPU_ALLOC(oldMask);
386 __kmp_get_system_affinity(oldMask, TRUE);
387
Jonathan Peyton202a24d2016-06-13 17:30:08 +0000388 int depth = 3;
389 int pkgLevel = 0;
390 int coreLevel = 1;
391 int threadLevel = 2;
392 nPackages = __kmp_hwloc_get_nobjs_under_obj(hwloc_get_root_obj(__kmp_hwloc_topology), HWLOC_OBJ_SOCKET);
393 nCoresPerPkg = __kmp_hwloc_get_nobjs_under_obj(hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_SOCKET, 0), HWLOC_OBJ_CORE);
394 __kmp_nThreadsPerCore = __kmp_hwloc_get_nobjs_under_obj(hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_CORE, 0), HWLOC_OBJ_PU);
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000395
396 if (! KMP_AFFINITY_CAPABLE())
397 {
398 //
399 // Hack to try and infer the machine topology using only the data
400 // available from cpuid on the current thread, and __kmp_xproc.
401 //
402 KMP_ASSERT(__kmp_affinity_type == affinity_none);
403
404 __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
405 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
406 if (__kmp_affinity_verbose) {
407 KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY");
408 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
409 if (__kmp_affinity_uniform_topology()) {
410 KMP_INFORM(Uniform, "KMP_AFFINITY");
411 } else {
412 KMP_INFORM(NonUniform, "KMP_AFFINITY");
413 }
414 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
415 __kmp_nThreadsPerCore, __kmp_ncores);
416 }
417 return 0;
418 }
419
420 //
421 // Allocate the data structure to be returned.
422 //
423 AddrUnsPair *retval = (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc);
424
Jonathan Peyton202a24d2016-06-13 17:30:08 +0000425 hwloc_obj_t pu;
426 hwloc_obj_t core;
427 hwloc_obj_t socket;
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000428 int nActiveThreads = 0;
Jonathan Peyton202a24d2016-06-13 17:30:08 +0000429 int socket_identifier = 0;
430 for(socket = hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_SOCKET, 0);
431 socket != NULL;
432 socket = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_SOCKET, socket),
433 socket_identifier++)
434 {
435 int core_identifier = 0;
436 for(core = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, socket->type, socket->logical_index, HWLOC_OBJ_CORE, 0);
437 core != NULL && hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, socket->type, core) == socket;
438 core = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_CORE, core),
439 core_identifier++)
440 {
441 int pu_identifier = 0;
442 for(pu = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, core->type, core->logical_index, HWLOC_OBJ_PU, 0);
443 pu != NULL && hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, core->type, pu) == core;
444 pu = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_PU, pu),
445 pu_identifier++)
446 {
447 Address addr(3);
Jonathan Peytonc5304aa2016-06-13 21:28:03 +0000448 if(! KMP_CPU_ISSET(pu->os_index, __kmp_affin_fullMask))
Jonathan Peyton202a24d2016-06-13 17:30:08 +0000449 continue;
450 KA_TRACE(20, ("Hwloc inserting %d (%d) %d (%d) %d (%d) into address2os\n",
451 socket->os_index, socket->logical_index, core->os_index, core->logical_index, pu->os_index,pu->logical_index));
452 addr.labels[0] = socket_identifier; // package
453 addr.labels[1] = core_identifier; // core
454 addr.labels[2] = pu_identifier; // pu
455 retval[nActiveThreads] = AddrUnsPair(addr, pu->os_index);
456 nActiveThreads++;
457 }
458 }
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000459 }
460
461 //
462 // If there's only one thread context to bind to, return now.
463 //
464 KMP_ASSERT(nActiveThreads > 0);
465 if (nActiveThreads == 1) {
466 __kmp_ncores = nPackages = 1;
467 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
468 if (__kmp_affinity_verbose) {
469 char buf[KMP_AFFIN_MASK_PRINT_LEN];
470 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
471
472 KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
473 if (__kmp_affinity_respect_mask) {
474 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
475 } else {
476 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
477 }
478 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
479 KMP_INFORM(Uniform, "KMP_AFFINITY");
480 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
481 __kmp_nThreadsPerCore, __kmp_ncores);
482 }
483
484 if (__kmp_affinity_type == affinity_none) {
485 __kmp_free(retval);
486 KMP_CPU_FREE(oldMask);
487 return 0;
488 }
489
490 //
491 // Form an Address object which only includes the package level.
492 //
493 Address addr(1);
Jonathan Peyton202a24d2016-06-13 17:30:08 +0000494 addr.labels[0] = retval[0].first.labels[pkgLevel];
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000495 retval[0].first = addr;
496
497 if (__kmp_affinity_gran_levels < 0) {
498 __kmp_affinity_gran_levels = 0;
499 }
500
501 if (__kmp_affinity_verbose) {
502 __kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
503 }
504
505 *address2os = retval;
506 KMP_CPU_FREE(oldMask);
507 return 1;
508 }
509
510 //
511 // Sort the table by physical Id.
512 //
513 qsort(retval, nActiveThreads, sizeof(*retval), __kmp_affinity_cmp_Address_labels);
514
515 //
516 // When affinity is off, this routine will still be called to set
517 // __kmp_ncores, as well as __kmp_nThreadsPerCore,
518 // nCoresPerPkg, & nPackages. Make sure all these vars are set
519 // correctly, and return if affinity is not enabled.
520 //
Jonathan Peyton202a24d2016-06-13 17:30:08 +0000521 __kmp_ncores = hwloc_get_nbobjs_by_type(__kmp_hwloc_topology, HWLOC_OBJ_CORE);
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000522
523 //
524 // Check to see if the machine topology is uniform
525 //
Jonathan Peyton202a24d2016-06-13 17:30:08 +0000526 unsigned npackages = hwloc_get_nbobjs_by_type(__kmp_hwloc_topology, HWLOC_OBJ_SOCKET);
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000527 unsigned ncores = __kmp_ncores;
Jonathan Peyton202a24d2016-06-13 17:30:08 +0000528 unsigned nthreads = hwloc_get_nbobjs_by_type(__kmp_hwloc_topology, HWLOC_OBJ_PU);
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000529 unsigned uniform = (npackages * nCoresPerPkg * __kmp_nThreadsPerCore == nthreads);
530
531 //
532 // Print the machine topology summary.
533 //
534 if (__kmp_affinity_verbose) {
535 char mask[KMP_AFFIN_MASK_PRINT_LEN];
536 __kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
537
538 KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
539 if (__kmp_affinity_respect_mask) {
540 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask);
541 } else {
542 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask);
543 }
544 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
545 if (uniform) {
546 KMP_INFORM(Uniform, "KMP_AFFINITY");
547 } else {
548 KMP_INFORM(NonUniform, "KMP_AFFINITY");
549 }
550
551 kmp_str_buf_t buf;
552 __kmp_str_buf_init(&buf);
553
554 __kmp_str_buf_print(&buf, "%d", npackages);
555 //for (level = 1; level <= pkgLevel; level++) {
556 // __kmp_str_buf_print(&buf, " x %d", maxCt[level]);
557 // }
558 KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg,
559 __kmp_nThreadsPerCore, __kmp_ncores);
560
561 __kmp_str_buf_free(&buf);
562 }
563
564 if (__kmp_affinity_type == affinity_none) {
565 KMP_CPU_FREE(oldMask);
566 return 0;
567 }
568
569 //
570 // Find any levels with radiix 1, and remove them from the map
571 // (except for the package level).
572 //
Jonathan Peyton202a24d2016-06-13 17:30:08 +0000573 depth = __kmp_affinity_remove_radix_one_levels(retval, nActiveThreads, depth, &pkgLevel, &coreLevel, &threadLevel);
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000574
575 if (__kmp_affinity_gran_levels < 0) {
576 //
577 // Set the granularity level based on what levels are modeled
578 // in the machine topology map.
579 //
580 __kmp_affinity_gran_levels = 0;
Jonathan Peyton202a24d2016-06-13 17:30:08 +0000581 if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000582 __kmp_affinity_gran_levels++;
583 }
Jonathan Peyton202a24d2016-06-13 17:30:08 +0000584 if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000585 __kmp_affinity_gran_levels++;
586 }
587 if (__kmp_affinity_gran > affinity_gran_package) {
588 __kmp_affinity_gran_levels++;
589 }
590 }
591
592 if (__kmp_affinity_verbose) {
Jonathan Peyton202a24d2016-06-13 17:30:08 +0000593 __kmp_affinity_print_topology(retval, nActiveThreads, depth, pkgLevel,
594 coreLevel, threadLevel);
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000595 }
596
597 KMP_CPU_FREE(oldMask);
598 *address2os = retval;
Jonathan Peyton202a24d2016-06-13 17:30:08 +0000599 return depth;
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000600}
601#endif // KMP_USE_HWLOC
Jim Cownie5e8470a2013-09-27 10:38:44 +0000602
603//
604// If we don't know how to retrieve the machine's processor topology, or
605// encounter an error in doing so, this routine is called to form a "flat"
606// mapping of os thread id's <-> processor id's.
607//
608static int
609__kmp_affinity_create_flat_map(AddrUnsPair **address2os,
610 kmp_i18n_id_t *const msg_id)
611{
612 *address2os = NULL;
613 *msg_id = kmp_i18n_null;
614
615 //
616 // Even if __kmp_affinity_type == affinity_none, this routine might still
Andrey Churbanovf696c822015-01-27 16:55:43 +0000617 // called to set __kmp_ncores, as well as
Jim Cownie5e8470a2013-09-27 10:38:44 +0000618 // __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
619 //
620 if (! KMP_AFFINITY_CAPABLE()) {
621 KMP_ASSERT(__kmp_affinity_type == affinity_none);
622 __kmp_ncores = nPackages = __kmp_xproc;
623 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000624 if (__kmp_affinity_verbose) {
625 KMP_INFORM(AffFlatTopology, "KMP_AFFINITY");
626 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
627 KMP_INFORM(Uniform, "KMP_AFFINITY");
628 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
629 __kmp_nThreadsPerCore, __kmp_ncores);
630 }
631 return 0;
632 }
633
634 //
635 // When affinity is off, this routine will still be called to set
Andrey Churbanovf696c822015-01-27 16:55:43 +0000636 // __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie5e8470a2013-09-27 10:38:44 +0000637 // nCoresPerPkg, & nPackages. Make sure all these vars are set
638 // correctly, and return now if affinity is not enabled.
639 //
640 __kmp_ncores = nPackages = __kmp_avail_proc;
641 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000642 if (__kmp_affinity_verbose) {
643 char buf[KMP_AFFIN_MASK_PRINT_LEN];
Jonathan Peytonc5304aa2016-06-13 21:28:03 +0000644 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, __kmp_affin_fullMask);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000645
646 KMP_INFORM(AffCapableUseFlat, "KMP_AFFINITY");
647 if (__kmp_affinity_respect_mask) {
648 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
649 } else {
650 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
651 }
652 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
653 KMP_INFORM(Uniform, "KMP_AFFINITY");
654 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
655 __kmp_nThreadsPerCore, __kmp_ncores);
656 }
657 if (__kmp_affinity_type == affinity_none) {
658 return 0;
659 }
660
661 //
662 // Contruct the data structure to be returned.
663 //
664 *address2os = (AddrUnsPair*)
665 __kmp_allocate(sizeof(**address2os) * __kmp_avail_proc);
666 int avail_ct = 0;
667 unsigned int i;
Jonathan Peytonc5304aa2016-06-13 21:28:03 +0000668 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000669 //
670 // Skip this proc if it is not included in the machine model.
671 //
Jonathan Peytonc5304aa2016-06-13 21:28:03 +0000672 if (! KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000673 continue;
674 }
675
676 Address addr(1);
677 addr.labels[0] = i;
678 (*address2os)[avail_ct++] = AddrUnsPair(addr,i);
679 }
680 if (__kmp_affinity_verbose) {
681 KMP_INFORM(OSProcToPackage, "KMP_AFFINITY");
682 }
683
684 if (__kmp_affinity_gran_levels < 0) {
685 //
686 // Only the package level is modeled in the machine topology map,
687 // so the #levels of granularity is either 0 or 1.
688 //
689 if (__kmp_affinity_gran > affinity_gran_package) {
690 __kmp_affinity_gran_levels = 1;
691 }
692 else {
693 __kmp_affinity_gran_levels = 0;
694 }
695 }
696 return 1;
697}
698
699
Andrey Churbanov7daf9802015-01-27 16:52:57 +0000700# if KMP_GROUP_AFFINITY
Jim Cownie5e8470a2013-09-27 10:38:44 +0000701
702//
703// If multiple Windows* OS processor groups exist, we can create a 2-level
704// topology map with the groups at level 0 and the individual procs at
705// level 1.
706//
707// This facilitates letting the threads float among all procs in a group,
708// if granularity=group (the default when there are multiple groups).
709//
710static int
711__kmp_affinity_create_proc_group_map(AddrUnsPair **address2os,
712 kmp_i18n_id_t *const msg_id)
713{
714 *address2os = NULL;
715 *msg_id = kmp_i18n_null;
716
717 //
718 // If we don't have multiple processor groups, return now.
719 // The flat mapping will be used.
720 //
Jonathan Peytonc5304aa2016-06-13 21:28:03 +0000721 if ((! KMP_AFFINITY_CAPABLE()) || (__kmp_get_proc_group(__kmp_affin_fullMask) >= 0)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000722 // FIXME set *msg_id
723 return -1;
724 }
725
726 //
727 // Contruct the data structure to be returned.
728 //
729 *address2os = (AddrUnsPair*)
730 __kmp_allocate(sizeof(**address2os) * __kmp_avail_proc);
731 int avail_ct = 0;
732 int i;
Jonathan Peytonc5304aa2016-06-13 21:28:03 +0000733 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000734 //
735 // Skip this proc if it is not included in the machine model.
736 //
Jonathan Peytonc5304aa2016-06-13 21:28:03 +0000737 if (! KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000738 continue;
739 }
740
741 Address addr(2);
742 addr.labels[0] = i / (CHAR_BIT * sizeof(DWORD_PTR));
743 addr.labels[1] = i % (CHAR_BIT * sizeof(DWORD_PTR));
744 (*address2os)[avail_ct++] = AddrUnsPair(addr,i);
745
746 if (__kmp_affinity_verbose) {
747 KMP_INFORM(AffOSProcToGroup, "KMP_AFFINITY", i, addr.labels[0],
748 addr.labels[1]);
749 }
750 }
751
752 if (__kmp_affinity_gran_levels < 0) {
753 if (__kmp_affinity_gran == affinity_gran_group) {
754 __kmp_affinity_gran_levels = 1;
755 }
756 else if ((__kmp_affinity_gran == affinity_gran_fine)
757 || (__kmp_affinity_gran == affinity_gran_thread)) {
758 __kmp_affinity_gran_levels = 0;
759 }
760 else {
761 const char *gran_str = NULL;
762 if (__kmp_affinity_gran == affinity_gran_core) {
763 gran_str = "core";
764 }
765 else if (__kmp_affinity_gran == affinity_gran_package) {
766 gran_str = "package";
767 }
768 else if (__kmp_affinity_gran == affinity_gran_node) {
769 gran_str = "node";
770 }
771 else {
772 KMP_ASSERT(0);
773 }
774
775 // Warning: can't use affinity granularity \"gran\" with group topology method, using "thread"
776 __kmp_affinity_gran_levels = 0;
777 }
778 }
779 return 2;
780}
781
Andrey Churbanov7daf9802015-01-27 16:52:57 +0000782# endif /* KMP_GROUP_AFFINITY */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000783
784
785# if KMP_ARCH_X86 || KMP_ARCH_X86_64
786
787static int
788__kmp_cpuid_mask_width(int count) {
789 int r = 0;
790
791 while((1<<r) < count)
792 ++r;
793 return r;
794}
795
796
797class apicThreadInfo {
798public:
799 unsigned osId; // param to __kmp_affinity_bind_thread
800 unsigned apicId; // from cpuid after binding
801 unsigned maxCoresPerPkg; // ""
802 unsigned maxThreadsPerPkg; // ""
803 unsigned pkgId; // inferred from above values
804 unsigned coreId; // ""
805 unsigned threadId; // ""
806};
807
808
809static int
810__kmp_affinity_cmp_apicThreadInfo_os_id(const void *a, const void *b)
811{
812 const apicThreadInfo *aa = (const apicThreadInfo *)a;
813 const apicThreadInfo *bb = (const apicThreadInfo *)b;
814 if (aa->osId < bb->osId) return -1;
815 if (aa->osId > bb->osId) return 1;
816 return 0;
817}
818
819
820static int
821__kmp_affinity_cmp_apicThreadInfo_phys_id(const void *a, const void *b)
822{
823 const apicThreadInfo *aa = (const apicThreadInfo *)a;
824 const apicThreadInfo *bb = (const apicThreadInfo *)b;
825 if (aa->pkgId < bb->pkgId) return -1;
826 if (aa->pkgId > bb->pkgId) return 1;
827 if (aa->coreId < bb->coreId) return -1;
828 if (aa->coreId > bb->coreId) return 1;
829 if (aa->threadId < bb->threadId) return -1;
830 if (aa->threadId > bb->threadId) return 1;
831 return 0;
832}
833
834
835//
836// On IA-32 architecture and Intel(R) 64 architecture, we attempt to use
837// an algorithm which cycles through the available os threads, setting
838// the current thread's affinity mask to that thread, and then retrieves
839// the Apic Id for each thread context using the cpuid instruction.
840//
841static int
842__kmp_affinity_create_apicid_map(AddrUnsPair **address2os,
843 kmp_i18n_id_t *const msg_id)
844{
Andrey Churbanov1c331292015-01-27 17:03:42 +0000845 kmp_cpuid buf;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000846 int rc;
847 *address2os = NULL;
848 *msg_id = kmp_i18n_null;
849
Andrey Churbanov1c331292015-01-27 17:03:42 +0000850 //
851 // Check if cpuid leaf 4 is supported.
852 //
Jim Cownie5e8470a2013-09-27 10:38:44 +0000853 __kmp_x86_cpuid(0, 0, &buf);
854 if (buf.eax < 4) {
855 *msg_id = kmp_i18n_str_NoLeaf4Support;
856 return -1;
857 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000858
859 //
Jim Cownie5e8470a2013-09-27 10:38:44 +0000860 // The algorithm used starts by setting the affinity to each available
Andrey Churbanov1c331292015-01-27 17:03:42 +0000861 // thread and retrieving info from the cpuid instruction, so if we are
862 // not capable of calling __kmp_get_system_affinity() and
863 // _kmp_get_system_affinity(), then we need to do something else - use
864 // the defaults that we calculated from issuing cpuid without binding
865 // to each proc.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000866 //
867 if (! KMP_AFFINITY_CAPABLE()) {
868 //
869 // Hack to try and infer the machine topology using only the data
870 // available from cpuid on the current thread, and __kmp_xproc.
871 //
872 KMP_ASSERT(__kmp_affinity_type == affinity_none);
873
874 //
875 // Get an upper bound on the number of threads per package using
876 // cpuid(1).
877 //
878 // On some OS/chps combinations where HT is supported by the chip
879 // but is disabled, this value will be 2 on a single core chip.
880 // Usually, it will be 2 if HT is enabled and 1 if HT is disabled.
881 //
Jim Cownie5e8470a2013-09-27 10:38:44 +0000882 __kmp_x86_cpuid(1, 0, &buf);
883 int maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
884 if (maxThreadsPerPkg == 0) {
885 maxThreadsPerPkg = 1;
886 }
887
888 //
889 // The num cores per pkg comes from cpuid(4).
890 // 1 must be added to the encoded value.
891 //
892 // The author of cpu_count.cpp treated this only an upper bound
893 // on the number of cores, but I haven't seen any cases where it
894 // was greater than the actual number of cores, so we will treat
895 // it as exact in this block of code.
896 //
897 // First, we need to check if cpuid(4) is supported on this chip.
898 // To see if cpuid(n) is supported, issue cpuid(0) and check if eax
899 // has the value n or greater.
900 //
901 __kmp_x86_cpuid(0, 0, &buf);
902 if (buf.eax >= 4) {
903 __kmp_x86_cpuid(4, 0, &buf);
904 nCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
905 }
906 else {
907 nCoresPerPkg = 1;
908 }
909
910 //
911 // There is no way to reliably tell if HT is enabled without issuing
912 // the cpuid instruction from every thread, can correlating the cpuid
913 // info, so if the machine is not affinity capable, we assume that HT
914 // is off. We have seen quite a few machines where maxThreadsPerPkg
915 // is 2, yet the machine does not support HT.
916 //
917 // - Older OSes are usually found on machines with older chips, which
918 // do not support HT.
919 //
920 // - The performance penalty for mistakenly identifying a machine as
921 // HT when it isn't (which results in blocktime being incorrecly set
922 // to 0) is greater than the penalty when for mistakenly identifying
923 // a machine as being 1 thread/core when it is really HT enabled
924 // (which results in blocktime being incorrectly set to a positive
925 // value).
926 //
927 __kmp_ncores = __kmp_xproc;
928 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
929 __kmp_nThreadsPerCore = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000930 if (__kmp_affinity_verbose) {
931 KMP_INFORM(AffNotCapableUseLocCpuid, "KMP_AFFINITY");
932 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
933 if (__kmp_affinity_uniform_topology()) {
934 KMP_INFORM(Uniform, "KMP_AFFINITY");
935 } else {
936 KMP_INFORM(NonUniform, "KMP_AFFINITY");
937 }
938 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
939 __kmp_nThreadsPerCore, __kmp_ncores);
940 }
941 return 0;
942 }
943
944 //
945 //
946 // From here on, we can assume that it is safe to call
947 // __kmp_get_system_affinity() and __kmp_set_system_affinity(),
948 // even if __kmp_affinity_type = affinity_none.
949 //
950
951 //
952 // Save the affinity mask for the current thread.
953 //
954 kmp_affin_mask_t *oldMask;
955 KMP_CPU_ALLOC(oldMask);
956 KMP_ASSERT(oldMask != NULL);
957 __kmp_get_system_affinity(oldMask, TRUE);
958
959 //
960 // Run through each of the available contexts, binding the current thread
961 // to it, and obtaining the pertinent information using the cpuid instr.
962 //
963 // The relevant information is:
964 //
965 // Apic Id: Bits 24:31 of ebx after issuing cpuid(1) - each thread context
966 // has a uniqie Apic Id, which is of the form pkg# : core# : thread#.
967 //
968 // Max Threads Per Pkg: Bits 16:23 of ebx after issuing cpuid(1). The
969 // value of this field determines the width of the core# + thread#
970 // fields in the Apic Id. It is also an upper bound on the number
971 // of threads per package, but it has been verified that situations
972 // happen were it is not exact. In particular, on certain OS/chip
973 // combinations where Intel(R) Hyper-Threading Technology is supported
974 // by the chip but has
975 // been disabled, the value of this field will be 2 (for a single core
976 // chip). On other OS/chip combinations supporting
977 // Intel(R) Hyper-Threading Technology, the value of
978 // this field will be 1 when Intel(R) Hyper-Threading Technology is
979 // disabled and 2 when it is enabled.
980 //
981 // Max Cores Per Pkg: Bits 26:31 of eax after issuing cpuid(4). The
982 // value of this field (+1) determines the width of the core# field in
983 // the Apic Id. The comments in "cpucount.cpp" say that this value is
984 // an upper bound, but the IA-32 architecture manual says that it is
985 // exactly the number of cores per package, and I haven't seen any
986 // case where it wasn't.
987 //
988 // From this information, deduce the package Id, core Id, and thread Id,
989 // and set the corresponding fields in the apicThreadInfo struct.
990 //
991 unsigned i;
992 apicThreadInfo *threadInfo = (apicThreadInfo *)__kmp_allocate(
993 __kmp_avail_proc * sizeof(apicThreadInfo));
994 unsigned nApics = 0;
Jonathan Peytonc5304aa2016-06-13 21:28:03 +0000995 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000996 //
997 // Skip this proc if it is not included in the machine model.
998 //
Jonathan Peytonc5304aa2016-06-13 21:28:03 +0000999 if (! KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001000 continue;
1001 }
1002 KMP_DEBUG_ASSERT((int)nApics < __kmp_avail_proc);
1003
1004 __kmp_affinity_bind_thread(i);
1005 threadInfo[nApics].osId = i;
1006
1007 //
1008 // The apic id and max threads per pkg come from cpuid(1).
1009 //
Jim Cownie5e8470a2013-09-27 10:38:44 +00001010 __kmp_x86_cpuid(1, 0, &buf);
1011 if (! (buf.edx >> 9) & 1) {
1012 __kmp_set_system_affinity(oldMask, TRUE);
1013 __kmp_free(threadInfo);
1014 KMP_CPU_FREE(oldMask);
1015 *msg_id = kmp_i18n_str_ApicNotPresent;
1016 return -1;
1017 }
1018 threadInfo[nApics].apicId = (buf.ebx >> 24) & 0xff;
1019 threadInfo[nApics].maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
1020 if (threadInfo[nApics].maxThreadsPerPkg == 0) {
1021 threadInfo[nApics].maxThreadsPerPkg = 1;
1022 }
1023
1024 //
1025 // Max cores per pkg comes from cpuid(4).
1026 // 1 must be added to the encoded value.
1027 //
1028 // First, we need to check if cpuid(4) is supported on this chip.
1029 // To see if cpuid(n) is supported, issue cpuid(0) and check if eax
1030 // has the value n or greater.
1031 //
1032 __kmp_x86_cpuid(0, 0, &buf);
1033 if (buf.eax >= 4) {
1034 __kmp_x86_cpuid(4, 0, &buf);
1035 threadInfo[nApics].maxCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
1036 }
1037 else {
1038 threadInfo[nApics].maxCoresPerPkg = 1;
1039 }
1040
1041 //
1042 // Infer the pkgId / coreId / threadId using only the info
1043 // obtained locally.
1044 //
1045 int widthCT = __kmp_cpuid_mask_width(
1046 threadInfo[nApics].maxThreadsPerPkg);
1047 threadInfo[nApics].pkgId = threadInfo[nApics].apicId >> widthCT;
1048
1049 int widthC = __kmp_cpuid_mask_width(
1050 threadInfo[nApics].maxCoresPerPkg);
1051 int widthT = widthCT - widthC;
1052 if (widthT < 0) {
1053 //
1054 // I've never seen this one happen, but I suppose it could, if
1055 // the cpuid instruction on a chip was really screwed up.
1056 // Make sure to restore the affinity mask before the tail call.
1057 //
1058 __kmp_set_system_affinity(oldMask, TRUE);
1059 __kmp_free(threadInfo);
1060 KMP_CPU_FREE(oldMask);
1061 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1062 return -1;
1063 }
1064
1065 int maskC = (1 << widthC) - 1;
1066 threadInfo[nApics].coreId = (threadInfo[nApics].apicId >> widthT)
1067 &maskC;
1068
1069 int maskT = (1 << widthT) - 1;
1070 threadInfo[nApics].threadId = threadInfo[nApics].apicId &maskT;
1071
1072 nApics++;
1073 }
1074
1075 //
1076 // We've collected all the info we need.
1077 // Restore the old affinity mask for this thread.
1078 //
1079 __kmp_set_system_affinity(oldMask, TRUE);
1080
1081 //
1082 // If there's only one thread context to bind to, form an Address object
1083 // with depth 1 and return immediately (or, if affinity is off, set
1084 // address2os to NULL and return).
1085 //
1086 // If it is configured to omit the package level when there is only a
1087 // single package, the logic at the end of this routine won't work if
1088 // there is only a single thread - it would try to form an Address
1089 // object with depth 0.
1090 //
1091 KMP_ASSERT(nApics > 0);
1092 if (nApics == 1) {
1093 __kmp_ncores = nPackages = 1;
1094 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001095 if (__kmp_affinity_verbose) {
1096 char buf[KMP_AFFIN_MASK_PRINT_LEN];
1097 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1098
1099 KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
1100 if (__kmp_affinity_respect_mask) {
1101 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
1102 } else {
1103 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
1104 }
1105 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
1106 KMP_INFORM(Uniform, "KMP_AFFINITY");
1107 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
1108 __kmp_nThreadsPerCore, __kmp_ncores);
1109 }
1110
1111 if (__kmp_affinity_type == affinity_none) {
1112 __kmp_free(threadInfo);
1113 KMP_CPU_FREE(oldMask);
1114 return 0;
1115 }
1116
1117 *address2os = (AddrUnsPair*)__kmp_allocate(sizeof(AddrUnsPair));
1118 Address addr(1);
1119 addr.labels[0] = threadInfo[0].pkgId;
1120 (*address2os)[0] = AddrUnsPair(addr, threadInfo[0].osId);
1121
1122 if (__kmp_affinity_gran_levels < 0) {
1123 __kmp_affinity_gran_levels = 0;
1124 }
1125
1126 if (__kmp_affinity_verbose) {
1127 __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
1128 }
1129
1130 __kmp_free(threadInfo);
1131 KMP_CPU_FREE(oldMask);
1132 return 1;
1133 }
1134
1135 //
1136 // Sort the threadInfo table by physical Id.
1137 //
1138 qsort(threadInfo, nApics, sizeof(*threadInfo),
1139 __kmp_affinity_cmp_apicThreadInfo_phys_id);
1140
1141 //
1142 // The table is now sorted by pkgId / coreId / threadId, but we really
1143 // don't know the radix of any of the fields. pkgId's may be sparsely
1144 // assigned among the chips on a system. Although coreId's are usually
1145 // assigned [0 .. coresPerPkg-1] and threadId's are usually assigned
1146 // [0..threadsPerCore-1], we don't want to make any such assumptions.
1147 //
1148 // For that matter, we don't know what coresPerPkg and threadsPerCore
1149 // (or the total # packages) are at this point - we want to determine
1150 // that now. We only have an upper bound on the first two figures.
1151 //
1152 // We also perform a consistency check at this point: the values returned
1153 // by the cpuid instruction for any thread bound to a given package had
1154 // better return the same info for maxThreadsPerPkg and maxCoresPerPkg.
1155 //
1156 nPackages = 1;
1157 nCoresPerPkg = 1;
1158 __kmp_nThreadsPerCore = 1;
1159 unsigned nCores = 1;
1160
1161 unsigned pkgCt = 1; // to determine radii
1162 unsigned lastPkgId = threadInfo[0].pkgId;
1163 unsigned coreCt = 1;
1164 unsigned lastCoreId = threadInfo[0].coreId;
1165 unsigned threadCt = 1;
1166 unsigned lastThreadId = threadInfo[0].threadId;
1167
1168 // intra-pkg consist checks
1169 unsigned prevMaxCoresPerPkg = threadInfo[0].maxCoresPerPkg;
1170 unsigned prevMaxThreadsPerPkg = threadInfo[0].maxThreadsPerPkg;
1171
1172 for (i = 1; i < nApics; i++) {
1173 if (threadInfo[i].pkgId != lastPkgId) {
1174 nCores++;
1175 pkgCt++;
1176 lastPkgId = threadInfo[i].pkgId;
1177 if ((int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt;
1178 coreCt = 1;
1179 lastCoreId = threadInfo[i].coreId;
1180 if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
1181 threadCt = 1;
1182 lastThreadId = threadInfo[i].threadId;
1183
1184 //
1185 // This is a different package, so go on to the next iteration
1186 // without doing any consistency checks. Reset the consistency
1187 // check vars, though.
1188 //
1189 prevMaxCoresPerPkg = threadInfo[i].maxCoresPerPkg;
1190 prevMaxThreadsPerPkg = threadInfo[i].maxThreadsPerPkg;
1191 continue;
1192 }
1193
1194 if (threadInfo[i].coreId != lastCoreId) {
1195 nCores++;
1196 coreCt++;
1197 lastCoreId = threadInfo[i].coreId;
1198 if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
1199 threadCt = 1;
1200 lastThreadId = threadInfo[i].threadId;
1201 }
1202 else if (threadInfo[i].threadId != lastThreadId) {
1203 threadCt++;
1204 lastThreadId = threadInfo[i].threadId;
1205 }
1206 else {
1207 __kmp_free(threadInfo);
1208 KMP_CPU_FREE(oldMask);
1209 *msg_id = kmp_i18n_str_LegacyApicIDsNotUnique;
1210 return -1;
1211 }
1212
1213 //
1214 // Check to make certain that the maxCoresPerPkg and maxThreadsPerPkg
1215 // fields agree between all the threads bounds to a given package.
1216 //
1217 if ((prevMaxCoresPerPkg != threadInfo[i].maxCoresPerPkg)
1218 || (prevMaxThreadsPerPkg != threadInfo[i].maxThreadsPerPkg)) {
1219 __kmp_free(threadInfo);
1220 KMP_CPU_FREE(oldMask);
1221 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1222 return -1;
1223 }
1224 }
1225 nPackages = pkgCt;
1226 if ((int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt;
1227 if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
1228
1229 //
1230 // When affinity is off, this routine will still be called to set
Andrey Churbanovf696c822015-01-27 16:55:43 +00001231 // __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie5e8470a2013-09-27 10:38:44 +00001232 // nCoresPerPkg, & nPackages. Make sure all these vars are set
1233 // correctly, and return now if affinity is not enabled.
1234 //
Jim Cownie5e8470a2013-09-27 10:38:44 +00001235 __kmp_ncores = nCores;
1236 if (__kmp_affinity_verbose) {
1237 char buf[KMP_AFFIN_MASK_PRINT_LEN];
1238 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1239
1240 KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
1241 if (__kmp_affinity_respect_mask) {
1242 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
1243 } else {
1244 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
1245 }
1246 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
1247 if (__kmp_affinity_uniform_topology()) {
1248 KMP_INFORM(Uniform, "KMP_AFFINITY");
1249 } else {
1250 KMP_INFORM(NonUniform, "KMP_AFFINITY");
1251 }
1252 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
1253 __kmp_nThreadsPerCore, __kmp_ncores);
1254
1255 }
1256
1257 if (__kmp_affinity_type == affinity_none) {
1258 __kmp_free(threadInfo);
1259 KMP_CPU_FREE(oldMask);
1260 return 0;
1261 }
1262
1263 //
1264 // Now that we've determined the number of packages, the number of cores
1265 // per package, and the number of threads per core, we can construct the
1266 // data structure that is to be returned.
1267 //
1268 int pkgLevel = 0;
1269 int coreLevel = (nCoresPerPkg <= 1) ? -1 : 1;
1270 int threadLevel = (__kmp_nThreadsPerCore <= 1) ? -1 : ((coreLevel >= 0) ? 2 : 1);
1271 unsigned depth = (pkgLevel >= 0) + (coreLevel >= 0) + (threadLevel >= 0);
1272
1273 KMP_ASSERT(depth > 0);
1274 *address2os = (AddrUnsPair*)__kmp_allocate(sizeof(AddrUnsPair) * nApics);
1275
1276 for (i = 0; i < nApics; ++i) {
1277 Address addr(depth);
1278 unsigned os = threadInfo[i].osId;
1279 int d = 0;
1280
1281 if (pkgLevel >= 0) {
1282 addr.labels[d++] = threadInfo[i].pkgId;
1283 }
1284 if (coreLevel >= 0) {
1285 addr.labels[d++] = threadInfo[i].coreId;
1286 }
1287 if (threadLevel >= 0) {
1288 addr.labels[d++] = threadInfo[i].threadId;
1289 }
1290 (*address2os)[i] = AddrUnsPair(addr, os);
1291 }
1292
1293 if (__kmp_affinity_gran_levels < 0) {
1294 //
1295 // Set the granularity level based on what levels are modeled
1296 // in the machine topology map.
1297 //
1298 __kmp_affinity_gran_levels = 0;
1299 if ((threadLevel >= 0)
1300 && (__kmp_affinity_gran > affinity_gran_thread)) {
1301 __kmp_affinity_gran_levels++;
1302 }
1303 if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
1304 __kmp_affinity_gran_levels++;
1305 }
1306 if ((pkgLevel >= 0) && (__kmp_affinity_gran > affinity_gran_package)) {
1307 __kmp_affinity_gran_levels++;
1308 }
1309 }
1310
1311 if (__kmp_affinity_verbose) {
1312 __kmp_affinity_print_topology(*address2os, nApics, depth, pkgLevel,
1313 coreLevel, threadLevel);
1314 }
1315
1316 __kmp_free(threadInfo);
1317 KMP_CPU_FREE(oldMask);
1318 return depth;
1319}
1320
1321
1322//
1323// Intel(R) microarchitecture code name Nehalem, Dunnington and later
1324// architectures support a newer interface for specifying the x2APIC Ids,
1325// based on cpuid leaf 11.
1326//
1327static int
1328__kmp_affinity_create_x2apicid_map(AddrUnsPair **address2os,
1329 kmp_i18n_id_t *const msg_id)
1330{
1331 kmp_cpuid buf;
1332
1333 *address2os = NULL;
1334 *msg_id = kmp_i18n_null;
1335
1336 //
1337 // Check to see if cpuid leaf 11 is supported.
1338 //
1339 __kmp_x86_cpuid(0, 0, &buf);
1340 if (buf.eax < 11) {
1341 *msg_id = kmp_i18n_str_NoLeaf11Support;
1342 return -1;
1343 }
1344 __kmp_x86_cpuid(11, 0, &buf);
1345 if (buf.ebx == 0) {
1346 *msg_id = kmp_i18n_str_NoLeaf11Support;
1347 return -1;
1348 }
1349
1350 //
1351 // Find the number of levels in the machine topology. While we're at it,
1352 // get the default values for __kmp_nThreadsPerCore & nCoresPerPkg. We will
1353 // try to get more accurate values later by explicitly counting them,
1354 // but get reasonable defaults now, in case we return early.
1355 //
1356 int level;
1357 int threadLevel = -1;
1358 int coreLevel = -1;
1359 int pkgLevel = -1;
1360 __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
1361
1362 for (level = 0;; level++) {
1363 if (level > 31) {
1364 //
1365 // FIXME: Hack for DPD200163180
1366 //
1367 // If level is big then something went wrong -> exiting
1368 //
1369 // There could actually be 32 valid levels in the machine topology,
1370 // but so far, the only machine we have seen which does not exit
1371 // this loop before iteration 32 has fubar x2APIC settings.
1372 //
1373 // For now, just reject this case based upon loop trip count.
1374 //
1375 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1376 return -1;
1377 }
1378 __kmp_x86_cpuid(11, level, &buf);
1379 if (buf.ebx == 0) {
1380 if (pkgLevel < 0) {
1381 //
1382 // Will infer nPackages from __kmp_xproc
1383 //
1384 pkgLevel = level;
1385 level++;
1386 }
1387 break;
1388 }
1389 int kind = (buf.ecx >> 8) & 0xff;
1390 if (kind == 1) {
1391 //
1392 // SMT level
1393 //
1394 threadLevel = level;
1395 coreLevel = -1;
1396 pkgLevel = -1;
1397 __kmp_nThreadsPerCore = buf.ebx & 0xff;
1398 if (__kmp_nThreadsPerCore == 0) {
1399 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1400 return -1;
1401 }
1402 }
1403 else if (kind == 2) {
1404 //
1405 // core level
1406 //
1407 coreLevel = level;
1408 pkgLevel = -1;
1409 nCoresPerPkg = buf.ebx & 0xff;
1410 if (nCoresPerPkg == 0) {
1411 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1412 return -1;
1413 }
1414 }
1415 else {
1416 if (level <= 0) {
1417 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1418 return -1;
1419 }
1420 if (pkgLevel >= 0) {
1421 continue;
1422 }
1423 pkgLevel = level;
1424 nPackages = buf.ebx & 0xff;
1425 if (nPackages == 0) {
1426 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1427 return -1;
1428 }
1429 }
1430 }
1431 int depth = level;
1432
1433 //
1434 // In the above loop, "level" was counted from the finest level (usually
1435 // thread) to the coarsest. The caller expects that we will place the
1436 // labels in (*address2os)[].first.labels[] in the inverse order, so
1437 // we need to invert the vars saying which level means what.
1438 //
1439 if (threadLevel >= 0) {
1440 threadLevel = depth - threadLevel - 1;
1441 }
1442 if (coreLevel >= 0) {
1443 coreLevel = depth - coreLevel - 1;
1444 }
1445 KMP_DEBUG_ASSERT(pkgLevel >= 0);
1446 pkgLevel = depth - pkgLevel - 1;
1447
1448 //
1449 // The algorithm used starts by setting the affinity to each available
Andrey Churbanov1c331292015-01-27 17:03:42 +00001450 // thread and retrieving info from the cpuid instruction, so if we are
1451 // not capable of calling __kmp_get_system_affinity() and
1452 // _kmp_get_system_affinity(), then we need to do something else - use
1453 // the defaults that we calculated from issuing cpuid without binding
1454 // to each proc.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001455 //
1456 if (! KMP_AFFINITY_CAPABLE())
1457 {
1458 //
1459 // Hack to try and infer the machine topology using only the data
1460 // available from cpuid on the current thread, and __kmp_xproc.
1461 //
1462 KMP_ASSERT(__kmp_affinity_type == affinity_none);
1463
1464 __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
1465 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001466 if (__kmp_affinity_verbose) {
1467 KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY");
1468 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
1469 if (__kmp_affinity_uniform_topology()) {
1470 KMP_INFORM(Uniform, "KMP_AFFINITY");
1471 } else {
1472 KMP_INFORM(NonUniform, "KMP_AFFINITY");
1473 }
1474 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
1475 __kmp_nThreadsPerCore, __kmp_ncores);
1476 }
1477 return 0;
1478 }
1479
1480 //
1481 //
1482 // From here on, we can assume that it is safe to call
1483 // __kmp_get_system_affinity() and __kmp_set_system_affinity(),
1484 // even if __kmp_affinity_type = affinity_none.
1485 //
1486
1487 //
1488 // Save the affinity mask for the current thread.
1489 //
1490 kmp_affin_mask_t *oldMask;
1491 KMP_CPU_ALLOC(oldMask);
1492 __kmp_get_system_affinity(oldMask, TRUE);
1493
1494 //
1495 // Allocate the data structure to be returned.
1496 //
1497 AddrUnsPair *retval = (AddrUnsPair *)
1498 __kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc);
1499
1500 //
1501 // Run through each of the available contexts, binding the current thread
1502 // to it, and obtaining the pertinent information using the cpuid instr.
1503 //
1504 unsigned int proc;
1505 int nApics = 0;
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00001506 KMP_CPU_SET_ITERATE(proc, __kmp_affin_fullMask) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001507 //
1508 // Skip this proc if it is not included in the machine model.
1509 //
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00001510 if (! KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001511 continue;
1512 }
1513 KMP_DEBUG_ASSERT(nApics < __kmp_avail_proc);
1514
1515 __kmp_affinity_bind_thread(proc);
1516
1517 //
1518 // Extrach the labels for each level in the machine topology map
1519 // from the Apic ID.
1520 //
1521 Address addr(depth);
1522 int prev_shift = 0;
1523
1524 for (level = 0; level < depth; level++) {
1525 __kmp_x86_cpuid(11, level, &buf);
1526 unsigned apicId = buf.edx;
1527 if (buf.ebx == 0) {
1528 if (level != depth - 1) {
1529 KMP_CPU_FREE(oldMask);
1530 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1531 return -1;
1532 }
1533 addr.labels[depth - level - 1] = apicId >> prev_shift;
1534 level++;
1535 break;
1536 }
1537 int shift = buf.eax & 0x1f;
1538 int mask = (1 << shift) - 1;
1539 addr.labels[depth - level - 1] = (apicId & mask) >> prev_shift;
1540 prev_shift = shift;
1541 }
1542 if (level != depth) {
1543 KMP_CPU_FREE(oldMask);
1544 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1545 return -1;
1546 }
1547
1548 retval[nApics] = AddrUnsPair(addr, proc);
1549 nApics++;
1550 }
1551
1552 //
1553 // We've collected all the info we need.
1554 // Restore the old affinity mask for this thread.
1555 //
1556 __kmp_set_system_affinity(oldMask, TRUE);
1557
1558 //
1559 // If there's only one thread context to bind to, return now.
1560 //
1561 KMP_ASSERT(nApics > 0);
1562 if (nApics == 1) {
1563 __kmp_ncores = nPackages = 1;
1564 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001565 if (__kmp_affinity_verbose) {
1566 char buf[KMP_AFFIN_MASK_PRINT_LEN];
1567 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1568
1569 KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
1570 if (__kmp_affinity_respect_mask) {
1571 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
1572 } else {
1573 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
1574 }
1575 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
1576 KMP_INFORM(Uniform, "KMP_AFFINITY");
1577 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
1578 __kmp_nThreadsPerCore, __kmp_ncores);
1579 }
1580
1581 if (__kmp_affinity_type == affinity_none) {
1582 __kmp_free(retval);
1583 KMP_CPU_FREE(oldMask);
1584 return 0;
1585 }
1586
1587 //
1588 // Form an Address object which only includes the package level.
1589 //
1590 Address addr(1);
1591 addr.labels[0] = retval[0].first.labels[pkgLevel];
1592 retval[0].first = addr;
1593
1594 if (__kmp_affinity_gran_levels < 0) {
1595 __kmp_affinity_gran_levels = 0;
1596 }
1597
1598 if (__kmp_affinity_verbose) {
1599 __kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
1600 }
1601
1602 *address2os = retval;
1603 KMP_CPU_FREE(oldMask);
1604 return 1;
1605 }
1606
1607 //
1608 // Sort the table by physical Id.
1609 //
1610 qsort(retval, nApics, sizeof(*retval), __kmp_affinity_cmp_Address_labels);
1611
1612 //
1613 // Find the radix at each of the levels.
1614 //
1615 unsigned *totals = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
1616 unsigned *counts = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
1617 unsigned *maxCt = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
1618 unsigned *last = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
1619 for (level = 0; level < depth; level++) {
1620 totals[level] = 1;
1621 maxCt[level] = 1;
1622 counts[level] = 1;
1623 last[level] = retval[0].first.labels[level];
1624 }
1625
1626 //
1627 // From here on, the iteration variable "level" runs from the finest
1628 // level to the coarsest, i.e. we iterate forward through
1629 // (*address2os)[].first.labels[] - in the previous loops, we iterated
1630 // backwards.
1631 //
1632 for (proc = 1; (int)proc < nApics; proc++) {
1633 int level;
1634 for (level = 0; level < depth; level++) {
1635 if (retval[proc].first.labels[level] != last[level]) {
1636 int j;
1637 for (j = level + 1; j < depth; j++) {
1638 totals[j]++;
1639 counts[j] = 1;
1640 // The line below causes printing incorrect topology information
1641 // in case the max value for some level (maxCt[level]) is encountered earlier than
1642 // some less value while going through the array.
1643 // For example, let pkg0 has 4 cores and pkg1 has 2 cores. Then maxCt[1] == 2
1644 // whereas it must be 4.
1645 // TODO!!! Check if it can be commented safely
1646 //maxCt[j] = 1;
1647 last[j] = retval[proc].first.labels[j];
1648 }
1649 totals[level]++;
1650 counts[level]++;
1651 if (counts[level] > maxCt[level]) {
1652 maxCt[level] = counts[level];
1653 }
1654 last[level] = retval[proc].first.labels[level];
1655 break;
1656 }
1657 else if (level == depth - 1) {
1658 __kmp_free(last);
1659 __kmp_free(maxCt);
1660 __kmp_free(counts);
1661 __kmp_free(totals);
1662 __kmp_free(retval);
1663 KMP_CPU_FREE(oldMask);
1664 *msg_id = kmp_i18n_str_x2ApicIDsNotUnique;
1665 return -1;
1666 }
1667 }
1668 }
1669
1670 //
1671 // When affinity is off, this routine will still be called to set
Andrey Churbanovf696c822015-01-27 16:55:43 +00001672 // __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie5e8470a2013-09-27 10:38:44 +00001673 // nCoresPerPkg, & nPackages. Make sure all these vars are set
1674 // correctly, and return if affinity is not enabled.
1675 //
1676 if (threadLevel >= 0) {
1677 __kmp_nThreadsPerCore = maxCt[threadLevel];
1678 }
1679 else {
1680 __kmp_nThreadsPerCore = 1;
1681 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001682 nPackages = totals[pkgLevel];
1683
1684 if (coreLevel >= 0) {
1685 __kmp_ncores = totals[coreLevel];
1686 nCoresPerPkg = maxCt[coreLevel];
1687 }
1688 else {
1689 __kmp_ncores = nPackages;
1690 nCoresPerPkg = 1;
1691 }
1692
1693 //
1694 // Check to see if the machine topology is uniform
1695 //
1696 unsigned prod = maxCt[0];
1697 for (level = 1; level < depth; level++) {
1698 prod *= maxCt[level];
1699 }
1700 bool uniform = (prod == totals[level - 1]);
1701
1702 //
1703 // Print the machine topology summary.
1704 //
1705 if (__kmp_affinity_verbose) {
1706 char mask[KMP_AFFIN_MASK_PRINT_LEN];
1707 __kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1708
1709 KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
1710 if (__kmp_affinity_respect_mask) {
1711 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask);
1712 } else {
1713 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask);
1714 }
1715 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
1716 if (uniform) {
1717 KMP_INFORM(Uniform, "KMP_AFFINITY");
1718 } else {
1719 KMP_INFORM(NonUniform, "KMP_AFFINITY");
1720 }
1721
1722 kmp_str_buf_t buf;
1723 __kmp_str_buf_init(&buf);
1724
1725 __kmp_str_buf_print(&buf, "%d", totals[0]);
1726 for (level = 1; level <= pkgLevel; level++) {
1727 __kmp_str_buf_print(&buf, " x %d", maxCt[level]);
1728 }
1729 KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg,
1730 __kmp_nThreadsPerCore, __kmp_ncores);
1731
1732 __kmp_str_buf_free(&buf);
1733 }
1734
1735 if (__kmp_affinity_type == affinity_none) {
1736 __kmp_free(last);
1737 __kmp_free(maxCt);
1738 __kmp_free(counts);
1739 __kmp_free(totals);
1740 __kmp_free(retval);
1741 KMP_CPU_FREE(oldMask);
1742 return 0;
1743 }
1744
1745 //
1746 // Find any levels with radiix 1, and remove them from the map
1747 // (except for the package level).
1748 //
1749 int new_depth = 0;
1750 for (level = 0; level < depth; level++) {
1751 if ((maxCt[level] == 1) && (level != pkgLevel)) {
1752 continue;
1753 }
1754 new_depth++;
1755 }
1756
1757 //
1758 // If we are removing any levels, allocate a new vector to return,
1759 // and copy the relevant information to it.
1760 //
1761 if (new_depth != depth) {
1762 AddrUnsPair *new_retval = (AddrUnsPair *)__kmp_allocate(
1763 sizeof(AddrUnsPair) * nApics);
1764 for (proc = 0; (int)proc < nApics; proc++) {
1765 Address addr(new_depth);
1766 new_retval[proc] = AddrUnsPair(addr, retval[proc].second);
1767 }
1768 int new_level = 0;
Jonathan Peyton62f38402015-08-25 18:44:41 +00001769 int newPkgLevel = -1;
1770 int newCoreLevel = -1;
1771 int newThreadLevel = -1;
1772 int i;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001773 for (level = 0; level < depth; level++) {
Jonathan Peyton62f38402015-08-25 18:44:41 +00001774 if ((maxCt[level] == 1)
1775 && (level != pkgLevel)) {
1776 //
1777 // Remove this level. Never remove the package level
1778 //
1779 continue;
1780 }
1781 if (level == pkgLevel) {
1782 newPkgLevel = level;
1783 }
1784 if (level == coreLevel) {
1785 newCoreLevel = level;
1786 }
1787 if (level == threadLevel) {
1788 newThreadLevel = level;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001789 }
1790 for (proc = 0; (int)proc < nApics; proc++) {
1791 new_retval[proc].first.labels[new_level]
1792 = retval[proc].first.labels[level];
1793 }
1794 new_level++;
1795 }
1796
1797 __kmp_free(retval);
1798 retval = new_retval;
1799 depth = new_depth;
Jonathan Peyton62f38402015-08-25 18:44:41 +00001800 pkgLevel = newPkgLevel;
1801 coreLevel = newCoreLevel;
1802 threadLevel = newThreadLevel;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001803 }
1804
1805 if (__kmp_affinity_gran_levels < 0) {
1806 //
1807 // Set the granularity level based on what levels are modeled
1808 // in the machine topology map.
1809 //
1810 __kmp_affinity_gran_levels = 0;
1811 if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
1812 __kmp_affinity_gran_levels++;
1813 }
1814 if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
1815 __kmp_affinity_gran_levels++;
1816 }
1817 if (__kmp_affinity_gran > affinity_gran_package) {
1818 __kmp_affinity_gran_levels++;
1819 }
1820 }
1821
1822 if (__kmp_affinity_verbose) {
1823 __kmp_affinity_print_topology(retval, nApics, depth, pkgLevel,
1824 coreLevel, threadLevel);
1825 }
1826
1827 __kmp_free(last);
1828 __kmp_free(maxCt);
1829 __kmp_free(counts);
1830 __kmp_free(totals);
1831 KMP_CPU_FREE(oldMask);
1832 *address2os = retval;
1833 return depth;
1834}
1835
1836
1837# endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1838
1839
1840#define osIdIndex 0
1841#define threadIdIndex 1
1842#define coreIdIndex 2
1843#define pkgIdIndex 3
1844#define nodeIdIndex 4
1845
1846typedef unsigned *ProcCpuInfo;
1847static unsigned maxIndex = pkgIdIndex;
1848
1849
1850static int
1851__kmp_affinity_cmp_ProcCpuInfo_os_id(const void *a, const void *b)
1852{
1853 const unsigned *aa = (const unsigned *)a;
1854 const unsigned *bb = (const unsigned *)b;
1855 if (aa[osIdIndex] < bb[osIdIndex]) return -1;
1856 if (aa[osIdIndex] > bb[osIdIndex]) return 1;
1857 return 0;
1858};
1859
1860
1861static int
1862__kmp_affinity_cmp_ProcCpuInfo_phys_id(const void *a, const void *b)
1863{
1864 unsigned i;
1865 const unsigned *aa = *((const unsigned **)a);
1866 const unsigned *bb = *((const unsigned **)b);
1867 for (i = maxIndex; ; i--) {
1868 if (aa[i] < bb[i]) return -1;
1869 if (aa[i] > bb[i]) return 1;
1870 if (i == osIdIndex) break;
1871 }
1872 return 0;
1873}
1874
1875
1876//
1877// Parse /proc/cpuinfo (or an alternate file in the same format) to obtain the
1878// affinity map.
1879//
1880static int
1881__kmp_affinity_create_cpuinfo_map(AddrUnsPair **address2os, int *line,
1882 kmp_i18n_id_t *const msg_id, FILE *f)
1883{
1884 *address2os = NULL;
1885 *msg_id = kmp_i18n_null;
1886
1887 //
1888 // Scan of the file, and count the number of "processor" (osId) fields,
Alp Toker8f2d3f02014-02-24 10:40:15 +00001889 // and find the highest value of <n> for a node_<n> field.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001890 //
1891 char buf[256];
1892 unsigned num_records = 0;
1893 while (! feof(f)) {
1894 buf[sizeof(buf) - 1] = 1;
1895 if (! fgets(buf, sizeof(buf), f)) {
1896 //
1897 // Read errors presumably because of EOF
1898 //
1899 break;
1900 }
1901
1902 char s1[] = "processor";
1903 if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
1904 num_records++;
1905 continue;
1906 }
1907
1908 //
1909 // FIXME - this will match "node_<n> <garbage>"
1910 //
1911 unsigned level;
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00001912 if (KMP_SSCANF(buf, "node_%d id", &level) == 1) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001913 if (nodeIdIndex + level >= maxIndex) {
1914 maxIndex = nodeIdIndex + level;
1915 }
1916 continue;
1917 }
1918 }
1919
1920 //
1921 // Check for empty file / no valid processor records, or too many.
1922 // The number of records can't exceed the number of valid bits in the
1923 // affinity mask.
1924 //
1925 if (num_records == 0) {
1926 *line = 0;
1927 *msg_id = kmp_i18n_str_NoProcRecords;
1928 return -1;
1929 }
1930 if (num_records > (unsigned)__kmp_xproc) {
1931 *line = 0;
1932 *msg_id = kmp_i18n_str_TooManyProcRecords;
1933 return -1;
1934 }
1935
1936 //
1937 // Set the file pointer back to the begginning, so that we can scan the
1938 // file again, this time performing a full parse of the data.
1939 // Allocate a vector of ProcCpuInfo object, where we will place the data.
1940 // Adding an extra element at the end allows us to remove a lot of extra
1941 // checks for termination conditions.
1942 //
1943 if (fseek(f, 0, SEEK_SET) != 0) {
1944 *line = 0;
1945 *msg_id = kmp_i18n_str_CantRewindCpuinfo;
1946 return -1;
1947 }
1948
1949 //
1950 // Allocate the array of records to store the proc info in. The dummy
1951 // element at the end makes the logic in filling them out easier to code.
1952 //
1953 unsigned **threadInfo = (unsigned **)__kmp_allocate((num_records + 1)
1954 * sizeof(unsigned *));
1955 unsigned i;
1956 for (i = 0; i <= num_records; i++) {
1957 threadInfo[i] = (unsigned *)__kmp_allocate((maxIndex + 1)
1958 * sizeof(unsigned));
1959 }
1960
1961#define CLEANUP_THREAD_INFO \
1962 for (i = 0; i <= num_records; i++) { \
1963 __kmp_free(threadInfo[i]); \
1964 } \
1965 __kmp_free(threadInfo);
1966
1967 //
1968 // A value of UINT_MAX means that we didn't find the field
1969 //
1970 unsigned __index;
1971
1972#define INIT_PROC_INFO(p) \
1973 for (__index = 0; __index <= maxIndex; __index++) { \
1974 (p)[__index] = UINT_MAX; \
1975 }
1976
1977 for (i = 0; i <= num_records; i++) {
1978 INIT_PROC_INFO(threadInfo[i]);
1979 }
1980
1981 unsigned num_avail = 0;
1982 *line = 0;
1983 while (! feof(f)) {
1984 //
1985 // Create an inner scoping level, so that all the goto targets at the
1986 // end of the loop appear in an outer scoping level. This avoids
1987 // warnings about jumping past an initialization to a target in the
1988 // same block.
1989 //
1990 {
1991 buf[sizeof(buf) - 1] = 1;
1992 bool long_line = false;
1993 if (! fgets(buf, sizeof(buf), f)) {
1994 //
1995 // Read errors presumably because of EOF
1996 //
1997 // If there is valid data in threadInfo[num_avail], then fake
1998 // a blank line in ensure that the last address gets parsed.
1999 //
2000 bool valid = false;
2001 for (i = 0; i <= maxIndex; i++) {
2002 if (threadInfo[num_avail][i] != UINT_MAX) {
2003 valid = true;
2004 }
2005 }
2006 if (! valid) {
2007 break;
2008 }
2009 buf[0] = 0;
2010 } else if (!buf[sizeof(buf) - 1]) {
2011 //
2012 // The line is longer than the buffer. Set a flag and don't
2013 // emit an error if we were going to ignore the line, anyway.
2014 //
2015 long_line = true;
2016
2017#define CHECK_LINE \
2018 if (long_line) { \
2019 CLEANUP_THREAD_INFO; \
2020 *msg_id = kmp_i18n_str_LongLineCpuinfo; \
2021 return -1; \
2022 }
2023 }
2024 (*line)++;
2025
2026 char s1[] = "processor";
2027 if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
2028 CHECK_LINE;
2029 char *p = strchr(buf + sizeof(s1) - 1, ':');
2030 unsigned val;
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00002031 if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002032 if (threadInfo[num_avail][osIdIndex] != UINT_MAX) goto dup_field;
2033 threadInfo[num_avail][osIdIndex] = val;
Jim Cownie181b4bb2013-12-23 17:28:57 +00002034#if KMP_OS_LINUX && USE_SYSFS_INFO
2035 char path[256];
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00002036 KMP_SNPRINTF(path, sizeof(path),
Jim Cownie181b4bb2013-12-23 17:28:57 +00002037 "/sys/devices/system/cpu/cpu%u/topology/physical_package_id",
2038 threadInfo[num_avail][osIdIndex]);
2039 __kmp_read_from_file(path, "%u", &threadInfo[num_avail][pkgIdIndex]);
2040
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00002041 KMP_SNPRINTF(path, sizeof(path),
Jim Cownie181b4bb2013-12-23 17:28:57 +00002042 "/sys/devices/system/cpu/cpu%u/topology/core_id",
2043 threadInfo[num_avail][osIdIndex]);
2044 __kmp_read_from_file(path, "%u", &threadInfo[num_avail][coreIdIndex]);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002045 continue;
Jim Cownie181b4bb2013-12-23 17:28:57 +00002046#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00002047 }
2048 char s2[] = "physical id";
2049 if (strncmp(buf, s2, sizeof(s2) - 1) == 0) {
2050 CHECK_LINE;
2051 char *p = strchr(buf + sizeof(s2) - 1, ':');
2052 unsigned val;
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00002053 if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002054 if (threadInfo[num_avail][pkgIdIndex] != UINT_MAX) goto dup_field;
2055 threadInfo[num_avail][pkgIdIndex] = val;
2056 continue;
2057 }
2058 char s3[] = "core id";
2059 if (strncmp(buf, s3, sizeof(s3) - 1) == 0) {
2060 CHECK_LINE;
2061 char *p = strchr(buf + sizeof(s3) - 1, ':');
2062 unsigned val;
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00002063 if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002064 if (threadInfo[num_avail][coreIdIndex] != UINT_MAX) goto dup_field;
2065 threadInfo[num_avail][coreIdIndex] = val;
2066 continue;
Jim Cownie181b4bb2013-12-23 17:28:57 +00002067#endif // KMP_OS_LINUX && USE_SYSFS_INFO
Jim Cownie5e8470a2013-09-27 10:38:44 +00002068 }
2069 char s4[] = "thread id";
2070 if (strncmp(buf, s4, sizeof(s4) - 1) == 0) {
2071 CHECK_LINE;
2072 char *p = strchr(buf + sizeof(s4) - 1, ':');
2073 unsigned val;
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00002074 if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002075 if (threadInfo[num_avail][threadIdIndex] != UINT_MAX) goto dup_field;
2076 threadInfo[num_avail][threadIdIndex] = val;
2077 continue;
2078 }
2079 unsigned level;
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00002080 if (KMP_SSCANF(buf, "node_%d id", &level) == 1) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002081 CHECK_LINE;
2082 char *p = strchr(buf + sizeof(s4) - 1, ':');
2083 unsigned val;
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00002084 if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002085 KMP_ASSERT(nodeIdIndex + level <= maxIndex);
2086 if (threadInfo[num_avail][nodeIdIndex + level] != UINT_MAX) goto dup_field;
2087 threadInfo[num_avail][nodeIdIndex + level] = val;
2088 continue;
2089 }
2090
2091 //
2092 // We didn't recognize the leading token on the line.
2093 // There are lots of leading tokens that we don't recognize -
2094 // if the line isn't empty, go on to the next line.
2095 //
2096 if ((*buf != 0) && (*buf != '\n')) {
2097 //
2098 // If the line is longer than the buffer, read characters
2099 // until we find a newline.
2100 //
2101 if (long_line) {
2102 int ch;
2103 while (((ch = fgetc(f)) != EOF) && (ch != '\n'));
2104 }
2105 continue;
2106 }
2107
2108 //
2109 // A newline has signalled the end of the processor record.
2110 // Check that there aren't too many procs specified.
2111 //
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002112 if ((int)num_avail == __kmp_xproc) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002113 CLEANUP_THREAD_INFO;
2114 *msg_id = kmp_i18n_str_TooManyEntries;
2115 return -1;
2116 }
2117
2118 //
2119 // Check for missing fields. The osId field must be there, and we
2120 // currently require that the physical id field is specified, also.
2121 //
2122 if (threadInfo[num_avail][osIdIndex] == UINT_MAX) {
2123 CLEANUP_THREAD_INFO;
2124 *msg_id = kmp_i18n_str_MissingProcField;
2125 return -1;
2126 }
2127 if (threadInfo[0][pkgIdIndex] == UINT_MAX) {
2128 CLEANUP_THREAD_INFO;
2129 *msg_id = kmp_i18n_str_MissingPhysicalIDField;
2130 return -1;
2131 }
2132
2133 //
2134 // Skip this proc if it is not included in the machine model.
2135 //
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00002136 if (! KMP_CPU_ISSET(threadInfo[num_avail][osIdIndex], __kmp_affin_fullMask)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002137 INIT_PROC_INFO(threadInfo[num_avail]);
2138 continue;
2139 }
2140
2141 //
2142 // We have a successful parse of this proc's info.
2143 // Increment the counter, and prepare for the next proc.
2144 //
2145 num_avail++;
2146 KMP_ASSERT(num_avail <= num_records);
2147 INIT_PROC_INFO(threadInfo[num_avail]);
2148 }
2149 continue;
2150
2151 no_val:
2152 CLEANUP_THREAD_INFO;
2153 *msg_id = kmp_i18n_str_MissingValCpuinfo;
2154 return -1;
2155
2156 dup_field:
2157 CLEANUP_THREAD_INFO;
2158 *msg_id = kmp_i18n_str_DuplicateFieldCpuinfo;
2159 return -1;
2160 }
2161 *line = 0;
2162
2163# if KMP_MIC && REDUCE_TEAM_SIZE
2164 unsigned teamSize = 0;
2165# endif // KMP_MIC && REDUCE_TEAM_SIZE
2166
2167 // check for num_records == __kmp_xproc ???
2168
2169 //
2170 // If there's only one thread context to bind to, form an Address object
2171 // with depth 1 and return immediately (or, if affinity is off, set
2172 // address2os to NULL and return).
2173 //
2174 // If it is configured to omit the package level when there is only a
2175 // single package, the logic at the end of this routine won't work if
2176 // there is only a single thread - it would try to form an Address
2177 // object with depth 0.
2178 //
2179 KMP_ASSERT(num_avail > 0);
2180 KMP_ASSERT(num_avail <= num_records);
2181 if (num_avail == 1) {
2182 __kmp_ncores = 1;
2183 __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002184 if (__kmp_affinity_verbose) {
2185 if (! KMP_AFFINITY_CAPABLE()) {
2186 KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
2187 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
2188 KMP_INFORM(Uniform, "KMP_AFFINITY");
2189 }
2190 else {
2191 char buf[KMP_AFFIN_MASK_PRINT_LEN];
2192 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00002193 __kmp_affin_fullMask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002194 KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
2195 if (__kmp_affinity_respect_mask) {
2196 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
2197 } else {
2198 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
2199 }
2200 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
2201 KMP_INFORM(Uniform, "KMP_AFFINITY");
2202 }
2203 int index;
2204 kmp_str_buf_t buf;
2205 __kmp_str_buf_init(&buf);
2206 __kmp_str_buf_print(&buf, "1");
2207 for (index = maxIndex - 1; index > pkgIdIndex; index--) {
2208 __kmp_str_buf_print(&buf, " x 1");
2209 }
2210 KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, 1, 1, 1);
2211 __kmp_str_buf_free(&buf);
2212 }
2213
2214 if (__kmp_affinity_type == affinity_none) {
2215 CLEANUP_THREAD_INFO;
2216 return 0;
2217 }
2218
2219 *address2os = (AddrUnsPair*)__kmp_allocate(sizeof(AddrUnsPair));
2220 Address addr(1);
2221 addr.labels[0] = threadInfo[0][pkgIdIndex];
2222 (*address2os)[0] = AddrUnsPair(addr, threadInfo[0][osIdIndex]);
2223
2224 if (__kmp_affinity_gran_levels < 0) {
2225 __kmp_affinity_gran_levels = 0;
2226 }
2227
2228 if (__kmp_affinity_verbose) {
2229 __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
2230 }
2231
2232 CLEANUP_THREAD_INFO;
2233 return 1;
2234 }
2235
2236 //
2237 // Sort the threadInfo table by physical Id.
2238 //
2239 qsort(threadInfo, num_avail, sizeof(*threadInfo),
2240 __kmp_affinity_cmp_ProcCpuInfo_phys_id);
2241
2242 //
2243 // The table is now sorted by pkgId / coreId / threadId, but we really
2244 // don't know the radix of any of the fields. pkgId's may be sparsely
2245 // assigned among the chips on a system. Although coreId's are usually
2246 // assigned [0 .. coresPerPkg-1] and threadId's are usually assigned
2247 // [0..threadsPerCore-1], we don't want to make any such assumptions.
2248 //
2249 // For that matter, we don't know what coresPerPkg and threadsPerCore
2250 // (or the total # packages) are at this point - we want to determine
2251 // that now. We only have an upper bound on the first two figures.
2252 //
2253 unsigned *counts = (unsigned *)__kmp_allocate((maxIndex + 1)
2254 * sizeof(unsigned));
2255 unsigned *maxCt = (unsigned *)__kmp_allocate((maxIndex + 1)
2256 * sizeof(unsigned));
2257 unsigned *totals = (unsigned *)__kmp_allocate((maxIndex + 1)
2258 * sizeof(unsigned));
2259 unsigned *lastId = (unsigned *)__kmp_allocate((maxIndex + 1)
2260 * sizeof(unsigned));
2261
2262 bool assign_thread_ids = false;
2263 unsigned threadIdCt;
2264 unsigned index;
2265
2266 restart_radix_check:
2267 threadIdCt = 0;
2268
2269 //
2270 // Initialize the counter arrays with data from threadInfo[0].
2271 //
2272 if (assign_thread_ids) {
2273 if (threadInfo[0][threadIdIndex] == UINT_MAX) {
2274 threadInfo[0][threadIdIndex] = threadIdCt++;
2275 }
2276 else if (threadIdCt <= threadInfo[0][threadIdIndex]) {
2277 threadIdCt = threadInfo[0][threadIdIndex] + 1;
2278 }
2279 }
2280 for (index = 0; index <= maxIndex; index++) {
2281 counts[index] = 1;
2282 maxCt[index] = 1;
2283 totals[index] = 1;
2284 lastId[index] = threadInfo[0][index];;
2285 }
2286
2287 //
2288 // Run through the rest of the OS procs.
2289 //
2290 for (i = 1; i < num_avail; i++) {
2291 //
2292 // Find the most significant index whose id differs
2293 // from the id for the previous OS proc.
2294 //
2295 for (index = maxIndex; index >= threadIdIndex; index--) {
2296 if (assign_thread_ids && (index == threadIdIndex)) {
2297 //
2298 // Auto-assign the thread id field if it wasn't specified.
2299 //
2300 if (threadInfo[i][threadIdIndex] == UINT_MAX) {
2301 threadInfo[i][threadIdIndex] = threadIdCt++;
2302 }
2303
2304 //
2305 // Aparrently the thread id field was specified for some
2306 // entries and not others. Start the thread id counter
2307 // off at the next higher thread id.
2308 //
2309 else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
2310 threadIdCt = threadInfo[i][threadIdIndex] + 1;
2311 }
2312 }
2313 if (threadInfo[i][index] != lastId[index]) {
2314 //
2315 // Run through all indices which are less significant,
2316 // and reset the counts to 1.
2317 //
2318 // At all levels up to and including index, we need to
2319 // increment the totals and record the last id.
2320 //
2321 unsigned index2;
2322 for (index2 = threadIdIndex; index2 < index; index2++) {
2323 totals[index2]++;
2324 if (counts[index2] > maxCt[index2]) {
2325 maxCt[index2] = counts[index2];
2326 }
2327 counts[index2] = 1;
2328 lastId[index2] = threadInfo[i][index2];
2329 }
2330 counts[index]++;
2331 totals[index]++;
2332 lastId[index] = threadInfo[i][index];
2333
2334 if (assign_thread_ids && (index > threadIdIndex)) {
2335
2336# if KMP_MIC && REDUCE_TEAM_SIZE
2337 //
2338 // The default team size is the total #threads in the machine
2339 // minus 1 thread for every core that has 3 or more threads.
2340 //
2341 teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 );
2342# endif // KMP_MIC && REDUCE_TEAM_SIZE
2343
2344 //
2345 // Restart the thread counter, as we are on a new core.
2346 //
2347 threadIdCt = 0;
2348
2349 //
2350 // Auto-assign the thread id field if it wasn't specified.
2351 //
2352 if (threadInfo[i][threadIdIndex] == UINT_MAX) {
2353 threadInfo[i][threadIdIndex] = threadIdCt++;
2354 }
2355
2356 //
2357 // Aparrently the thread id field was specified for some
2358 // entries and not others. Start the thread id counter
2359 // off at the next higher thread id.
2360 //
2361 else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
2362 threadIdCt = threadInfo[i][threadIdIndex] + 1;
2363 }
2364 }
2365 break;
2366 }
2367 }
2368 if (index < threadIdIndex) {
2369 //
2370 // If thread ids were specified, it is an error if they are not
2371 // unique. Also, check that we waven't already restarted the
2372 // loop (to be safe - shouldn't need to).
2373 //
2374 if ((threadInfo[i][threadIdIndex] != UINT_MAX)
2375 || assign_thread_ids) {
2376 __kmp_free(lastId);
2377 __kmp_free(totals);
2378 __kmp_free(maxCt);
2379 __kmp_free(counts);
2380 CLEANUP_THREAD_INFO;
2381 *msg_id = kmp_i18n_str_PhysicalIDsNotUnique;
2382 return -1;
2383 }
2384
2385 //
2386 // If the thread ids were not specified and we see entries
2387 // entries that are duplicates, start the loop over and
2388 // assign the thread ids manually.
2389 //
2390 assign_thread_ids = true;
2391 goto restart_radix_check;
2392 }
2393 }
2394
2395# if KMP_MIC && REDUCE_TEAM_SIZE
2396 //
2397 // The default team size is the total #threads in the machine
2398 // minus 1 thread for every core that has 3 or more threads.
2399 //
2400 teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 );
2401# endif // KMP_MIC && REDUCE_TEAM_SIZE
2402
2403 for (index = threadIdIndex; index <= maxIndex; index++) {
2404 if (counts[index] > maxCt[index]) {
2405 maxCt[index] = counts[index];
2406 }
2407 }
2408
2409 __kmp_nThreadsPerCore = maxCt[threadIdIndex];
2410 nCoresPerPkg = maxCt[coreIdIndex];
2411 nPackages = totals[pkgIdIndex];
2412
2413 //
2414 // Check to see if the machine topology is uniform
2415 //
2416 unsigned prod = totals[maxIndex];
2417 for (index = threadIdIndex; index < maxIndex; index++) {
2418 prod *= maxCt[index];
2419 }
2420 bool uniform = (prod == totals[threadIdIndex]);
2421
2422 //
2423 // When affinity is off, this routine will still be called to set
Andrey Churbanovf696c822015-01-27 16:55:43 +00002424 // __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie5e8470a2013-09-27 10:38:44 +00002425 // nCoresPerPkg, & nPackages. Make sure all these vars are set
2426 // correctly, and return now if affinity is not enabled.
2427 //
Jim Cownie5e8470a2013-09-27 10:38:44 +00002428 __kmp_ncores = totals[coreIdIndex];
2429
2430 if (__kmp_affinity_verbose) {
2431 if (! KMP_AFFINITY_CAPABLE()) {
2432 KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
2433 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
2434 if (uniform) {
2435 KMP_INFORM(Uniform, "KMP_AFFINITY");
2436 } else {
2437 KMP_INFORM(NonUniform, "KMP_AFFINITY");
2438 }
2439 }
2440 else {
2441 char buf[KMP_AFFIN_MASK_PRINT_LEN];
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00002442 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, __kmp_affin_fullMask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002443 KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
2444 if (__kmp_affinity_respect_mask) {
2445 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
2446 } else {
2447 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
2448 }
2449 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
2450 if (uniform) {
2451 KMP_INFORM(Uniform, "KMP_AFFINITY");
2452 } else {
2453 KMP_INFORM(NonUniform, "KMP_AFFINITY");
2454 }
2455 }
2456 kmp_str_buf_t buf;
2457 __kmp_str_buf_init(&buf);
2458
2459 __kmp_str_buf_print(&buf, "%d", totals[maxIndex]);
2460 for (index = maxIndex - 1; index >= pkgIdIndex; index--) {
2461 __kmp_str_buf_print(&buf, " x %d", maxCt[index]);
2462 }
2463 KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, maxCt[coreIdIndex],
2464 maxCt[threadIdIndex], __kmp_ncores);
2465
2466 __kmp_str_buf_free(&buf);
2467 }
2468
2469# if KMP_MIC && REDUCE_TEAM_SIZE
2470 //
2471 // Set the default team size.
2472 //
2473 if ((__kmp_dflt_team_nth == 0) && (teamSize > 0)) {
2474 __kmp_dflt_team_nth = teamSize;
2475 KA_TRACE(20, ("__kmp_affinity_create_cpuinfo_map: setting __kmp_dflt_team_nth = %d\n",
2476 __kmp_dflt_team_nth));
2477 }
2478# endif // KMP_MIC && REDUCE_TEAM_SIZE
2479
2480 if (__kmp_affinity_type == affinity_none) {
2481 __kmp_free(lastId);
2482 __kmp_free(totals);
2483 __kmp_free(maxCt);
2484 __kmp_free(counts);
2485 CLEANUP_THREAD_INFO;
2486 return 0;
2487 }
2488
2489 //
2490 // Count the number of levels which have more nodes at that level than
2491 // at the parent's level (with there being an implicit root node of
2492 // the top level). This is equivalent to saying that there is at least
2493 // one node at this level which has a sibling. These levels are in the
2494 // map, and the package level is always in the map.
2495 //
2496 bool *inMap = (bool *)__kmp_allocate((maxIndex + 1) * sizeof(bool));
2497 int level = 0;
2498 for (index = threadIdIndex; index < maxIndex; index++) {
2499 KMP_ASSERT(totals[index] >= totals[index + 1]);
2500 inMap[index] = (totals[index] > totals[index + 1]);
2501 }
2502 inMap[maxIndex] = (totals[maxIndex] > 1);
2503 inMap[pkgIdIndex] = true;
2504
2505 int depth = 0;
2506 for (index = threadIdIndex; index <= maxIndex; index++) {
2507 if (inMap[index]) {
2508 depth++;
2509 }
2510 }
2511 KMP_ASSERT(depth > 0);
2512
2513 //
2514 // Construct the data structure that is to be returned.
2515 //
2516 *address2os = (AddrUnsPair*)
2517 __kmp_allocate(sizeof(AddrUnsPair) * num_avail);
2518 int pkgLevel = -1;
2519 int coreLevel = -1;
2520 int threadLevel = -1;
2521
2522 for (i = 0; i < num_avail; ++i) {
2523 Address addr(depth);
2524 unsigned os = threadInfo[i][osIdIndex];
2525 int src_index;
2526 int dst_index = 0;
2527
2528 for (src_index = maxIndex; src_index >= threadIdIndex; src_index--) {
2529 if (! inMap[src_index]) {
2530 continue;
2531 }
2532 addr.labels[dst_index] = threadInfo[i][src_index];
2533 if (src_index == pkgIdIndex) {
2534 pkgLevel = dst_index;
2535 }
2536 else if (src_index == coreIdIndex) {
2537 coreLevel = dst_index;
2538 }
2539 else if (src_index == threadIdIndex) {
2540 threadLevel = dst_index;
2541 }
2542 dst_index++;
2543 }
2544 (*address2os)[i] = AddrUnsPair(addr, os);
2545 }
2546
2547 if (__kmp_affinity_gran_levels < 0) {
2548 //
2549 // Set the granularity level based on what levels are modeled
2550 // in the machine topology map.
2551 //
2552 unsigned src_index;
2553 __kmp_affinity_gran_levels = 0;
2554 for (src_index = threadIdIndex; src_index <= maxIndex; src_index++) {
2555 if (! inMap[src_index]) {
2556 continue;
2557 }
2558 switch (src_index) {
2559 case threadIdIndex:
2560 if (__kmp_affinity_gran > affinity_gran_thread) {
2561 __kmp_affinity_gran_levels++;
2562 }
2563
2564 break;
2565 case coreIdIndex:
2566 if (__kmp_affinity_gran > affinity_gran_core) {
2567 __kmp_affinity_gran_levels++;
2568 }
2569 break;
2570
2571 case pkgIdIndex:
2572 if (__kmp_affinity_gran > affinity_gran_package) {
2573 __kmp_affinity_gran_levels++;
2574 }
2575 break;
2576 }
2577 }
2578 }
2579
2580 if (__kmp_affinity_verbose) {
2581 __kmp_affinity_print_topology(*address2os, num_avail, depth, pkgLevel,
2582 coreLevel, threadLevel);
2583 }
2584
2585 __kmp_free(inMap);
2586 __kmp_free(lastId);
2587 __kmp_free(totals);
2588 __kmp_free(maxCt);
2589 __kmp_free(counts);
2590 CLEANUP_THREAD_INFO;
2591 return depth;
2592}
2593
2594
2595//
2596// Create and return a table of affinity masks, indexed by OS thread ID.
2597// This routine handles OR'ing together all the affinity masks of threads
2598// that are sufficiently close, if granularity > fine.
2599//
2600static kmp_affin_mask_t *
2601__kmp_create_masks(unsigned *maxIndex, unsigned *numUnique,
2602 AddrUnsPair *address2os, unsigned numAddrs)
2603{
2604 //
2605 // First form a table of affinity masks in order of OS thread id.
2606 //
2607 unsigned depth;
2608 unsigned maxOsId;
2609 unsigned i;
2610
2611 KMP_ASSERT(numAddrs > 0);
2612 depth = address2os[0].first.depth;
2613
2614 maxOsId = 0;
2615 for (i = 0; i < numAddrs; i++) {
2616 unsigned osId = address2os[i].second;
2617 if (osId > maxOsId) {
2618 maxOsId = osId;
2619 }
2620 }
Jonathan Peyton01dcf362015-11-30 20:02:59 +00002621 kmp_affin_mask_t *osId2Mask;
2622 KMP_CPU_ALLOC_ARRAY(osId2Mask, (maxOsId+1));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002623
2624 //
2625 // Sort the address2os table according to physical order. Doing so
2626 // will put all threads on the same core/package/node in consecutive
2627 // locations.
2628 //
2629 qsort(address2os, numAddrs, sizeof(*address2os),
2630 __kmp_affinity_cmp_Address_labels);
2631
2632 KMP_ASSERT(__kmp_affinity_gran_levels >= 0);
2633 if (__kmp_affinity_verbose && (__kmp_affinity_gran_levels > 0)) {
2634 KMP_INFORM(ThreadsMigrate, "KMP_AFFINITY", __kmp_affinity_gran_levels);
2635 }
2636 if (__kmp_affinity_gran_levels >= (int)depth) {
2637 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
2638 && (__kmp_affinity_type != affinity_none))) {
2639 KMP_WARNING(AffThreadsMayMigrate);
2640 }
2641 }
2642
2643 //
2644 // Run through the table, forming the masks for all threads on each
2645 // core. Threads on the same core will have identical "Address"
2646 // objects, not considering the last level, which must be the thread
2647 // id. All threads on a core will appear consecutively.
2648 //
2649 unsigned unique = 0;
2650 unsigned j = 0; // index of 1st thread on core
2651 unsigned leader = 0;
2652 Address *leaderAddr = &(address2os[0].first);
Jonathan Peyton01dcf362015-11-30 20:02:59 +00002653 kmp_affin_mask_t *sum;
2654 KMP_CPU_ALLOC_ON_STACK(sum);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002655 KMP_CPU_ZERO(sum);
2656 KMP_CPU_SET(address2os[0].second, sum);
2657 for (i = 1; i < numAddrs; i++) {
2658 //
Alp Toker8f2d3f02014-02-24 10:40:15 +00002659 // If this thread is sufficiently close to the leader (within the
Jim Cownie5e8470a2013-09-27 10:38:44 +00002660 // granularity setting), then set the bit for this os thread in the
2661 // affinity mask for this group, and go on to the next thread.
2662 //
2663 if (leaderAddr->isClose(address2os[i].first,
2664 __kmp_affinity_gran_levels)) {
2665 KMP_CPU_SET(address2os[i].second, sum);
2666 continue;
2667 }
2668
2669 //
2670 // For every thread in this group, copy the mask to the thread's
2671 // entry in the osId2Mask table. Mark the first address as a
2672 // leader.
2673 //
2674 for (; j < i; j++) {
2675 unsigned osId = address2os[j].second;
2676 KMP_DEBUG_ASSERT(osId <= maxOsId);
2677 kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
2678 KMP_CPU_COPY(mask, sum);
2679 address2os[j].first.leader = (j == leader);
2680 }
2681 unique++;
2682
2683 //
2684 // Start a new mask.
2685 //
2686 leader = i;
2687 leaderAddr = &(address2os[i].first);
2688 KMP_CPU_ZERO(sum);
2689 KMP_CPU_SET(address2os[i].second, sum);
2690 }
2691
2692 //
2693 // For every thread in last group, copy the mask to the thread's
2694 // entry in the osId2Mask table.
2695 //
2696 for (; j < i; j++) {
2697 unsigned osId = address2os[j].second;
2698 KMP_DEBUG_ASSERT(osId <= maxOsId);
2699 kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
2700 KMP_CPU_COPY(mask, sum);
2701 address2os[j].first.leader = (j == leader);
2702 }
2703 unique++;
Jonathan Peyton01dcf362015-11-30 20:02:59 +00002704 KMP_CPU_FREE_FROM_STACK(sum);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002705
2706 *maxIndex = maxOsId;
2707 *numUnique = unique;
2708 return osId2Mask;
2709}
2710
2711
2712//
2713// Stuff for the affinity proclist parsers. It's easier to declare these vars
2714// as file-static than to try and pass them through the calling sequence of
2715// the recursive-descent OMP_PLACES parser.
2716//
2717static kmp_affin_mask_t *newMasks;
2718static int numNewMasks;
2719static int nextNewMask;
2720
2721#define ADD_MASK(_mask) \
2722 { \
2723 if (nextNewMask >= numNewMasks) { \
Jonathan Peyton01dcf362015-11-30 20:02:59 +00002724 int i; \
Jim Cownie5e8470a2013-09-27 10:38:44 +00002725 numNewMasks *= 2; \
Jonathan Peyton01dcf362015-11-30 20:02:59 +00002726 kmp_affin_mask_t* temp; \
2727 KMP_CPU_INTERNAL_ALLOC_ARRAY(temp, numNewMasks); \
2728 for(i=0;i<numNewMasks/2;i++) { \
2729 kmp_affin_mask_t* src = KMP_CPU_INDEX(newMasks, i); \
2730 kmp_affin_mask_t* dest = KMP_CPU_INDEX(temp, i); \
2731 KMP_CPU_COPY(dest, src); \
2732 } \
2733 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks/2); \
2734 newMasks = temp; \
Jim Cownie5e8470a2013-09-27 10:38:44 +00002735 } \
2736 KMP_CPU_COPY(KMP_CPU_INDEX(newMasks, nextNewMask), (_mask)); \
2737 nextNewMask++; \
2738 }
2739
2740#define ADD_MASK_OSID(_osId,_osId2Mask,_maxOsId) \
2741 { \
2742 if (((_osId) > _maxOsId) || \
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002743 (! KMP_CPU_ISSET((_osId), KMP_CPU_INDEX((_osId2Mask), (_osId))))) { \
Jim Cownie5e8470a2013-09-27 10:38:44 +00002744 if (__kmp_affinity_verbose || (__kmp_affinity_warnings \
2745 && (__kmp_affinity_type != affinity_none))) { \
2746 KMP_WARNING(AffIgnoreInvalidProcID, _osId); \
2747 } \
2748 } \
2749 else { \
2750 ADD_MASK(KMP_CPU_INDEX(_osId2Mask, (_osId))); \
2751 } \
2752 }
2753
2754
2755//
2756// Re-parse the proclist (for the explicit affinity type), and form the list
2757// of affinity newMasks indexed by gtid.
2758//
2759static void
2760__kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks,
2761 unsigned int *out_numMasks, const char *proclist,
2762 kmp_affin_mask_t *osId2Mask, int maxOsId)
2763{
Jonathan Peyton01dcf362015-11-30 20:02:59 +00002764 int i;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002765 const char *scan = proclist;
2766 const char *next = proclist;
2767
2768 //
2769 // We use malloc() for the temporary mask vector,
2770 // so that we can use realloc() to extend it.
2771 //
2772 numNewMasks = 2;
Jonathan Peyton01dcf362015-11-30 20:02:59 +00002773 KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002774 nextNewMask = 0;
Jonathan Peyton01dcf362015-11-30 20:02:59 +00002775 kmp_affin_mask_t *sumMask;
2776 KMP_CPU_ALLOC(sumMask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002777 int setSize = 0;
2778
2779 for (;;) {
2780 int start, end, stride;
2781
2782 SKIP_WS(scan);
2783 next = scan;
2784 if (*next == '\0') {
2785 break;
2786 }
2787
2788 if (*next == '{') {
2789 int num;
2790 setSize = 0;
2791 next++; // skip '{'
2792 SKIP_WS(next);
2793 scan = next;
2794
2795 //
2796 // Read the first integer in the set.
2797 //
2798 KMP_ASSERT2((*next >= '0') && (*next <= '9'),
2799 "bad proclist");
2800 SKIP_DIGITS(next);
2801 num = __kmp_str_to_int(scan, *next);
2802 KMP_ASSERT2(num >= 0, "bad explicit proc list");
2803
2804 //
2805 // Copy the mask for that osId to the sum (union) mask.
2806 //
2807 if ((num > maxOsId) ||
2808 (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
2809 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
2810 && (__kmp_affinity_type != affinity_none))) {
2811 KMP_WARNING(AffIgnoreInvalidProcID, num);
2812 }
2813 KMP_CPU_ZERO(sumMask);
2814 }
2815 else {
2816 KMP_CPU_COPY(sumMask, KMP_CPU_INDEX(osId2Mask, num));
2817 setSize = 1;
2818 }
2819
2820 for (;;) {
2821 //
2822 // Check for end of set.
2823 //
2824 SKIP_WS(next);
2825 if (*next == '}') {
2826 next++; // skip '}'
2827 break;
2828 }
2829
2830 //
2831 // Skip optional comma.
2832 //
2833 if (*next == ',') {
2834 next++;
2835 }
2836 SKIP_WS(next);
2837
2838 //
2839 // Read the next integer in the set.
2840 //
2841 scan = next;
2842 KMP_ASSERT2((*next >= '0') && (*next <= '9'),
2843 "bad explicit proc list");
2844
2845 SKIP_DIGITS(next);
2846 num = __kmp_str_to_int(scan, *next);
2847 KMP_ASSERT2(num >= 0, "bad explicit proc list");
2848
2849 //
2850 // Add the mask for that osId to the sum mask.
2851 //
2852 if ((num > maxOsId) ||
2853 (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
2854 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
2855 && (__kmp_affinity_type != affinity_none))) {
2856 KMP_WARNING(AffIgnoreInvalidProcID, num);
2857 }
2858 }
2859 else {
2860 KMP_CPU_UNION(sumMask, KMP_CPU_INDEX(osId2Mask, num));
2861 setSize++;
2862 }
2863 }
2864 if (setSize > 0) {
2865 ADD_MASK(sumMask);
2866 }
2867
2868 SKIP_WS(next);
2869 if (*next == ',') {
2870 next++;
2871 }
2872 scan = next;
2873 continue;
2874 }
2875
2876 //
2877 // Read the first integer.
2878 //
2879 KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list");
2880 SKIP_DIGITS(next);
2881 start = __kmp_str_to_int(scan, *next);
2882 KMP_ASSERT2(start >= 0, "bad explicit proc list");
2883 SKIP_WS(next);
2884
2885 //
2886 // If this isn't a range, then add a mask to the list and go on.
2887 //
2888 if (*next != '-') {
2889 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2890
2891 //
2892 // Skip optional comma.
2893 //
2894 if (*next == ',') {
2895 next++;
2896 }
2897 scan = next;
2898 continue;
2899 }
2900
2901 //
2902 // This is a range. Skip over the '-' and read in the 2nd int.
2903 //
2904 next++; // skip '-'
2905 SKIP_WS(next);
2906 scan = next;
2907 KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list");
2908 SKIP_DIGITS(next);
2909 end = __kmp_str_to_int(scan, *next);
2910 KMP_ASSERT2(end >= 0, "bad explicit proc list");
2911
2912 //
2913 // Check for a stride parameter
2914 //
2915 stride = 1;
2916 SKIP_WS(next);
2917 if (*next == ':') {
2918 //
2919 // A stride is specified. Skip over the ':" and read the 3rd int.
2920 //
2921 int sign = +1;
2922 next++; // skip ':'
2923 SKIP_WS(next);
2924 scan = next;
2925 if (*next == '-') {
2926 sign = -1;
2927 next++;
2928 SKIP_WS(next);
2929 scan = next;
2930 }
2931 KMP_ASSERT2((*next >= '0') && (*next <= '9'),
2932 "bad explicit proc list");
2933 SKIP_DIGITS(next);
2934 stride = __kmp_str_to_int(scan, *next);
2935 KMP_ASSERT2(stride >= 0, "bad explicit proc list");
2936 stride *= sign;
2937 }
2938
2939 //
2940 // Do some range checks.
2941 //
2942 KMP_ASSERT2(stride != 0, "bad explicit proc list");
2943 if (stride > 0) {
2944 KMP_ASSERT2(start <= end, "bad explicit proc list");
2945 }
2946 else {
2947 KMP_ASSERT2(start >= end, "bad explicit proc list");
2948 }
2949 KMP_ASSERT2((end - start) / stride <= 65536, "bad explicit proc list");
2950
2951 //
2952 // Add the mask for each OS proc # to the list.
2953 //
2954 if (stride > 0) {
2955 do {
2956 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2957 start += stride;
2958 } while (start <= end);
2959 }
2960 else {
2961 do {
2962 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2963 start += stride;
2964 } while (start >= end);
2965 }
2966
2967 //
2968 // Skip optional comma.
2969 //
2970 SKIP_WS(next);
2971 if (*next == ',') {
2972 next++;
2973 }
2974 scan = next;
2975 }
2976
2977 *out_numMasks = nextNewMask;
2978 if (nextNewMask == 0) {
2979 *out_masks = NULL;
Jonathan Peyton01dcf362015-11-30 20:02:59 +00002980 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002981 return;
2982 }
Jonathan Peyton01dcf362015-11-30 20:02:59 +00002983 KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask);
2984 for(i = 0; i < nextNewMask; i++) {
2985 kmp_affin_mask_t* src = KMP_CPU_INDEX(newMasks, i);
2986 kmp_affin_mask_t* dest = KMP_CPU_INDEX((*out_masks), i);
2987 KMP_CPU_COPY(dest, src);
2988 }
2989 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
2990 KMP_CPU_FREE(sumMask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002991}
2992
2993
2994# if OMP_40_ENABLED
2995
2996/*-----------------------------------------------------------------------------
2997
2998Re-parse the OMP_PLACES proc id list, forming the newMasks for the different
2999places. Again, Here is the grammar:
3000
3001place_list := place
3002place_list := place , place_list
3003place := num
3004place := place : num
3005place := place : num : signed
3006place := { subplacelist }
3007place := ! place // (lowest priority)
3008subplace_list := subplace
3009subplace_list := subplace , subplace_list
3010subplace := num
3011subplace := num : num
3012subplace := num : num : signed
3013signed := num
3014signed := + signed
3015signed := - signed
3016
3017-----------------------------------------------------------------------------*/
3018
3019static void
3020__kmp_process_subplace_list(const char **scan, kmp_affin_mask_t *osId2Mask,
3021 int maxOsId, kmp_affin_mask_t *tempMask, int *setSize)
3022{
3023 const char *next;
3024
3025 for (;;) {
3026 int start, count, stride, i;
3027
3028 //
3029 // Read in the starting proc id
3030 //
3031 SKIP_WS(*scan);
3032 KMP_ASSERT2((**scan >= '0') && (**scan <= '9'),
3033 "bad explicit places list");
3034 next = *scan;
3035 SKIP_DIGITS(next);
3036 start = __kmp_str_to_int(*scan, *next);
3037 KMP_ASSERT(start >= 0);
3038 *scan = next;
3039
3040 //
3041 // valid follow sets are ',' ':' and '}'
3042 //
3043 SKIP_WS(*scan);
3044 if (**scan == '}' || **scan == ',') {
3045 if ((start > maxOsId) ||
3046 (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
3047 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3048 && (__kmp_affinity_type != affinity_none))) {
3049 KMP_WARNING(AffIgnoreInvalidProcID, start);
3050 }
3051 }
3052 else {
3053 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
3054 (*setSize)++;
3055 }
3056 if (**scan == '}') {
3057 break;
3058 }
3059 (*scan)++; // skip ','
3060 continue;
3061 }
3062 KMP_ASSERT2(**scan == ':', "bad explicit places list");
3063 (*scan)++; // skip ':'
3064
3065 //
3066 // Read count parameter
3067 //
3068 SKIP_WS(*scan);
3069 KMP_ASSERT2((**scan >= '0') && (**scan <= '9'),
3070 "bad explicit places list");
3071 next = *scan;
3072 SKIP_DIGITS(next);
3073 count = __kmp_str_to_int(*scan, *next);
3074 KMP_ASSERT(count >= 0);
3075 *scan = next;
3076
3077 //
3078 // valid follow sets are ',' ':' and '}'
3079 //
3080 SKIP_WS(*scan);
3081 if (**scan == '}' || **scan == ',') {
3082 for (i = 0; i < count; i++) {
3083 if ((start > maxOsId) ||
3084 (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
3085 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3086 && (__kmp_affinity_type != affinity_none))) {
3087 KMP_WARNING(AffIgnoreInvalidProcID, start);
3088 }
3089 break; // don't proliferate warnings for large count
3090 }
3091 else {
3092 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
3093 start++;
3094 (*setSize)++;
3095 }
3096 }
3097 if (**scan == '}') {
3098 break;
3099 }
3100 (*scan)++; // skip ','
3101 continue;
3102 }
3103 KMP_ASSERT2(**scan == ':', "bad explicit places list");
3104 (*scan)++; // skip ':'
3105
3106 //
3107 // Read stride parameter
3108 //
3109 int sign = +1;
3110 for (;;) {
3111 SKIP_WS(*scan);
3112 if (**scan == '+') {
3113 (*scan)++; // skip '+'
3114 continue;
3115 }
3116 if (**scan == '-') {
3117 sign *= -1;
3118 (*scan)++; // skip '-'
3119 continue;
3120 }
3121 break;
3122 }
3123 SKIP_WS(*scan);
3124 KMP_ASSERT2((**scan >= '0') && (**scan <= '9'),
3125 "bad explicit places list");
3126 next = *scan;
3127 SKIP_DIGITS(next);
3128 stride = __kmp_str_to_int(*scan, *next);
3129 KMP_ASSERT(stride >= 0);
3130 *scan = next;
3131 stride *= sign;
3132
3133 //
3134 // valid follow sets are ',' and '}'
3135 //
3136 SKIP_WS(*scan);
3137 if (**scan == '}' || **scan == ',') {
3138 for (i = 0; i < count; i++) {
3139 if ((start > maxOsId) ||
3140 (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
3141 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3142 && (__kmp_affinity_type != affinity_none))) {
3143 KMP_WARNING(AffIgnoreInvalidProcID, start);
3144 }
3145 break; // don't proliferate warnings for large count
3146 }
3147 else {
3148 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
3149 start += stride;
3150 (*setSize)++;
3151 }
3152 }
3153 if (**scan == '}') {
3154 break;
3155 }
3156 (*scan)++; // skip ','
3157 continue;
3158 }
3159
3160 KMP_ASSERT2(0, "bad explicit places list");
3161 }
3162}
3163
3164
3165static void
3166__kmp_process_place(const char **scan, kmp_affin_mask_t *osId2Mask,
3167 int maxOsId, kmp_affin_mask_t *tempMask, int *setSize)
3168{
3169 const char *next;
3170
3171 //
3172 // valid follow sets are '{' '!' and num
3173 //
3174 SKIP_WS(*scan);
3175 if (**scan == '{') {
3176 (*scan)++; // skip '{'
3177 __kmp_process_subplace_list(scan, osId2Mask, maxOsId , tempMask,
3178 setSize);
3179 KMP_ASSERT2(**scan == '}', "bad explicit places list");
3180 (*scan)++; // skip '}'
3181 }
3182 else if (**scan == '!') {
Jonathan Peyton6778c732015-10-19 19:43:01 +00003183 (*scan)++; // skip '!'
Jim Cownie5e8470a2013-09-27 10:38:44 +00003184 __kmp_process_place(scan, osId2Mask, maxOsId, tempMask, setSize);
Jonathan Peyton01dcf362015-11-30 20:02:59 +00003185 KMP_CPU_COMPLEMENT(maxOsId, tempMask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003186 }
3187 else if ((**scan >= '0') && (**scan <= '9')) {
3188 next = *scan;
3189 SKIP_DIGITS(next);
3190 int num = __kmp_str_to_int(*scan, *next);
3191 KMP_ASSERT(num >= 0);
3192 if ((num > maxOsId) ||
3193 (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
3194 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3195 && (__kmp_affinity_type != affinity_none))) {
3196 KMP_WARNING(AffIgnoreInvalidProcID, num);
3197 }
3198 }
3199 else {
3200 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, num));
3201 (*setSize)++;
3202 }
3203 *scan = next; // skip num
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003204 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003205 else {
3206 KMP_ASSERT2(0, "bad explicit places list");
3207 }
3208}
3209
3210
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003211//static void
3212void
Jim Cownie5e8470a2013-09-27 10:38:44 +00003213__kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
3214 unsigned int *out_numMasks, const char *placelist,
3215 kmp_affin_mask_t *osId2Mask, int maxOsId)
3216{
Jonathan Peyton01dcf362015-11-30 20:02:59 +00003217 int i,j,count,stride,sign;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003218 const char *scan = placelist;
3219 const char *next = placelist;
3220
3221 numNewMasks = 2;
Jonathan Peyton01dcf362015-11-30 20:02:59 +00003222 KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003223 nextNewMask = 0;
3224
Jonathan Peyton01dcf362015-11-30 20:02:59 +00003225 // tempMask is modified based on the previous or initial
3226 // place to form the current place
3227 // previousMask contains the previous place
3228 kmp_affin_mask_t *tempMask;
3229 kmp_affin_mask_t *previousMask;
3230 KMP_CPU_ALLOC(tempMask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003231 KMP_CPU_ZERO(tempMask);
Jonathan Peyton01dcf362015-11-30 20:02:59 +00003232 KMP_CPU_ALLOC(previousMask);
3233 KMP_CPU_ZERO(previousMask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003234 int setSize = 0;
3235
3236 for (;;) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003237 __kmp_process_place(&scan, osId2Mask, maxOsId, tempMask, &setSize);
3238
3239 //
3240 // valid follow sets are ',' ':' and EOL
3241 //
3242 SKIP_WS(scan);
3243 if (*scan == '\0' || *scan == ',') {
3244 if (setSize > 0) {
3245 ADD_MASK(tempMask);
3246 }
3247 KMP_CPU_ZERO(tempMask);
3248 setSize = 0;
3249 if (*scan == '\0') {
3250 break;
3251 }
3252 scan++; // skip ','
3253 continue;
3254 }
3255
3256 KMP_ASSERT2(*scan == ':', "bad explicit places list");
3257 scan++; // skip ':'
3258
3259 //
3260 // Read count parameter
3261 //
3262 SKIP_WS(scan);
3263 KMP_ASSERT2((*scan >= '0') && (*scan <= '9'),
3264 "bad explicit places list");
3265 next = scan;
3266 SKIP_DIGITS(next);
Jonathan Peyton01dcf362015-11-30 20:02:59 +00003267 count = __kmp_str_to_int(scan, *next);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003268 KMP_ASSERT(count >= 0);
3269 scan = next;
3270
3271 //
3272 // valid follow sets are ',' ':' and EOL
3273 //
3274 SKIP_WS(scan);
3275 if (*scan == '\0' || *scan == ',') {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003276 stride = +1;
3277 }
3278 else {
3279 KMP_ASSERT2(*scan == ':', "bad explicit places list");
3280 scan++; // skip ':'
Jim Cownie5e8470a2013-09-27 10:38:44 +00003281
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003282 //
3283 // Read stride parameter
3284 //
Jonathan Peyton01dcf362015-11-30 20:02:59 +00003285 sign = +1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003286 for (;;) {
3287 SKIP_WS(scan);
3288 if (*scan == '+') {
3289 scan++; // skip '+'
3290 continue;
3291 }
3292 if (*scan == '-') {
3293 sign *= -1;
3294 scan++; // skip '-'
3295 continue;
3296 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003297 break;
3298 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003299 SKIP_WS(scan);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003300 KMP_ASSERT2((*scan >= '0') && (*scan <= '9'),
3301 "bad explicit places list");
3302 next = scan;
3303 SKIP_DIGITS(next);
3304 stride = __kmp_str_to_int(scan, *next);
3305 KMP_DEBUG_ASSERT(stride >= 0);
3306 scan = next;
3307 stride *= sign;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003308 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003309
Jonathan Peyton01dcf362015-11-30 20:02:59 +00003310 // Add places determined by initial_place : count : stride
3311 for (i = 0; i < count; i++) {
3312 if (setSize == 0) {
3313 break;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003314 }
Jonathan Peyton01dcf362015-11-30 20:02:59 +00003315 // Add the current place, then build the next place (tempMask) from that
3316 KMP_CPU_COPY(previousMask, tempMask);
3317 ADD_MASK(previousMask);
3318 KMP_CPU_ZERO(tempMask);
3319 setSize = 0;
3320 KMP_CPU_SET_ITERATE(j, previousMask) {
3321 if (! KMP_CPU_ISSET(j, previousMask)) {
3322 continue;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003323 }
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00003324 if ((j+stride > maxOsId) || (j+stride < 0) ||
3325 (! KMP_CPU_ISSET(j, __kmp_affin_fullMask)) ||
Jonathan Peyton01dcf362015-11-30 20:02:59 +00003326 (! KMP_CPU_ISSET(j+stride, KMP_CPU_INDEX(osId2Mask, j+stride)))) {
3327 if ((__kmp_affinity_verbose || (__kmp_affinity_warnings
3328 && (__kmp_affinity_type != affinity_none))) && i < count - 1) {
3329 KMP_WARNING(AffIgnoreInvalidProcID, j+stride);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003330 }
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00003331 continue;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003332 }
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00003333 KMP_CPU_SET(j+stride, tempMask);
3334 setSize++;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003335 }
3336 }
3337 KMP_CPU_ZERO(tempMask);
3338 setSize = 0;
3339
3340 //
3341 // valid follow sets are ',' and EOL
3342 //
3343 SKIP_WS(scan);
3344 if (*scan == '\0') {
3345 break;
3346 }
3347 if (*scan == ',') {
3348 scan++; // skip ','
3349 continue;
3350 }
3351
3352 KMP_ASSERT2(0, "bad explicit places list");
3353 }
3354
3355 *out_numMasks = nextNewMask;
3356 if (nextNewMask == 0) {
3357 *out_masks = NULL;
Jonathan Peyton01dcf362015-11-30 20:02:59 +00003358 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003359 return;
3360 }
Jonathan Peyton01dcf362015-11-30 20:02:59 +00003361 KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask);
3362 KMP_CPU_FREE(tempMask);
3363 KMP_CPU_FREE(previousMask);
3364 for(i = 0; i < nextNewMask; i++) {
3365 kmp_affin_mask_t* src = KMP_CPU_INDEX(newMasks, i);
3366 kmp_affin_mask_t* dest = KMP_CPU_INDEX((*out_masks), i);
3367 KMP_CPU_COPY(dest, src);
3368 }
3369 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003370}
3371
3372# endif /* OMP_40_ENABLED */
3373
3374#undef ADD_MASK
3375#undef ADD_MASK_OSID
3376
Jim Cownie5e8470a2013-09-27 10:38:44 +00003377static void
3378__kmp_apply_thread_places(AddrUnsPair **pAddr, int depth)
3379{
Jonathan Peytondd4aa9b2015-10-08 17:55:54 +00003380 if (__kmp_place_num_sockets == 0 &&
3381 __kmp_place_num_cores == 0 &&
3382 __kmp_place_num_threads_per_core == 0 )
3383 return; // no topology limiting actions requested, exit
3384 if (__kmp_place_num_sockets == 0)
3385 __kmp_place_num_sockets = nPackages; // use all available sockets
3386 if (__kmp_place_num_cores == 0)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003387 __kmp_place_num_cores = nCoresPerPkg; // use all available cores
Jonathan Peytondd4aa9b2015-10-08 17:55:54 +00003388 if (__kmp_place_num_threads_per_core == 0 ||
3389 __kmp_place_num_threads_per_core > __kmp_nThreadsPerCore)
3390 __kmp_place_num_threads_per_core = __kmp_nThreadsPerCore; // use all HW contexts
3391
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003392 if ( !__kmp_affinity_uniform_topology() ) {
3393 KMP_WARNING( AffThrPlaceNonUniform );
3394 return; // don't support non-uniform topology
3395 }
3396 if ( depth != 3 ) {
3397 KMP_WARNING( AffThrPlaceNonThreeLevel );
3398 return; // don't support not-3-level topology
Jim Cownie5e8470a2013-09-27 10:38:44 +00003399 }
Jonathan Peytondd4aa9b2015-10-08 17:55:54 +00003400 if (__kmp_place_socket_offset + __kmp_place_num_sockets > nPackages) {
3401 KMP_WARNING(AffThrPlaceManySockets);
3402 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003403 }
Andrey Churbanov12875572015-03-10 09:00:36 +00003404 if ( __kmp_place_core_offset + __kmp_place_num_cores > nCoresPerPkg ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003405 KMP_WARNING( AffThrPlaceManyCores );
3406 return;
3407 }
3408
3409 AddrUnsPair *newAddr = (AddrUnsPair *)__kmp_allocate( sizeof(AddrUnsPair) *
Jonathan Peytondd4aa9b2015-10-08 17:55:54 +00003410 __kmp_place_num_sockets * __kmp_place_num_cores * __kmp_place_num_threads_per_core);
3411
Jim Cownie5e8470a2013-09-27 10:38:44 +00003412 int i, j, k, n_old = 0, n_new = 0;
Jonathan Peytondd4aa9b2015-10-08 17:55:54 +00003413 for (i = 0; i < nPackages; ++i)
3414 if (i < __kmp_place_socket_offset ||
3415 i >= __kmp_place_socket_offset + __kmp_place_num_sockets)
3416 n_old += nCoresPerPkg * __kmp_nThreadsPerCore; // skip not-requested socket
3417 else
3418 for (j = 0; j < nCoresPerPkg; ++j) // walk through requested socket
3419 if (j < __kmp_place_core_offset ||
3420 j >= __kmp_place_core_offset + __kmp_place_num_cores)
3421 n_old += __kmp_nThreadsPerCore; // skip not-requested core
3422 else
3423 for (k = 0; k < __kmp_nThreadsPerCore; ++k) { // walk through requested core
3424 if (k < __kmp_place_num_threads_per_core) {
3425 newAddr[n_new] = (*pAddr)[n_old]; // collect requested thread's data
3426 n_new++;
3427 }
3428 n_old++;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003429 }
Jonathan Peytondd4aa9b2015-10-08 17:55:54 +00003430 KMP_DEBUG_ASSERT(n_old == nPackages * nCoresPerPkg * __kmp_nThreadsPerCore);
3431 KMP_DEBUG_ASSERT(n_new == __kmp_place_num_sockets * __kmp_place_num_cores *
3432 __kmp_place_num_threads_per_core);
3433
3434 nPackages = __kmp_place_num_sockets; // correct nPackages
Jim Cownie5e8470a2013-09-27 10:38:44 +00003435 nCoresPerPkg = __kmp_place_num_cores; // correct nCoresPerPkg
3436 __kmp_nThreadsPerCore = __kmp_place_num_threads_per_core; // correct __kmp_nThreadsPerCore
3437 __kmp_avail_proc = n_new; // correct avail_proc
3438 __kmp_ncores = nPackages * __kmp_place_num_cores; // correct ncores
3439
3440 __kmp_free( *pAddr );
3441 *pAddr = newAddr; // replace old topology with new one
3442}
3443
Jim Cownie5e8470a2013-09-27 10:38:44 +00003444
3445static AddrUnsPair *address2os = NULL;
3446static int * procarr = NULL;
3447static int __kmp_aff_depth = 0;
3448
3449static void
3450__kmp_aux_affinity_initialize(void)
3451{
3452 if (__kmp_affinity_masks != NULL) {
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00003453 KMP_ASSERT(__kmp_affin_fullMask != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003454 return;
3455 }
3456
3457 //
3458 // Create the "full" mask - this defines all of the processors that we
3459 // consider to be in the machine model. If respect is set, then it is
3460 // the initialization thread's affinity mask. Otherwise, it is all
3461 // processors that we know about on the machine.
3462 //
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00003463 if (__kmp_affin_fullMask == NULL) {
3464 KMP_CPU_ALLOC(__kmp_affin_fullMask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003465 }
3466 if (KMP_AFFINITY_CAPABLE()) {
3467 if (__kmp_affinity_respect_mask) {
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00003468 __kmp_get_system_affinity(__kmp_affin_fullMask, TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003469
3470 //
3471 // Count the number of available processors.
3472 //
3473 unsigned i;
3474 __kmp_avail_proc = 0;
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00003475 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
3476 if (! KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003477 continue;
3478 }
3479 __kmp_avail_proc++;
3480 }
3481 if (__kmp_avail_proc > __kmp_xproc) {
3482 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3483 && (__kmp_affinity_type != affinity_none))) {
3484 KMP_WARNING(ErrorInitializeAffinity);
3485 }
3486 __kmp_affinity_type = affinity_none;
Andrey Churbanov1f037e42015-03-10 09:15:26 +00003487 KMP_AFFINITY_DISABLE();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003488 return;
3489 }
3490 }
3491 else {
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00003492 __kmp_affinity_entire_machine_mask(__kmp_affin_fullMask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003493 __kmp_avail_proc = __kmp_xproc;
3494 }
3495 }
3496
3497 int depth = -1;
3498 kmp_i18n_id_t msg_id = kmp_i18n_null;
3499
3500 //
Alp Toker8f2d3f02014-02-24 10:40:15 +00003501 // For backward compatibility, setting KMP_CPUINFO_FILE =>
Jim Cownie5e8470a2013-09-27 10:38:44 +00003502 // KMP_TOPOLOGY_METHOD=cpuinfo
3503 //
3504 if ((__kmp_cpuinfo_file != NULL) &&
3505 (__kmp_affinity_top_method == affinity_top_method_all)) {
3506 __kmp_affinity_top_method = affinity_top_method_cpuinfo;
3507 }
3508
3509 if (__kmp_affinity_top_method == affinity_top_method_all) {
3510 //
3511 // In the default code path, errors are not fatal - we just try using
3512 // another method. We only emit a warning message if affinity is on,
3513 // or the verbose flag is set, an the nowarnings flag was not set.
3514 //
3515 const char *file_name = NULL;
3516 int line = 0;
Jonathan Peyton01dcf362015-11-30 20:02:59 +00003517# if KMP_USE_HWLOC
3518 if (depth < 0) {
3519 if (__kmp_affinity_verbose) {
3520 KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
3521 }
3522 if(!__kmp_hwloc_error) {
3523 depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id);
3524 if (depth == 0) {
3525 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3526 KMP_ASSERT(address2os == NULL);
3527 return;
3528 } else if(depth < 0 && __kmp_affinity_verbose) {
3529 KMP_INFORM(AffIgnoringHwloc, "KMP_AFFINITY");
3530 }
3531 } else if(__kmp_affinity_verbose) {
3532 KMP_INFORM(AffIgnoringHwloc, "KMP_AFFINITY");
3533 }
3534 }
3535# endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003536
3537# if KMP_ARCH_X86 || KMP_ARCH_X86_64
3538
Jim Cownie5e8470a2013-09-27 10:38:44 +00003539 if (depth < 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003540 if (__kmp_affinity_verbose) {
Jonathan Peyton01dcf362015-11-30 20:02:59 +00003541 KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003542 }
3543
3544 file_name = NULL;
Jonathan Peyton01dcf362015-11-30 20:02:59 +00003545 depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003546 if (depth == 0) {
3547 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3548 KMP_ASSERT(address2os == NULL);
3549 return;
3550 }
Jonathan Peyton01dcf362015-11-30 20:02:59 +00003551
3552 if (depth < 0) {
3553 if (__kmp_affinity_verbose) {
3554 if (msg_id != kmp_i18n_null) {
3555 KMP_INFORM(AffInfoStrStr, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id),
3556 KMP_I18N_STR(DecodingLegacyAPIC));
3557 }
3558 else {
3559 KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(DecodingLegacyAPIC));
3560 }
3561 }
3562
3563 file_name = NULL;
3564 depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
3565 if (depth == 0) {
3566 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3567 KMP_ASSERT(address2os == NULL);
3568 return;
3569 }
3570 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003571 }
3572
3573# endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
3574
3575# if KMP_OS_LINUX
3576
3577 if (depth < 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003578 if (__kmp_affinity_verbose) {
3579 if (msg_id != kmp_i18n_null) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003580 KMP_INFORM(AffStrParseFilename, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id), "/proc/cpuinfo");
3581 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003582 else {
3583 KMP_INFORM(AffParseFilename, "KMP_AFFINITY", "/proc/cpuinfo");
3584 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003585 }
3586
3587 FILE *f = fopen("/proc/cpuinfo", "r");
3588 if (f == NULL) {
3589 msg_id = kmp_i18n_str_CantOpenCpuinfo;
3590 }
3591 else {
3592 file_name = "/proc/cpuinfo";
3593 depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
3594 fclose(f);
3595 if (depth == 0) {
3596 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3597 KMP_ASSERT(address2os == NULL);
3598 return;
3599 }
3600 }
3601 }
3602
3603# endif /* KMP_OS_LINUX */
3604
Andrey Churbanov7daf9802015-01-27 16:52:57 +00003605# if KMP_GROUP_AFFINITY
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003606
3607 if ((depth < 0) && (__kmp_num_proc_groups > 1)) {
3608 if (__kmp_affinity_verbose) {
3609 KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
3610 }
3611
3612 depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
3613 KMP_ASSERT(depth != 0);
3614 }
3615
Andrey Churbanov7daf9802015-01-27 16:52:57 +00003616# endif /* KMP_GROUP_AFFINITY */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003617
Jim Cownie5e8470a2013-09-27 10:38:44 +00003618 if (depth < 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003619 if (__kmp_affinity_verbose && (msg_id != kmp_i18n_null)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003620 if (file_name == NULL) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003621 KMP_INFORM(UsingFlatOS, __kmp_i18n_catgets(msg_id));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003622 }
3623 else if (line == 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003624 KMP_INFORM(UsingFlatOSFile, file_name, __kmp_i18n_catgets(msg_id));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003625 }
3626 else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003627 KMP_INFORM(UsingFlatOSFileLine, file_name, line, __kmp_i18n_catgets(msg_id));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003628 }
3629 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003630 // FIXME - print msg if msg_id = kmp_i18n_null ???
Jim Cownie5e8470a2013-09-27 10:38:44 +00003631
3632 file_name = "";
3633 depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
3634 if (depth == 0) {
3635 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3636 KMP_ASSERT(address2os == NULL);
3637 return;
3638 }
3639 KMP_ASSERT(depth > 0);
3640 KMP_ASSERT(address2os != NULL);
3641 }
3642 }
3643
3644 //
3645 // If the user has specified that a paricular topology discovery method
3646 // is to be used, then we abort if that method fails. The exception is
3647 // group affinity, which might have been implicitly set.
3648 //
3649
3650# if KMP_ARCH_X86 || KMP_ARCH_X86_64
3651
3652 else if (__kmp_affinity_top_method == affinity_top_method_x2apicid) {
3653 if (__kmp_affinity_verbose) {
3654 KMP_INFORM(AffInfoStr, "KMP_AFFINITY",
3655 KMP_I18N_STR(Decodingx2APIC));
3656 }
3657
3658 depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
3659 if (depth == 0) {
3660 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3661 KMP_ASSERT(address2os == NULL);
3662 return;
3663 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003664 if (depth < 0) {
3665 KMP_ASSERT(msg_id != kmp_i18n_null);
3666 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
3667 }
3668 }
3669 else if (__kmp_affinity_top_method == affinity_top_method_apicid) {
3670 if (__kmp_affinity_verbose) {
3671 KMP_INFORM(AffInfoStr, "KMP_AFFINITY",
3672 KMP_I18N_STR(DecodingLegacyAPIC));
3673 }
3674
3675 depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
3676 if (depth == 0) {
3677 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3678 KMP_ASSERT(address2os == NULL);
3679 return;
3680 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003681 if (depth < 0) {
3682 KMP_ASSERT(msg_id != kmp_i18n_null);
3683 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
3684 }
3685 }
3686
3687# endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
3688
3689 else if (__kmp_affinity_top_method == affinity_top_method_cpuinfo) {
3690 const char *filename;
3691 if (__kmp_cpuinfo_file != NULL) {
3692 filename = __kmp_cpuinfo_file;
3693 }
3694 else {
3695 filename = "/proc/cpuinfo";
3696 }
3697
3698 if (__kmp_affinity_verbose) {
3699 KMP_INFORM(AffParseFilename, "KMP_AFFINITY", filename);
3700 }
3701
3702 FILE *f = fopen(filename, "r");
3703 if (f == NULL) {
3704 int code = errno;
3705 if (__kmp_cpuinfo_file != NULL) {
3706 __kmp_msg(
3707 kmp_ms_fatal,
3708 KMP_MSG(CantOpenFileForReading, filename),
3709 KMP_ERR(code),
3710 KMP_HNT(NameComesFrom_CPUINFO_FILE),
3711 __kmp_msg_null
3712 );
3713 }
3714 else {
3715 __kmp_msg(
3716 kmp_ms_fatal,
3717 KMP_MSG(CantOpenFileForReading, filename),
3718 KMP_ERR(code),
3719 __kmp_msg_null
3720 );
3721 }
3722 }
3723 int line = 0;
3724 depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
3725 fclose(f);
3726 if (depth < 0) {
3727 KMP_ASSERT(msg_id != kmp_i18n_null);
3728 if (line > 0) {
3729 KMP_FATAL(FileLineMsgExiting, filename, line, __kmp_i18n_catgets(msg_id));
3730 }
3731 else {
3732 KMP_FATAL(FileMsgExiting, filename, __kmp_i18n_catgets(msg_id));
3733 }
3734 }
3735 if (__kmp_affinity_type == affinity_none) {
3736 KMP_ASSERT(depth == 0);
3737 KMP_ASSERT(address2os == NULL);
3738 return;
3739 }
3740 }
3741
Andrey Churbanov7daf9802015-01-27 16:52:57 +00003742# if KMP_GROUP_AFFINITY
Jim Cownie5e8470a2013-09-27 10:38:44 +00003743
3744 else if (__kmp_affinity_top_method == affinity_top_method_group) {
3745 if (__kmp_affinity_verbose) {
3746 KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
3747 }
3748
3749 depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
3750 KMP_ASSERT(depth != 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003751 if (depth < 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003752 KMP_ASSERT(msg_id != kmp_i18n_null);
3753 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003754 }
3755 }
3756
Andrey Churbanov7daf9802015-01-27 16:52:57 +00003757# endif /* KMP_GROUP_AFFINITY */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003758
3759 else if (__kmp_affinity_top_method == affinity_top_method_flat) {
3760 if (__kmp_affinity_verbose) {
3761 KMP_INFORM(AffUsingFlatOS, "KMP_AFFINITY");
3762 }
3763
3764 depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
3765 if (depth == 0) {
3766 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3767 KMP_ASSERT(address2os == NULL);
3768 return;
3769 }
3770 // should not fail
3771 KMP_ASSERT(depth > 0);
3772 KMP_ASSERT(address2os != NULL);
3773 }
3774
Jonathan Peyton01dcf362015-11-30 20:02:59 +00003775# if KMP_USE_HWLOC
3776 else if (__kmp_affinity_top_method == affinity_top_method_hwloc) {
3777 if (__kmp_affinity_verbose) {
3778 KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
3779 }
3780 depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id);
3781 if (depth == 0) {
3782 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3783 KMP_ASSERT(address2os == NULL);
3784 return;
3785 }
Jonathan Peyton01dcf362015-11-30 20:02:59 +00003786 }
3787# endif // KMP_USE_HWLOC
3788
Jim Cownie5e8470a2013-09-27 10:38:44 +00003789 if (address2os == NULL) {
3790 if (KMP_AFFINITY_CAPABLE()
3791 && (__kmp_affinity_verbose || (__kmp_affinity_warnings
3792 && (__kmp_affinity_type != affinity_none)))) {
3793 KMP_WARNING(ErrorInitializeAffinity);
3794 }
3795 __kmp_affinity_type = affinity_none;
Andrey Churbanov1f037e42015-03-10 09:15:26 +00003796 KMP_AFFINITY_DISABLE();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003797 return;
3798 }
3799
Jim Cownie5e8470a2013-09-27 10:38:44 +00003800 __kmp_apply_thread_places(&address2os, depth);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003801
3802 //
3803 // Create the table of masks, indexed by thread Id.
3804 //
3805 unsigned maxIndex;
3806 unsigned numUnique;
3807 kmp_affin_mask_t *osId2Mask = __kmp_create_masks(&maxIndex, &numUnique,
3808 address2os, __kmp_avail_proc);
3809 if (__kmp_affinity_gran_levels == 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003810 KMP_DEBUG_ASSERT((int)numUnique == __kmp_avail_proc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003811 }
3812
3813 //
3814 // Set the childNums vector in all Address objects. This must be done
3815 // before we can sort using __kmp_affinity_cmp_Address_child_num(),
3816 // which takes into account the setting of __kmp_affinity_compact.
3817 //
3818 __kmp_affinity_assign_child_nums(address2os, __kmp_avail_proc);
3819
3820 switch (__kmp_affinity_type) {
3821
3822 case affinity_explicit:
3823 KMP_DEBUG_ASSERT(__kmp_affinity_proclist != NULL);
3824# if OMP_40_ENABLED
3825 if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
3826# endif
3827 {
3828 __kmp_affinity_process_proclist(&__kmp_affinity_masks,
3829 &__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask,
3830 maxIndex);
3831 }
3832# if OMP_40_ENABLED
3833 else {
3834 __kmp_affinity_process_placelist(&__kmp_affinity_masks,
3835 &__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask,
3836 maxIndex);
3837 }
3838# endif
3839 if (__kmp_affinity_num_masks == 0) {
3840 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3841 && (__kmp_affinity_type != affinity_none))) {
3842 KMP_WARNING(AffNoValidProcID);
3843 }
3844 __kmp_affinity_type = affinity_none;
3845 return;
3846 }
3847 break;
3848
3849 //
3850 // The other affinity types rely on sorting the Addresses according
3851 // to some permutation of the machine topology tree. Set
3852 // __kmp_affinity_compact and __kmp_affinity_offset appropriately,
3853 // then jump to a common code fragment to do the sort and create
3854 // the array of affinity masks.
3855 //
3856
3857 case affinity_logical:
3858 __kmp_affinity_compact = 0;
3859 if (__kmp_affinity_offset) {
3860 __kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset
3861 % __kmp_avail_proc;
3862 }
3863 goto sortAddresses;
3864
3865 case affinity_physical:
3866 if (__kmp_nThreadsPerCore > 1) {
3867 __kmp_affinity_compact = 1;
3868 if (__kmp_affinity_compact >= depth) {
3869 __kmp_affinity_compact = 0;
3870 }
3871 } else {
3872 __kmp_affinity_compact = 0;
3873 }
3874 if (__kmp_affinity_offset) {
3875 __kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset
3876 % __kmp_avail_proc;
3877 }
3878 goto sortAddresses;
3879
3880 case affinity_scatter:
3881 if (__kmp_affinity_compact >= depth) {
3882 __kmp_affinity_compact = 0;
3883 }
3884 else {
3885 __kmp_affinity_compact = depth - 1 - __kmp_affinity_compact;
3886 }
3887 goto sortAddresses;
3888
3889 case affinity_compact:
3890 if (__kmp_affinity_compact >= depth) {
3891 __kmp_affinity_compact = depth - 1;
3892 }
3893 goto sortAddresses;
3894
Jim Cownie5e8470a2013-09-27 10:38:44 +00003895 case affinity_balanced:
Jonathan Peytoncaf09fe2015-05-27 23:27:33 +00003896 // Balanced works only for the case of a single package
Jim Cownie5e8470a2013-09-27 10:38:44 +00003897 if( nPackages > 1 ) {
3898 if( __kmp_affinity_verbose || __kmp_affinity_warnings ) {
3899 KMP_WARNING( AffBalancedNotAvail, "KMP_AFFINITY" );
3900 }
3901 __kmp_affinity_type = affinity_none;
3902 return;
3903 } else if( __kmp_affinity_uniform_topology() ) {
3904 break;
3905 } else { // Non-uniform topology
3906
3907 // Save the depth for further usage
3908 __kmp_aff_depth = depth;
3909
3910 // Number of hyper threads per core in HT machine
3911 int nth_per_core = __kmp_nThreadsPerCore;
3912
3913 int core_level;
3914 if( nth_per_core > 1 ) {
3915 core_level = depth - 2;
3916 } else {
3917 core_level = depth - 1;
3918 }
3919 int ncores = address2os[ __kmp_avail_proc - 1 ].first.labels[ core_level ] + 1;
3920 int nproc = nth_per_core * ncores;
3921
3922 procarr = ( int * )__kmp_allocate( sizeof( int ) * nproc );
3923 for( int i = 0; i < nproc; i++ ) {
3924 procarr[ i ] = -1;
3925 }
3926
3927 for( int i = 0; i < __kmp_avail_proc; i++ ) {
3928 int proc = address2os[ i ].second;
3929 // If depth == 3 then level=0 - package, level=1 - core, level=2 - thread.
3930 // If there is only one thread per core then depth == 2: level 0 - package,
3931 // level 1 - core.
3932 int level = depth - 1;
3933
3934 // __kmp_nth_per_core == 1
3935 int thread = 0;
3936 int core = address2os[ i ].first.labels[ level ];
3937 // If the thread level exists, that is we have more than one thread context per core
3938 if( nth_per_core > 1 ) {
3939 thread = address2os[ i ].first.labels[ level ] % nth_per_core;
3940 core = address2os[ i ].first.labels[ level - 1 ];
3941 }
3942 procarr[ core * nth_per_core + thread ] = proc;
3943 }
3944
3945 break;
3946 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003947
3948 sortAddresses:
3949 //
3950 // Allocate the gtid->affinity mask table.
3951 //
3952 if (__kmp_affinity_dups) {
3953 __kmp_affinity_num_masks = __kmp_avail_proc;
3954 }
3955 else {
3956 __kmp_affinity_num_masks = numUnique;
3957 }
3958
3959# if OMP_40_ENABLED
3960 if ( ( __kmp_nested_proc_bind.bind_types[0] != proc_bind_intel )
3961 && ( __kmp_affinity_num_places > 0 )
3962 && ( (unsigned)__kmp_affinity_num_places < __kmp_affinity_num_masks ) ) {
3963 __kmp_affinity_num_masks = __kmp_affinity_num_places;
3964 }
3965# endif
3966
Jonathan Peyton01dcf362015-11-30 20:02:59 +00003967 KMP_CPU_ALLOC_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003968
3969 //
3970 // Sort the address2os table according to the current setting of
3971 // __kmp_affinity_compact, then fill out __kmp_affinity_masks.
3972 //
3973 qsort(address2os, __kmp_avail_proc, sizeof(*address2os),
3974 __kmp_affinity_cmp_Address_child_num);
3975 {
3976 int i;
3977 unsigned j;
3978 for (i = 0, j = 0; i < __kmp_avail_proc; i++) {
3979 if ((! __kmp_affinity_dups) && (! address2os[i].first.leader)) {
3980 continue;
3981 }
3982 unsigned osId = address2os[i].second;
3983 kmp_affin_mask_t *src = KMP_CPU_INDEX(osId2Mask, osId);
3984 kmp_affin_mask_t *dest
3985 = KMP_CPU_INDEX(__kmp_affinity_masks, j);
3986 KMP_ASSERT(KMP_CPU_ISSET(osId, src));
3987 KMP_CPU_COPY(dest, src);
3988 if (++j >= __kmp_affinity_num_masks) {
3989 break;
3990 }
3991 }
3992 KMP_DEBUG_ASSERT(j == __kmp_affinity_num_masks);
3993 }
3994 break;
3995
3996 default:
3997 KMP_ASSERT2(0, "Unexpected affinity setting");
3998 }
3999
4000 __kmp_free(osId2Mask);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004001 machine_hierarchy.init(address2os, __kmp_avail_proc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004002}
4003
4004
4005void
4006__kmp_affinity_initialize(void)
4007{
4008 //
4009 // Much of the code above was written assumming that if a machine was not
4010 // affinity capable, then __kmp_affinity_type == affinity_none. We now
4011 // explicitly represent this as __kmp_affinity_type == affinity_disabled.
4012 //
4013 // There are too many checks for __kmp_affinity_type == affinity_none
4014 // in this code. Instead of trying to change them all, check if
4015 // __kmp_affinity_type == affinity_disabled, and if so, slam it with
4016 // affinity_none, call the real initialization routine, then restore
4017 // __kmp_affinity_type to affinity_disabled.
4018 //
4019 int disabled = (__kmp_affinity_type == affinity_disabled);
4020 if (! KMP_AFFINITY_CAPABLE()) {
4021 KMP_ASSERT(disabled);
4022 }
4023 if (disabled) {
4024 __kmp_affinity_type = affinity_none;
4025 }
4026 __kmp_aux_affinity_initialize();
4027 if (disabled) {
4028 __kmp_affinity_type = affinity_disabled;
4029 }
4030}
4031
4032
4033void
4034__kmp_affinity_uninitialize(void)
4035{
4036 if (__kmp_affinity_masks != NULL) {
Jonathan Peyton01dcf362015-11-30 20:02:59 +00004037 KMP_CPU_FREE_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004038 __kmp_affinity_masks = NULL;
4039 }
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00004040 if (__kmp_affin_fullMask != NULL) {
4041 KMP_CPU_FREE(__kmp_affin_fullMask);
4042 __kmp_affin_fullMask = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004043 }
4044 __kmp_affinity_num_masks = 0;
4045# if OMP_40_ENABLED
4046 __kmp_affinity_num_places = 0;
4047# endif
4048 if (__kmp_affinity_proclist != NULL) {
4049 __kmp_free(__kmp_affinity_proclist);
4050 __kmp_affinity_proclist = NULL;
4051 }
4052 if( address2os != NULL ) {
4053 __kmp_free( address2os );
4054 address2os = NULL;
4055 }
4056 if( procarr != NULL ) {
4057 __kmp_free( procarr );
4058 procarr = NULL;
4059 }
Jonathan Peyton202a24d2016-06-13 17:30:08 +00004060# if KMP_USE_HWLOC
4061 if (__kmp_hwloc_topology != NULL) {
4062 hwloc_topology_destroy(__kmp_hwloc_topology);
4063 __kmp_hwloc_topology = NULL;
4064 }
4065# endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004066}
4067
4068
4069void
4070__kmp_affinity_set_init_mask(int gtid, int isa_root)
4071{
4072 if (! KMP_AFFINITY_CAPABLE()) {
4073 return;
4074 }
4075
4076 kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
4077 if (th->th.th_affin_mask == NULL) {
4078 KMP_CPU_ALLOC(th->th.th_affin_mask);
4079 }
4080 else {
4081 KMP_CPU_ZERO(th->th.th_affin_mask);
4082 }
4083
4084 //
4085 // Copy the thread mask to the kmp_info_t strucuture.
4086 // If __kmp_affinity_type == affinity_none, copy the "full" mask, i.e. one
4087 // that has all of the OS proc ids set, or if __kmp_affinity_respect_mask
4088 // is set, then the full mask is the same as the mask of the initialization
4089 // thread.
4090 //
4091 kmp_affin_mask_t *mask;
4092 int i;
4093
4094# if OMP_40_ENABLED
4095 if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
4096# endif
4097 {
Andrey Churbanovf28f6132015-01-13 14:54:00 +00004098 if ((__kmp_affinity_type == affinity_none) || (__kmp_affinity_type == affinity_balanced)
Jim Cownie5e8470a2013-09-27 10:38:44 +00004099 ) {
Andrey Churbanov7daf9802015-01-27 16:52:57 +00004100# if KMP_GROUP_AFFINITY
Jim Cownie5e8470a2013-09-27 10:38:44 +00004101 if (__kmp_num_proc_groups > 1) {
4102 return;
4103 }
4104# endif
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00004105 KMP_ASSERT(__kmp_affin_fullMask != NULL);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004106 i = KMP_PLACE_ALL;
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00004107 mask = __kmp_affin_fullMask;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004108 }
4109 else {
4110 KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 );
4111 i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
4112 mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
4113 }
4114 }
4115# if OMP_40_ENABLED
4116 else {
4117 if ((! isa_root)
4118 || (__kmp_nested_proc_bind.bind_types[0] == proc_bind_false)) {
Andrey Churbanov7daf9802015-01-27 16:52:57 +00004119# if KMP_GROUP_AFFINITY
Jim Cownie5e8470a2013-09-27 10:38:44 +00004120 if (__kmp_num_proc_groups > 1) {
4121 return;
4122 }
4123# endif
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00004124 KMP_ASSERT(__kmp_affin_fullMask != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004125 i = KMP_PLACE_ALL;
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00004126 mask = __kmp_affin_fullMask;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004127 }
4128 else {
4129 //
4130 // int i = some hash function or just a counter that doesn't
4131 // always start at 0. Use gtid for now.
4132 //
4133 KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 );
4134 i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
4135 mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
4136 }
4137 }
4138# endif
4139
4140# if OMP_40_ENABLED
4141 th->th.th_current_place = i;
4142 if (isa_root) {
4143 th->th.th_new_place = i;
4144 th->th.th_first_place = 0;
4145 th->th.th_last_place = __kmp_affinity_num_masks - 1;
4146 }
4147
4148 if (i == KMP_PLACE_ALL) {
4149 KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to all places\n",
4150 gtid));
4151 }
4152 else {
4153 KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to place %d\n",
4154 gtid, i));
4155 }
4156# else
4157 if (i == -1) {
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00004158 KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to __kmp_affin_fullMask\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00004159 gtid));
4160 }
4161 else {
4162 KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to mask %d\n",
4163 gtid, i));
4164 }
4165# endif /* OMP_40_ENABLED */
4166
4167 KMP_CPU_COPY(th->th.th_affin_mask, mask);
4168
4169 if (__kmp_affinity_verbose) {
4170 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4171 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4172 th->th.th_affin_mask);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004173 KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(), gtid,
4174 buf);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004175 }
4176
4177# if KMP_OS_WINDOWS
4178 //
4179 // On Windows* OS, the process affinity mask might have changed.
4180 // If the user didn't request affinity and this call fails,
4181 // just continue silently. See CQ171393.
4182 //
4183 if ( __kmp_affinity_type == affinity_none ) {
4184 __kmp_set_system_affinity(th->th.th_affin_mask, FALSE);
4185 }
4186 else
4187# endif
4188 __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
4189}
4190
4191
4192# if OMP_40_ENABLED
4193
4194void
4195__kmp_affinity_set_place(int gtid)
4196{
4197 int retval;
4198
4199 if (! KMP_AFFINITY_CAPABLE()) {
4200 return;
4201 }
4202
4203 kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
4204
4205 KA_TRACE(100, ("__kmp_affinity_set_place: binding T#%d to place %d (current place = %d)\n",
4206 gtid, th->th.th_new_place, th->th.th_current_place));
4207
4208 //
Alp Toker8f2d3f02014-02-24 10:40:15 +00004209 // Check that the new place is within this thread's partition.
Jim Cownie5e8470a2013-09-27 10:38:44 +00004210 //
4211 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004212 KMP_ASSERT(th->th.th_new_place >= 0);
4213 KMP_ASSERT((unsigned)th->th.th_new_place <= __kmp_affinity_num_masks);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004214 if (th->th.th_first_place <= th->th.th_last_place) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004215 KMP_ASSERT((th->th.th_new_place >= th->th.th_first_place)
Jim Cownie5e8470a2013-09-27 10:38:44 +00004216 && (th->th.th_new_place <= th->th.th_last_place));
4217 }
4218 else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004219 KMP_ASSERT((th->th.th_new_place <= th->th.th_first_place)
Jim Cownie5e8470a2013-09-27 10:38:44 +00004220 || (th->th.th_new_place >= th->th.th_last_place));
4221 }
4222
4223 //
4224 // Copy the thread mask to the kmp_info_t strucuture,
4225 // and set this thread's affinity.
4226 //
4227 kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity_masks,
4228 th->th.th_new_place);
4229 KMP_CPU_COPY(th->th.th_affin_mask, mask);
4230 th->th.th_current_place = th->th.th_new_place;
4231
4232 if (__kmp_affinity_verbose) {
4233 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4234 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4235 th->th.th_affin_mask);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004236 KMP_INFORM(BoundToOSProcSet, "OMP_PROC_BIND", (kmp_int32)getpid(),
4237 gtid, buf);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004238 }
4239 __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
4240}
4241
4242# endif /* OMP_40_ENABLED */
4243
4244
4245int
4246__kmp_aux_set_affinity(void **mask)
4247{
4248 int gtid;
4249 kmp_info_t *th;
4250 int retval;
4251
4252 if (! KMP_AFFINITY_CAPABLE()) {
4253 return -1;
4254 }
4255
4256 gtid = __kmp_entry_gtid();
4257 KA_TRACE(1000, ;{
4258 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4259 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4260 (kmp_affin_mask_t *)(*mask));
4261 __kmp_debug_printf("kmp_set_affinity: setting affinity mask for thread %d = %s\n",
4262 gtid, buf);
4263 });
4264
4265 if (__kmp_env_consistency_check) {
4266 if ((mask == NULL) || (*mask == NULL)) {
4267 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
4268 }
4269 else {
4270 unsigned proc;
4271 int num_procs = 0;
4272
Jonathan Peyton01dcf362015-11-30 20:02:59 +00004273 KMP_CPU_SET_ITERATE(proc, ((kmp_affin_mask_t*)(*mask))) {
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00004274 if (! KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
4275 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
4276 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004277 if (! KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask))) {
4278 continue;
4279 }
4280 num_procs++;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004281 }
4282 if (num_procs == 0) {
4283 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
4284 }
4285
Andrey Churbanov7daf9802015-01-27 16:52:57 +00004286# if KMP_GROUP_AFFINITY
Jim Cownie5e8470a2013-09-27 10:38:44 +00004287 if (__kmp_get_proc_group((kmp_affin_mask_t *)(*mask)) < 0) {
4288 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
4289 }
Andrey Churbanov7daf9802015-01-27 16:52:57 +00004290# endif /* KMP_GROUP_AFFINITY */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004291
4292 }
4293 }
4294
4295 th = __kmp_threads[gtid];
4296 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
4297 retval = __kmp_set_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
4298 if (retval == 0) {
4299 KMP_CPU_COPY(th->th.th_affin_mask, (kmp_affin_mask_t *)(*mask));
4300 }
4301
4302# if OMP_40_ENABLED
4303 th->th.th_current_place = KMP_PLACE_UNDEFINED;
4304 th->th.th_new_place = KMP_PLACE_UNDEFINED;
4305 th->th.th_first_place = 0;
4306 th->th.th_last_place = __kmp_affinity_num_masks - 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004307
4308 //
4309 // Turn off 4.0 affinity for the current tread at this parallel level.
4310 //
4311 th->th.th_current_task->td_icvs.proc_bind = proc_bind_false;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004312# endif
4313
4314 return retval;
4315}
4316
4317
4318int
4319__kmp_aux_get_affinity(void **mask)
4320{
4321 int gtid;
4322 int retval;
4323 kmp_info_t *th;
4324
4325 if (! KMP_AFFINITY_CAPABLE()) {
4326 return -1;
4327 }
4328
4329 gtid = __kmp_entry_gtid();
4330 th = __kmp_threads[gtid];
4331 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
4332
4333 KA_TRACE(1000, ;{
4334 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4335 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4336 th->th.th_affin_mask);
4337 __kmp_printf("kmp_get_affinity: stored affinity mask for thread %d = %s\n", gtid, buf);
4338 });
4339
4340 if (__kmp_env_consistency_check) {
4341 if ((mask == NULL) || (*mask == NULL)) {
4342 KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity");
4343 }
4344 }
4345
4346# if !KMP_OS_WINDOWS
4347
4348 retval = __kmp_get_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
4349 KA_TRACE(1000, ;{
4350 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4351 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4352 (kmp_affin_mask_t *)(*mask));
4353 __kmp_printf("kmp_get_affinity: system affinity mask for thread %d = %s\n", gtid, buf);
4354 });
4355 return retval;
4356
4357# else
4358
4359 KMP_CPU_COPY((kmp_affin_mask_t *)(*mask), th->th.th_affin_mask);
4360 return 0;
4361
4362# endif /* KMP_OS_WINDOWS */
4363
4364}
4365
Jim Cownie5e8470a2013-09-27 10:38:44 +00004366int
4367__kmp_aux_set_affinity_mask_proc(int proc, void **mask)
4368{
4369 int retval;
4370
4371 if (! KMP_AFFINITY_CAPABLE()) {
4372 return -1;
4373 }
4374
4375 KA_TRACE(1000, ;{
4376 int gtid = __kmp_entry_gtid();
4377 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4378 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4379 (kmp_affin_mask_t *)(*mask));
4380 __kmp_debug_printf("kmp_set_affinity_mask_proc: setting proc %d in affinity mask for thread %d = %s\n",
4381 proc, gtid, buf);
4382 });
4383
4384 if (__kmp_env_consistency_check) {
4385 if ((mask == NULL) || (*mask == NULL)) {
4386 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity_mask_proc");
4387 }
4388 }
4389
Jonathan Peyton01dcf362015-11-30 20:02:59 +00004390 if ((proc < 0)
4391# if !KMP_USE_HWLOC
4392 || ((unsigned)proc >= KMP_CPU_SETSIZE)
4393# endif
4394 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004395 return -1;
4396 }
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00004397 if (! KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004398 return -2;
4399 }
4400
4401 KMP_CPU_SET(proc, (kmp_affin_mask_t *)(*mask));
4402 return 0;
4403}
4404
4405
4406int
4407__kmp_aux_unset_affinity_mask_proc(int proc, void **mask)
4408{
4409 int retval;
4410
4411 if (! KMP_AFFINITY_CAPABLE()) {
4412 return -1;
4413 }
4414
4415 KA_TRACE(1000, ;{
4416 int gtid = __kmp_entry_gtid();
4417 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4418 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4419 (kmp_affin_mask_t *)(*mask));
4420 __kmp_debug_printf("kmp_unset_affinity_mask_proc: unsetting proc %d in affinity mask for thread %d = %s\n",
4421 proc, gtid, buf);
4422 });
4423
4424 if (__kmp_env_consistency_check) {
4425 if ((mask == NULL) || (*mask == NULL)) {
4426 KMP_FATAL(AffinityInvalidMask, "kmp_unset_affinity_mask_proc");
4427 }
4428 }
4429
Jonathan Peyton01dcf362015-11-30 20:02:59 +00004430 if ((proc < 0)
4431# if !KMP_USE_HWLOC
4432 || ((unsigned)proc >= KMP_CPU_SETSIZE)
4433# endif
4434 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004435 return -1;
4436 }
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00004437 if (! KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004438 return -2;
4439 }
4440
4441 KMP_CPU_CLR(proc, (kmp_affin_mask_t *)(*mask));
4442 return 0;
4443}
4444
4445
4446int
4447__kmp_aux_get_affinity_mask_proc(int proc, void **mask)
4448{
4449 int retval;
4450
4451 if (! KMP_AFFINITY_CAPABLE()) {
4452 return -1;
4453 }
4454
4455 KA_TRACE(1000, ;{
4456 int gtid = __kmp_entry_gtid();
4457 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4458 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4459 (kmp_affin_mask_t *)(*mask));
4460 __kmp_debug_printf("kmp_get_affinity_mask_proc: getting proc %d in affinity mask for thread %d = %s\n",
4461 proc, gtid, buf);
4462 });
4463
4464 if (__kmp_env_consistency_check) {
4465 if ((mask == NULL) || (*mask == NULL)) {
Andrey Churbanov4b2f17a2015-01-29 15:49:22 +00004466 KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity_mask_proc");
Jim Cownie5e8470a2013-09-27 10:38:44 +00004467 }
4468 }
4469
Jonathan Peyton01dcf362015-11-30 20:02:59 +00004470 if ((proc < 0)
4471# if !KMP_USE_HWLOC
4472 || ((unsigned)proc >= KMP_CPU_SETSIZE)
4473# endif
4474 ) {
4475 return -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004476 }
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00004477 if (! KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004478 return 0;
4479 }
4480
4481 return KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask));
4482}
4483
Jim Cownie5e8470a2013-09-27 10:38:44 +00004484
4485// Dynamic affinity settings - Affinity balanced
4486void __kmp_balanced_affinity( int tid, int nthreads )
4487{
4488 if( __kmp_affinity_uniform_topology() ) {
4489 int coreID;
4490 int threadID;
4491 // Number of hyper threads per core in HT machine
4492 int __kmp_nth_per_core = __kmp_avail_proc / __kmp_ncores;
4493 // Number of cores
4494 int ncores = __kmp_ncores;
4495 // How many threads will be bound to each core
4496 int chunk = nthreads / ncores;
4497 // How many cores will have an additional thread bound to it - "big cores"
4498 int big_cores = nthreads % ncores;
4499 // Number of threads on the big cores
4500 int big_nth = ( chunk + 1 ) * big_cores;
4501 if( tid < big_nth ) {
4502 coreID = tid / (chunk + 1 );
4503 threadID = ( tid % (chunk + 1 ) ) % __kmp_nth_per_core ;
4504 } else { //tid >= big_nth
4505 coreID = ( tid - big_cores ) / chunk;
4506 threadID = ( ( tid - big_cores ) % chunk ) % __kmp_nth_per_core ;
4507 }
4508
4509 KMP_DEBUG_ASSERT2(KMP_AFFINITY_CAPABLE(),
4510 "Illegal set affinity operation when not capable");
4511
Jonathan Peyton01dcf362015-11-30 20:02:59 +00004512 kmp_affin_mask_t *mask;
4513 KMP_CPU_ALLOC_ON_STACK(mask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004514 KMP_CPU_ZERO(mask);
4515
4516 // Granularity == thread
4517 if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) {
4518 int osID = address2os[ coreID * __kmp_nth_per_core + threadID ].second;
4519 KMP_CPU_SET( osID, mask);
4520 } else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
4521 for( int i = 0; i < __kmp_nth_per_core; i++ ) {
4522 int osID;
4523 osID = address2os[ coreID * __kmp_nth_per_core + i ].second;
4524 KMP_CPU_SET( osID, mask);
4525 }
4526 }
4527 if (__kmp_affinity_verbose) {
4528 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4529 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004530 KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
4531 tid, buf);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004532 }
4533 __kmp_set_system_affinity( mask, TRUE );
Jonathan Peyton01dcf362015-11-30 20:02:59 +00004534 KMP_CPU_FREE_FROM_STACK(mask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004535 } else { // Non-uniform topology
4536
Jonathan Peyton01dcf362015-11-30 20:02:59 +00004537 kmp_affin_mask_t *mask;
4538 KMP_CPU_ALLOC_ON_STACK(mask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004539 KMP_CPU_ZERO(mask);
4540
4541 // Number of hyper threads per core in HT machine
4542 int nth_per_core = __kmp_nThreadsPerCore;
4543 int core_level;
4544 if( nth_per_core > 1 ) {
4545 core_level = __kmp_aff_depth - 2;
4546 } else {
4547 core_level = __kmp_aff_depth - 1;
4548 }
4549
4550 // Number of cores - maximum value; it does not count trail cores with 0 processors
4551 int ncores = address2os[ __kmp_avail_proc - 1 ].first.labels[ core_level ] + 1;
4552
4553 // For performance gain consider the special case nthreads == __kmp_avail_proc
4554 if( nthreads == __kmp_avail_proc ) {
4555 if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) {
4556 int osID = address2os[ tid ].second;
4557 KMP_CPU_SET( osID, mask);
4558 } else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
4559 int coreID = address2os[ tid ].first.labels[ core_level ];
4560 // We'll count found osIDs for the current core; they can be not more than nth_per_core;
4561 // since the address2os is sortied we can break when cnt==nth_per_core
4562 int cnt = 0;
4563 for( int i = 0; i < __kmp_avail_proc; i++ ) {
4564 int osID = address2os[ i ].second;
4565 int core = address2os[ i ].first.labels[ core_level ];
4566 if( core == coreID ) {
4567 KMP_CPU_SET( osID, mask);
4568 cnt++;
4569 if( cnt == nth_per_core ) {
4570 break;
4571 }
4572 }
4573 }
4574 }
4575 } else if( nthreads <= __kmp_ncores ) {
4576
4577 int core = 0;
4578 for( int i = 0; i < ncores; i++ ) {
4579 // Check if this core from procarr[] is in the mask
4580 int in_mask = 0;
4581 for( int j = 0; j < nth_per_core; j++ ) {
4582 if( procarr[ i * nth_per_core + j ] != - 1 ) {
4583 in_mask = 1;
4584 break;
4585 }
4586 }
4587 if( in_mask ) {
4588 if( tid == core ) {
4589 for( int j = 0; j < nth_per_core; j++ ) {
4590 int osID = procarr[ i * nth_per_core + j ];
4591 if( osID != -1 ) {
4592 KMP_CPU_SET( osID, mask );
4593 // For granularity=thread it is enough to set the first available osID for this core
4594 if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) {
4595 break;
4596 }
4597 }
4598 }
4599 break;
4600 } else {
4601 core++;
4602 }
4603 }
4604 }
4605
4606 } else { // nthreads > __kmp_ncores
4607
4608 // Array to save the number of processors at each core
Jonathan Peyton7be075332015-06-22 15:53:50 +00004609 int* nproc_at_core = (int*)KMP_ALLOCA(sizeof(int)*ncores);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004610 // Array to save the number of cores with "x" available processors;
Jonathan Peyton7be075332015-06-22 15:53:50 +00004611 int* ncores_with_x_procs = (int*)KMP_ALLOCA(sizeof(int)*(nth_per_core+1));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004612 // Array to save the number of cores with # procs from x to nth_per_core
Jonathan Peyton7be075332015-06-22 15:53:50 +00004613 int* ncores_with_x_to_max_procs = (int*)KMP_ALLOCA(sizeof(int)*(nth_per_core+1));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004614
4615 for( int i = 0; i <= nth_per_core; i++ ) {
4616 ncores_with_x_procs[ i ] = 0;
4617 ncores_with_x_to_max_procs[ i ] = 0;
4618 }
4619
4620 for( int i = 0; i < ncores; i++ ) {
4621 int cnt = 0;
4622 for( int j = 0; j < nth_per_core; j++ ) {
4623 if( procarr[ i * nth_per_core + j ] != -1 ) {
4624 cnt++;
4625 }
4626 }
4627 nproc_at_core[ i ] = cnt;
4628 ncores_with_x_procs[ cnt ]++;
4629 }
4630
4631 for( int i = 0; i <= nth_per_core; i++ ) {
4632 for( int j = i; j <= nth_per_core; j++ ) {
4633 ncores_with_x_to_max_procs[ i ] += ncores_with_x_procs[ j ];
4634 }
4635 }
4636
4637 // Max number of processors
4638 int nproc = nth_per_core * ncores;
4639 // An array to keep number of threads per each context
4640 int * newarr = ( int * )__kmp_allocate( sizeof( int ) * nproc );
4641 for( int i = 0; i < nproc; i++ ) {
4642 newarr[ i ] = 0;
4643 }
4644
4645 int nth = nthreads;
4646 int flag = 0;
4647 while( nth > 0 ) {
4648 for( int j = 1; j <= nth_per_core; j++ ) {
4649 int cnt = ncores_with_x_to_max_procs[ j ];
4650 for( int i = 0; i < ncores; i++ ) {
4651 // Skip the core with 0 processors
4652 if( nproc_at_core[ i ] == 0 ) {
4653 continue;
4654 }
4655 for( int k = 0; k < nth_per_core; k++ ) {
4656 if( procarr[ i * nth_per_core + k ] != -1 ) {
4657 if( newarr[ i * nth_per_core + k ] == 0 ) {
4658 newarr[ i * nth_per_core + k ] = 1;
4659 cnt--;
4660 nth--;
4661 break;
4662 } else {
4663 if( flag != 0 ) {
4664 newarr[ i * nth_per_core + k ] ++;
4665 cnt--;
4666 nth--;
4667 break;
4668 }
4669 }
4670 }
4671 }
4672 if( cnt == 0 || nth == 0 ) {
4673 break;
4674 }
4675 }
4676 if( nth == 0 ) {
4677 break;
4678 }
4679 }
4680 flag = 1;
4681 }
4682 int sum = 0;
4683 for( int i = 0; i < nproc; i++ ) {
4684 sum += newarr[ i ];
4685 if( sum > tid ) {
4686 // Granularity == thread
4687 if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) {
4688 int osID = procarr[ i ];
4689 KMP_CPU_SET( osID, mask);
4690 } else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
4691 int coreID = i / nth_per_core;
4692 for( int ii = 0; ii < nth_per_core; ii++ ) {
4693 int osID = procarr[ coreID * nth_per_core + ii ];
4694 if( osID != -1 ) {
4695 KMP_CPU_SET( osID, mask);
4696 }
4697 }
4698 }
4699 break;
4700 }
4701 }
4702 __kmp_free( newarr );
4703 }
4704
4705 if (__kmp_affinity_verbose) {
4706 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4707 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004708 KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
4709 tid, buf);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004710 }
4711 __kmp_set_system_affinity( mask, TRUE );
Jonathan Peyton01dcf362015-11-30 20:02:59 +00004712 KMP_CPU_FREE_FROM_STACK(mask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004713 }
4714}
4715
Jonathan Peyton3076fa42016-01-12 17:21:55 +00004716#if KMP_OS_LINUX
4717// We don't need this entry for Windows because
4718// there is GetProcessAffinityMask() api
4719//
4720// The intended usage is indicated by these steps:
4721// 1) The user gets the current affinity mask
4722// 2) Then sets the affinity by calling this function
4723// 3) Error check the return value
4724// 4) Use non-OpenMP parallelization
4725// 5) Reset the affinity to what was stored in step 1)
4726#ifdef __cplusplus
4727extern "C"
4728#endif
4729int
4730kmp_set_thread_affinity_mask_initial()
4731// the function returns 0 on success,
4732// -1 if we cannot bind thread
4733// >0 (errno) if an error happened during binding
4734{
4735 int gtid = __kmp_get_gtid();
4736 if (gtid < 0) {
4737 // Do not touch non-omp threads
4738 KA_TRACE(30, ( "kmp_set_thread_affinity_mask_initial: "
4739 "non-omp thread, returning\n"));
4740 return -1;
4741 }
4742 if (!KMP_AFFINITY_CAPABLE() || !__kmp_init_middle) {
4743 KA_TRACE(30, ( "kmp_set_thread_affinity_mask_initial: "
4744 "affinity not initialized, returning\n"));
4745 return -1;
4746 }
4747 KA_TRACE(30, ( "kmp_set_thread_affinity_mask_initial: "
4748 "set full mask for thread %d\n", gtid));
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00004749 KMP_DEBUG_ASSERT(__kmp_affin_fullMask != NULL);
4750 return __kmp_set_system_affinity(__kmp_affin_fullMask, FALSE);
Jonathan Peyton3076fa42016-01-12 17:21:55 +00004751}
4752#endif
4753
Alp Toker763b9392014-02-28 09:42:41 +00004754#endif // KMP_AFFINITY_SUPPORTED