blob: 00ba5c14a769ec2b5651417342fe1d8eaf638f87 [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
2 * kmp_affinity.cpp -- affinity management
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16#include "kmp.h"
17#include "kmp_i18n.h"
18#include "kmp_io.h"
19#include "kmp_str.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000020#include "kmp_wrapper_getpid.h"
Jonathan Peyton17078362015-09-10 19:22:07 +000021#include "kmp_affinity.h"
22
23// Store the real or imagined machine hierarchy here
24static hierarchy_info machine_hierarchy;
25
26void __kmp_cleanup_hierarchy() {
27 machine_hierarchy.fini();
28}
29
30void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
31 kmp_uint32 depth;
32 // The test below is true if affinity is available, but set to "none". Need to init on first use of hierarchical barrier.
33 if (TCR_1(machine_hierarchy.uninitialized))
34 machine_hierarchy.init(NULL, nproc);
Jonathan Peyton17078362015-09-10 19:22:07 +000035
Jonathan Peyton7dee82e2015-11-09 16:24:53 +000036 // Adjust the hierarchy in case num threads exceeds original
37 if (nproc > machine_hierarchy.base_num_threads)
38 machine_hierarchy.resize(nproc);
39
Jonathan Peyton17078362015-09-10 19:22:07 +000040 depth = machine_hierarchy.depth;
41 KMP_DEBUG_ASSERT(depth > 0);
Jonathan Peyton17078362015-09-10 19:22:07 +000042
43 thr_bar->depth = depth;
44 thr_bar->base_leaf_kids = (kmp_uint8)machine_hierarchy.numPerLevel[0]-1;
45 thr_bar->skip_per_level = machine_hierarchy.skipPerLevel;
46}
Jim Cownie5e8470a2013-09-27 10:38:44 +000047
Alp Toker763b9392014-02-28 09:42:41 +000048#if KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +000049
50//
51// Print the affinity mask to the character array in a pretty format.
52//
Jonathan Peyton01dcf362015-11-30 20:02:59 +000053#if KMP_USE_HWLOC
54char *
55__kmp_affinity_print_mask(char *buf, int buf_len, kmp_affin_mask_t *mask)
56{
57 int num_chars_to_write, num_chars_written;
58 char* scan;
59 KMP_ASSERT(buf_len >= 40);
60
61 // bufsize of 0 just retrieves the needed buffer size.
62 num_chars_to_write = hwloc_bitmap_list_snprintf(buf, 0, (hwloc_bitmap_t)mask);
63
64 // need '{', "xxxxxxxx...xx", '}', '\0' = num_chars_to_write + 3 bytes
65 // * num_chars_to_write returned by hwloc_bitmap_list_snprintf does not
66 // take into account the '\0' character.
67 if(hwloc_bitmap_iszero((hwloc_bitmap_t)mask)) {
68 KMP_SNPRINTF(buf, buf_len, "{<empty>}");
69 } else if(num_chars_to_write < buf_len - 3) {
70 // no problem fitting the mask into buf_len number of characters
71 buf[0] = '{';
72 // use buf_len-3 because we have the three characters: '{' '}' '\0' to add to the buffer
73 num_chars_written = hwloc_bitmap_list_snprintf(buf+1, buf_len-3, (hwloc_bitmap_t)mask);
74 buf[num_chars_written+1] = '}';
75 buf[num_chars_written+2] = '\0';
76 } else {
77 // Need to truncate the affinity mask string and add ellipsis.
78 // To do this, we first write out the '{' + str(mask)
79 buf[0] = '{';
Jonathan Peyton1d5487c2016-04-25 21:08:31 +000080 hwloc_bitmap_list_snprintf(buf+1, buf_len-1, (hwloc_bitmap_t)mask);
Jonathan Peyton01dcf362015-11-30 20:02:59 +000081 // then, what we do here is go to the 7th to last character, then go backwards until we are NOT
82 // on a digit then write "...}\0". This way it is a clean ellipsis addition and we don't
83 // overwrite part of an affinity number. i.e., we avoid something like { 45, 67, 8...} and get
84 // { 45, 67,...} instead.
85 scan = buf + buf_len - 7;
86 while(*scan >= '0' && *scan <= '9' && scan >= buf)
87 scan--;
88 *(scan+1) = '.';
89 *(scan+2) = '.';
90 *(scan+3) = '.';
91 *(scan+4) = '}';
92 *(scan+5) = '\0';
93 }
94 return buf;
95}
96#else
Jim Cownie5e8470a2013-09-27 10:38:44 +000097char *
98__kmp_affinity_print_mask(char *buf, int buf_len, kmp_affin_mask_t *mask)
99{
100 KMP_ASSERT(buf_len >= 40);
101 char *scan = buf;
102 char *end = buf + buf_len - 1;
103
104 //
105 // Find first element / check for empty set.
106 //
107 size_t i;
108 for (i = 0; i < KMP_CPU_SETSIZE; i++) {
109 if (KMP_CPU_ISSET(i, mask)) {
110 break;
111 }
112 }
113 if (i == KMP_CPU_SETSIZE) {
Jonathan Peyton7edeef12015-09-25 17:23:17 +0000114 KMP_SNPRINTF(scan, end-scan+1, "{<empty>}");
Jim Cownie5e8470a2013-09-27 10:38:44 +0000115 while (*scan != '\0') scan++;
116 KMP_ASSERT(scan <= end);
117 return buf;
118 }
119
Jonathan Peyton7edeef12015-09-25 17:23:17 +0000120 KMP_SNPRINTF(scan, end-scan+1, "{%ld", (long)i);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000121 while (*scan != '\0') scan++;
122 i++;
123 for (; i < KMP_CPU_SETSIZE; i++) {
124 if (! KMP_CPU_ISSET(i, mask)) {
125 continue;
126 }
127
128 //
129 // Check for buffer overflow. A string of the form ",<n>" will have
130 // at most 10 characters, plus we want to leave room to print ",...}"
131 // if the set is too large to print for a total of 15 characters.
132 // We already left room for '\0' in setting end.
133 //
134 if (end - scan < 15) {
135 break;
136 }
Jonathan Peyton7edeef12015-09-25 17:23:17 +0000137 KMP_SNPRINTF(scan, end-scan+1, ",%-ld", (long)i);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000138 while (*scan != '\0') scan++;
139 }
140 if (i < KMP_CPU_SETSIZE) {
Jonathan Peyton7edeef12015-09-25 17:23:17 +0000141 KMP_SNPRINTF(scan, end-scan+1, ",...");
Jim Cownie5e8470a2013-09-27 10:38:44 +0000142 while (*scan != '\0') scan++;
143 }
Jonathan Peyton7edeef12015-09-25 17:23:17 +0000144 KMP_SNPRINTF(scan, end-scan+1, "}");
Jim Cownie5e8470a2013-09-27 10:38:44 +0000145 while (*scan != '\0') scan++;
146 KMP_ASSERT(scan <= end);
147 return buf;
148}
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000149#endif // KMP_USE_HWLOC
Jim Cownie5e8470a2013-09-27 10:38:44 +0000150
151
152void
153__kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask)
154{
155 KMP_CPU_ZERO(mask);
156
Andrey Churbanov7daf9802015-01-27 16:52:57 +0000157# if KMP_GROUP_AFFINITY
Jim Cownie5e8470a2013-09-27 10:38:44 +0000158
159 if (__kmp_num_proc_groups > 1) {
160 int group;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000161 KMP_DEBUG_ASSERT(__kmp_GetActiveProcessorCount != NULL);
162 for (group = 0; group < __kmp_num_proc_groups; group++) {
163 int i;
164 int num = __kmp_GetActiveProcessorCount(group);
165 for (i = 0; i < num; i++) {
166 KMP_CPU_SET(i + group * (CHAR_BIT * sizeof(DWORD_PTR)), mask);
167 }
168 }
169 }
170 else
171
Andrey Churbanov7daf9802015-01-27 16:52:57 +0000172# endif /* KMP_GROUP_AFFINITY */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000173
174 {
175 int proc;
176 for (proc = 0; proc < __kmp_xproc; proc++) {
177 KMP_CPU_SET(proc, mask);
178 }
179 }
180}
181
Jim Cownie5e8470a2013-09-27 10:38:44 +0000182//
183// When sorting by labels, __kmp_affinity_assign_child_nums() must first be
184// called to renumber the labels from [0..n] and place them into the child_num
185// vector of the address object. This is done in case the labels used for
Alp Toker8f2d3f02014-02-24 10:40:15 +0000186// the children at one node of the hierarchy differ from those used for
Jim Cownie5e8470a2013-09-27 10:38:44 +0000187// another node at the same level. Example: suppose the machine has 2 nodes
188// with 2 packages each. The first node contains packages 601 and 602, and
189// second node contains packages 603 and 604. If we try to sort the table
190// for "scatter" affinity, the table will still be sorted 601, 602, 603, 604
191// because we are paying attention to the labels themselves, not the ordinal
192// child numbers. By using the child numbers in the sort, the result is
193// {0,0}=601, {0,1}=603, {1,0}=602, {1,1}=604.
194//
195static void
196__kmp_affinity_assign_child_nums(AddrUnsPair *address2os,
197 int numAddrs)
198{
199 KMP_DEBUG_ASSERT(numAddrs > 0);
200 int depth = address2os->first.depth;
201 unsigned *counts = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
202 unsigned *lastLabel = (unsigned *)__kmp_allocate(depth
203 * sizeof(unsigned));
204 int labCt;
205 for (labCt = 0; labCt < depth; labCt++) {
206 address2os[0].first.childNums[labCt] = counts[labCt] = 0;
207 lastLabel[labCt] = address2os[0].first.labels[labCt];
208 }
209 int i;
210 for (i = 1; i < numAddrs; i++) {
211 for (labCt = 0; labCt < depth; labCt++) {
212 if (address2os[i].first.labels[labCt] != lastLabel[labCt]) {
213 int labCt2;
214 for (labCt2 = labCt + 1; labCt2 < depth; labCt2++) {
215 counts[labCt2] = 0;
216 lastLabel[labCt2] = address2os[i].first.labels[labCt2];
217 }
218 counts[labCt]++;
219 lastLabel[labCt] = address2os[i].first.labels[labCt];
220 break;
221 }
222 }
223 for (labCt = 0; labCt < depth; labCt++) {
224 address2os[i].first.childNums[labCt] = counts[labCt];
225 }
226 for (; labCt < (int)Address::maxDepth; labCt++) {
227 address2os[i].first.childNums[labCt] = 0;
228 }
229 }
230}
231
232
233//
234// All of the __kmp_affinity_create_*_map() routines should set
235// __kmp_affinity_masks to a vector of affinity mask objects of length
236// __kmp_affinity_num_masks, if __kmp_affinity_type != affinity_none, and
237// return the number of levels in the machine topology tree (zero if
238// __kmp_affinity_type == affinity_none).
239//
Jonathan Peytonc5304aa2016-06-13 21:28:03 +0000240// All of the __kmp_affinity_create_*_map() routines should set *__kmp_affin_fullMask
Jim Cownie5e8470a2013-09-27 10:38:44 +0000241// to the affinity mask for the initialization thread. They need to save and
242// restore the mask, and it could be needed later, so saving it is just an
243// optimization to avoid calling kmp_get_system_affinity() again.
244//
Jonathan Peytonc5304aa2016-06-13 21:28:03 +0000245kmp_affin_mask_t *__kmp_affin_fullMask = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000246
247static int nCoresPerPkg, nPackages;
Andrey Churbanovf696c822015-01-27 16:55:43 +0000248static int __kmp_nThreadsPerCore;
249#ifndef KMP_DFLT_NTH_CORES
250static int __kmp_ncores;
251#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000252
253//
254// __kmp_affinity_uniform_topology() doesn't work when called from
255// places which support arbitrarily many levels in the machine topology
256// map, i.e. the non-default cases in __kmp_affinity_create_cpuinfo_map()
257// __kmp_affinity_create_x2apicid_map().
258//
259inline static bool
260__kmp_affinity_uniform_topology()
261{
262 return __kmp_avail_proc == (__kmp_nThreadsPerCore * nCoresPerPkg * nPackages);
263}
264
265
266//
267// Print out the detailed machine topology map, i.e. the physical locations
268// of each OS proc.
269//
270static void
271__kmp_affinity_print_topology(AddrUnsPair *address2os, int len, int depth,
272 int pkgLevel, int coreLevel, int threadLevel)
273{
274 int proc;
275
276 KMP_INFORM(OSProcToPhysicalThreadMap, "KMP_AFFINITY");
277 for (proc = 0; proc < len; proc++) {
278 int level;
279 kmp_str_buf_t buf;
280 __kmp_str_buf_init(&buf);
281 for (level = 0; level < depth; level++) {
282 if (level == threadLevel) {
283 __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Thread));
284 }
285 else if (level == coreLevel) {
286 __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Core));
287 }
288 else if (level == pkgLevel) {
289 __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Package));
290 }
291 else if (level > pkgLevel) {
292 __kmp_str_buf_print(&buf, "%s_%d ", KMP_I18N_STR(Node),
293 level - pkgLevel - 1);
294 }
295 else {
296 __kmp_str_buf_print(&buf, "L%d ", level);
297 }
298 __kmp_str_buf_print(&buf, "%d ",
299 address2os[proc].first.labels[level]);
300 }
301 KMP_INFORM(OSProcMapToPack, "KMP_AFFINITY", address2os[proc].second,
302 buf.str);
303 __kmp_str_buf_free(&buf);
304 }
305}
306
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000307#if KMP_USE_HWLOC
Jonathan Peyton202a24d2016-06-13 17:30:08 +0000308
309// This function removes the topology levels that are radix 1 and don't offer
310// further information about the topology. The most common example is when you
311// have one thread context per core, we don't want the extra thread context
312// level if it offers no unique labels. So they are removed.
313// return value: the new depth of address2os
314static int
315__kmp_affinity_remove_radix_one_levels(AddrUnsPair *address2os, int nActiveThreads, int depth, int* pkgLevel, int* coreLevel, int* threadLevel) {
316 int level;
317 int i;
318 int radix1_detected;
319
320 for (level = depth-1; level >= 0; --level) {
321 // Always keep the package level
322 if (level == *pkgLevel)
323 continue;
324 // Detect if this level is radix 1
325 radix1_detected = 1;
326 for (i = 1; i < nActiveThreads; ++i) {
327 if (address2os[0].first.labels[level] != address2os[i].first.labels[level]) {
328 // There are differing label values for this level so it stays
329 radix1_detected = 0;
330 break;
331 }
332 }
333 if (!radix1_detected)
334 continue;
335 // Radix 1 was detected
336 if (level == *threadLevel) {
337 // If only one thread per core, then just decrement
338 // the depth which removes the threadlevel from address2os
339 for (i = 0; i < nActiveThreads; ++i) {
340 address2os[i].first.depth--;
341 }
342 *threadLevel = -1;
343 } else if (level == *coreLevel) {
344 // For core level, we move the thread labels over if they are still
345 // valid (*threadLevel != -1), and also reduce the depth another level
346 for (i = 0; i < nActiveThreads; ++i) {
347 if (*threadLevel != -1) {
348 address2os[i].first.labels[*coreLevel] = address2os[i].first.labels[*threadLevel];
349 }
350 address2os[i].first.depth--;
351 }
352 *coreLevel = -1;
353 }
354 }
355 return address2os[0].first.depth;
356}
357
358// Returns the number of objects of type 'type' below 'obj' within the topology tree structure.
359// e.g., if obj is a HWLOC_OBJ_SOCKET object, and type is HWLOC_OBJ_PU, then
360// this will return the number of PU's under the SOCKET object.
361static int
362__kmp_hwloc_get_nobjs_under_obj(hwloc_obj_t obj, hwloc_obj_type_t type) {
363 int retval = 0;
364 hwloc_obj_t first;
365 for(first = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, obj->type, obj->logical_index, type, 0);
366 first != NULL && hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, obj->type, first) == obj;
367 first = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, first->type, first))
368 {
369 ++retval;
370 }
371 return retval;
372}
373
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000374static int
375__kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
376 kmp_i18n_id_t *const msg_id)
377{
378 *address2os = NULL;
379 *msg_id = kmp_i18n_null;
380
381 //
382 // Save the affinity mask for the current thread.
383 //
384 kmp_affin_mask_t *oldMask;
385 KMP_CPU_ALLOC(oldMask);
386 __kmp_get_system_affinity(oldMask, TRUE);
387
Jonathan Peyton202a24d2016-06-13 17:30:08 +0000388 int depth = 3;
389 int pkgLevel = 0;
390 int coreLevel = 1;
391 int threadLevel = 2;
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000392
393 if (! KMP_AFFINITY_CAPABLE())
394 {
395 //
396 // Hack to try and infer the machine topology using only the data
397 // available from cpuid on the current thread, and __kmp_xproc.
398 //
399 KMP_ASSERT(__kmp_affinity_type == affinity_none);
400
Jonathan Peytonbf357712016-06-16 20:31:19 +0000401 nCoresPerPkg = __kmp_hwloc_get_nobjs_under_obj(hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_SOCKET, 0), HWLOC_OBJ_CORE);
402 __kmp_nThreadsPerCore = __kmp_hwloc_get_nobjs_under_obj(hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_CORE, 0), HWLOC_OBJ_PU);
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000403 __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
404 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
405 if (__kmp_affinity_verbose) {
406 KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY");
407 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
408 if (__kmp_affinity_uniform_topology()) {
409 KMP_INFORM(Uniform, "KMP_AFFINITY");
410 } else {
411 KMP_INFORM(NonUniform, "KMP_AFFINITY");
412 }
413 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
414 __kmp_nThreadsPerCore, __kmp_ncores);
415 }
Jonathan Peyton72a84982016-06-16 20:14:54 +0000416 KMP_CPU_FREE(oldMask);
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000417 return 0;
418 }
419
420 //
421 // Allocate the data structure to be returned.
422 //
423 AddrUnsPair *retval = (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc);
424
Jonathan Peytonbf357712016-06-16 20:31:19 +0000425 //
426 // When affinity is off, this routine will still be called to set
427 // __kmp_ncores, as well as __kmp_nThreadsPerCore,
428 // nCoresPerPkg, & nPackages. Make sure all these vars are set
429 // correctly, and return if affinity is not enabled.
430 //
431
Jonathan Peyton202a24d2016-06-13 17:30:08 +0000432 hwloc_obj_t pu;
433 hwloc_obj_t core;
434 hwloc_obj_t socket;
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000435 int nActiveThreads = 0;
Jonathan Peyton202a24d2016-06-13 17:30:08 +0000436 int socket_identifier = 0;
Jonathan Peytonbf357712016-06-16 20:31:19 +0000437 // re-calculate globals to count only accessible resources
438 __kmp_ncores = nPackages = nCoresPerPkg = __kmp_nThreadsPerCore = 0;
Jonathan Peyton202a24d2016-06-13 17:30:08 +0000439 for(socket = hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_SOCKET, 0);
440 socket != NULL;
441 socket = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_SOCKET, socket),
442 socket_identifier++)
443 {
444 int core_identifier = 0;
Jonathan Peytonbf357712016-06-16 20:31:19 +0000445 int num_active_cores = 0;
Jonathan Peyton202a24d2016-06-13 17:30:08 +0000446 for(core = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, socket->type, socket->logical_index, HWLOC_OBJ_CORE, 0);
447 core != NULL && hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, socket->type, core) == socket;
448 core = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_CORE, core),
449 core_identifier++)
450 {
451 int pu_identifier = 0;
Jonathan Peytonbf357712016-06-16 20:31:19 +0000452 int num_active_threads = 0;
Jonathan Peyton202a24d2016-06-13 17:30:08 +0000453 for(pu = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, core->type, core->logical_index, HWLOC_OBJ_PU, 0);
454 pu != NULL && hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, core->type, pu) == core;
455 pu = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_PU, pu),
456 pu_identifier++)
457 {
458 Address addr(3);
Jonathan Peytonc5304aa2016-06-13 21:28:03 +0000459 if(! KMP_CPU_ISSET(pu->os_index, __kmp_affin_fullMask))
Jonathan Peytonbf357712016-06-16 20:31:19 +0000460 continue; // skip inactive (inaccessible) unit
Jonathan Peyton202a24d2016-06-13 17:30:08 +0000461 KA_TRACE(20, ("Hwloc inserting %d (%d) %d (%d) %d (%d) into address2os\n",
462 socket->os_index, socket->logical_index, core->os_index, core->logical_index, pu->os_index,pu->logical_index));
463 addr.labels[0] = socket_identifier; // package
464 addr.labels[1] = core_identifier; // core
465 addr.labels[2] = pu_identifier; // pu
466 retval[nActiveThreads] = AddrUnsPair(addr, pu->os_index);
467 nActiveThreads++;
Jonathan Peytonbf357712016-06-16 20:31:19 +0000468 ++num_active_threads; // count active threads per core
Jonathan Peyton202a24d2016-06-13 17:30:08 +0000469 }
Jonathan Peytonbf357712016-06-16 20:31:19 +0000470 if (num_active_threads) { // were there any active threads on the core?
471 ++__kmp_ncores; // count total active cores
472 ++num_active_cores; // count active cores per socket
473 if (num_active_threads > __kmp_nThreadsPerCore)
474 __kmp_nThreadsPerCore = num_active_threads; // calc maximum
475 }
476 }
477 if (num_active_cores) { // were there any active cores on the socket?
478 ++nPackages; // count total active packages
479 if (num_active_cores > nCoresPerPkg)
480 nCoresPerPkg = num_active_cores; // calc maximum
Jonathan Peyton202a24d2016-06-13 17:30:08 +0000481 }
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000482 }
483
484 //
485 // If there's only one thread context to bind to, return now.
486 //
Jonathan Peytonbf357712016-06-16 20:31:19 +0000487 KMP_DEBUG_ASSERT(nActiveThreads == __kmp_avail_proc);
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000488 KMP_ASSERT(nActiveThreads > 0);
489 if (nActiveThreads == 1) {
490 __kmp_ncores = nPackages = 1;
491 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
492 if (__kmp_affinity_verbose) {
493 char buf[KMP_AFFIN_MASK_PRINT_LEN];
494 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
495
496 KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
497 if (__kmp_affinity_respect_mask) {
498 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
499 } else {
500 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
501 }
502 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
503 KMP_INFORM(Uniform, "KMP_AFFINITY");
504 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
505 __kmp_nThreadsPerCore, __kmp_ncores);
506 }
507
508 if (__kmp_affinity_type == affinity_none) {
509 __kmp_free(retval);
510 KMP_CPU_FREE(oldMask);
511 return 0;
512 }
513
514 //
515 // Form an Address object which only includes the package level.
516 //
517 Address addr(1);
Jonathan Peyton202a24d2016-06-13 17:30:08 +0000518 addr.labels[0] = retval[0].first.labels[pkgLevel];
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000519 retval[0].first = addr;
520
521 if (__kmp_affinity_gran_levels < 0) {
522 __kmp_affinity_gran_levels = 0;
523 }
524
525 if (__kmp_affinity_verbose) {
526 __kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
527 }
528
529 *address2os = retval;
530 KMP_CPU_FREE(oldMask);
531 return 1;
532 }
533
534 //
535 // Sort the table by physical Id.
536 //
537 qsort(retval, nActiveThreads, sizeof(*retval), __kmp_affinity_cmp_Address_labels);
538
539 //
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000540 // Check to see if the machine topology is uniform
541 //
Jonathan Peytonbf357712016-06-16 20:31:19 +0000542 unsigned uniform = (nPackages * nCoresPerPkg * __kmp_nThreadsPerCore == nActiveThreads);
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000543
544 //
545 // Print the machine topology summary.
546 //
547 if (__kmp_affinity_verbose) {
548 char mask[KMP_AFFIN_MASK_PRINT_LEN];
549 __kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
550
551 KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
552 if (__kmp_affinity_respect_mask) {
553 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask);
554 } else {
555 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask);
556 }
557 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
558 if (uniform) {
559 KMP_INFORM(Uniform, "KMP_AFFINITY");
560 } else {
561 KMP_INFORM(NonUniform, "KMP_AFFINITY");
562 }
563
564 kmp_str_buf_t buf;
565 __kmp_str_buf_init(&buf);
566
Jonathan Peytonbf357712016-06-16 20:31:19 +0000567 __kmp_str_buf_print(&buf, "%d", nPackages);
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000568 //for (level = 1; level <= pkgLevel; level++) {
569 // __kmp_str_buf_print(&buf, " x %d", maxCt[level]);
570 // }
571 KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg,
572 __kmp_nThreadsPerCore, __kmp_ncores);
573
574 __kmp_str_buf_free(&buf);
575 }
576
577 if (__kmp_affinity_type == affinity_none) {
Jonathan Peyton72a84982016-06-16 20:14:54 +0000578 __kmp_free(retval);
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000579 KMP_CPU_FREE(oldMask);
580 return 0;
581 }
582
583 //
584 // Find any levels with radiix 1, and remove them from the map
585 // (except for the package level).
586 //
Jonathan Peyton202a24d2016-06-13 17:30:08 +0000587 depth = __kmp_affinity_remove_radix_one_levels(retval, nActiveThreads, depth, &pkgLevel, &coreLevel, &threadLevel);
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000588
589 if (__kmp_affinity_gran_levels < 0) {
590 //
591 // Set the granularity level based on what levels are modeled
592 // in the machine topology map.
593 //
594 __kmp_affinity_gran_levels = 0;
Jonathan Peyton202a24d2016-06-13 17:30:08 +0000595 if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000596 __kmp_affinity_gran_levels++;
597 }
Jonathan Peyton202a24d2016-06-13 17:30:08 +0000598 if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000599 __kmp_affinity_gran_levels++;
600 }
601 if (__kmp_affinity_gran > affinity_gran_package) {
602 __kmp_affinity_gran_levels++;
603 }
604 }
605
606 if (__kmp_affinity_verbose) {
Jonathan Peyton202a24d2016-06-13 17:30:08 +0000607 __kmp_affinity_print_topology(retval, nActiveThreads, depth, pkgLevel,
608 coreLevel, threadLevel);
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000609 }
610
611 KMP_CPU_FREE(oldMask);
612 *address2os = retval;
Jonathan Peyton202a24d2016-06-13 17:30:08 +0000613 return depth;
Jonathan Peyton01dcf362015-11-30 20:02:59 +0000614}
615#endif // KMP_USE_HWLOC
Jim Cownie5e8470a2013-09-27 10:38:44 +0000616
617//
618// If we don't know how to retrieve the machine's processor topology, or
619// encounter an error in doing so, this routine is called to form a "flat"
620// mapping of os thread id's <-> processor id's.
621//
622static int
623__kmp_affinity_create_flat_map(AddrUnsPair **address2os,
624 kmp_i18n_id_t *const msg_id)
625{
626 *address2os = NULL;
627 *msg_id = kmp_i18n_null;
628
629 //
630 // Even if __kmp_affinity_type == affinity_none, this routine might still
Andrey Churbanovf696c822015-01-27 16:55:43 +0000631 // called to set __kmp_ncores, as well as
Jim Cownie5e8470a2013-09-27 10:38:44 +0000632 // __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
633 //
634 if (! KMP_AFFINITY_CAPABLE()) {
635 KMP_ASSERT(__kmp_affinity_type == affinity_none);
636 __kmp_ncores = nPackages = __kmp_xproc;
637 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000638 if (__kmp_affinity_verbose) {
639 KMP_INFORM(AffFlatTopology, "KMP_AFFINITY");
640 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
641 KMP_INFORM(Uniform, "KMP_AFFINITY");
642 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
643 __kmp_nThreadsPerCore, __kmp_ncores);
644 }
645 return 0;
646 }
647
648 //
649 // When affinity is off, this routine will still be called to set
Andrey Churbanovf696c822015-01-27 16:55:43 +0000650 // __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie5e8470a2013-09-27 10:38:44 +0000651 // nCoresPerPkg, & nPackages. Make sure all these vars are set
652 // correctly, and return now if affinity is not enabled.
653 //
654 __kmp_ncores = nPackages = __kmp_avail_proc;
655 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000656 if (__kmp_affinity_verbose) {
657 char buf[KMP_AFFIN_MASK_PRINT_LEN];
Jonathan Peytonc5304aa2016-06-13 21:28:03 +0000658 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, __kmp_affin_fullMask);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000659
660 KMP_INFORM(AffCapableUseFlat, "KMP_AFFINITY");
661 if (__kmp_affinity_respect_mask) {
662 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
663 } else {
664 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
665 }
666 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
667 KMP_INFORM(Uniform, "KMP_AFFINITY");
668 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
669 __kmp_nThreadsPerCore, __kmp_ncores);
670 }
671 if (__kmp_affinity_type == affinity_none) {
672 return 0;
673 }
674
675 //
676 // Contruct the data structure to be returned.
677 //
678 *address2os = (AddrUnsPair*)
679 __kmp_allocate(sizeof(**address2os) * __kmp_avail_proc);
680 int avail_ct = 0;
681 unsigned int i;
Jonathan Peytonc5304aa2016-06-13 21:28:03 +0000682 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000683 //
684 // Skip this proc if it is not included in the machine model.
685 //
Jonathan Peytonc5304aa2016-06-13 21:28:03 +0000686 if (! KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000687 continue;
688 }
689
690 Address addr(1);
691 addr.labels[0] = i;
692 (*address2os)[avail_ct++] = AddrUnsPair(addr,i);
693 }
694 if (__kmp_affinity_verbose) {
695 KMP_INFORM(OSProcToPackage, "KMP_AFFINITY");
696 }
697
698 if (__kmp_affinity_gran_levels < 0) {
699 //
700 // Only the package level is modeled in the machine topology map,
701 // so the #levels of granularity is either 0 or 1.
702 //
703 if (__kmp_affinity_gran > affinity_gran_package) {
704 __kmp_affinity_gran_levels = 1;
705 }
706 else {
707 __kmp_affinity_gran_levels = 0;
708 }
709 }
710 return 1;
711}
712
713
Andrey Churbanov7daf9802015-01-27 16:52:57 +0000714# if KMP_GROUP_AFFINITY
Jim Cownie5e8470a2013-09-27 10:38:44 +0000715
716//
717// If multiple Windows* OS processor groups exist, we can create a 2-level
718// topology map with the groups at level 0 and the individual procs at
719// level 1.
720//
721// This facilitates letting the threads float among all procs in a group,
722// if granularity=group (the default when there are multiple groups).
723//
724static int
725__kmp_affinity_create_proc_group_map(AddrUnsPair **address2os,
726 kmp_i18n_id_t *const msg_id)
727{
728 *address2os = NULL;
729 *msg_id = kmp_i18n_null;
730
731 //
732 // If we don't have multiple processor groups, return now.
733 // The flat mapping will be used.
734 //
Jonathan Peytonc5304aa2016-06-13 21:28:03 +0000735 if ((! KMP_AFFINITY_CAPABLE()) || (__kmp_get_proc_group(__kmp_affin_fullMask) >= 0)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000736 // FIXME set *msg_id
737 return -1;
738 }
739
740 //
741 // Contruct the data structure to be returned.
742 //
743 *address2os = (AddrUnsPair*)
744 __kmp_allocate(sizeof(**address2os) * __kmp_avail_proc);
745 int avail_ct = 0;
746 int i;
Jonathan Peytonc5304aa2016-06-13 21:28:03 +0000747 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000748 //
749 // Skip this proc if it is not included in the machine model.
750 //
Jonathan Peytonc5304aa2016-06-13 21:28:03 +0000751 if (! KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000752 continue;
753 }
754
755 Address addr(2);
756 addr.labels[0] = i / (CHAR_BIT * sizeof(DWORD_PTR));
757 addr.labels[1] = i % (CHAR_BIT * sizeof(DWORD_PTR));
758 (*address2os)[avail_ct++] = AddrUnsPair(addr,i);
759
760 if (__kmp_affinity_verbose) {
761 KMP_INFORM(AffOSProcToGroup, "KMP_AFFINITY", i, addr.labels[0],
762 addr.labels[1]);
763 }
764 }
765
766 if (__kmp_affinity_gran_levels < 0) {
767 if (__kmp_affinity_gran == affinity_gran_group) {
768 __kmp_affinity_gran_levels = 1;
769 }
770 else if ((__kmp_affinity_gran == affinity_gran_fine)
771 || (__kmp_affinity_gran == affinity_gran_thread)) {
772 __kmp_affinity_gran_levels = 0;
773 }
774 else {
775 const char *gran_str = NULL;
776 if (__kmp_affinity_gran == affinity_gran_core) {
777 gran_str = "core";
778 }
779 else if (__kmp_affinity_gran == affinity_gran_package) {
780 gran_str = "package";
781 }
782 else if (__kmp_affinity_gran == affinity_gran_node) {
783 gran_str = "node";
784 }
785 else {
786 KMP_ASSERT(0);
787 }
788
789 // Warning: can't use affinity granularity \"gran\" with group topology method, using "thread"
790 __kmp_affinity_gran_levels = 0;
791 }
792 }
793 return 2;
794}
795
Andrey Churbanov7daf9802015-01-27 16:52:57 +0000796# endif /* KMP_GROUP_AFFINITY */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000797
798
799# if KMP_ARCH_X86 || KMP_ARCH_X86_64
800
801static int
802__kmp_cpuid_mask_width(int count) {
803 int r = 0;
804
805 while((1<<r) < count)
806 ++r;
807 return r;
808}
809
810
811class apicThreadInfo {
812public:
813 unsigned osId; // param to __kmp_affinity_bind_thread
814 unsigned apicId; // from cpuid after binding
815 unsigned maxCoresPerPkg; // ""
816 unsigned maxThreadsPerPkg; // ""
817 unsigned pkgId; // inferred from above values
818 unsigned coreId; // ""
819 unsigned threadId; // ""
820};
821
822
823static int
824__kmp_affinity_cmp_apicThreadInfo_os_id(const void *a, const void *b)
825{
826 const apicThreadInfo *aa = (const apicThreadInfo *)a;
827 const apicThreadInfo *bb = (const apicThreadInfo *)b;
828 if (aa->osId < bb->osId) return -1;
829 if (aa->osId > bb->osId) return 1;
830 return 0;
831}
832
833
834static int
835__kmp_affinity_cmp_apicThreadInfo_phys_id(const void *a, const void *b)
836{
837 const apicThreadInfo *aa = (const apicThreadInfo *)a;
838 const apicThreadInfo *bb = (const apicThreadInfo *)b;
839 if (aa->pkgId < bb->pkgId) return -1;
840 if (aa->pkgId > bb->pkgId) return 1;
841 if (aa->coreId < bb->coreId) return -1;
842 if (aa->coreId > bb->coreId) return 1;
843 if (aa->threadId < bb->threadId) return -1;
844 if (aa->threadId > bb->threadId) return 1;
845 return 0;
846}
847
848
849//
850// On IA-32 architecture and Intel(R) 64 architecture, we attempt to use
851// an algorithm which cycles through the available os threads, setting
852// the current thread's affinity mask to that thread, and then retrieves
853// the Apic Id for each thread context using the cpuid instruction.
854//
855static int
856__kmp_affinity_create_apicid_map(AddrUnsPair **address2os,
857 kmp_i18n_id_t *const msg_id)
858{
Andrey Churbanov1c331292015-01-27 17:03:42 +0000859 kmp_cpuid buf;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000860 int rc;
861 *address2os = NULL;
862 *msg_id = kmp_i18n_null;
863
Andrey Churbanov1c331292015-01-27 17:03:42 +0000864 //
865 // Check if cpuid leaf 4 is supported.
866 //
Jim Cownie5e8470a2013-09-27 10:38:44 +0000867 __kmp_x86_cpuid(0, 0, &buf);
868 if (buf.eax < 4) {
869 *msg_id = kmp_i18n_str_NoLeaf4Support;
870 return -1;
871 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000872
873 //
Jim Cownie5e8470a2013-09-27 10:38:44 +0000874 // The algorithm used starts by setting the affinity to each available
Andrey Churbanov1c331292015-01-27 17:03:42 +0000875 // thread and retrieving info from the cpuid instruction, so if we are
876 // not capable of calling __kmp_get_system_affinity() and
877 // _kmp_get_system_affinity(), then we need to do something else - use
878 // the defaults that we calculated from issuing cpuid without binding
879 // to each proc.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000880 //
881 if (! KMP_AFFINITY_CAPABLE()) {
882 //
883 // Hack to try and infer the machine topology using only the data
884 // available from cpuid on the current thread, and __kmp_xproc.
885 //
886 KMP_ASSERT(__kmp_affinity_type == affinity_none);
887
888 //
889 // Get an upper bound on the number of threads per package using
890 // cpuid(1).
891 //
892 // On some OS/chps combinations where HT is supported by the chip
893 // but is disabled, this value will be 2 on a single core chip.
894 // Usually, it will be 2 if HT is enabled and 1 if HT is disabled.
895 //
Jim Cownie5e8470a2013-09-27 10:38:44 +0000896 __kmp_x86_cpuid(1, 0, &buf);
897 int maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
898 if (maxThreadsPerPkg == 0) {
899 maxThreadsPerPkg = 1;
900 }
901
902 //
903 // The num cores per pkg comes from cpuid(4).
904 // 1 must be added to the encoded value.
905 //
906 // The author of cpu_count.cpp treated this only an upper bound
907 // on the number of cores, but I haven't seen any cases where it
908 // was greater than the actual number of cores, so we will treat
909 // it as exact in this block of code.
910 //
911 // First, we need to check if cpuid(4) is supported on this chip.
912 // To see if cpuid(n) is supported, issue cpuid(0) and check if eax
913 // has the value n or greater.
914 //
915 __kmp_x86_cpuid(0, 0, &buf);
916 if (buf.eax >= 4) {
917 __kmp_x86_cpuid(4, 0, &buf);
918 nCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
919 }
920 else {
921 nCoresPerPkg = 1;
922 }
923
924 //
925 // There is no way to reliably tell if HT is enabled without issuing
926 // the cpuid instruction from every thread, can correlating the cpuid
927 // info, so if the machine is not affinity capable, we assume that HT
928 // is off. We have seen quite a few machines where maxThreadsPerPkg
929 // is 2, yet the machine does not support HT.
930 //
931 // - Older OSes are usually found on machines with older chips, which
932 // do not support HT.
933 //
934 // - The performance penalty for mistakenly identifying a machine as
935 // HT when it isn't (which results in blocktime being incorrecly set
936 // to 0) is greater than the penalty when for mistakenly identifying
937 // a machine as being 1 thread/core when it is really HT enabled
938 // (which results in blocktime being incorrectly set to a positive
939 // value).
940 //
941 __kmp_ncores = __kmp_xproc;
942 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
943 __kmp_nThreadsPerCore = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000944 if (__kmp_affinity_verbose) {
945 KMP_INFORM(AffNotCapableUseLocCpuid, "KMP_AFFINITY");
946 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
947 if (__kmp_affinity_uniform_topology()) {
948 KMP_INFORM(Uniform, "KMP_AFFINITY");
949 } else {
950 KMP_INFORM(NonUniform, "KMP_AFFINITY");
951 }
952 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
953 __kmp_nThreadsPerCore, __kmp_ncores);
954 }
955 return 0;
956 }
957
958 //
959 //
960 // From here on, we can assume that it is safe to call
961 // __kmp_get_system_affinity() and __kmp_set_system_affinity(),
962 // even if __kmp_affinity_type = affinity_none.
963 //
964
965 //
966 // Save the affinity mask for the current thread.
967 //
968 kmp_affin_mask_t *oldMask;
969 KMP_CPU_ALLOC(oldMask);
970 KMP_ASSERT(oldMask != NULL);
971 __kmp_get_system_affinity(oldMask, TRUE);
972
973 //
974 // Run through each of the available contexts, binding the current thread
975 // to it, and obtaining the pertinent information using the cpuid instr.
976 //
977 // The relevant information is:
978 //
979 // Apic Id: Bits 24:31 of ebx after issuing cpuid(1) - each thread context
980 // has a uniqie Apic Id, which is of the form pkg# : core# : thread#.
981 //
982 // Max Threads Per Pkg: Bits 16:23 of ebx after issuing cpuid(1). The
983 // value of this field determines the width of the core# + thread#
984 // fields in the Apic Id. It is also an upper bound on the number
985 // of threads per package, but it has been verified that situations
986 // happen were it is not exact. In particular, on certain OS/chip
987 // combinations where Intel(R) Hyper-Threading Technology is supported
988 // by the chip but has
989 // been disabled, the value of this field will be 2 (for a single core
990 // chip). On other OS/chip combinations supporting
991 // Intel(R) Hyper-Threading Technology, the value of
992 // this field will be 1 when Intel(R) Hyper-Threading Technology is
993 // disabled and 2 when it is enabled.
994 //
995 // Max Cores Per Pkg: Bits 26:31 of eax after issuing cpuid(4). The
996 // value of this field (+1) determines the width of the core# field in
997 // the Apic Id. The comments in "cpucount.cpp" say that this value is
998 // an upper bound, but the IA-32 architecture manual says that it is
999 // exactly the number of cores per package, and I haven't seen any
1000 // case where it wasn't.
1001 //
1002 // From this information, deduce the package Id, core Id, and thread Id,
1003 // and set the corresponding fields in the apicThreadInfo struct.
1004 //
1005 unsigned i;
1006 apicThreadInfo *threadInfo = (apicThreadInfo *)__kmp_allocate(
1007 __kmp_avail_proc * sizeof(apicThreadInfo));
1008 unsigned nApics = 0;
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00001009 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001010 //
1011 // Skip this proc if it is not included in the machine model.
1012 //
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00001013 if (! KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001014 continue;
1015 }
1016 KMP_DEBUG_ASSERT((int)nApics < __kmp_avail_proc);
1017
1018 __kmp_affinity_bind_thread(i);
1019 threadInfo[nApics].osId = i;
1020
1021 //
1022 // The apic id and max threads per pkg come from cpuid(1).
1023 //
Jim Cownie5e8470a2013-09-27 10:38:44 +00001024 __kmp_x86_cpuid(1, 0, &buf);
1025 if (! (buf.edx >> 9) & 1) {
1026 __kmp_set_system_affinity(oldMask, TRUE);
1027 __kmp_free(threadInfo);
1028 KMP_CPU_FREE(oldMask);
1029 *msg_id = kmp_i18n_str_ApicNotPresent;
1030 return -1;
1031 }
1032 threadInfo[nApics].apicId = (buf.ebx >> 24) & 0xff;
1033 threadInfo[nApics].maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
1034 if (threadInfo[nApics].maxThreadsPerPkg == 0) {
1035 threadInfo[nApics].maxThreadsPerPkg = 1;
1036 }
1037
1038 //
1039 // Max cores per pkg comes from cpuid(4).
1040 // 1 must be added to the encoded value.
1041 //
1042 // First, we need to check if cpuid(4) is supported on this chip.
1043 // To see if cpuid(n) is supported, issue cpuid(0) and check if eax
1044 // has the value n or greater.
1045 //
1046 __kmp_x86_cpuid(0, 0, &buf);
1047 if (buf.eax >= 4) {
1048 __kmp_x86_cpuid(4, 0, &buf);
1049 threadInfo[nApics].maxCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
1050 }
1051 else {
1052 threadInfo[nApics].maxCoresPerPkg = 1;
1053 }
1054
1055 //
1056 // Infer the pkgId / coreId / threadId using only the info
1057 // obtained locally.
1058 //
1059 int widthCT = __kmp_cpuid_mask_width(
1060 threadInfo[nApics].maxThreadsPerPkg);
1061 threadInfo[nApics].pkgId = threadInfo[nApics].apicId >> widthCT;
1062
1063 int widthC = __kmp_cpuid_mask_width(
1064 threadInfo[nApics].maxCoresPerPkg);
1065 int widthT = widthCT - widthC;
1066 if (widthT < 0) {
1067 //
1068 // I've never seen this one happen, but I suppose it could, if
1069 // the cpuid instruction on a chip was really screwed up.
1070 // Make sure to restore the affinity mask before the tail call.
1071 //
1072 __kmp_set_system_affinity(oldMask, TRUE);
1073 __kmp_free(threadInfo);
1074 KMP_CPU_FREE(oldMask);
1075 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1076 return -1;
1077 }
1078
1079 int maskC = (1 << widthC) - 1;
1080 threadInfo[nApics].coreId = (threadInfo[nApics].apicId >> widthT)
1081 &maskC;
1082
1083 int maskT = (1 << widthT) - 1;
1084 threadInfo[nApics].threadId = threadInfo[nApics].apicId &maskT;
1085
1086 nApics++;
1087 }
1088
1089 //
1090 // We've collected all the info we need.
1091 // Restore the old affinity mask for this thread.
1092 //
1093 __kmp_set_system_affinity(oldMask, TRUE);
1094
1095 //
1096 // If there's only one thread context to bind to, form an Address object
1097 // with depth 1 and return immediately (or, if affinity is off, set
1098 // address2os to NULL and return).
1099 //
1100 // If it is configured to omit the package level when there is only a
1101 // single package, the logic at the end of this routine won't work if
1102 // there is only a single thread - it would try to form an Address
1103 // object with depth 0.
1104 //
1105 KMP_ASSERT(nApics > 0);
1106 if (nApics == 1) {
1107 __kmp_ncores = nPackages = 1;
1108 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001109 if (__kmp_affinity_verbose) {
1110 char buf[KMP_AFFIN_MASK_PRINT_LEN];
1111 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1112
1113 KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
1114 if (__kmp_affinity_respect_mask) {
1115 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
1116 } else {
1117 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
1118 }
1119 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
1120 KMP_INFORM(Uniform, "KMP_AFFINITY");
1121 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
1122 __kmp_nThreadsPerCore, __kmp_ncores);
1123 }
1124
1125 if (__kmp_affinity_type == affinity_none) {
1126 __kmp_free(threadInfo);
1127 KMP_CPU_FREE(oldMask);
1128 return 0;
1129 }
1130
1131 *address2os = (AddrUnsPair*)__kmp_allocate(sizeof(AddrUnsPair));
1132 Address addr(1);
1133 addr.labels[0] = threadInfo[0].pkgId;
1134 (*address2os)[0] = AddrUnsPair(addr, threadInfo[0].osId);
1135
1136 if (__kmp_affinity_gran_levels < 0) {
1137 __kmp_affinity_gran_levels = 0;
1138 }
1139
1140 if (__kmp_affinity_verbose) {
1141 __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
1142 }
1143
1144 __kmp_free(threadInfo);
1145 KMP_CPU_FREE(oldMask);
1146 return 1;
1147 }
1148
1149 //
1150 // Sort the threadInfo table by physical Id.
1151 //
1152 qsort(threadInfo, nApics, sizeof(*threadInfo),
1153 __kmp_affinity_cmp_apicThreadInfo_phys_id);
1154
1155 //
1156 // The table is now sorted by pkgId / coreId / threadId, but we really
1157 // don't know the radix of any of the fields. pkgId's may be sparsely
1158 // assigned among the chips on a system. Although coreId's are usually
1159 // assigned [0 .. coresPerPkg-1] and threadId's are usually assigned
1160 // [0..threadsPerCore-1], we don't want to make any such assumptions.
1161 //
1162 // For that matter, we don't know what coresPerPkg and threadsPerCore
1163 // (or the total # packages) are at this point - we want to determine
1164 // that now. We only have an upper bound on the first two figures.
1165 //
1166 // We also perform a consistency check at this point: the values returned
1167 // by the cpuid instruction for any thread bound to a given package had
1168 // better return the same info for maxThreadsPerPkg and maxCoresPerPkg.
1169 //
1170 nPackages = 1;
1171 nCoresPerPkg = 1;
1172 __kmp_nThreadsPerCore = 1;
1173 unsigned nCores = 1;
1174
1175 unsigned pkgCt = 1; // to determine radii
1176 unsigned lastPkgId = threadInfo[0].pkgId;
1177 unsigned coreCt = 1;
1178 unsigned lastCoreId = threadInfo[0].coreId;
1179 unsigned threadCt = 1;
1180 unsigned lastThreadId = threadInfo[0].threadId;
1181
1182 // intra-pkg consist checks
1183 unsigned prevMaxCoresPerPkg = threadInfo[0].maxCoresPerPkg;
1184 unsigned prevMaxThreadsPerPkg = threadInfo[0].maxThreadsPerPkg;
1185
1186 for (i = 1; i < nApics; i++) {
1187 if (threadInfo[i].pkgId != lastPkgId) {
1188 nCores++;
1189 pkgCt++;
1190 lastPkgId = threadInfo[i].pkgId;
1191 if ((int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt;
1192 coreCt = 1;
1193 lastCoreId = threadInfo[i].coreId;
1194 if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
1195 threadCt = 1;
1196 lastThreadId = threadInfo[i].threadId;
1197
1198 //
1199 // This is a different package, so go on to the next iteration
1200 // without doing any consistency checks. Reset the consistency
1201 // check vars, though.
1202 //
1203 prevMaxCoresPerPkg = threadInfo[i].maxCoresPerPkg;
1204 prevMaxThreadsPerPkg = threadInfo[i].maxThreadsPerPkg;
1205 continue;
1206 }
1207
1208 if (threadInfo[i].coreId != lastCoreId) {
1209 nCores++;
1210 coreCt++;
1211 lastCoreId = threadInfo[i].coreId;
1212 if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
1213 threadCt = 1;
1214 lastThreadId = threadInfo[i].threadId;
1215 }
1216 else if (threadInfo[i].threadId != lastThreadId) {
1217 threadCt++;
1218 lastThreadId = threadInfo[i].threadId;
1219 }
1220 else {
1221 __kmp_free(threadInfo);
1222 KMP_CPU_FREE(oldMask);
1223 *msg_id = kmp_i18n_str_LegacyApicIDsNotUnique;
1224 return -1;
1225 }
1226
1227 //
1228 // Check to make certain that the maxCoresPerPkg and maxThreadsPerPkg
1229 // fields agree between all the threads bounds to a given package.
1230 //
1231 if ((prevMaxCoresPerPkg != threadInfo[i].maxCoresPerPkg)
1232 || (prevMaxThreadsPerPkg != threadInfo[i].maxThreadsPerPkg)) {
1233 __kmp_free(threadInfo);
1234 KMP_CPU_FREE(oldMask);
1235 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1236 return -1;
1237 }
1238 }
1239 nPackages = pkgCt;
1240 if ((int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt;
1241 if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
1242
1243 //
1244 // When affinity is off, this routine will still be called to set
Andrey Churbanovf696c822015-01-27 16:55:43 +00001245 // __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie5e8470a2013-09-27 10:38:44 +00001246 // nCoresPerPkg, & nPackages. Make sure all these vars are set
1247 // correctly, and return now if affinity is not enabled.
1248 //
Jim Cownie5e8470a2013-09-27 10:38:44 +00001249 __kmp_ncores = nCores;
1250 if (__kmp_affinity_verbose) {
1251 char buf[KMP_AFFIN_MASK_PRINT_LEN];
1252 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1253
1254 KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
1255 if (__kmp_affinity_respect_mask) {
1256 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
1257 } else {
1258 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
1259 }
1260 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
1261 if (__kmp_affinity_uniform_topology()) {
1262 KMP_INFORM(Uniform, "KMP_AFFINITY");
1263 } else {
1264 KMP_INFORM(NonUniform, "KMP_AFFINITY");
1265 }
1266 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
1267 __kmp_nThreadsPerCore, __kmp_ncores);
1268
1269 }
1270
1271 if (__kmp_affinity_type == affinity_none) {
1272 __kmp_free(threadInfo);
1273 KMP_CPU_FREE(oldMask);
1274 return 0;
1275 }
1276
1277 //
1278 // Now that we've determined the number of packages, the number of cores
1279 // per package, and the number of threads per core, we can construct the
1280 // data structure that is to be returned.
1281 //
1282 int pkgLevel = 0;
1283 int coreLevel = (nCoresPerPkg <= 1) ? -1 : 1;
1284 int threadLevel = (__kmp_nThreadsPerCore <= 1) ? -1 : ((coreLevel >= 0) ? 2 : 1);
1285 unsigned depth = (pkgLevel >= 0) + (coreLevel >= 0) + (threadLevel >= 0);
1286
1287 KMP_ASSERT(depth > 0);
1288 *address2os = (AddrUnsPair*)__kmp_allocate(sizeof(AddrUnsPair) * nApics);
1289
1290 for (i = 0; i < nApics; ++i) {
1291 Address addr(depth);
1292 unsigned os = threadInfo[i].osId;
1293 int d = 0;
1294
1295 if (pkgLevel >= 0) {
1296 addr.labels[d++] = threadInfo[i].pkgId;
1297 }
1298 if (coreLevel >= 0) {
1299 addr.labels[d++] = threadInfo[i].coreId;
1300 }
1301 if (threadLevel >= 0) {
1302 addr.labels[d++] = threadInfo[i].threadId;
1303 }
1304 (*address2os)[i] = AddrUnsPair(addr, os);
1305 }
1306
1307 if (__kmp_affinity_gran_levels < 0) {
1308 //
1309 // Set the granularity level based on what levels are modeled
1310 // in the machine topology map.
1311 //
1312 __kmp_affinity_gran_levels = 0;
1313 if ((threadLevel >= 0)
1314 && (__kmp_affinity_gran > affinity_gran_thread)) {
1315 __kmp_affinity_gran_levels++;
1316 }
1317 if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
1318 __kmp_affinity_gran_levels++;
1319 }
1320 if ((pkgLevel >= 0) && (__kmp_affinity_gran > affinity_gran_package)) {
1321 __kmp_affinity_gran_levels++;
1322 }
1323 }
1324
1325 if (__kmp_affinity_verbose) {
1326 __kmp_affinity_print_topology(*address2os, nApics, depth, pkgLevel,
1327 coreLevel, threadLevel);
1328 }
1329
1330 __kmp_free(threadInfo);
1331 KMP_CPU_FREE(oldMask);
1332 return depth;
1333}
1334
1335
1336//
1337// Intel(R) microarchitecture code name Nehalem, Dunnington and later
1338// architectures support a newer interface for specifying the x2APIC Ids,
1339// based on cpuid leaf 11.
1340//
1341static int
1342__kmp_affinity_create_x2apicid_map(AddrUnsPair **address2os,
1343 kmp_i18n_id_t *const msg_id)
1344{
1345 kmp_cpuid buf;
1346
1347 *address2os = NULL;
1348 *msg_id = kmp_i18n_null;
1349
1350 //
1351 // Check to see if cpuid leaf 11 is supported.
1352 //
1353 __kmp_x86_cpuid(0, 0, &buf);
1354 if (buf.eax < 11) {
1355 *msg_id = kmp_i18n_str_NoLeaf11Support;
1356 return -1;
1357 }
1358 __kmp_x86_cpuid(11, 0, &buf);
1359 if (buf.ebx == 0) {
1360 *msg_id = kmp_i18n_str_NoLeaf11Support;
1361 return -1;
1362 }
1363
1364 //
1365 // Find the number of levels in the machine topology. While we're at it,
1366 // get the default values for __kmp_nThreadsPerCore & nCoresPerPkg. We will
1367 // try to get more accurate values later by explicitly counting them,
1368 // but get reasonable defaults now, in case we return early.
1369 //
1370 int level;
1371 int threadLevel = -1;
1372 int coreLevel = -1;
1373 int pkgLevel = -1;
1374 __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
1375
1376 for (level = 0;; level++) {
1377 if (level > 31) {
1378 //
1379 // FIXME: Hack for DPD200163180
1380 //
1381 // If level is big then something went wrong -> exiting
1382 //
1383 // There could actually be 32 valid levels in the machine topology,
1384 // but so far, the only machine we have seen which does not exit
1385 // this loop before iteration 32 has fubar x2APIC settings.
1386 //
1387 // For now, just reject this case based upon loop trip count.
1388 //
1389 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1390 return -1;
1391 }
1392 __kmp_x86_cpuid(11, level, &buf);
1393 if (buf.ebx == 0) {
1394 if (pkgLevel < 0) {
1395 //
1396 // Will infer nPackages from __kmp_xproc
1397 //
1398 pkgLevel = level;
1399 level++;
1400 }
1401 break;
1402 }
1403 int kind = (buf.ecx >> 8) & 0xff;
1404 if (kind == 1) {
1405 //
1406 // SMT level
1407 //
1408 threadLevel = level;
1409 coreLevel = -1;
1410 pkgLevel = -1;
1411 __kmp_nThreadsPerCore = buf.ebx & 0xff;
1412 if (__kmp_nThreadsPerCore == 0) {
1413 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1414 return -1;
1415 }
1416 }
1417 else if (kind == 2) {
1418 //
1419 // core level
1420 //
1421 coreLevel = level;
1422 pkgLevel = -1;
1423 nCoresPerPkg = buf.ebx & 0xff;
1424 if (nCoresPerPkg == 0) {
1425 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1426 return -1;
1427 }
1428 }
1429 else {
1430 if (level <= 0) {
1431 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1432 return -1;
1433 }
1434 if (pkgLevel >= 0) {
1435 continue;
1436 }
1437 pkgLevel = level;
1438 nPackages = buf.ebx & 0xff;
1439 if (nPackages == 0) {
1440 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1441 return -1;
1442 }
1443 }
1444 }
1445 int depth = level;
1446
1447 //
1448 // In the above loop, "level" was counted from the finest level (usually
1449 // thread) to the coarsest. The caller expects that we will place the
1450 // labels in (*address2os)[].first.labels[] in the inverse order, so
1451 // we need to invert the vars saying which level means what.
1452 //
1453 if (threadLevel >= 0) {
1454 threadLevel = depth - threadLevel - 1;
1455 }
1456 if (coreLevel >= 0) {
1457 coreLevel = depth - coreLevel - 1;
1458 }
1459 KMP_DEBUG_ASSERT(pkgLevel >= 0);
1460 pkgLevel = depth - pkgLevel - 1;
1461
1462 //
1463 // The algorithm used starts by setting the affinity to each available
Andrey Churbanov1c331292015-01-27 17:03:42 +00001464 // thread and retrieving info from the cpuid instruction, so if we are
1465 // not capable of calling __kmp_get_system_affinity() and
1466 // _kmp_get_system_affinity(), then we need to do something else - use
1467 // the defaults that we calculated from issuing cpuid without binding
1468 // to each proc.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001469 //
1470 if (! KMP_AFFINITY_CAPABLE())
1471 {
1472 //
1473 // Hack to try and infer the machine topology using only the data
1474 // available from cpuid on the current thread, and __kmp_xproc.
1475 //
1476 KMP_ASSERT(__kmp_affinity_type == affinity_none);
1477
1478 __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
1479 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001480 if (__kmp_affinity_verbose) {
1481 KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY");
1482 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
1483 if (__kmp_affinity_uniform_topology()) {
1484 KMP_INFORM(Uniform, "KMP_AFFINITY");
1485 } else {
1486 KMP_INFORM(NonUniform, "KMP_AFFINITY");
1487 }
1488 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
1489 __kmp_nThreadsPerCore, __kmp_ncores);
1490 }
1491 return 0;
1492 }
1493
1494 //
1495 //
1496 // From here on, we can assume that it is safe to call
1497 // __kmp_get_system_affinity() and __kmp_set_system_affinity(),
1498 // even if __kmp_affinity_type = affinity_none.
1499 //
1500
1501 //
1502 // Save the affinity mask for the current thread.
1503 //
1504 kmp_affin_mask_t *oldMask;
1505 KMP_CPU_ALLOC(oldMask);
1506 __kmp_get_system_affinity(oldMask, TRUE);
1507
1508 //
1509 // Allocate the data structure to be returned.
1510 //
1511 AddrUnsPair *retval = (AddrUnsPair *)
1512 __kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc);
1513
1514 //
1515 // Run through each of the available contexts, binding the current thread
1516 // to it, and obtaining the pertinent information using the cpuid instr.
1517 //
1518 unsigned int proc;
1519 int nApics = 0;
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00001520 KMP_CPU_SET_ITERATE(proc, __kmp_affin_fullMask) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001521 //
1522 // Skip this proc if it is not included in the machine model.
1523 //
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00001524 if (! KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001525 continue;
1526 }
1527 KMP_DEBUG_ASSERT(nApics < __kmp_avail_proc);
1528
1529 __kmp_affinity_bind_thread(proc);
1530
1531 //
1532 // Extrach the labels for each level in the machine topology map
1533 // from the Apic ID.
1534 //
1535 Address addr(depth);
1536 int prev_shift = 0;
1537
1538 for (level = 0; level < depth; level++) {
1539 __kmp_x86_cpuid(11, level, &buf);
1540 unsigned apicId = buf.edx;
1541 if (buf.ebx == 0) {
1542 if (level != depth - 1) {
1543 KMP_CPU_FREE(oldMask);
1544 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1545 return -1;
1546 }
1547 addr.labels[depth - level - 1] = apicId >> prev_shift;
1548 level++;
1549 break;
1550 }
1551 int shift = buf.eax & 0x1f;
1552 int mask = (1 << shift) - 1;
1553 addr.labels[depth - level - 1] = (apicId & mask) >> prev_shift;
1554 prev_shift = shift;
1555 }
1556 if (level != depth) {
1557 KMP_CPU_FREE(oldMask);
1558 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1559 return -1;
1560 }
1561
1562 retval[nApics] = AddrUnsPair(addr, proc);
1563 nApics++;
1564 }
1565
1566 //
1567 // We've collected all the info we need.
1568 // Restore the old affinity mask for this thread.
1569 //
1570 __kmp_set_system_affinity(oldMask, TRUE);
1571
1572 //
1573 // If there's only one thread context to bind to, return now.
1574 //
1575 KMP_ASSERT(nApics > 0);
1576 if (nApics == 1) {
1577 __kmp_ncores = nPackages = 1;
1578 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001579 if (__kmp_affinity_verbose) {
1580 char buf[KMP_AFFIN_MASK_PRINT_LEN];
1581 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1582
1583 KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
1584 if (__kmp_affinity_respect_mask) {
1585 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
1586 } else {
1587 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
1588 }
1589 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
1590 KMP_INFORM(Uniform, "KMP_AFFINITY");
1591 KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
1592 __kmp_nThreadsPerCore, __kmp_ncores);
1593 }
1594
1595 if (__kmp_affinity_type == affinity_none) {
1596 __kmp_free(retval);
1597 KMP_CPU_FREE(oldMask);
1598 return 0;
1599 }
1600
1601 //
1602 // Form an Address object which only includes the package level.
1603 //
1604 Address addr(1);
1605 addr.labels[0] = retval[0].first.labels[pkgLevel];
1606 retval[0].first = addr;
1607
1608 if (__kmp_affinity_gran_levels < 0) {
1609 __kmp_affinity_gran_levels = 0;
1610 }
1611
1612 if (__kmp_affinity_verbose) {
1613 __kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
1614 }
1615
1616 *address2os = retval;
1617 KMP_CPU_FREE(oldMask);
1618 return 1;
1619 }
1620
1621 //
1622 // Sort the table by physical Id.
1623 //
1624 qsort(retval, nApics, sizeof(*retval), __kmp_affinity_cmp_Address_labels);
1625
1626 //
1627 // Find the radix at each of the levels.
1628 //
1629 unsigned *totals = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
1630 unsigned *counts = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
1631 unsigned *maxCt = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
1632 unsigned *last = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
1633 for (level = 0; level < depth; level++) {
1634 totals[level] = 1;
1635 maxCt[level] = 1;
1636 counts[level] = 1;
1637 last[level] = retval[0].first.labels[level];
1638 }
1639
1640 //
1641 // From here on, the iteration variable "level" runs from the finest
1642 // level to the coarsest, i.e. we iterate forward through
1643 // (*address2os)[].first.labels[] - in the previous loops, we iterated
1644 // backwards.
1645 //
1646 for (proc = 1; (int)proc < nApics; proc++) {
1647 int level;
1648 for (level = 0; level < depth; level++) {
1649 if (retval[proc].first.labels[level] != last[level]) {
1650 int j;
1651 for (j = level + 1; j < depth; j++) {
1652 totals[j]++;
1653 counts[j] = 1;
1654 // The line below causes printing incorrect topology information
1655 // in case the max value for some level (maxCt[level]) is encountered earlier than
1656 // some less value while going through the array.
1657 // For example, let pkg0 has 4 cores and pkg1 has 2 cores. Then maxCt[1] == 2
1658 // whereas it must be 4.
1659 // TODO!!! Check if it can be commented safely
1660 //maxCt[j] = 1;
1661 last[j] = retval[proc].first.labels[j];
1662 }
1663 totals[level]++;
1664 counts[level]++;
1665 if (counts[level] > maxCt[level]) {
1666 maxCt[level] = counts[level];
1667 }
1668 last[level] = retval[proc].first.labels[level];
1669 break;
1670 }
1671 else if (level == depth - 1) {
1672 __kmp_free(last);
1673 __kmp_free(maxCt);
1674 __kmp_free(counts);
1675 __kmp_free(totals);
1676 __kmp_free(retval);
1677 KMP_CPU_FREE(oldMask);
1678 *msg_id = kmp_i18n_str_x2ApicIDsNotUnique;
1679 return -1;
1680 }
1681 }
1682 }
1683
1684 //
1685 // When affinity is off, this routine will still be called to set
Andrey Churbanovf696c822015-01-27 16:55:43 +00001686 // __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie5e8470a2013-09-27 10:38:44 +00001687 // nCoresPerPkg, & nPackages. Make sure all these vars are set
1688 // correctly, and return if affinity is not enabled.
1689 //
1690 if (threadLevel >= 0) {
1691 __kmp_nThreadsPerCore = maxCt[threadLevel];
1692 }
1693 else {
1694 __kmp_nThreadsPerCore = 1;
1695 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001696 nPackages = totals[pkgLevel];
1697
1698 if (coreLevel >= 0) {
1699 __kmp_ncores = totals[coreLevel];
1700 nCoresPerPkg = maxCt[coreLevel];
1701 }
1702 else {
1703 __kmp_ncores = nPackages;
1704 nCoresPerPkg = 1;
1705 }
1706
1707 //
1708 // Check to see if the machine topology is uniform
1709 //
1710 unsigned prod = maxCt[0];
1711 for (level = 1; level < depth; level++) {
1712 prod *= maxCt[level];
1713 }
1714 bool uniform = (prod == totals[level - 1]);
1715
1716 //
1717 // Print the machine topology summary.
1718 //
1719 if (__kmp_affinity_verbose) {
1720 char mask[KMP_AFFIN_MASK_PRINT_LEN];
1721 __kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1722
1723 KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
1724 if (__kmp_affinity_respect_mask) {
1725 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask);
1726 } else {
1727 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask);
1728 }
1729 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
1730 if (uniform) {
1731 KMP_INFORM(Uniform, "KMP_AFFINITY");
1732 } else {
1733 KMP_INFORM(NonUniform, "KMP_AFFINITY");
1734 }
1735
1736 kmp_str_buf_t buf;
1737 __kmp_str_buf_init(&buf);
1738
1739 __kmp_str_buf_print(&buf, "%d", totals[0]);
1740 for (level = 1; level <= pkgLevel; level++) {
1741 __kmp_str_buf_print(&buf, " x %d", maxCt[level]);
1742 }
1743 KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg,
1744 __kmp_nThreadsPerCore, __kmp_ncores);
1745
1746 __kmp_str_buf_free(&buf);
1747 }
1748
1749 if (__kmp_affinity_type == affinity_none) {
1750 __kmp_free(last);
1751 __kmp_free(maxCt);
1752 __kmp_free(counts);
1753 __kmp_free(totals);
1754 __kmp_free(retval);
1755 KMP_CPU_FREE(oldMask);
1756 return 0;
1757 }
1758
1759 //
1760 // Find any levels with radiix 1, and remove them from the map
1761 // (except for the package level).
1762 //
1763 int new_depth = 0;
1764 for (level = 0; level < depth; level++) {
1765 if ((maxCt[level] == 1) && (level != pkgLevel)) {
1766 continue;
1767 }
1768 new_depth++;
1769 }
1770
1771 //
1772 // If we are removing any levels, allocate a new vector to return,
1773 // and copy the relevant information to it.
1774 //
1775 if (new_depth != depth) {
1776 AddrUnsPair *new_retval = (AddrUnsPair *)__kmp_allocate(
1777 sizeof(AddrUnsPair) * nApics);
1778 for (proc = 0; (int)proc < nApics; proc++) {
1779 Address addr(new_depth);
1780 new_retval[proc] = AddrUnsPair(addr, retval[proc].second);
1781 }
1782 int new_level = 0;
Jonathan Peyton62f38402015-08-25 18:44:41 +00001783 int newPkgLevel = -1;
1784 int newCoreLevel = -1;
1785 int newThreadLevel = -1;
1786 int i;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001787 for (level = 0; level < depth; level++) {
Jonathan Peyton62f38402015-08-25 18:44:41 +00001788 if ((maxCt[level] == 1)
1789 && (level != pkgLevel)) {
1790 //
1791 // Remove this level. Never remove the package level
1792 //
1793 continue;
1794 }
1795 if (level == pkgLevel) {
1796 newPkgLevel = level;
1797 }
1798 if (level == coreLevel) {
1799 newCoreLevel = level;
1800 }
1801 if (level == threadLevel) {
1802 newThreadLevel = level;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001803 }
1804 for (proc = 0; (int)proc < nApics; proc++) {
1805 new_retval[proc].first.labels[new_level]
1806 = retval[proc].first.labels[level];
1807 }
1808 new_level++;
1809 }
1810
1811 __kmp_free(retval);
1812 retval = new_retval;
1813 depth = new_depth;
Jonathan Peyton62f38402015-08-25 18:44:41 +00001814 pkgLevel = newPkgLevel;
1815 coreLevel = newCoreLevel;
1816 threadLevel = newThreadLevel;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001817 }
1818
1819 if (__kmp_affinity_gran_levels < 0) {
1820 //
1821 // Set the granularity level based on what levels are modeled
1822 // in the machine topology map.
1823 //
1824 __kmp_affinity_gran_levels = 0;
1825 if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
1826 __kmp_affinity_gran_levels++;
1827 }
1828 if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
1829 __kmp_affinity_gran_levels++;
1830 }
1831 if (__kmp_affinity_gran > affinity_gran_package) {
1832 __kmp_affinity_gran_levels++;
1833 }
1834 }
1835
1836 if (__kmp_affinity_verbose) {
1837 __kmp_affinity_print_topology(retval, nApics, depth, pkgLevel,
1838 coreLevel, threadLevel);
1839 }
1840
1841 __kmp_free(last);
1842 __kmp_free(maxCt);
1843 __kmp_free(counts);
1844 __kmp_free(totals);
1845 KMP_CPU_FREE(oldMask);
1846 *address2os = retval;
1847 return depth;
1848}
1849
1850
1851# endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1852
1853
1854#define osIdIndex 0
1855#define threadIdIndex 1
1856#define coreIdIndex 2
1857#define pkgIdIndex 3
1858#define nodeIdIndex 4
1859
1860typedef unsigned *ProcCpuInfo;
1861static unsigned maxIndex = pkgIdIndex;
1862
1863
1864static int
1865__kmp_affinity_cmp_ProcCpuInfo_os_id(const void *a, const void *b)
1866{
1867 const unsigned *aa = (const unsigned *)a;
1868 const unsigned *bb = (const unsigned *)b;
1869 if (aa[osIdIndex] < bb[osIdIndex]) return -1;
1870 if (aa[osIdIndex] > bb[osIdIndex]) return 1;
1871 return 0;
1872};
1873
1874
1875static int
1876__kmp_affinity_cmp_ProcCpuInfo_phys_id(const void *a, const void *b)
1877{
1878 unsigned i;
1879 const unsigned *aa = *((const unsigned **)a);
1880 const unsigned *bb = *((const unsigned **)b);
1881 for (i = maxIndex; ; i--) {
1882 if (aa[i] < bb[i]) return -1;
1883 if (aa[i] > bb[i]) return 1;
1884 if (i == osIdIndex) break;
1885 }
1886 return 0;
1887}
1888
1889
1890//
1891// Parse /proc/cpuinfo (or an alternate file in the same format) to obtain the
1892// affinity map.
1893//
1894static int
1895__kmp_affinity_create_cpuinfo_map(AddrUnsPair **address2os, int *line,
1896 kmp_i18n_id_t *const msg_id, FILE *f)
1897{
1898 *address2os = NULL;
1899 *msg_id = kmp_i18n_null;
1900
1901 //
1902 // Scan of the file, and count the number of "processor" (osId) fields,
Alp Toker8f2d3f02014-02-24 10:40:15 +00001903 // and find the highest value of <n> for a node_<n> field.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001904 //
1905 char buf[256];
1906 unsigned num_records = 0;
1907 while (! feof(f)) {
1908 buf[sizeof(buf) - 1] = 1;
1909 if (! fgets(buf, sizeof(buf), f)) {
1910 //
1911 // Read errors presumably because of EOF
1912 //
1913 break;
1914 }
1915
1916 char s1[] = "processor";
1917 if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
1918 num_records++;
1919 continue;
1920 }
1921
1922 //
1923 // FIXME - this will match "node_<n> <garbage>"
1924 //
1925 unsigned level;
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00001926 if (KMP_SSCANF(buf, "node_%d id", &level) == 1) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001927 if (nodeIdIndex + level >= maxIndex) {
1928 maxIndex = nodeIdIndex + level;
1929 }
1930 continue;
1931 }
1932 }
1933
1934 //
1935 // Check for empty file / no valid processor records, or too many.
1936 // The number of records can't exceed the number of valid bits in the
1937 // affinity mask.
1938 //
1939 if (num_records == 0) {
1940 *line = 0;
1941 *msg_id = kmp_i18n_str_NoProcRecords;
1942 return -1;
1943 }
1944 if (num_records > (unsigned)__kmp_xproc) {
1945 *line = 0;
1946 *msg_id = kmp_i18n_str_TooManyProcRecords;
1947 return -1;
1948 }
1949
1950 //
1951 // Set the file pointer back to the begginning, so that we can scan the
1952 // file again, this time performing a full parse of the data.
1953 // Allocate a vector of ProcCpuInfo object, where we will place the data.
1954 // Adding an extra element at the end allows us to remove a lot of extra
1955 // checks for termination conditions.
1956 //
1957 if (fseek(f, 0, SEEK_SET) != 0) {
1958 *line = 0;
1959 *msg_id = kmp_i18n_str_CantRewindCpuinfo;
1960 return -1;
1961 }
1962
1963 //
1964 // Allocate the array of records to store the proc info in. The dummy
1965 // element at the end makes the logic in filling them out easier to code.
1966 //
1967 unsigned **threadInfo = (unsigned **)__kmp_allocate((num_records + 1)
1968 * sizeof(unsigned *));
1969 unsigned i;
1970 for (i = 0; i <= num_records; i++) {
1971 threadInfo[i] = (unsigned *)__kmp_allocate((maxIndex + 1)
1972 * sizeof(unsigned));
1973 }
1974
1975#define CLEANUP_THREAD_INFO \
1976 for (i = 0; i <= num_records; i++) { \
1977 __kmp_free(threadInfo[i]); \
1978 } \
1979 __kmp_free(threadInfo);
1980
1981 //
1982 // A value of UINT_MAX means that we didn't find the field
1983 //
1984 unsigned __index;
1985
1986#define INIT_PROC_INFO(p) \
1987 for (__index = 0; __index <= maxIndex; __index++) { \
1988 (p)[__index] = UINT_MAX; \
1989 }
1990
1991 for (i = 0; i <= num_records; i++) {
1992 INIT_PROC_INFO(threadInfo[i]);
1993 }
1994
1995 unsigned num_avail = 0;
1996 *line = 0;
1997 while (! feof(f)) {
1998 //
1999 // Create an inner scoping level, so that all the goto targets at the
2000 // end of the loop appear in an outer scoping level. This avoids
2001 // warnings about jumping past an initialization to a target in the
2002 // same block.
2003 //
2004 {
2005 buf[sizeof(buf) - 1] = 1;
2006 bool long_line = false;
2007 if (! fgets(buf, sizeof(buf), f)) {
2008 //
2009 // Read errors presumably because of EOF
2010 //
2011 // If there is valid data in threadInfo[num_avail], then fake
2012 // a blank line in ensure that the last address gets parsed.
2013 //
2014 bool valid = false;
2015 for (i = 0; i <= maxIndex; i++) {
2016 if (threadInfo[num_avail][i] != UINT_MAX) {
2017 valid = true;
2018 }
2019 }
2020 if (! valid) {
2021 break;
2022 }
2023 buf[0] = 0;
2024 } else if (!buf[sizeof(buf) - 1]) {
2025 //
2026 // The line is longer than the buffer. Set a flag and don't
2027 // emit an error if we were going to ignore the line, anyway.
2028 //
2029 long_line = true;
2030
2031#define CHECK_LINE \
2032 if (long_line) { \
2033 CLEANUP_THREAD_INFO; \
2034 *msg_id = kmp_i18n_str_LongLineCpuinfo; \
2035 return -1; \
2036 }
2037 }
2038 (*line)++;
2039
2040 char s1[] = "processor";
2041 if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
2042 CHECK_LINE;
2043 char *p = strchr(buf + sizeof(s1) - 1, ':');
2044 unsigned val;
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00002045 if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002046 if (threadInfo[num_avail][osIdIndex] != UINT_MAX) goto dup_field;
2047 threadInfo[num_avail][osIdIndex] = val;
Jim Cownie181b4bb2013-12-23 17:28:57 +00002048#if KMP_OS_LINUX && USE_SYSFS_INFO
2049 char path[256];
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00002050 KMP_SNPRINTF(path, sizeof(path),
Jim Cownie181b4bb2013-12-23 17:28:57 +00002051 "/sys/devices/system/cpu/cpu%u/topology/physical_package_id",
2052 threadInfo[num_avail][osIdIndex]);
2053 __kmp_read_from_file(path, "%u", &threadInfo[num_avail][pkgIdIndex]);
2054
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00002055 KMP_SNPRINTF(path, sizeof(path),
Jim Cownie181b4bb2013-12-23 17:28:57 +00002056 "/sys/devices/system/cpu/cpu%u/topology/core_id",
2057 threadInfo[num_avail][osIdIndex]);
2058 __kmp_read_from_file(path, "%u", &threadInfo[num_avail][coreIdIndex]);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002059 continue;
Jim Cownie181b4bb2013-12-23 17:28:57 +00002060#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00002061 }
2062 char s2[] = "physical id";
2063 if (strncmp(buf, s2, sizeof(s2) - 1) == 0) {
2064 CHECK_LINE;
2065 char *p = strchr(buf + sizeof(s2) - 1, ':');
2066 unsigned val;
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00002067 if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002068 if (threadInfo[num_avail][pkgIdIndex] != UINT_MAX) goto dup_field;
2069 threadInfo[num_avail][pkgIdIndex] = val;
2070 continue;
2071 }
2072 char s3[] = "core id";
2073 if (strncmp(buf, s3, sizeof(s3) - 1) == 0) {
2074 CHECK_LINE;
2075 char *p = strchr(buf + sizeof(s3) - 1, ':');
2076 unsigned val;
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00002077 if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002078 if (threadInfo[num_avail][coreIdIndex] != UINT_MAX) goto dup_field;
2079 threadInfo[num_avail][coreIdIndex] = val;
2080 continue;
Jim Cownie181b4bb2013-12-23 17:28:57 +00002081#endif // KMP_OS_LINUX && USE_SYSFS_INFO
Jim Cownie5e8470a2013-09-27 10:38:44 +00002082 }
2083 char s4[] = "thread id";
2084 if (strncmp(buf, s4, sizeof(s4) - 1) == 0) {
2085 CHECK_LINE;
2086 char *p = strchr(buf + sizeof(s4) - 1, ':');
2087 unsigned val;
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00002088 if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002089 if (threadInfo[num_avail][threadIdIndex] != UINT_MAX) goto dup_field;
2090 threadInfo[num_avail][threadIdIndex] = val;
2091 continue;
2092 }
2093 unsigned level;
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00002094 if (KMP_SSCANF(buf, "node_%d id", &level) == 1) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002095 CHECK_LINE;
2096 char *p = strchr(buf + sizeof(s4) - 1, ':');
2097 unsigned val;
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00002098 if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002099 KMP_ASSERT(nodeIdIndex + level <= maxIndex);
2100 if (threadInfo[num_avail][nodeIdIndex + level] != UINT_MAX) goto dup_field;
2101 threadInfo[num_avail][nodeIdIndex + level] = val;
2102 continue;
2103 }
2104
2105 //
2106 // We didn't recognize the leading token on the line.
2107 // There are lots of leading tokens that we don't recognize -
2108 // if the line isn't empty, go on to the next line.
2109 //
2110 if ((*buf != 0) && (*buf != '\n')) {
2111 //
2112 // If the line is longer than the buffer, read characters
2113 // until we find a newline.
2114 //
2115 if (long_line) {
2116 int ch;
2117 while (((ch = fgetc(f)) != EOF) && (ch != '\n'));
2118 }
2119 continue;
2120 }
2121
2122 //
2123 // A newline has signalled the end of the processor record.
2124 // Check that there aren't too many procs specified.
2125 //
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002126 if ((int)num_avail == __kmp_xproc) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002127 CLEANUP_THREAD_INFO;
2128 *msg_id = kmp_i18n_str_TooManyEntries;
2129 return -1;
2130 }
2131
2132 //
2133 // Check for missing fields. The osId field must be there, and we
2134 // currently require that the physical id field is specified, also.
2135 //
2136 if (threadInfo[num_avail][osIdIndex] == UINT_MAX) {
2137 CLEANUP_THREAD_INFO;
2138 *msg_id = kmp_i18n_str_MissingProcField;
2139 return -1;
2140 }
2141 if (threadInfo[0][pkgIdIndex] == UINT_MAX) {
2142 CLEANUP_THREAD_INFO;
2143 *msg_id = kmp_i18n_str_MissingPhysicalIDField;
2144 return -1;
2145 }
2146
2147 //
2148 // Skip this proc if it is not included in the machine model.
2149 //
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00002150 if (! KMP_CPU_ISSET(threadInfo[num_avail][osIdIndex], __kmp_affin_fullMask)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002151 INIT_PROC_INFO(threadInfo[num_avail]);
2152 continue;
2153 }
2154
2155 //
2156 // We have a successful parse of this proc's info.
2157 // Increment the counter, and prepare for the next proc.
2158 //
2159 num_avail++;
2160 KMP_ASSERT(num_avail <= num_records);
2161 INIT_PROC_INFO(threadInfo[num_avail]);
2162 }
2163 continue;
2164
2165 no_val:
2166 CLEANUP_THREAD_INFO;
2167 *msg_id = kmp_i18n_str_MissingValCpuinfo;
2168 return -1;
2169
2170 dup_field:
2171 CLEANUP_THREAD_INFO;
2172 *msg_id = kmp_i18n_str_DuplicateFieldCpuinfo;
2173 return -1;
2174 }
2175 *line = 0;
2176
2177# if KMP_MIC && REDUCE_TEAM_SIZE
2178 unsigned teamSize = 0;
2179# endif // KMP_MIC && REDUCE_TEAM_SIZE
2180
2181 // check for num_records == __kmp_xproc ???
2182
2183 //
2184 // If there's only one thread context to bind to, form an Address object
2185 // with depth 1 and return immediately (or, if affinity is off, set
2186 // address2os to NULL and return).
2187 //
2188 // If it is configured to omit the package level when there is only a
2189 // single package, the logic at the end of this routine won't work if
2190 // there is only a single thread - it would try to form an Address
2191 // object with depth 0.
2192 //
2193 KMP_ASSERT(num_avail > 0);
2194 KMP_ASSERT(num_avail <= num_records);
2195 if (num_avail == 1) {
2196 __kmp_ncores = 1;
2197 __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002198 if (__kmp_affinity_verbose) {
2199 if (! KMP_AFFINITY_CAPABLE()) {
2200 KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
2201 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
2202 KMP_INFORM(Uniform, "KMP_AFFINITY");
2203 }
2204 else {
2205 char buf[KMP_AFFIN_MASK_PRINT_LEN];
2206 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00002207 __kmp_affin_fullMask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002208 KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
2209 if (__kmp_affinity_respect_mask) {
2210 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
2211 } else {
2212 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
2213 }
2214 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
2215 KMP_INFORM(Uniform, "KMP_AFFINITY");
2216 }
2217 int index;
2218 kmp_str_buf_t buf;
2219 __kmp_str_buf_init(&buf);
2220 __kmp_str_buf_print(&buf, "1");
2221 for (index = maxIndex - 1; index > pkgIdIndex; index--) {
2222 __kmp_str_buf_print(&buf, " x 1");
2223 }
2224 KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, 1, 1, 1);
2225 __kmp_str_buf_free(&buf);
2226 }
2227
2228 if (__kmp_affinity_type == affinity_none) {
2229 CLEANUP_THREAD_INFO;
2230 return 0;
2231 }
2232
2233 *address2os = (AddrUnsPair*)__kmp_allocate(sizeof(AddrUnsPair));
2234 Address addr(1);
2235 addr.labels[0] = threadInfo[0][pkgIdIndex];
2236 (*address2os)[0] = AddrUnsPair(addr, threadInfo[0][osIdIndex]);
2237
2238 if (__kmp_affinity_gran_levels < 0) {
2239 __kmp_affinity_gran_levels = 0;
2240 }
2241
2242 if (__kmp_affinity_verbose) {
2243 __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
2244 }
2245
2246 CLEANUP_THREAD_INFO;
2247 return 1;
2248 }
2249
2250 //
2251 // Sort the threadInfo table by physical Id.
2252 //
2253 qsort(threadInfo, num_avail, sizeof(*threadInfo),
2254 __kmp_affinity_cmp_ProcCpuInfo_phys_id);
2255
2256 //
2257 // The table is now sorted by pkgId / coreId / threadId, but we really
2258 // don't know the radix of any of the fields. pkgId's may be sparsely
2259 // assigned among the chips on a system. Although coreId's are usually
2260 // assigned [0 .. coresPerPkg-1] and threadId's are usually assigned
2261 // [0..threadsPerCore-1], we don't want to make any such assumptions.
2262 //
2263 // For that matter, we don't know what coresPerPkg and threadsPerCore
2264 // (or the total # packages) are at this point - we want to determine
2265 // that now. We only have an upper bound on the first two figures.
2266 //
2267 unsigned *counts = (unsigned *)__kmp_allocate((maxIndex + 1)
2268 * sizeof(unsigned));
2269 unsigned *maxCt = (unsigned *)__kmp_allocate((maxIndex + 1)
2270 * sizeof(unsigned));
2271 unsigned *totals = (unsigned *)__kmp_allocate((maxIndex + 1)
2272 * sizeof(unsigned));
2273 unsigned *lastId = (unsigned *)__kmp_allocate((maxIndex + 1)
2274 * sizeof(unsigned));
2275
2276 bool assign_thread_ids = false;
2277 unsigned threadIdCt;
2278 unsigned index;
2279
2280 restart_radix_check:
2281 threadIdCt = 0;
2282
2283 //
2284 // Initialize the counter arrays with data from threadInfo[0].
2285 //
2286 if (assign_thread_ids) {
2287 if (threadInfo[0][threadIdIndex] == UINT_MAX) {
2288 threadInfo[0][threadIdIndex] = threadIdCt++;
2289 }
2290 else if (threadIdCt <= threadInfo[0][threadIdIndex]) {
2291 threadIdCt = threadInfo[0][threadIdIndex] + 1;
2292 }
2293 }
2294 for (index = 0; index <= maxIndex; index++) {
2295 counts[index] = 1;
2296 maxCt[index] = 1;
2297 totals[index] = 1;
2298 lastId[index] = threadInfo[0][index];;
2299 }
2300
2301 //
2302 // Run through the rest of the OS procs.
2303 //
2304 for (i = 1; i < num_avail; i++) {
2305 //
2306 // Find the most significant index whose id differs
2307 // from the id for the previous OS proc.
2308 //
2309 for (index = maxIndex; index >= threadIdIndex; index--) {
2310 if (assign_thread_ids && (index == threadIdIndex)) {
2311 //
2312 // Auto-assign the thread id field if it wasn't specified.
2313 //
2314 if (threadInfo[i][threadIdIndex] == UINT_MAX) {
2315 threadInfo[i][threadIdIndex] = threadIdCt++;
2316 }
2317
2318 //
2319 // Aparrently the thread id field was specified for some
2320 // entries and not others. Start the thread id counter
2321 // off at the next higher thread id.
2322 //
2323 else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
2324 threadIdCt = threadInfo[i][threadIdIndex] + 1;
2325 }
2326 }
2327 if (threadInfo[i][index] != lastId[index]) {
2328 //
2329 // Run through all indices which are less significant,
2330 // and reset the counts to 1.
2331 //
2332 // At all levels up to and including index, we need to
2333 // increment the totals and record the last id.
2334 //
2335 unsigned index2;
2336 for (index2 = threadIdIndex; index2 < index; index2++) {
2337 totals[index2]++;
2338 if (counts[index2] > maxCt[index2]) {
2339 maxCt[index2] = counts[index2];
2340 }
2341 counts[index2] = 1;
2342 lastId[index2] = threadInfo[i][index2];
2343 }
2344 counts[index]++;
2345 totals[index]++;
2346 lastId[index] = threadInfo[i][index];
2347
2348 if (assign_thread_ids && (index > threadIdIndex)) {
2349
2350# if KMP_MIC && REDUCE_TEAM_SIZE
2351 //
2352 // The default team size is the total #threads in the machine
2353 // minus 1 thread for every core that has 3 or more threads.
2354 //
2355 teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 );
2356# endif // KMP_MIC && REDUCE_TEAM_SIZE
2357
2358 //
2359 // Restart the thread counter, as we are on a new core.
2360 //
2361 threadIdCt = 0;
2362
2363 //
2364 // Auto-assign the thread id field if it wasn't specified.
2365 //
2366 if (threadInfo[i][threadIdIndex] == UINT_MAX) {
2367 threadInfo[i][threadIdIndex] = threadIdCt++;
2368 }
2369
2370 //
2371 // Aparrently the thread id field was specified for some
2372 // entries and not others. Start the thread id counter
2373 // off at the next higher thread id.
2374 //
2375 else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
2376 threadIdCt = threadInfo[i][threadIdIndex] + 1;
2377 }
2378 }
2379 break;
2380 }
2381 }
2382 if (index < threadIdIndex) {
2383 //
2384 // If thread ids were specified, it is an error if they are not
2385 // unique. Also, check that we waven't already restarted the
2386 // loop (to be safe - shouldn't need to).
2387 //
2388 if ((threadInfo[i][threadIdIndex] != UINT_MAX)
2389 || assign_thread_ids) {
2390 __kmp_free(lastId);
2391 __kmp_free(totals);
2392 __kmp_free(maxCt);
2393 __kmp_free(counts);
2394 CLEANUP_THREAD_INFO;
2395 *msg_id = kmp_i18n_str_PhysicalIDsNotUnique;
2396 return -1;
2397 }
2398
2399 //
2400 // If the thread ids were not specified and we see entries
2401 // entries that are duplicates, start the loop over and
2402 // assign the thread ids manually.
2403 //
2404 assign_thread_ids = true;
2405 goto restart_radix_check;
2406 }
2407 }
2408
2409# if KMP_MIC && REDUCE_TEAM_SIZE
2410 //
2411 // The default team size is the total #threads in the machine
2412 // minus 1 thread for every core that has 3 or more threads.
2413 //
2414 teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 );
2415# endif // KMP_MIC && REDUCE_TEAM_SIZE
2416
2417 for (index = threadIdIndex; index <= maxIndex; index++) {
2418 if (counts[index] > maxCt[index]) {
2419 maxCt[index] = counts[index];
2420 }
2421 }
2422
2423 __kmp_nThreadsPerCore = maxCt[threadIdIndex];
2424 nCoresPerPkg = maxCt[coreIdIndex];
2425 nPackages = totals[pkgIdIndex];
2426
2427 //
2428 // Check to see if the machine topology is uniform
2429 //
2430 unsigned prod = totals[maxIndex];
2431 for (index = threadIdIndex; index < maxIndex; index++) {
2432 prod *= maxCt[index];
2433 }
2434 bool uniform = (prod == totals[threadIdIndex]);
2435
2436 //
2437 // When affinity is off, this routine will still be called to set
Andrey Churbanovf696c822015-01-27 16:55:43 +00002438 // __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie5e8470a2013-09-27 10:38:44 +00002439 // nCoresPerPkg, & nPackages. Make sure all these vars are set
2440 // correctly, and return now if affinity is not enabled.
2441 //
Jim Cownie5e8470a2013-09-27 10:38:44 +00002442 __kmp_ncores = totals[coreIdIndex];
2443
2444 if (__kmp_affinity_verbose) {
2445 if (! KMP_AFFINITY_CAPABLE()) {
2446 KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
2447 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
2448 if (uniform) {
2449 KMP_INFORM(Uniform, "KMP_AFFINITY");
2450 } else {
2451 KMP_INFORM(NonUniform, "KMP_AFFINITY");
2452 }
2453 }
2454 else {
2455 char buf[KMP_AFFIN_MASK_PRINT_LEN];
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00002456 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, __kmp_affin_fullMask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002457 KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
2458 if (__kmp_affinity_respect_mask) {
2459 KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
2460 } else {
2461 KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
2462 }
2463 KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
2464 if (uniform) {
2465 KMP_INFORM(Uniform, "KMP_AFFINITY");
2466 } else {
2467 KMP_INFORM(NonUniform, "KMP_AFFINITY");
2468 }
2469 }
2470 kmp_str_buf_t buf;
2471 __kmp_str_buf_init(&buf);
2472
2473 __kmp_str_buf_print(&buf, "%d", totals[maxIndex]);
2474 for (index = maxIndex - 1; index >= pkgIdIndex; index--) {
2475 __kmp_str_buf_print(&buf, " x %d", maxCt[index]);
2476 }
2477 KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, maxCt[coreIdIndex],
2478 maxCt[threadIdIndex], __kmp_ncores);
2479
2480 __kmp_str_buf_free(&buf);
2481 }
2482
2483# if KMP_MIC && REDUCE_TEAM_SIZE
2484 //
2485 // Set the default team size.
2486 //
2487 if ((__kmp_dflt_team_nth == 0) && (teamSize > 0)) {
2488 __kmp_dflt_team_nth = teamSize;
2489 KA_TRACE(20, ("__kmp_affinity_create_cpuinfo_map: setting __kmp_dflt_team_nth = %d\n",
2490 __kmp_dflt_team_nth));
2491 }
2492# endif // KMP_MIC && REDUCE_TEAM_SIZE
2493
2494 if (__kmp_affinity_type == affinity_none) {
2495 __kmp_free(lastId);
2496 __kmp_free(totals);
2497 __kmp_free(maxCt);
2498 __kmp_free(counts);
2499 CLEANUP_THREAD_INFO;
2500 return 0;
2501 }
2502
2503 //
2504 // Count the number of levels which have more nodes at that level than
2505 // at the parent's level (with there being an implicit root node of
2506 // the top level). This is equivalent to saying that there is at least
2507 // one node at this level which has a sibling. These levels are in the
2508 // map, and the package level is always in the map.
2509 //
2510 bool *inMap = (bool *)__kmp_allocate((maxIndex + 1) * sizeof(bool));
2511 int level = 0;
2512 for (index = threadIdIndex; index < maxIndex; index++) {
2513 KMP_ASSERT(totals[index] >= totals[index + 1]);
2514 inMap[index] = (totals[index] > totals[index + 1]);
2515 }
2516 inMap[maxIndex] = (totals[maxIndex] > 1);
2517 inMap[pkgIdIndex] = true;
2518
2519 int depth = 0;
2520 for (index = threadIdIndex; index <= maxIndex; index++) {
2521 if (inMap[index]) {
2522 depth++;
2523 }
2524 }
2525 KMP_ASSERT(depth > 0);
2526
2527 //
2528 // Construct the data structure that is to be returned.
2529 //
2530 *address2os = (AddrUnsPair*)
2531 __kmp_allocate(sizeof(AddrUnsPair) * num_avail);
2532 int pkgLevel = -1;
2533 int coreLevel = -1;
2534 int threadLevel = -1;
2535
2536 for (i = 0; i < num_avail; ++i) {
2537 Address addr(depth);
2538 unsigned os = threadInfo[i][osIdIndex];
2539 int src_index;
2540 int dst_index = 0;
2541
2542 for (src_index = maxIndex; src_index >= threadIdIndex; src_index--) {
2543 if (! inMap[src_index]) {
2544 continue;
2545 }
2546 addr.labels[dst_index] = threadInfo[i][src_index];
2547 if (src_index == pkgIdIndex) {
2548 pkgLevel = dst_index;
2549 }
2550 else if (src_index == coreIdIndex) {
2551 coreLevel = dst_index;
2552 }
2553 else if (src_index == threadIdIndex) {
2554 threadLevel = dst_index;
2555 }
2556 dst_index++;
2557 }
2558 (*address2os)[i] = AddrUnsPair(addr, os);
2559 }
2560
2561 if (__kmp_affinity_gran_levels < 0) {
2562 //
2563 // Set the granularity level based on what levels are modeled
2564 // in the machine topology map.
2565 //
2566 unsigned src_index;
2567 __kmp_affinity_gran_levels = 0;
2568 for (src_index = threadIdIndex; src_index <= maxIndex; src_index++) {
2569 if (! inMap[src_index]) {
2570 continue;
2571 }
2572 switch (src_index) {
2573 case threadIdIndex:
2574 if (__kmp_affinity_gran > affinity_gran_thread) {
2575 __kmp_affinity_gran_levels++;
2576 }
2577
2578 break;
2579 case coreIdIndex:
2580 if (__kmp_affinity_gran > affinity_gran_core) {
2581 __kmp_affinity_gran_levels++;
2582 }
2583 break;
2584
2585 case pkgIdIndex:
2586 if (__kmp_affinity_gran > affinity_gran_package) {
2587 __kmp_affinity_gran_levels++;
2588 }
2589 break;
2590 }
2591 }
2592 }
2593
2594 if (__kmp_affinity_verbose) {
2595 __kmp_affinity_print_topology(*address2os, num_avail, depth, pkgLevel,
2596 coreLevel, threadLevel);
2597 }
2598
2599 __kmp_free(inMap);
2600 __kmp_free(lastId);
2601 __kmp_free(totals);
2602 __kmp_free(maxCt);
2603 __kmp_free(counts);
2604 CLEANUP_THREAD_INFO;
2605 return depth;
2606}
2607
2608
2609//
2610// Create and return a table of affinity masks, indexed by OS thread ID.
2611// This routine handles OR'ing together all the affinity masks of threads
2612// that are sufficiently close, if granularity > fine.
2613//
2614static kmp_affin_mask_t *
2615__kmp_create_masks(unsigned *maxIndex, unsigned *numUnique,
2616 AddrUnsPair *address2os, unsigned numAddrs)
2617{
2618 //
2619 // First form a table of affinity masks in order of OS thread id.
2620 //
2621 unsigned depth;
2622 unsigned maxOsId;
2623 unsigned i;
2624
2625 KMP_ASSERT(numAddrs > 0);
2626 depth = address2os[0].first.depth;
2627
2628 maxOsId = 0;
2629 for (i = 0; i < numAddrs; i++) {
2630 unsigned osId = address2os[i].second;
2631 if (osId > maxOsId) {
2632 maxOsId = osId;
2633 }
2634 }
Jonathan Peyton01dcf362015-11-30 20:02:59 +00002635 kmp_affin_mask_t *osId2Mask;
2636 KMP_CPU_ALLOC_ARRAY(osId2Mask, (maxOsId+1));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002637
2638 //
2639 // Sort the address2os table according to physical order. Doing so
2640 // will put all threads on the same core/package/node in consecutive
2641 // locations.
2642 //
2643 qsort(address2os, numAddrs, sizeof(*address2os),
2644 __kmp_affinity_cmp_Address_labels);
2645
2646 KMP_ASSERT(__kmp_affinity_gran_levels >= 0);
2647 if (__kmp_affinity_verbose && (__kmp_affinity_gran_levels > 0)) {
2648 KMP_INFORM(ThreadsMigrate, "KMP_AFFINITY", __kmp_affinity_gran_levels);
2649 }
2650 if (__kmp_affinity_gran_levels >= (int)depth) {
2651 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
2652 && (__kmp_affinity_type != affinity_none))) {
2653 KMP_WARNING(AffThreadsMayMigrate);
2654 }
2655 }
2656
2657 //
2658 // Run through the table, forming the masks for all threads on each
2659 // core. Threads on the same core will have identical "Address"
2660 // objects, not considering the last level, which must be the thread
2661 // id. All threads on a core will appear consecutively.
2662 //
2663 unsigned unique = 0;
2664 unsigned j = 0; // index of 1st thread on core
2665 unsigned leader = 0;
2666 Address *leaderAddr = &(address2os[0].first);
Jonathan Peyton01dcf362015-11-30 20:02:59 +00002667 kmp_affin_mask_t *sum;
2668 KMP_CPU_ALLOC_ON_STACK(sum);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002669 KMP_CPU_ZERO(sum);
2670 KMP_CPU_SET(address2os[0].second, sum);
2671 for (i = 1; i < numAddrs; i++) {
2672 //
Alp Toker8f2d3f02014-02-24 10:40:15 +00002673 // If this thread is sufficiently close to the leader (within the
Jim Cownie5e8470a2013-09-27 10:38:44 +00002674 // granularity setting), then set the bit for this os thread in the
2675 // affinity mask for this group, and go on to the next thread.
2676 //
2677 if (leaderAddr->isClose(address2os[i].first,
2678 __kmp_affinity_gran_levels)) {
2679 KMP_CPU_SET(address2os[i].second, sum);
2680 continue;
2681 }
2682
2683 //
2684 // For every thread in this group, copy the mask to the thread's
2685 // entry in the osId2Mask table. Mark the first address as a
2686 // leader.
2687 //
2688 for (; j < i; j++) {
2689 unsigned osId = address2os[j].second;
2690 KMP_DEBUG_ASSERT(osId <= maxOsId);
2691 kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
2692 KMP_CPU_COPY(mask, sum);
2693 address2os[j].first.leader = (j == leader);
2694 }
2695 unique++;
2696
2697 //
2698 // Start a new mask.
2699 //
2700 leader = i;
2701 leaderAddr = &(address2os[i].first);
2702 KMP_CPU_ZERO(sum);
2703 KMP_CPU_SET(address2os[i].second, sum);
2704 }
2705
2706 //
2707 // For every thread in last group, copy the mask to the thread's
2708 // entry in the osId2Mask table.
2709 //
2710 for (; j < i; j++) {
2711 unsigned osId = address2os[j].second;
2712 KMP_DEBUG_ASSERT(osId <= maxOsId);
2713 kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
2714 KMP_CPU_COPY(mask, sum);
2715 address2os[j].first.leader = (j == leader);
2716 }
2717 unique++;
Jonathan Peyton01dcf362015-11-30 20:02:59 +00002718 KMP_CPU_FREE_FROM_STACK(sum);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002719
2720 *maxIndex = maxOsId;
2721 *numUnique = unique;
2722 return osId2Mask;
2723}
2724
2725
2726//
2727// Stuff for the affinity proclist parsers. It's easier to declare these vars
2728// as file-static than to try and pass them through the calling sequence of
2729// the recursive-descent OMP_PLACES parser.
2730//
2731static kmp_affin_mask_t *newMasks;
2732static int numNewMasks;
2733static int nextNewMask;
2734
2735#define ADD_MASK(_mask) \
2736 { \
2737 if (nextNewMask >= numNewMasks) { \
Jonathan Peyton01dcf362015-11-30 20:02:59 +00002738 int i; \
Jim Cownie5e8470a2013-09-27 10:38:44 +00002739 numNewMasks *= 2; \
Jonathan Peyton01dcf362015-11-30 20:02:59 +00002740 kmp_affin_mask_t* temp; \
2741 KMP_CPU_INTERNAL_ALLOC_ARRAY(temp, numNewMasks); \
2742 for(i=0;i<numNewMasks/2;i++) { \
2743 kmp_affin_mask_t* src = KMP_CPU_INDEX(newMasks, i); \
2744 kmp_affin_mask_t* dest = KMP_CPU_INDEX(temp, i); \
2745 KMP_CPU_COPY(dest, src); \
2746 } \
2747 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks/2); \
2748 newMasks = temp; \
Jim Cownie5e8470a2013-09-27 10:38:44 +00002749 } \
2750 KMP_CPU_COPY(KMP_CPU_INDEX(newMasks, nextNewMask), (_mask)); \
2751 nextNewMask++; \
2752 }
2753
2754#define ADD_MASK_OSID(_osId,_osId2Mask,_maxOsId) \
2755 { \
2756 if (((_osId) > _maxOsId) || \
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002757 (! KMP_CPU_ISSET((_osId), KMP_CPU_INDEX((_osId2Mask), (_osId))))) { \
Jim Cownie5e8470a2013-09-27 10:38:44 +00002758 if (__kmp_affinity_verbose || (__kmp_affinity_warnings \
2759 && (__kmp_affinity_type != affinity_none))) { \
2760 KMP_WARNING(AffIgnoreInvalidProcID, _osId); \
2761 } \
2762 } \
2763 else { \
2764 ADD_MASK(KMP_CPU_INDEX(_osId2Mask, (_osId))); \
2765 } \
2766 }
2767
2768
2769//
2770// Re-parse the proclist (for the explicit affinity type), and form the list
2771// of affinity newMasks indexed by gtid.
2772//
2773static void
2774__kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks,
2775 unsigned int *out_numMasks, const char *proclist,
2776 kmp_affin_mask_t *osId2Mask, int maxOsId)
2777{
Jonathan Peyton01dcf362015-11-30 20:02:59 +00002778 int i;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002779 const char *scan = proclist;
2780 const char *next = proclist;
2781
2782 //
2783 // We use malloc() for the temporary mask vector,
2784 // so that we can use realloc() to extend it.
2785 //
2786 numNewMasks = 2;
Jonathan Peyton01dcf362015-11-30 20:02:59 +00002787 KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002788 nextNewMask = 0;
Jonathan Peyton01dcf362015-11-30 20:02:59 +00002789 kmp_affin_mask_t *sumMask;
2790 KMP_CPU_ALLOC(sumMask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002791 int setSize = 0;
2792
2793 for (;;) {
2794 int start, end, stride;
2795
2796 SKIP_WS(scan);
2797 next = scan;
2798 if (*next == '\0') {
2799 break;
2800 }
2801
2802 if (*next == '{') {
2803 int num;
2804 setSize = 0;
2805 next++; // skip '{'
2806 SKIP_WS(next);
2807 scan = next;
2808
2809 //
2810 // Read the first integer in the set.
2811 //
2812 KMP_ASSERT2((*next >= '0') && (*next <= '9'),
2813 "bad proclist");
2814 SKIP_DIGITS(next);
2815 num = __kmp_str_to_int(scan, *next);
2816 KMP_ASSERT2(num >= 0, "bad explicit proc list");
2817
2818 //
2819 // Copy the mask for that osId to the sum (union) mask.
2820 //
2821 if ((num > maxOsId) ||
2822 (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
2823 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
2824 && (__kmp_affinity_type != affinity_none))) {
2825 KMP_WARNING(AffIgnoreInvalidProcID, num);
2826 }
2827 KMP_CPU_ZERO(sumMask);
2828 }
2829 else {
2830 KMP_CPU_COPY(sumMask, KMP_CPU_INDEX(osId2Mask, num));
2831 setSize = 1;
2832 }
2833
2834 for (;;) {
2835 //
2836 // Check for end of set.
2837 //
2838 SKIP_WS(next);
2839 if (*next == '}') {
2840 next++; // skip '}'
2841 break;
2842 }
2843
2844 //
2845 // Skip optional comma.
2846 //
2847 if (*next == ',') {
2848 next++;
2849 }
2850 SKIP_WS(next);
2851
2852 //
2853 // Read the next integer in the set.
2854 //
2855 scan = next;
2856 KMP_ASSERT2((*next >= '0') && (*next <= '9'),
2857 "bad explicit proc list");
2858
2859 SKIP_DIGITS(next);
2860 num = __kmp_str_to_int(scan, *next);
2861 KMP_ASSERT2(num >= 0, "bad explicit proc list");
2862
2863 //
2864 // Add the mask for that osId to the sum mask.
2865 //
2866 if ((num > maxOsId) ||
2867 (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
2868 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
2869 && (__kmp_affinity_type != affinity_none))) {
2870 KMP_WARNING(AffIgnoreInvalidProcID, num);
2871 }
2872 }
2873 else {
2874 KMP_CPU_UNION(sumMask, KMP_CPU_INDEX(osId2Mask, num));
2875 setSize++;
2876 }
2877 }
2878 if (setSize > 0) {
2879 ADD_MASK(sumMask);
2880 }
2881
2882 SKIP_WS(next);
2883 if (*next == ',') {
2884 next++;
2885 }
2886 scan = next;
2887 continue;
2888 }
2889
2890 //
2891 // Read the first integer.
2892 //
2893 KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list");
2894 SKIP_DIGITS(next);
2895 start = __kmp_str_to_int(scan, *next);
2896 KMP_ASSERT2(start >= 0, "bad explicit proc list");
2897 SKIP_WS(next);
2898
2899 //
2900 // If this isn't a range, then add a mask to the list and go on.
2901 //
2902 if (*next != '-') {
2903 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2904
2905 //
2906 // Skip optional comma.
2907 //
2908 if (*next == ',') {
2909 next++;
2910 }
2911 scan = next;
2912 continue;
2913 }
2914
2915 //
2916 // This is a range. Skip over the '-' and read in the 2nd int.
2917 //
2918 next++; // skip '-'
2919 SKIP_WS(next);
2920 scan = next;
2921 KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list");
2922 SKIP_DIGITS(next);
2923 end = __kmp_str_to_int(scan, *next);
2924 KMP_ASSERT2(end >= 0, "bad explicit proc list");
2925
2926 //
2927 // Check for a stride parameter
2928 //
2929 stride = 1;
2930 SKIP_WS(next);
2931 if (*next == ':') {
2932 //
2933 // A stride is specified. Skip over the ':" and read the 3rd int.
2934 //
2935 int sign = +1;
2936 next++; // skip ':'
2937 SKIP_WS(next);
2938 scan = next;
2939 if (*next == '-') {
2940 sign = -1;
2941 next++;
2942 SKIP_WS(next);
2943 scan = next;
2944 }
2945 KMP_ASSERT2((*next >= '0') && (*next <= '9'),
2946 "bad explicit proc list");
2947 SKIP_DIGITS(next);
2948 stride = __kmp_str_to_int(scan, *next);
2949 KMP_ASSERT2(stride >= 0, "bad explicit proc list");
2950 stride *= sign;
2951 }
2952
2953 //
2954 // Do some range checks.
2955 //
2956 KMP_ASSERT2(stride != 0, "bad explicit proc list");
2957 if (stride > 0) {
2958 KMP_ASSERT2(start <= end, "bad explicit proc list");
2959 }
2960 else {
2961 KMP_ASSERT2(start >= end, "bad explicit proc list");
2962 }
2963 KMP_ASSERT2((end - start) / stride <= 65536, "bad explicit proc list");
2964
2965 //
2966 // Add the mask for each OS proc # to the list.
2967 //
2968 if (stride > 0) {
2969 do {
2970 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2971 start += stride;
2972 } while (start <= end);
2973 }
2974 else {
2975 do {
2976 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2977 start += stride;
2978 } while (start >= end);
2979 }
2980
2981 //
2982 // Skip optional comma.
2983 //
2984 SKIP_WS(next);
2985 if (*next == ',') {
2986 next++;
2987 }
2988 scan = next;
2989 }
2990
2991 *out_numMasks = nextNewMask;
2992 if (nextNewMask == 0) {
2993 *out_masks = NULL;
Jonathan Peyton01dcf362015-11-30 20:02:59 +00002994 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002995 return;
2996 }
Jonathan Peyton01dcf362015-11-30 20:02:59 +00002997 KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask);
2998 for(i = 0; i < nextNewMask; i++) {
2999 kmp_affin_mask_t* src = KMP_CPU_INDEX(newMasks, i);
3000 kmp_affin_mask_t* dest = KMP_CPU_INDEX((*out_masks), i);
3001 KMP_CPU_COPY(dest, src);
3002 }
3003 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
3004 KMP_CPU_FREE(sumMask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003005}
3006
3007
3008# if OMP_40_ENABLED
3009
3010/*-----------------------------------------------------------------------------
3011
3012Re-parse the OMP_PLACES proc id list, forming the newMasks for the different
3013places. Again, Here is the grammar:
3014
3015place_list := place
3016place_list := place , place_list
3017place := num
3018place := place : num
3019place := place : num : signed
3020place := { subplacelist }
3021place := ! place // (lowest priority)
3022subplace_list := subplace
3023subplace_list := subplace , subplace_list
3024subplace := num
3025subplace := num : num
3026subplace := num : num : signed
3027signed := num
3028signed := + signed
3029signed := - signed
3030
3031-----------------------------------------------------------------------------*/
3032
3033static void
3034__kmp_process_subplace_list(const char **scan, kmp_affin_mask_t *osId2Mask,
3035 int maxOsId, kmp_affin_mask_t *tempMask, int *setSize)
3036{
3037 const char *next;
3038
3039 for (;;) {
3040 int start, count, stride, i;
3041
3042 //
3043 // Read in the starting proc id
3044 //
3045 SKIP_WS(*scan);
3046 KMP_ASSERT2((**scan >= '0') && (**scan <= '9'),
3047 "bad explicit places list");
3048 next = *scan;
3049 SKIP_DIGITS(next);
3050 start = __kmp_str_to_int(*scan, *next);
3051 KMP_ASSERT(start >= 0);
3052 *scan = next;
3053
3054 //
3055 // valid follow sets are ',' ':' and '}'
3056 //
3057 SKIP_WS(*scan);
3058 if (**scan == '}' || **scan == ',') {
3059 if ((start > maxOsId) ||
3060 (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
3061 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3062 && (__kmp_affinity_type != affinity_none))) {
3063 KMP_WARNING(AffIgnoreInvalidProcID, start);
3064 }
3065 }
3066 else {
3067 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
3068 (*setSize)++;
3069 }
3070 if (**scan == '}') {
3071 break;
3072 }
3073 (*scan)++; // skip ','
3074 continue;
3075 }
3076 KMP_ASSERT2(**scan == ':', "bad explicit places list");
3077 (*scan)++; // skip ':'
3078
3079 //
3080 // Read count parameter
3081 //
3082 SKIP_WS(*scan);
3083 KMP_ASSERT2((**scan >= '0') && (**scan <= '9'),
3084 "bad explicit places list");
3085 next = *scan;
3086 SKIP_DIGITS(next);
3087 count = __kmp_str_to_int(*scan, *next);
3088 KMP_ASSERT(count >= 0);
3089 *scan = next;
3090
3091 //
3092 // valid follow sets are ',' ':' and '}'
3093 //
3094 SKIP_WS(*scan);
3095 if (**scan == '}' || **scan == ',') {
3096 for (i = 0; i < count; i++) {
3097 if ((start > maxOsId) ||
3098 (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
3099 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3100 && (__kmp_affinity_type != affinity_none))) {
3101 KMP_WARNING(AffIgnoreInvalidProcID, start);
3102 }
3103 break; // don't proliferate warnings for large count
3104 }
3105 else {
3106 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
3107 start++;
3108 (*setSize)++;
3109 }
3110 }
3111 if (**scan == '}') {
3112 break;
3113 }
3114 (*scan)++; // skip ','
3115 continue;
3116 }
3117 KMP_ASSERT2(**scan == ':', "bad explicit places list");
3118 (*scan)++; // skip ':'
3119
3120 //
3121 // Read stride parameter
3122 //
3123 int sign = +1;
3124 for (;;) {
3125 SKIP_WS(*scan);
3126 if (**scan == '+') {
3127 (*scan)++; // skip '+'
3128 continue;
3129 }
3130 if (**scan == '-') {
3131 sign *= -1;
3132 (*scan)++; // skip '-'
3133 continue;
3134 }
3135 break;
3136 }
3137 SKIP_WS(*scan);
3138 KMP_ASSERT2((**scan >= '0') && (**scan <= '9'),
3139 "bad explicit places list");
3140 next = *scan;
3141 SKIP_DIGITS(next);
3142 stride = __kmp_str_to_int(*scan, *next);
3143 KMP_ASSERT(stride >= 0);
3144 *scan = next;
3145 stride *= sign;
3146
3147 //
3148 // valid follow sets are ',' and '}'
3149 //
3150 SKIP_WS(*scan);
3151 if (**scan == '}' || **scan == ',') {
3152 for (i = 0; i < count; i++) {
3153 if ((start > maxOsId) ||
3154 (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
3155 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3156 && (__kmp_affinity_type != affinity_none))) {
3157 KMP_WARNING(AffIgnoreInvalidProcID, start);
3158 }
3159 break; // don't proliferate warnings for large count
3160 }
3161 else {
3162 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
3163 start += stride;
3164 (*setSize)++;
3165 }
3166 }
3167 if (**scan == '}') {
3168 break;
3169 }
3170 (*scan)++; // skip ','
3171 continue;
3172 }
3173
3174 KMP_ASSERT2(0, "bad explicit places list");
3175 }
3176}
3177
3178
3179static void
3180__kmp_process_place(const char **scan, kmp_affin_mask_t *osId2Mask,
3181 int maxOsId, kmp_affin_mask_t *tempMask, int *setSize)
3182{
3183 const char *next;
3184
3185 //
3186 // valid follow sets are '{' '!' and num
3187 //
3188 SKIP_WS(*scan);
3189 if (**scan == '{') {
3190 (*scan)++; // skip '{'
3191 __kmp_process_subplace_list(scan, osId2Mask, maxOsId , tempMask,
3192 setSize);
3193 KMP_ASSERT2(**scan == '}', "bad explicit places list");
3194 (*scan)++; // skip '}'
3195 }
3196 else if (**scan == '!') {
Jonathan Peyton6778c732015-10-19 19:43:01 +00003197 (*scan)++; // skip '!'
Jim Cownie5e8470a2013-09-27 10:38:44 +00003198 __kmp_process_place(scan, osId2Mask, maxOsId, tempMask, setSize);
Jonathan Peyton01dcf362015-11-30 20:02:59 +00003199 KMP_CPU_COMPLEMENT(maxOsId, tempMask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003200 }
3201 else if ((**scan >= '0') && (**scan <= '9')) {
3202 next = *scan;
3203 SKIP_DIGITS(next);
3204 int num = __kmp_str_to_int(*scan, *next);
3205 KMP_ASSERT(num >= 0);
3206 if ((num > maxOsId) ||
3207 (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
3208 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3209 && (__kmp_affinity_type != affinity_none))) {
3210 KMP_WARNING(AffIgnoreInvalidProcID, num);
3211 }
3212 }
3213 else {
3214 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, num));
3215 (*setSize)++;
3216 }
3217 *scan = next; // skip num
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003218 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003219 else {
3220 KMP_ASSERT2(0, "bad explicit places list");
3221 }
3222}
3223
3224
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003225//static void
3226void
Jim Cownie5e8470a2013-09-27 10:38:44 +00003227__kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
3228 unsigned int *out_numMasks, const char *placelist,
3229 kmp_affin_mask_t *osId2Mask, int maxOsId)
3230{
Jonathan Peyton01dcf362015-11-30 20:02:59 +00003231 int i,j,count,stride,sign;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003232 const char *scan = placelist;
3233 const char *next = placelist;
3234
3235 numNewMasks = 2;
Jonathan Peyton01dcf362015-11-30 20:02:59 +00003236 KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003237 nextNewMask = 0;
3238
Jonathan Peyton01dcf362015-11-30 20:02:59 +00003239 // tempMask is modified based on the previous or initial
3240 // place to form the current place
3241 // previousMask contains the previous place
3242 kmp_affin_mask_t *tempMask;
3243 kmp_affin_mask_t *previousMask;
3244 KMP_CPU_ALLOC(tempMask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003245 KMP_CPU_ZERO(tempMask);
Jonathan Peyton01dcf362015-11-30 20:02:59 +00003246 KMP_CPU_ALLOC(previousMask);
3247 KMP_CPU_ZERO(previousMask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003248 int setSize = 0;
3249
3250 for (;;) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003251 __kmp_process_place(&scan, osId2Mask, maxOsId, tempMask, &setSize);
3252
3253 //
3254 // valid follow sets are ',' ':' and EOL
3255 //
3256 SKIP_WS(scan);
3257 if (*scan == '\0' || *scan == ',') {
3258 if (setSize > 0) {
3259 ADD_MASK(tempMask);
3260 }
3261 KMP_CPU_ZERO(tempMask);
3262 setSize = 0;
3263 if (*scan == '\0') {
3264 break;
3265 }
3266 scan++; // skip ','
3267 continue;
3268 }
3269
3270 KMP_ASSERT2(*scan == ':', "bad explicit places list");
3271 scan++; // skip ':'
3272
3273 //
3274 // Read count parameter
3275 //
3276 SKIP_WS(scan);
3277 KMP_ASSERT2((*scan >= '0') && (*scan <= '9'),
3278 "bad explicit places list");
3279 next = scan;
3280 SKIP_DIGITS(next);
Jonathan Peyton01dcf362015-11-30 20:02:59 +00003281 count = __kmp_str_to_int(scan, *next);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003282 KMP_ASSERT(count >= 0);
3283 scan = next;
3284
3285 //
3286 // valid follow sets are ',' ':' and EOL
3287 //
3288 SKIP_WS(scan);
3289 if (*scan == '\0' || *scan == ',') {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003290 stride = +1;
3291 }
3292 else {
3293 KMP_ASSERT2(*scan == ':', "bad explicit places list");
3294 scan++; // skip ':'
Jim Cownie5e8470a2013-09-27 10:38:44 +00003295
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003296 //
3297 // Read stride parameter
3298 //
Jonathan Peyton01dcf362015-11-30 20:02:59 +00003299 sign = +1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003300 for (;;) {
3301 SKIP_WS(scan);
3302 if (*scan == '+') {
3303 scan++; // skip '+'
3304 continue;
3305 }
3306 if (*scan == '-') {
3307 sign *= -1;
3308 scan++; // skip '-'
3309 continue;
3310 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003311 break;
3312 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003313 SKIP_WS(scan);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003314 KMP_ASSERT2((*scan >= '0') && (*scan <= '9'),
3315 "bad explicit places list");
3316 next = scan;
3317 SKIP_DIGITS(next);
3318 stride = __kmp_str_to_int(scan, *next);
3319 KMP_DEBUG_ASSERT(stride >= 0);
3320 scan = next;
3321 stride *= sign;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003322 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003323
Jonathan Peyton01dcf362015-11-30 20:02:59 +00003324 // Add places determined by initial_place : count : stride
3325 for (i = 0; i < count; i++) {
3326 if (setSize == 0) {
3327 break;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003328 }
Jonathan Peyton01dcf362015-11-30 20:02:59 +00003329 // Add the current place, then build the next place (tempMask) from that
3330 KMP_CPU_COPY(previousMask, tempMask);
3331 ADD_MASK(previousMask);
3332 KMP_CPU_ZERO(tempMask);
3333 setSize = 0;
3334 KMP_CPU_SET_ITERATE(j, previousMask) {
3335 if (! KMP_CPU_ISSET(j, previousMask)) {
3336 continue;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003337 }
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00003338 if ((j+stride > maxOsId) || (j+stride < 0) ||
3339 (! KMP_CPU_ISSET(j, __kmp_affin_fullMask)) ||
Jonathan Peyton01dcf362015-11-30 20:02:59 +00003340 (! KMP_CPU_ISSET(j+stride, KMP_CPU_INDEX(osId2Mask, j+stride)))) {
3341 if ((__kmp_affinity_verbose || (__kmp_affinity_warnings
3342 && (__kmp_affinity_type != affinity_none))) && i < count - 1) {
3343 KMP_WARNING(AffIgnoreInvalidProcID, j+stride);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003344 }
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00003345 continue;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003346 }
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00003347 KMP_CPU_SET(j+stride, tempMask);
3348 setSize++;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003349 }
3350 }
3351 KMP_CPU_ZERO(tempMask);
3352 setSize = 0;
3353
3354 //
3355 // valid follow sets are ',' and EOL
3356 //
3357 SKIP_WS(scan);
3358 if (*scan == '\0') {
3359 break;
3360 }
3361 if (*scan == ',') {
3362 scan++; // skip ','
3363 continue;
3364 }
3365
3366 KMP_ASSERT2(0, "bad explicit places list");
3367 }
3368
3369 *out_numMasks = nextNewMask;
3370 if (nextNewMask == 0) {
3371 *out_masks = NULL;
Jonathan Peyton01dcf362015-11-30 20:02:59 +00003372 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003373 return;
3374 }
Jonathan Peyton01dcf362015-11-30 20:02:59 +00003375 KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask);
3376 KMP_CPU_FREE(tempMask);
3377 KMP_CPU_FREE(previousMask);
3378 for(i = 0; i < nextNewMask; i++) {
3379 kmp_affin_mask_t* src = KMP_CPU_INDEX(newMasks, i);
3380 kmp_affin_mask_t* dest = KMP_CPU_INDEX((*out_masks), i);
3381 KMP_CPU_COPY(dest, src);
3382 }
3383 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003384}
3385
3386# endif /* OMP_40_ENABLED */
3387
3388#undef ADD_MASK
3389#undef ADD_MASK_OSID
3390
Jim Cownie5e8470a2013-09-27 10:38:44 +00003391static void
3392__kmp_apply_thread_places(AddrUnsPair **pAddr, int depth)
3393{
Jonathan Peytondd4aa9b2015-10-08 17:55:54 +00003394 if (__kmp_place_num_sockets == 0 &&
3395 __kmp_place_num_cores == 0 &&
3396 __kmp_place_num_threads_per_core == 0 )
3397 return; // no topology limiting actions requested, exit
3398 if (__kmp_place_num_sockets == 0)
3399 __kmp_place_num_sockets = nPackages; // use all available sockets
3400 if (__kmp_place_num_cores == 0)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003401 __kmp_place_num_cores = nCoresPerPkg; // use all available cores
Jonathan Peytondd4aa9b2015-10-08 17:55:54 +00003402 if (__kmp_place_num_threads_per_core == 0 ||
3403 __kmp_place_num_threads_per_core > __kmp_nThreadsPerCore)
3404 __kmp_place_num_threads_per_core = __kmp_nThreadsPerCore; // use all HW contexts
3405
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003406 if ( !__kmp_affinity_uniform_topology() ) {
Jonathan Peytonb9d28fb2016-06-16 18:53:48 +00003407 KMP_WARNING( AffHWSubsetNonUniform );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003408 return; // don't support non-uniform topology
3409 }
3410 if ( depth != 3 ) {
Jonathan Peytonb9d28fb2016-06-16 18:53:48 +00003411 KMP_WARNING( AffHWSubsetNonThreeLevel );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003412 return; // don't support not-3-level topology
Jim Cownie5e8470a2013-09-27 10:38:44 +00003413 }
Jonathan Peytondd4aa9b2015-10-08 17:55:54 +00003414 if (__kmp_place_socket_offset + __kmp_place_num_sockets > nPackages) {
Jonathan Peytonb9d28fb2016-06-16 18:53:48 +00003415 KMP_WARNING(AffHWSubsetManySockets);
Jonathan Peytondd4aa9b2015-10-08 17:55:54 +00003416 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003417 }
Andrey Churbanov12875572015-03-10 09:00:36 +00003418 if ( __kmp_place_core_offset + __kmp_place_num_cores > nCoresPerPkg ) {
Jonathan Peytonb9d28fb2016-06-16 18:53:48 +00003419 KMP_WARNING( AffHWSubsetManyCores );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003420 return;
3421 }
3422
3423 AddrUnsPair *newAddr = (AddrUnsPair *)__kmp_allocate( sizeof(AddrUnsPair) *
Jonathan Peytondd4aa9b2015-10-08 17:55:54 +00003424 __kmp_place_num_sockets * __kmp_place_num_cores * __kmp_place_num_threads_per_core);
3425
Jim Cownie5e8470a2013-09-27 10:38:44 +00003426 int i, j, k, n_old = 0, n_new = 0;
Jonathan Peytondd4aa9b2015-10-08 17:55:54 +00003427 for (i = 0; i < nPackages; ++i)
3428 if (i < __kmp_place_socket_offset ||
3429 i >= __kmp_place_socket_offset + __kmp_place_num_sockets)
3430 n_old += nCoresPerPkg * __kmp_nThreadsPerCore; // skip not-requested socket
3431 else
3432 for (j = 0; j < nCoresPerPkg; ++j) // walk through requested socket
3433 if (j < __kmp_place_core_offset ||
3434 j >= __kmp_place_core_offset + __kmp_place_num_cores)
3435 n_old += __kmp_nThreadsPerCore; // skip not-requested core
3436 else
3437 for (k = 0; k < __kmp_nThreadsPerCore; ++k) { // walk through requested core
3438 if (k < __kmp_place_num_threads_per_core) {
3439 newAddr[n_new] = (*pAddr)[n_old]; // collect requested thread's data
3440 n_new++;
3441 }
3442 n_old++;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003443 }
Jonathan Peytondd4aa9b2015-10-08 17:55:54 +00003444 KMP_DEBUG_ASSERT(n_old == nPackages * nCoresPerPkg * __kmp_nThreadsPerCore);
3445 KMP_DEBUG_ASSERT(n_new == __kmp_place_num_sockets * __kmp_place_num_cores *
3446 __kmp_place_num_threads_per_core);
3447
3448 nPackages = __kmp_place_num_sockets; // correct nPackages
Jim Cownie5e8470a2013-09-27 10:38:44 +00003449 nCoresPerPkg = __kmp_place_num_cores; // correct nCoresPerPkg
3450 __kmp_nThreadsPerCore = __kmp_place_num_threads_per_core; // correct __kmp_nThreadsPerCore
3451 __kmp_avail_proc = n_new; // correct avail_proc
3452 __kmp_ncores = nPackages * __kmp_place_num_cores; // correct ncores
3453
3454 __kmp_free( *pAddr );
3455 *pAddr = newAddr; // replace old topology with new one
3456}
3457
Jim Cownie5e8470a2013-09-27 10:38:44 +00003458
3459static AddrUnsPair *address2os = NULL;
3460static int * procarr = NULL;
3461static int __kmp_aff_depth = 0;
3462
3463static void
3464__kmp_aux_affinity_initialize(void)
3465{
3466 if (__kmp_affinity_masks != NULL) {
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00003467 KMP_ASSERT(__kmp_affin_fullMask != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003468 return;
3469 }
3470
3471 //
3472 // Create the "full" mask - this defines all of the processors that we
3473 // consider to be in the machine model. If respect is set, then it is
3474 // the initialization thread's affinity mask. Otherwise, it is all
3475 // processors that we know about on the machine.
3476 //
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00003477 if (__kmp_affin_fullMask == NULL) {
3478 KMP_CPU_ALLOC(__kmp_affin_fullMask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003479 }
3480 if (KMP_AFFINITY_CAPABLE()) {
3481 if (__kmp_affinity_respect_mask) {
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00003482 __kmp_get_system_affinity(__kmp_affin_fullMask, TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003483
3484 //
3485 // Count the number of available processors.
3486 //
3487 unsigned i;
3488 __kmp_avail_proc = 0;
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00003489 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
3490 if (! KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003491 continue;
3492 }
3493 __kmp_avail_proc++;
3494 }
3495 if (__kmp_avail_proc > __kmp_xproc) {
3496 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3497 && (__kmp_affinity_type != affinity_none))) {
3498 KMP_WARNING(ErrorInitializeAffinity);
3499 }
3500 __kmp_affinity_type = affinity_none;
Andrey Churbanov1f037e42015-03-10 09:15:26 +00003501 KMP_AFFINITY_DISABLE();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003502 return;
3503 }
3504 }
3505 else {
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00003506 __kmp_affinity_entire_machine_mask(__kmp_affin_fullMask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003507 __kmp_avail_proc = __kmp_xproc;
3508 }
3509 }
3510
3511 int depth = -1;
3512 kmp_i18n_id_t msg_id = kmp_i18n_null;
3513
3514 //
Alp Toker8f2d3f02014-02-24 10:40:15 +00003515 // For backward compatibility, setting KMP_CPUINFO_FILE =>
Jim Cownie5e8470a2013-09-27 10:38:44 +00003516 // KMP_TOPOLOGY_METHOD=cpuinfo
3517 //
3518 if ((__kmp_cpuinfo_file != NULL) &&
3519 (__kmp_affinity_top_method == affinity_top_method_all)) {
3520 __kmp_affinity_top_method = affinity_top_method_cpuinfo;
3521 }
3522
3523 if (__kmp_affinity_top_method == affinity_top_method_all) {
3524 //
3525 // In the default code path, errors are not fatal - we just try using
3526 // another method. We only emit a warning message if affinity is on,
3527 // or the verbose flag is set, an the nowarnings flag was not set.
3528 //
3529 const char *file_name = NULL;
3530 int line = 0;
Jonathan Peyton01dcf362015-11-30 20:02:59 +00003531# if KMP_USE_HWLOC
3532 if (depth < 0) {
3533 if (__kmp_affinity_verbose) {
3534 KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
3535 }
3536 if(!__kmp_hwloc_error) {
3537 depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id);
3538 if (depth == 0) {
3539 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3540 KMP_ASSERT(address2os == NULL);
3541 return;
3542 } else if(depth < 0 && __kmp_affinity_verbose) {
3543 KMP_INFORM(AffIgnoringHwloc, "KMP_AFFINITY");
3544 }
3545 } else if(__kmp_affinity_verbose) {
3546 KMP_INFORM(AffIgnoringHwloc, "KMP_AFFINITY");
3547 }
3548 }
3549# endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003550
3551# if KMP_ARCH_X86 || KMP_ARCH_X86_64
3552
Jim Cownie5e8470a2013-09-27 10:38:44 +00003553 if (depth < 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003554 if (__kmp_affinity_verbose) {
Jonathan Peyton01dcf362015-11-30 20:02:59 +00003555 KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003556 }
3557
3558 file_name = NULL;
Jonathan Peyton01dcf362015-11-30 20:02:59 +00003559 depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003560 if (depth == 0) {
3561 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3562 KMP_ASSERT(address2os == NULL);
3563 return;
3564 }
Jonathan Peyton01dcf362015-11-30 20:02:59 +00003565
3566 if (depth < 0) {
3567 if (__kmp_affinity_verbose) {
3568 if (msg_id != kmp_i18n_null) {
3569 KMP_INFORM(AffInfoStrStr, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id),
3570 KMP_I18N_STR(DecodingLegacyAPIC));
3571 }
3572 else {
3573 KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(DecodingLegacyAPIC));
3574 }
3575 }
3576
3577 file_name = NULL;
3578 depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
3579 if (depth == 0) {
3580 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3581 KMP_ASSERT(address2os == NULL);
3582 return;
3583 }
3584 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003585 }
3586
3587# endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
3588
3589# if KMP_OS_LINUX
3590
3591 if (depth < 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003592 if (__kmp_affinity_verbose) {
3593 if (msg_id != kmp_i18n_null) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003594 KMP_INFORM(AffStrParseFilename, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id), "/proc/cpuinfo");
3595 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003596 else {
3597 KMP_INFORM(AffParseFilename, "KMP_AFFINITY", "/proc/cpuinfo");
3598 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003599 }
3600
3601 FILE *f = fopen("/proc/cpuinfo", "r");
3602 if (f == NULL) {
3603 msg_id = kmp_i18n_str_CantOpenCpuinfo;
3604 }
3605 else {
3606 file_name = "/proc/cpuinfo";
3607 depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
3608 fclose(f);
3609 if (depth == 0) {
3610 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3611 KMP_ASSERT(address2os == NULL);
3612 return;
3613 }
3614 }
3615 }
3616
3617# endif /* KMP_OS_LINUX */
3618
Andrey Churbanov7daf9802015-01-27 16:52:57 +00003619# if KMP_GROUP_AFFINITY
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003620
3621 if ((depth < 0) && (__kmp_num_proc_groups > 1)) {
3622 if (__kmp_affinity_verbose) {
3623 KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
3624 }
3625
3626 depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
3627 KMP_ASSERT(depth != 0);
3628 }
3629
Andrey Churbanov7daf9802015-01-27 16:52:57 +00003630# endif /* KMP_GROUP_AFFINITY */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003631
Jim Cownie5e8470a2013-09-27 10:38:44 +00003632 if (depth < 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003633 if (__kmp_affinity_verbose && (msg_id != kmp_i18n_null)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003634 if (file_name == NULL) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003635 KMP_INFORM(UsingFlatOS, __kmp_i18n_catgets(msg_id));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003636 }
3637 else if (line == 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003638 KMP_INFORM(UsingFlatOSFile, file_name, __kmp_i18n_catgets(msg_id));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003639 }
3640 else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003641 KMP_INFORM(UsingFlatOSFileLine, file_name, line, __kmp_i18n_catgets(msg_id));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003642 }
3643 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003644 // FIXME - print msg if msg_id = kmp_i18n_null ???
Jim Cownie5e8470a2013-09-27 10:38:44 +00003645
3646 file_name = "";
3647 depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
3648 if (depth == 0) {
3649 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3650 KMP_ASSERT(address2os == NULL);
3651 return;
3652 }
3653 KMP_ASSERT(depth > 0);
3654 KMP_ASSERT(address2os != NULL);
3655 }
3656 }
3657
3658 //
3659 // If the user has specified that a paricular topology discovery method
3660 // is to be used, then we abort if that method fails. The exception is
3661 // group affinity, which might have been implicitly set.
3662 //
3663
3664# if KMP_ARCH_X86 || KMP_ARCH_X86_64
3665
3666 else if (__kmp_affinity_top_method == affinity_top_method_x2apicid) {
3667 if (__kmp_affinity_verbose) {
3668 KMP_INFORM(AffInfoStr, "KMP_AFFINITY",
3669 KMP_I18N_STR(Decodingx2APIC));
3670 }
3671
3672 depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
3673 if (depth == 0) {
3674 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3675 KMP_ASSERT(address2os == NULL);
3676 return;
3677 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003678 if (depth < 0) {
3679 KMP_ASSERT(msg_id != kmp_i18n_null);
3680 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
3681 }
3682 }
3683 else if (__kmp_affinity_top_method == affinity_top_method_apicid) {
3684 if (__kmp_affinity_verbose) {
3685 KMP_INFORM(AffInfoStr, "KMP_AFFINITY",
3686 KMP_I18N_STR(DecodingLegacyAPIC));
3687 }
3688
3689 depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
3690 if (depth == 0) {
3691 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3692 KMP_ASSERT(address2os == NULL);
3693 return;
3694 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003695 if (depth < 0) {
3696 KMP_ASSERT(msg_id != kmp_i18n_null);
3697 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
3698 }
3699 }
3700
3701# endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
3702
3703 else if (__kmp_affinity_top_method == affinity_top_method_cpuinfo) {
3704 const char *filename;
3705 if (__kmp_cpuinfo_file != NULL) {
3706 filename = __kmp_cpuinfo_file;
3707 }
3708 else {
3709 filename = "/proc/cpuinfo";
3710 }
3711
3712 if (__kmp_affinity_verbose) {
3713 KMP_INFORM(AffParseFilename, "KMP_AFFINITY", filename);
3714 }
3715
3716 FILE *f = fopen(filename, "r");
3717 if (f == NULL) {
3718 int code = errno;
3719 if (__kmp_cpuinfo_file != NULL) {
3720 __kmp_msg(
3721 kmp_ms_fatal,
3722 KMP_MSG(CantOpenFileForReading, filename),
3723 KMP_ERR(code),
3724 KMP_HNT(NameComesFrom_CPUINFO_FILE),
3725 __kmp_msg_null
3726 );
3727 }
3728 else {
3729 __kmp_msg(
3730 kmp_ms_fatal,
3731 KMP_MSG(CantOpenFileForReading, filename),
3732 KMP_ERR(code),
3733 __kmp_msg_null
3734 );
3735 }
3736 }
3737 int line = 0;
3738 depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
3739 fclose(f);
3740 if (depth < 0) {
3741 KMP_ASSERT(msg_id != kmp_i18n_null);
3742 if (line > 0) {
3743 KMP_FATAL(FileLineMsgExiting, filename, line, __kmp_i18n_catgets(msg_id));
3744 }
3745 else {
3746 KMP_FATAL(FileMsgExiting, filename, __kmp_i18n_catgets(msg_id));
3747 }
3748 }
3749 if (__kmp_affinity_type == affinity_none) {
3750 KMP_ASSERT(depth == 0);
3751 KMP_ASSERT(address2os == NULL);
3752 return;
3753 }
3754 }
3755
Andrey Churbanov7daf9802015-01-27 16:52:57 +00003756# if KMP_GROUP_AFFINITY
Jim Cownie5e8470a2013-09-27 10:38:44 +00003757
3758 else if (__kmp_affinity_top_method == affinity_top_method_group) {
3759 if (__kmp_affinity_verbose) {
3760 KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
3761 }
3762
3763 depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
3764 KMP_ASSERT(depth != 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003765 if (depth < 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003766 KMP_ASSERT(msg_id != kmp_i18n_null);
3767 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003768 }
3769 }
3770
Andrey Churbanov7daf9802015-01-27 16:52:57 +00003771# endif /* KMP_GROUP_AFFINITY */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003772
3773 else if (__kmp_affinity_top_method == affinity_top_method_flat) {
3774 if (__kmp_affinity_verbose) {
3775 KMP_INFORM(AffUsingFlatOS, "KMP_AFFINITY");
3776 }
3777
3778 depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
3779 if (depth == 0) {
3780 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3781 KMP_ASSERT(address2os == NULL);
3782 return;
3783 }
3784 // should not fail
3785 KMP_ASSERT(depth > 0);
3786 KMP_ASSERT(address2os != NULL);
3787 }
3788
Jonathan Peyton01dcf362015-11-30 20:02:59 +00003789# if KMP_USE_HWLOC
3790 else if (__kmp_affinity_top_method == affinity_top_method_hwloc) {
3791 if (__kmp_affinity_verbose) {
3792 KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
3793 }
3794 depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id);
3795 if (depth == 0) {
3796 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3797 KMP_ASSERT(address2os == NULL);
3798 return;
3799 }
Jonathan Peyton01dcf362015-11-30 20:02:59 +00003800 }
3801# endif // KMP_USE_HWLOC
3802
Jim Cownie5e8470a2013-09-27 10:38:44 +00003803 if (address2os == NULL) {
3804 if (KMP_AFFINITY_CAPABLE()
3805 && (__kmp_affinity_verbose || (__kmp_affinity_warnings
3806 && (__kmp_affinity_type != affinity_none)))) {
3807 KMP_WARNING(ErrorInitializeAffinity);
3808 }
3809 __kmp_affinity_type = affinity_none;
Andrey Churbanov1f037e42015-03-10 09:15:26 +00003810 KMP_AFFINITY_DISABLE();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003811 return;
3812 }
3813
Jim Cownie5e8470a2013-09-27 10:38:44 +00003814 __kmp_apply_thread_places(&address2os, depth);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003815
3816 //
3817 // Create the table of masks, indexed by thread Id.
3818 //
3819 unsigned maxIndex;
3820 unsigned numUnique;
3821 kmp_affin_mask_t *osId2Mask = __kmp_create_masks(&maxIndex, &numUnique,
3822 address2os, __kmp_avail_proc);
3823 if (__kmp_affinity_gran_levels == 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003824 KMP_DEBUG_ASSERT((int)numUnique == __kmp_avail_proc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003825 }
3826
3827 //
3828 // Set the childNums vector in all Address objects. This must be done
3829 // before we can sort using __kmp_affinity_cmp_Address_child_num(),
3830 // which takes into account the setting of __kmp_affinity_compact.
3831 //
3832 __kmp_affinity_assign_child_nums(address2os, __kmp_avail_proc);
3833
3834 switch (__kmp_affinity_type) {
3835
3836 case affinity_explicit:
3837 KMP_DEBUG_ASSERT(__kmp_affinity_proclist != NULL);
3838# if OMP_40_ENABLED
3839 if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
3840# endif
3841 {
3842 __kmp_affinity_process_proclist(&__kmp_affinity_masks,
3843 &__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask,
3844 maxIndex);
3845 }
3846# if OMP_40_ENABLED
3847 else {
3848 __kmp_affinity_process_placelist(&__kmp_affinity_masks,
3849 &__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask,
3850 maxIndex);
3851 }
3852# endif
3853 if (__kmp_affinity_num_masks == 0) {
3854 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3855 && (__kmp_affinity_type != affinity_none))) {
3856 KMP_WARNING(AffNoValidProcID);
3857 }
3858 __kmp_affinity_type = affinity_none;
3859 return;
3860 }
3861 break;
3862
3863 //
3864 // The other affinity types rely on sorting the Addresses according
3865 // to some permutation of the machine topology tree. Set
3866 // __kmp_affinity_compact and __kmp_affinity_offset appropriately,
3867 // then jump to a common code fragment to do the sort and create
3868 // the array of affinity masks.
3869 //
3870
3871 case affinity_logical:
3872 __kmp_affinity_compact = 0;
3873 if (__kmp_affinity_offset) {
3874 __kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset
3875 % __kmp_avail_proc;
3876 }
3877 goto sortAddresses;
3878
3879 case affinity_physical:
3880 if (__kmp_nThreadsPerCore > 1) {
3881 __kmp_affinity_compact = 1;
3882 if (__kmp_affinity_compact >= depth) {
3883 __kmp_affinity_compact = 0;
3884 }
3885 } else {
3886 __kmp_affinity_compact = 0;
3887 }
3888 if (__kmp_affinity_offset) {
3889 __kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset
3890 % __kmp_avail_proc;
3891 }
3892 goto sortAddresses;
3893
3894 case affinity_scatter:
3895 if (__kmp_affinity_compact >= depth) {
3896 __kmp_affinity_compact = 0;
3897 }
3898 else {
3899 __kmp_affinity_compact = depth - 1 - __kmp_affinity_compact;
3900 }
3901 goto sortAddresses;
3902
3903 case affinity_compact:
3904 if (__kmp_affinity_compact >= depth) {
3905 __kmp_affinity_compact = depth - 1;
3906 }
3907 goto sortAddresses;
3908
Jim Cownie5e8470a2013-09-27 10:38:44 +00003909 case affinity_balanced:
Jonathan Peytoncaf09fe2015-05-27 23:27:33 +00003910 // Balanced works only for the case of a single package
Jim Cownie5e8470a2013-09-27 10:38:44 +00003911 if( nPackages > 1 ) {
3912 if( __kmp_affinity_verbose || __kmp_affinity_warnings ) {
3913 KMP_WARNING( AffBalancedNotAvail, "KMP_AFFINITY" );
3914 }
3915 __kmp_affinity_type = affinity_none;
3916 return;
3917 } else if( __kmp_affinity_uniform_topology() ) {
3918 break;
3919 } else { // Non-uniform topology
3920
3921 // Save the depth for further usage
3922 __kmp_aff_depth = depth;
3923
3924 // Number of hyper threads per core in HT machine
3925 int nth_per_core = __kmp_nThreadsPerCore;
3926
3927 int core_level;
3928 if( nth_per_core > 1 ) {
3929 core_level = depth - 2;
3930 } else {
3931 core_level = depth - 1;
3932 }
3933 int ncores = address2os[ __kmp_avail_proc - 1 ].first.labels[ core_level ] + 1;
3934 int nproc = nth_per_core * ncores;
3935
3936 procarr = ( int * )__kmp_allocate( sizeof( int ) * nproc );
3937 for( int i = 0; i < nproc; i++ ) {
3938 procarr[ i ] = -1;
3939 }
3940
3941 for( int i = 0; i < __kmp_avail_proc; i++ ) {
3942 int proc = address2os[ i ].second;
3943 // If depth == 3 then level=0 - package, level=1 - core, level=2 - thread.
3944 // If there is only one thread per core then depth == 2: level 0 - package,
3945 // level 1 - core.
3946 int level = depth - 1;
3947
3948 // __kmp_nth_per_core == 1
3949 int thread = 0;
3950 int core = address2os[ i ].first.labels[ level ];
3951 // If the thread level exists, that is we have more than one thread context per core
3952 if( nth_per_core > 1 ) {
3953 thread = address2os[ i ].first.labels[ level ] % nth_per_core;
3954 core = address2os[ i ].first.labels[ level - 1 ];
3955 }
3956 procarr[ core * nth_per_core + thread ] = proc;
3957 }
3958
3959 break;
3960 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003961
3962 sortAddresses:
3963 //
3964 // Allocate the gtid->affinity mask table.
3965 //
3966 if (__kmp_affinity_dups) {
3967 __kmp_affinity_num_masks = __kmp_avail_proc;
3968 }
3969 else {
3970 __kmp_affinity_num_masks = numUnique;
3971 }
3972
3973# if OMP_40_ENABLED
3974 if ( ( __kmp_nested_proc_bind.bind_types[0] != proc_bind_intel )
3975 && ( __kmp_affinity_num_places > 0 )
3976 && ( (unsigned)__kmp_affinity_num_places < __kmp_affinity_num_masks ) ) {
3977 __kmp_affinity_num_masks = __kmp_affinity_num_places;
3978 }
3979# endif
3980
Jonathan Peyton01dcf362015-11-30 20:02:59 +00003981 KMP_CPU_ALLOC_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003982
3983 //
3984 // Sort the address2os table according to the current setting of
3985 // __kmp_affinity_compact, then fill out __kmp_affinity_masks.
3986 //
3987 qsort(address2os, __kmp_avail_proc, sizeof(*address2os),
3988 __kmp_affinity_cmp_Address_child_num);
3989 {
3990 int i;
3991 unsigned j;
3992 for (i = 0, j = 0; i < __kmp_avail_proc; i++) {
3993 if ((! __kmp_affinity_dups) && (! address2os[i].first.leader)) {
3994 continue;
3995 }
3996 unsigned osId = address2os[i].second;
3997 kmp_affin_mask_t *src = KMP_CPU_INDEX(osId2Mask, osId);
3998 kmp_affin_mask_t *dest
3999 = KMP_CPU_INDEX(__kmp_affinity_masks, j);
4000 KMP_ASSERT(KMP_CPU_ISSET(osId, src));
4001 KMP_CPU_COPY(dest, src);
4002 if (++j >= __kmp_affinity_num_masks) {
4003 break;
4004 }
4005 }
4006 KMP_DEBUG_ASSERT(j == __kmp_affinity_num_masks);
4007 }
4008 break;
4009
4010 default:
4011 KMP_ASSERT2(0, "Unexpected affinity setting");
4012 }
4013
4014 __kmp_free(osId2Mask);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004015 machine_hierarchy.init(address2os, __kmp_avail_proc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004016}
4017
4018
4019void
4020__kmp_affinity_initialize(void)
4021{
4022 //
4023 // Much of the code above was written assumming that if a machine was not
4024 // affinity capable, then __kmp_affinity_type == affinity_none. We now
4025 // explicitly represent this as __kmp_affinity_type == affinity_disabled.
4026 //
4027 // There are too many checks for __kmp_affinity_type == affinity_none
4028 // in this code. Instead of trying to change them all, check if
4029 // __kmp_affinity_type == affinity_disabled, and if so, slam it with
4030 // affinity_none, call the real initialization routine, then restore
4031 // __kmp_affinity_type to affinity_disabled.
4032 //
4033 int disabled = (__kmp_affinity_type == affinity_disabled);
4034 if (! KMP_AFFINITY_CAPABLE()) {
4035 KMP_ASSERT(disabled);
4036 }
4037 if (disabled) {
4038 __kmp_affinity_type = affinity_none;
4039 }
4040 __kmp_aux_affinity_initialize();
4041 if (disabled) {
4042 __kmp_affinity_type = affinity_disabled;
4043 }
4044}
4045
4046
4047void
4048__kmp_affinity_uninitialize(void)
4049{
4050 if (__kmp_affinity_masks != NULL) {
Jonathan Peyton01dcf362015-11-30 20:02:59 +00004051 KMP_CPU_FREE_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004052 __kmp_affinity_masks = NULL;
4053 }
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00004054 if (__kmp_affin_fullMask != NULL) {
4055 KMP_CPU_FREE(__kmp_affin_fullMask);
4056 __kmp_affin_fullMask = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004057 }
4058 __kmp_affinity_num_masks = 0;
4059# if OMP_40_ENABLED
4060 __kmp_affinity_num_places = 0;
4061# endif
4062 if (__kmp_affinity_proclist != NULL) {
4063 __kmp_free(__kmp_affinity_proclist);
4064 __kmp_affinity_proclist = NULL;
4065 }
4066 if( address2os != NULL ) {
4067 __kmp_free( address2os );
4068 address2os = NULL;
4069 }
4070 if( procarr != NULL ) {
4071 __kmp_free( procarr );
4072 procarr = NULL;
4073 }
Jonathan Peyton202a24d2016-06-13 17:30:08 +00004074# if KMP_USE_HWLOC
4075 if (__kmp_hwloc_topology != NULL) {
4076 hwloc_topology_destroy(__kmp_hwloc_topology);
4077 __kmp_hwloc_topology = NULL;
4078 }
4079# endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004080}
4081
4082
4083void
4084__kmp_affinity_set_init_mask(int gtid, int isa_root)
4085{
4086 if (! KMP_AFFINITY_CAPABLE()) {
4087 return;
4088 }
4089
4090 kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
4091 if (th->th.th_affin_mask == NULL) {
4092 KMP_CPU_ALLOC(th->th.th_affin_mask);
4093 }
4094 else {
4095 KMP_CPU_ZERO(th->th.th_affin_mask);
4096 }
4097
4098 //
4099 // Copy the thread mask to the kmp_info_t strucuture.
4100 // If __kmp_affinity_type == affinity_none, copy the "full" mask, i.e. one
4101 // that has all of the OS proc ids set, or if __kmp_affinity_respect_mask
4102 // is set, then the full mask is the same as the mask of the initialization
4103 // thread.
4104 //
4105 kmp_affin_mask_t *mask;
4106 int i;
4107
4108# if OMP_40_ENABLED
4109 if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
4110# endif
4111 {
Andrey Churbanovf28f6132015-01-13 14:54:00 +00004112 if ((__kmp_affinity_type == affinity_none) || (__kmp_affinity_type == affinity_balanced)
Jim Cownie5e8470a2013-09-27 10:38:44 +00004113 ) {
Andrey Churbanov7daf9802015-01-27 16:52:57 +00004114# if KMP_GROUP_AFFINITY
Jim Cownie5e8470a2013-09-27 10:38:44 +00004115 if (__kmp_num_proc_groups > 1) {
4116 return;
4117 }
4118# endif
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00004119 KMP_ASSERT(__kmp_affin_fullMask != NULL);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004120 i = KMP_PLACE_ALL;
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00004121 mask = __kmp_affin_fullMask;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004122 }
4123 else {
4124 KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 );
4125 i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
4126 mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
4127 }
4128 }
4129# if OMP_40_ENABLED
4130 else {
4131 if ((! isa_root)
4132 || (__kmp_nested_proc_bind.bind_types[0] == proc_bind_false)) {
Andrey Churbanov7daf9802015-01-27 16:52:57 +00004133# if KMP_GROUP_AFFINITY
Jim Cownie5e8470a2013-09-27 10:38:44 +00004134 if (__kmp_num_proc_groups > 1) {
4135 return;
4136 }
4137# endif
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00004138 KMP_ASSERT(__kmp_affin_fullMask != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004139 i = KMP_PLACE_ALL;
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00004140 mask = __kmp_affin_fullMask;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004141 }
4142 else {
4143 //
4144 // int i = some hash function or just a counter that doesn't
4145 // always start at 0. Use gtid for now.
4146 //
4147 KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 );
4148 i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
4149 mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
4150 }
4151 }
4152# endif
4153
4154# if OMP_40_ENABLED
4155 th->th.th_current_place = i;
4156 if (isa_root) {
4157 th->th.th_new_place = i;
4158 th->th.th_first_place = 0;
4159 th->th.th_last_place = __kmp_affinity_num_masks - 1;
4160 }
4161
4162 if (i == KMP_PLACE_ALL) {
4163 KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to all places\n",
4164 gtid));
4165 }
4166 else {
4167 KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to place %d\n",
4168 gtid, i));
4169 }
4170# else
4171 if (i == -1) {
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00004172 KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to __kmp_affin_fullMask\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00004173 gtid));
4174 }
4175 else {
4176 KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to mask %d\n",
4177 gtid, i));
4178 }
4179# endif /* OMP_40_ENABLED */
4180
4181 KMP_CPU_COPY(th->th.th_affin_mask, mask);
4182
4183 if (__kmp_affinity_verbose) {
4184 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4185 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4186 th->th.th_affin_mask);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004187 KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(), gtid,
4188 buf);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004189 }
4190
4191# if KMP_OS_WINDOWS
4192 //
4193 // On Windows* OS, the process affinity mask might have changed.
4194 // If the user didn't request affinity and this call fails,
4195 // just continue silently. See CQ171393.
4196 //
4197 if ( __kmp_affinity_type == affinity_none ) {
4198 __kmp_set_system_affinity(th->th.th_affin_mask, FALSE);
4199 }
4200 else
4201# endif
4202 __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
4203}
4204
4205
4206# if OMP_40_ENABLED
4207
4208void
4209__kmp_affinity_set_place(int gtid)
4210{
4211 int retval;
4212
4213 if (! KMP_AFFINITY_CAPABLE()) {
4214 return;
4215 }
4216
4217 kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
4218
4219 KA_TRACE(100, ("__kmp_affinity_set_place: binding T#%d to place %d (current place = %d)\n",
4220 gtid, th->th.th_new_place, th->th.th_current_place));
4221
4222 //
Alp Toker8f2d3f02014-02-24 10:40:15 +00004223 // Check that the new place is within this thread's partition.
Jim Cownie5e8470a2013-09-27 10:38:44 +00004224 //
4225 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004226 KMP_ASSERT(th->th.th_new_place >= 0);
4227 KMP_ASSERT((unsigned)th->th.th_new_place <= __kmp_affinity_num_masks);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004228 if (th->th.th_first_place <= th->th.th_last_place) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004229 KMP_ASSERT((th->th.th_new_place >= th->th.th_first_place)
Jim Cownie5e8470a2013-09-27 10:38:44 +00004230 && (th->th.th_new_place <= th->th.th_last_place));
4231 }
4232 else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004233 KMP_ASSERT((th->th.th_new_place <= th->th.th_first_place)
Jim Cownie5e8470a2013-09-27 10:38:44 +00004234 || (th->th.th_new_place >= th->th.th_last_place));
4235 }
4236
4237 //
4238 // Copy the thread mask to the kmp_info_t strucuture,
4239 // and set this thread's affinity.
4240 //
4241 kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity_masks,
4242 th->th.th_new_place);
4243 KMP_CPU_COPY(th->th.th_affin_mask, mask);
4244 th->th.th_current_place = th->th.th_new_place;
4245
4246 if (__kmp_affinity_verbose) {
4247 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4248 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4249 th->th.th_affin_mask);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004250 KMP_INFORM(BoundToOSProcSet, "OMP_PROC_BIND", (kmp_int32)getpid(),
4251 gtid, buf);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004252 }
4253 __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
4254}
4255
4256# endif /* OMP_40_ENABLED */
4257
4258
4259int
4260__kmp_aux_set_affinity(void **mask)
4261{
4262 int gtid;
4263 kmp_info_t *th;
4264 int retval;
4265
4266 if (! KMP_AFFINITY_CAPABLE()) {
4267 return -1;
4268 }
4269
4270 gtid = __kmp_entry_gtid();
4271 KA_TRACE(1000, ;{
4272 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4273 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4274 (kmp_affin_mask_t *)(*mask));
4275 __kmp_debug_printf("kmp_set_affinity: setting affinity mask for thread %d = %s\n",
4276 gtid, buf);
4277 });
4278
4279 if (__kmp_env_consistency_check) {
4280 if ((mask == NULL) || (*mask == NULL)) {
4281 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
4282 }
4283 else {
4284 unsigned proc;
4285 int num_procs = 0;
4286
Jonathan Peyton01dcf362015-11-30 20:02:59 +00004287 KMP_CPU_SET_ITERATE(proc, ((kmp_affin_mask_t*)(*mask))) {
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00004288 if (! KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
4289 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
4290 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004291 if (! KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask))) {
4292 continue;
4293 }
4294 num_procs++;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004295 }
4296 if (num_procs == 0) {
4297 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
4298 }
4299
Andrey Churbanov7daf9802015-01-27 16:52:57 +00004300# if KMP_GROUP_AFFINITY
Jim Cownie5e8470a2013-09-27 10:38:44 +00004301 if (__kmp_get_proc_group((kmp_affin_mask_t *)(*mask)) < 0) {
4302 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
4303 }
Andrey Churbanov7daf9802015-01-27 16:52:57 +00004304# endif /* KMP_GROUP_AFFINITY */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004305
4306 }
4307 }
4308
4309 th = __kmp_threads[gtid];
4310 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
4311 retval = __kmp_set_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
4312 if (retval == 0) {
4313 KMP_CPU_COPY(th->th.th_affin_mask, (kmp_affin_mask_t *)(*mask));
4314 }
4315
4316# if OMP_40_ENABLED
4317 th->th.th_current_place = KMP_PLACE_UNDEFINED;
4318 th->th.th_new_place = KMP_PLACE_UNDEFINED;
4319 th->th.th_first_place = 0;
4320 th->th.th_last_place = __kmp_affinity_num_masks - 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004321
4322 //
4323 // Turn off 4.0 affinity for the current tread at this parallel level.
4324 //
4325 th->th.th_current_task->td_icvs.proc_bind = proc_bind_false;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004326# endif
4327
4328 return retval;
4329}
4330
4331
4332int
4333__kmp_aux_get_affinity(void **mask)
4334{
4335 int gtid;
4336 int retval;
4337 kmp_info_t *th;
4338
4339 if (! KMP_AFFINITY_CAPABLE()) {
4340 return -1;
4341 }
4342
4343 gtid = __kmp_entry_gtid();
4344 th = __kmp_threads[gtid];
4345 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
4346
4347 KA_TRACE(1000, ;{
4348 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4349 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4350 th->th.th_affin_mask);
4351 __kmp_printf("kmp_get_affinity: stored affinity mask for thread %d = %s\n", gtid, buf);
4352 });
4353
4354 if (__kmp_env_consistency_check) {
4355 if ((mask == NULL) || (*mask == NULL)) {
4356 KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity");
4357 }
4358 }
4359
4360# if !KMP_OS_WINDOWS
4361
4362 retval = __kmp_get_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
4363 KA_TRACE(1000, ;{
4364 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4365 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4366 (kmp_affin_mask_t *)(*mask));
4367 __kmp_printf("kmp_get_affinity: system affinity mask for thread %d = %s\n", gtid, buf);
4368 });
4369 return retval;
4370
4371# else
4372
4373 KMP_CPU_COPY((kmp_affin_mask_t *)(*mask), th->th.th_affin_mask);
4374 return 0;
4375
4376# endif /* KMP_OS_WINDOWS */
4377
4378}
4379
Jim Cownie5e8470a2013-09-27 10:38:44 +00004380int
4381__kmp_aux_set_affinity_mask_proc(int proc, void **mask)
4382{
4383 int retval;
4384
4385 if (! KMP_AFFINITY_CAPABLE()) {
4386 return -1;
4387 }
4388
4389 KA_TRACE(1000, ;{
4390 int gtid = __kmp_entry_gtid();
4391 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4392 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4393 (kmp_affin_mask_t *)(*mask));
4394 __kmp_debug_printf("kmp_set_affinity_mask_proc: setting proc %d in affinity mask for thread %d = %s\n",
4395 proc, gtid, buf);
4396 });
4397
4398 if (__kmp_env_consistency_check) {
4399 if ((mask == NULL) || (*mask == NULL)) {
4400 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity_mask_proc");
4401 }
4402 }
4403
Jonathan Peyton01dcf362015-11-30 20:02:59 +00004404 if ((proc < 0)
4405# if !KMP_USE_HWLOC
4406 || ((unsigned)proc >= KMP_CPU_SETSIZE)
4407# endif
4408 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004409 return -1;
4410 }
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00004411 if (! KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004412 return -2;
4413 }
4414
4415 KMP_CPU_SET(proc, (kmp_affin_mask_t *)(*mask));
4416 return 0;
4417}
4418
4419
4420int
4421__kmp_aux_unset_affinity_mask_proc(int proc, void **mask)
4422{
4423 int retval;
4424
4425 if (! KMP_AFFINITY_CAPABLE()) {
4426 return -1;
4427 }
4428
4429 KA_TRACE(1000, ;{
4430 int gtid = __kmp_entry_gtid();
4431 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4432 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4433 (kmp_affin_mask_t *)(*mask));
4434 __kmp_debug_printf("kmp_unset_affinity_mask_proc: unsetting proc %d in affinity mask for thread %d = %s\n",
4435 proc, gtid, buf);
4436 });
4437
4438 if (__kmp_env_consistency_check) {
4439 if ((mask == NULL) || (*mask == NULL)) {
4440 KMP_FATAL(AffinityInvalidMask, "kmp_unset_affinity_mask_proc");
4441 }
4442 }
4443
Jonathan Peyton01dcf362015-11-30 20:02:59 +00004444 if ((proc < 0)
4445# if !KMP_USE_HWLOC
4446 || ((unsigned)proc >= KMP_CPU_SETSIZE)
4447# endif
4448 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004449 return -1;
4450 }
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00004451 if (! KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004452 return -2;
4453 }
4454
4455 KMP_CPU_CLR(proc, (kmp_affin_mask_t *)(*mask));
4456 return 0;
4457}
4458
4459
4460int
4461__kmp_aux_get_affinity_mask_proc(int proc, void **mask)
4462{
4463 int retval;
4464
4465 if (! KMP_AFFINITY_CAPABLE()) {
4466 return -1;
4467 }
4468
4469 KA_TRACE(1000, ;{
4470 int gtid = __kmp_entry_gtid();
4471 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4472 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4473 (kmp_affin_mask_t *)(*mask));
4474 __kmp_debug_printf("kmp_get_affinity_mask_proc: getting proc %d in affinity mask for thread %d = %s\n",
4475 proc, gtid, buf);
4476 });
4477
4478 if (__kmp_env_consistency_check) {
4479 if ((mask == NULL) || (*mask == NULL)) {
Andrey Churbanov4b2f17a2015-01-29 15:49:22 +00004480 KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity_mask_proc");
Jim Cownie5e8470a2013-09-27 10:38:44 +00004481 }
4482 }
4483
Jonathan Peyton01dcf362015-11-30 20:02:59 +00004484 if ((proc < 0)
4485# if !KMP_USE_HWLOC
4486 || ((unsigned)proc >= KMP_CPU_SETSIZE)
4487# endif
4488 ) {
4489 return -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004490 }
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00004491 if (! KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004492 return 0;
4493 }
4494
4495 return KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask));
4496}
4497
Jim Cownie5e8470a2013-09-27 10:38:44 +00004498
4499// Dynamic affinity settings - Affinity balanced
4500void __kmp_balanced_affinity( int tid, int nthreads )
4501{
4502 if( __kmp_affinity_uniform_topology() ) {
4503 int coreID;
4504 int threadID;
4505 // Number of hyper threads per core in HT machine
4506 int __kmp_nth_per_core = __kmp_avail_proc / __kmp_ncores;
4507 // Number of cores
4508 int ncores = __kmp_ncores;
4509 // How many threads will be bound to each core
4510 int chunk = nthreads / ncores;
4511 // How many cores will have an additional thread bound to it - "big cores"
4512 int big_cores = nthreads % ncores;
4513 // Number of threads on the big cores
4514 int big_nth = ( chunk + 1 ) * big_cores;
4515 if( tid < big_nth ) {
4516 coreID = tid / (chunk + 1 );
4517 threadID = ( tid % (chunk + 1 ) ) % __kmp_nth_per_core ;
4518 } else { //tid >= big_nth
4519 coreID = ( tid - big_cores ) / chunk;
4520 threadID = ( ( tid - big_cores ) % chunk ) % __kmp_nth_per_core ;
4521 }
4522
4523 KMP_DEBUG_ASSERT2(KMP_AFFINITY_CAPABLE(),
4524 "Illegal set affinity operation when not capable");
4525
Jonathan Peyton01dcf362015-11-30 20:02:59 +00004526 kmp_affin_mask_t *mask;
4527 KMP_CPU_ALLOC_ON_STACK(mask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004528 KMP_CPU_ZERO(mask);
4529
4530 // Granularity == thread
4531 if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) {
4532 int osID = address2os[ coreID * __kmp_nth_per_core + threadID ].second;
4533 KMP_CPU_SET( osID, mask);
4534 } else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
4535 for( int i = 0; i < __kmp_nth_per_core; i++ ) {
4536 int osID;
4537 osID = address2os[ coreID * __kmp_nth_per_core + i ].second;
4538 KMP_CPU_SET( osID, mask);
4539 }
4540 }
4541 if (__kmp_affinity_verbose) {
4542 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4543 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004544 KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
4545 tid, buf);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004546 }
4547 __kmp_set_system_affinity( mask, TRUE );
Jonathan Peyton01dcf362015-11-30 20:02:59 +00004548 KMP_CPU_FREE_FROM_STACK(mask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004549 } else { // Non-uniform topology
4550
Jonathan Peyton01dcf362015-11-30 20:02:59 +00004551 kmp_affin_mask_t *mask;
4552 KMP_CPU_ALLOC_ON_STACK(mask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004553 KMP_CPU_ZERO(mask);
4554
4555 // Number of hyper threads per core in HT machine
4556 int nth_per_core = __kmp_nThreadsPerCore;
4557 int core_level;
4558 if( nth_per_core > 1 ) {
4559 core_level = __kmp_aff_depth - 2;
4560 } else {
4561 core_level = __kmp_aff_depth - 1;
4562 }
4563
4564 // Number of cores - maximum value; it does not count trail cores with 0 processors
4565 int ncores = address2os[ __kmp_avail_proc - 1 ].first.labels[ core_level ] + 1;
4566
4567 // For performance gain consider the special case nthreads == __kmp_avail_proc
4568 if( nthreads == __kmp_avail_proc ) {
4569 if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) {
4570 int osID = address2os[ tid ].second;
4571 KMP_CPU_SET( osID, mask);
4572 } else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
4573 int coreID = address2os[ tid ].first.labels[ core_level ];
4574 // We'll count found osIDs for the current core; they can be not more than nth_per_core;
4575 // since the address2os is sortied we can break when cnt==nth_per_core
4576 int cnt = 0;
4577 for( int i = 0; i < __kmp_avail_proc; i++ ) {
4578 int osID = address2os[ i ].second;
4579 int core = address2os[ i ].first.labels[ core_level ];
4580 if( core == coreID ) {
4581 KMP_CPU_SET( osID, mask);
4582 cnt++;
4583 if( cnt == nth_per_core ) {
4584 break;
4585 }
4586 }
4587 }
4588 }
4589 } else if( nthreads <= __kmp_ncores ) {
4590
4591 int core = 0;
4592 for( int i = 0; i < ncores; i++ ) {
4593 // Check if this core from procarr[] is in the mask
4594 int in_mask = 0;
4595 for( int j = 0; j < nth_per_core; j++ ) {
4596 if( procarr[ i * nth_per_core + j ] != - 1 ) {
4597 in_mask = 1;
4598 break;
4599 }
4600 }
4601 if( in_mask ) {
4602 if( tid == core ) {
4603 for( int j = 0; j < nth_per_core; j++ ) {
4604 int osID = procarr[ i * nth_per_core + j ];
4605 if( osID != -1 ) {
4606 KMP_CPU_SET( osID, mask );
4607 // For granularity=thread it is enough to set the first available osID for this core
4608 if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) {
4609 break;
4610 }
4611 }
4612 }
4613 break;
4614 } else {
4615 core++;
4616 }
4617 }
4618 }
4619
4620 } else { // nthreads > __kmp_ncores
4621
4622 // Array to save the number of processors at each core
Jonathan Peyton7be075332015-06-22 15:53:50 +00004623 int* nproc_at_core = (int*)KMP_ALLOCA(sizeof(int)*ncores);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004624 // Array to save the number of cores with "x" available processors;
Jonathan Peyton7be075332015-06-22 15:53:50 +00004625 int* ncores_with_x_procs = (int*)KMP_ALLOCA(sizeof(int)*(nth_per_core+1));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004626 // Array to save the number of cores with # procs from x to nth_per_core
Jonathan Peyton7be075332015-06-22 15:53:50 +00004627 int* ncores_with_x_to_max_procs = (int*)KMP_ALLOCA(sizeof(int)*(nth_per_core+1));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004628
4629 for( int i = 0; i <= nth_per_core; i++ ) {
4630 ncores_with_x_procs[ i ] = 0;
4631 ncores_with_x_to_max_procs[ i ] = 0;
4632 }
4633
4634 for( int i = 0; i < ncores; i++ ) {
4635 int cnt = 0;
4636 for( int j = 0; j < nth_per_core; j++ ) {
4637 if( procarr[ i * nth_per_core + j ] != -1 ) {
4638 cnt++;
4639 }
4640 }
4641 nproc_at_core[ i ] = cnt;
4642 ncores_with_x_procs[ cnt ]++;
4643 }
4644
4645 for( int i = 0; i <= nth_per_core; i++ ) {
4646 for( int j = i; j <= nth_per_core; j++ ) {
4647 ncores_with_x_to_max_procs[ i ] += ncores_with_x_procs[ j ];
4648 }
4649 }
4650
4651 // Max number of processors
4652 int nproc = nth_per_core * ncores;
4653 // An array to keep number of threads per each context
4654 int * newarr = ( int * )__kmp_allocate( sizeof( int ) * nproc );
4655 for( int i = 0; i < nproc; i++ ) {
4656 newarr[ i ] = 0;
4657 }
4658
4659 int nth = nthreads;
4660 int flag = 0;
4661 while( nth > 0 ) {
4662 for( int j = 1; j <= nth_per_core; j++ ) {
4663 int cnt = ncores_with_x_to_max_procs[ j ];
4664 for( int i = 0; i < ncores; i++ ) {
4665 // Skip the core with 0 processors
4666 if( nproc_at_core[ i ] == 0 ) {
4667 continue;
4668 }
4669 for( int k = 0; k < nth_per_core; k++ ) {
4670 if( procarr[ i * nth_per_core + k ] != -1 ) {
4671 if( newarr[ i * nth_per_core + k ] == 0 ) {
4672 newarr[ i * nth_per_core + k ] = 1;
4673 cnt--;
4674 nth--;
4675 break;
4676 } else {
4677 if( flag != 0 ) {
4678 newarr[ i * nth_per_core + k ] ++;
4679 cnt--;
4680 nth--;
4681 break;
4682 }
4683 }
4684 }
4685 }
4686 if( cnt == 0 || nth == 0 ) {
4687 break;
4688 }
4689 }
4690 if( nth == 0 ) {
4691 break;
4692 }
4693 }
4694 flag = 1;
4695 }
4696 int sum = 0;
4697 for( int i = 0; i < nproc; i++ ) {
4698 sum += newarr[ i ];
4699 if( sum > tid ) {
4700 // Granularity == thread
4701 if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) {
4702 int osID = procarr[ i ];
4703 KMP_CPU_SET( osID, mask);
4704 } else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
4705 int coreID = i / nth_per_core;
4706 for( int ii = 0; ii < nth_per_core; ii++ ) {
4707 int osID = procarr[ coreID * nth_per_core + ii ];
4708 if( osID != -1 ) {
4709 KMP_CPU_SET( osID, mask);
4710 }
4711 }
4712 }
4713 break;
4714 }
4715 }
4716 __kmp_free( newarr );
4717 }
4718
4719 if (__kmp_affinity_verbose) {
4720 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4721 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004722 KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
4723 tid, buf);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004724 }
4725 __kmp_set_system_affinity( mask, TRUE );
Jonathan Peyton01dcf362015-11-30 20:02:59 +00004726 KMP_CPU_FREE_FROM_STACK(mask);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004727 }
4728}
4729
Jonathan Peyton3076fa42016-01-12 17:21:55 +00004730#if KMP_OS_LINUX
4731// We don't need this entry for Windows because
4732// there is GetProcessAffinityMask() api
4733//
4734// The intended usage is indicated by these steps:
4735// 1) The user gets the current affinity mask
4736// 2) Then sets the affinity by calling this function
4737// 3) Error check the return value
4738// 4) Use non-OpenMP parallelization
4739// 5) Reset the affinity to what was stored in step 1)
4740#ifdef __cplusplus
4741extern "C"
4742#endif
4743int
4744kmp_set_thread_affinity_mask_initial()
4745// the function returns 0 on success,
4746// -1 if we cannot bind thread
4747// >0 (errno) if an error happened during binding
4748{
4749 int gtid = __kmp_get_gtid();
4750 if (gtid < 0) {
4751 // Do not touch non-omp threads
4752 KA_TRACE(30, ( "kmp_set_thread_affinity_mask_initial: "
4753 "non-omp thread, returning\n"));
4754 return -1;
4755 }
4756 if (!KMP_AFFINITY_CAPABLE() || !__kmp_init_middle) {
4757 KA_TRACE(30, ( "kmp_set_thread_affinity_mask_initial: "
4758 "affinity not initialized, returning\n"));
4759 return -1;
4760 }
4761 KA_TRACE(30, ( "kmp_set_thread_affinity_mask_initial: "
4762 "set full mask for thread %d\n", gtid));
Jonathan Peytonc5304aa2016-06-13 21:28:03 +00004763 KMP_DEBUG_ASSERT(__kmp_affin_fullMask != NULL);
4764 return __kmp_set_system_affinity(__kmp_affin_fullMask, FALSE);
Jonathan Peyton3076fa42016-01-12 17:21:55 +00004765}
4766#endif
4767
Alp Toker763b9392014-02-28 09:42:41 +00004768#endif // KMP_AFFINITY_SUPPORTED