Blame - openmp/runtime/src/kmp_affinity.cpp - toolchain/llvm-project

blob: 356fcbeca97f792116b2856a7e64a5eb93bdf158 [file] [log] [blame]

Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1	/*
				2	* kmp_affinity.cpp -- affinity management
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3	*/
				4
				5
				6	//===----------------------------------------------------------------------===//
				7	//
				8	// The LLVM Compiler Infrastructure
				9	//
				10	// This file is dual licensed under the MIT and the University of Illinois Open
				11	// Source Licenses. See LICENSE.txt for details.
				12	//
				13	//===----------------------------------------------------------------------===//
				14
				15
				16	#include "kmp.h"
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	17	#include "kmp_affinity.h"
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	18	#include "kmp_i18n.h"
				19	#include "kmp_io.h"
				20	#include "kmp_str.h"
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	21	#include "kmp_wrapper_getpid.h"
Jonathan Peyton	1707836	2015-09-10 19:22:07 +0000	[diff] [blame]	22
				23	// Store the real or imagined machine hierarchy here
				24	static hierarchy_info machine_hierarchy;
				25
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	26	void __kmp_cleanup_hierarchy() { machine_hierarchy.fini(); }
				27
Jonathan Peyton	1707836	2015-09-10 19:22:07 +0000	[diff] [blame]	28
				29	void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	30	kmp_uint32 depth;
				31	// The test below is true if affinity is available, but set to "none". Need to
				32	// init on first use of hierarchical barrier.
				33	if (TCR_1(machine_hierarchy.uninitialized))
				34	machine_hierarchy.init(NULL, nproc);
Jonathan Peyton	1707836	2015-09-10 19:22:07 +0000	[diff] [blame]	35
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	36	// Adjust the hierarchy in case num threads exceeds original
				37	if (nproc > machine_hierarchy.base_num_threads)
				38	machine_hierarchy.resize(nproc);
Jonathan Peyton	7dee82e	2015-11-09 16:24:53 +0000	[diff] [blame]	39
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	40	depth = machine_hierarchy.depth;
				41	KMP_DEBUG_ASSERT(depth > 0);
Jonathan Peyton	1707836	2015-09-10 19:22:07 +0000	[diff] [blame]	42
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	43	thr_bar->depth = depth;
				44	thr_bar->base_leaf_kids = (kmp_uint8)machine_hierarchy.numPerLevel[0] - 1;
				45	thr_bar->skip_per_level = machine_hierarchy.skipPerLevel;
Jonathan Peyton	1707836	2015-09-10 19:22:07 +0000	[diff] [blame]	46	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	47
Alp Toker	763b939	2014-02-28 09:42:41 +0000	[diff] [blame]	48	#if KMP_AFFINITY_SUPPORTED
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	49
Jonathan Peyton	1cdd87a	2016-11-14 21:08:35 +0000	[diff] [blame]	50	bool KMPAffinity::picked_api = false;
				51
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	52	void *KMPAffinity::Mask::operator new(size_t n) { return __kmp_allocate(n); }
				53	void *KMPAffinity::Mask::operator new[](size_t n) { return __kmp_allocate(n); }
				54	void KMPAffinity::Mask::operator delete(void *p) { __kmp_free(p); }
				55	void KMPAffinity::Mask::operator delete[](void *p) { __kmp_free(p); }
				56	void *KMPAffinity::operator new(size_t n) { return __kmp_allocate(n); }
				57	void KMPAffinity::operator delete(void *p) { __kmp_free(p); }
Jonathan Peyton	1cdd87a	2016-11-14 21:08:35 +0000	[diff] [blame]	58
				59	void KMPAffinity::pick_api() {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	60	KMPAffinity *affinity_dispatch;
				61	if (picked_api)
				62	return;
Jonathan Peyton	1cdd87a	2016-11-14 21:08:35 +0000	[diff] [blame]	63	#if KMP_USE_HWLOC
Jonathan Peyton	e3e2aaf	2017-05-31 20:35:22 +0000	[diff] [blame]	64	// Only use Hwloc if affinity isn't explicitly disabled and
				65	// user requests Hwloc topology method
				66	if (__kmp_affinity_top_method == affinity_top_method_hwloc &&
				67	__kmp_affinity_type != affinity_disabled) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	68	affinity_dispatch = new KMPHwlocAffinity();
				69	} else
Jonathan Peyton	1cdd87a	2016-11-14 21:08:35 +0000	[diff] [blame]	70	#endif
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	71	{
				72	affinity_dispatch = new KMPNativeAffinity();
				73	}
				74	__kmp_affinity_dispatch = affinity_dispatch;
				75	picked_api = true;
Jonathan Peyton	1cdd87a	2016-11-14 21:08:35 +0000	[diff] [blame]	76	}
				77
				78	void KMPAffinity::destroy_api() {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	79	if (__kmp_affinity_dispatch != NULL) {
				80	delete __kmp_affinity_dispatch;
				81	__kmp_affinity_dispatch = NULL;
				82	picked_api = false;
				83	}
Jonathan Peyton	1cdd87a	2016-11-14 21:08:35 +0000	[diff] [blame]	84	}
				85
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	86	// Print the affinity mask to the character array in a pretty format.
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	87	char __kmp_affinity_print_mask(char buf, int buf_len,
				88	kmp_affin_mask_t *mask) {
				89	KMP_ASSERT(buf_len >= 40);
				90	char *scan = buf;
				91	char *end = buf + buf_len - 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	92
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	93	// Find first element / check for empty set.
				94	size_t i;
				95	i = mask->begin();
				96	if (i == mask->end()) {
				97	KMP_SNPRINTF(scan, end - scan + 1, "{<empty>}");
				98	while (*scan != '\0')
				99	scan++;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	100	KMP_ASSERT(scan <= end);
				101	return buf;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	102	}
				103
				104	KMP_SNPRINTF(scan, end - scan + 1, "{%ld", (long)i);
				105	while (*scan != '\0')
				106	scan++;
				107	i++;
				108	for (; i != mask->end(); i = mask->next(i)) {
				109	if (!KMP_CPU_ISSET(i, mask)) {
				110	continue;
				111	}
				112
				113	// Check for buffer overflow. A string of the form ",<n>" will have at most
				114	// 10 characters, plus we want to leave room to print ",...}" if the set is
				115	// too large to print for a total of 15 characters. We already left room for
				116	// '\0' in setting end.
				117	if (end - scan < 15) {
				118	break;
				119	}
				120	KMP_SNPRINTF(scan, end - scan + 1, ",%-ld", (long)i);
				121	while (*scan != '\0')
				122	scan++;
				123	}
				124	if (i != mask->end()) {
				125	KMP_SNPRINTF(scan, end - scan + 1, ",...");
				126	while (*scan != '\0')
				127	scan++;
				128	}
				129	KMP_SNPRINTF(scan, end - scan + 1, "}");
				130	while (*scan != '\0')
				131	scan++;
				132	KMP_ASSERT(scan <= end);
				133	return buf;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	134	}
				135
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	136	void __kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask) {
				137	KMP_CPU_ZERO(mask);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	138
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	139	#if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	140
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	141	if (__kmp_num_proc_groups > 1) {
				142	int group;
				143	KMP_DEBUG_ASSERT(__kmp_GetActiveProcessorCount != NULL);
				144	for (group = 0; group < __kmp_num_proc_groups; group++) {
				145	int i;
				146	int num = __kmp_GetActiveProcessorCount(group);
				147	for (i = 0; i < num; i++) {
				148	KMP_CPU_SET(i + group * (CHAR_BIT * sizeof(DWORD_PTR)), mask);
				149	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	150	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	151	} else
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	152
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	153	#endif /* KMP_GROUP_AFFINITY */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	154
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	155	{
				156	int proc;
				157	for (proc = 0; proc < __kmp_xproc; proc++) {
				158	KMP_CPU_SET(proc, mask);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	159	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	160	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	161	}
				162
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	163	// When sorting by labels, __kmp_affinity_assign_child_nums() must first be
				164	// called to renumber the labels from [0..n] and place them into the child_num
				165	// vector of the address object. This is done in case the labels used for
Alp Toker	8f2d3f0	2014-02-24 10:40:15 +0000	[diff] [blame]	166	// the children at one node of the hierarchy differ from those used for
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	167	// another node at the same level. Example: suppose the machine has 2 nodes
				168	// with 2 packages each. The first node contains packages 601 and 602, and
				169	// second node contains packages 603 and 604. If we try to sort the table
				170	// for "scatter" affinity, the table will still be sorted 601, 602, 603, 604
				171	// because we are paying attention to the labels themselves, not the ordinal
				172	// child numbers. By using the child numbers in the sort, the result is
				173	// {0,0}=601, {0,1}=603, {1,0}=602, {1,1}=604.
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	174	static void __kmp_affinity_assign_child_nums(AddrUnsPair *address2os,
				175	int numAddrs) {
				176	KMP_DEBUG_ASSERT(numAddrs > 0);
				177	int depth = address2os->first.depth;
				178	unsigned counts = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				179	unsigned lastLabel = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				180	int labCt;
				181	for (labCt = 0; labCt < depth; labCt++) {
				182	address2os[0].first.childNums[labCt] = counts[labCt] = 0;
				183	lastLabel[labCt] = address2os[0].first.labels[labCt];
				184	}
				185	int i;
				186	for (i = 1; i < numAddrs; i++) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	187	for (labCt = 0; labCt < depth; labCt++) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	188	if (address2os[i].first.labels[labCt] != lastLabel[labCt]) {
				189	int labCt2;
				190	for (labCt2 = labCt + 1; labCt2 < depth; labCt2++) {
				191	counts[labCt2] = 0;
				192	lastLabel[labCt2] = address2os[i].first.labels[labCt2];
				193	}
				194	counts[labCt]++;
				195	lastLabel[labCt] = address2os[i].first.labels[labCt];
				196	break;
				197	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	198	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	199	for (labCt = 0; labCt < depth; labCt++) {
				200	address2os[i].first.childNums[labCt] = counts[labCt];
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	201	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	202	for (; labCt < (int)Address::maxDepth; labCt++) {
				203	address2os[i].first.childNums[labCt] = 0;
				204	}
				205	}
				206	__kmp_free(lastLabel);
				207	__kmp_free(counts);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	208	}
				209
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	210	// All of the __kmp_affinity_create_*_map() routines should set
				211	// __kmp_affinity_masks to a vector of affinity mask objects of length
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	212	// __kmp_affinity_num_masks, if __kmp_affinity_type != affinity_none, and return
				213	// the number of levels in the machine topology tree (zero if
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	214	// __kmp_affinity_type == affinity_none).
				215	//
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	216	// All of the __kmp_affinity_create_*_map() routines should set
				217	// *__kmp_affin_fullMask to the affinity mask for the initialization thread.
				218	// They need to save and restore the mask, and it could be needed later, so
				219	// saving it is just an optimization to avoid calling kmp_get_system_affinity()
				220	// again.
Jonathan Peyton	c5304aa	2016-06-13 21:28:03 +0000	[diff] [blame]	221	kmp_affin_mask_t *__kmp_affin_fullMask = NULL;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	222
				223	static int nCoresPerPkg, nPackages;
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame]	224	static int __kmp_nThreadsPerCore;
				225	#ifndef KMP_DFLT_NTH_CORES
				226	static int __kmp_ncores;
				227	#endif
Jonathan Peyton	fd7cc42	2016-06-21 15:54:38 +0000	[diff] [blame]	228	static int *__kmp_pu_os_idx = NULL;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	229
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	230	// __kmp_affinity_uniform_topology() doesn't work when called from
				231	// places which support arbitrarily many levels in the machine topology
				232	// map, i.e. the non-default cases in __kmp_affinity_create_cpuinfo_map()
				233	// __kmp_affinity_create_x2apicid_map().
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	234	inline static bool __kmp_affinity_uniform_topology() {
				235	return __kmp_avail_proc == (__kmp_nThreadsPerCore * nCoresPerPkg * nPackages);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	236	}
				237
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	238	// Print out the detailed machine topology map, i.e. the physical locations
				239	// of each OS proc.
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	240	static void __kmp_affinity_print_topology(AddrUnsPair *address2os, int len,
				241	int depth, int pkgLevel,
				242	int coreLevel, int threadLevel) {
				243	int proc;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	244
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	245	KMP_INFORM(OSProcToPhysicalThreadMap, "KMP_AFFINITY");
				246	for (proc = 0; proc < len; proc++) {
				247	int level;
				248	kmp_str_buf_t buf;
				249	__kmp_str_buf_init(&buf);
				250	for (level = 0; level < depth; level++) {
				251	if (level == threadLevel) {
				252	__kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Thread));
				253	} else if (level == coreLevel) {
				254	__kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Core));
				255	} else if (level == pkgLevel) {
				256	__kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Package));
				257	} else if (level > pkgLevel) {
				258	__kmp_str_buf_print(&buf, "%s_%d ", KMP_I18N_STR(Node),
				259	level - pkgLevel - 1);
				260	} else {
				261	__kmp_str_buf_print(&buf, "L%d ", level);
				262	}
				263	__kmp_str_buf_print(&buf, "%d ", address2os[proc].first.labels[level]);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	264	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	265	KMP_INFORM(OSProcMapToPack, "KMP_AFFINITY", address2os[proc].second,
				266	buf.str);
				267	__kmp_str_buf_free(&buf);
				268	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	269	}
				270
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	271	#if KMP_USE_HWLOC
Jonathan Peyton	202a24d	2016-06-13 17:30:08 +0000	[diff] [blame]	272
				273	// This function removes the topology levels that are radix 1 and don't offer
				274	// further information about the topology. The most common example is when you
				275	// have one thread context per core, we don't want the extra thread context
				276	// level if it offers no unique labels. So they are removed.
				277	// return value: the new depth of address2os
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	278	static int __kmp_affinity_remove_radix_one_levels(AddrUnsPair *address2os,
				279	int nActiveThreads, int depth,
				280	int pkgLevel, int coreLevel,
				281	int *threadLevel) {
				282	int level;
				283	int i;
				284	int radix1_detected;
Jonathan Peyton	202a24d	2016-06-13 17:30:08 +0000	[diff] [blame]	285
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	286	for (level = depth - 1; level >= 0; --level) {
				287	// Always keep the package level
				288	if (level == *pkgLevel)
				289	continue;
				290	// Detect if this level is radix 1
				291	radix1_detected = 1;
				292	for (i = 1; i < nActiveThreads; ++i) {
				293	if (address2os[0].first.labels[level] !=
				294	address2os[i].first.labels[level]) {
				295	// There are differing label values for this level so it stays
				296	radix1_detected = 0;
				297	break;
				298	}
Jonathan Peyton	202a24d	2016-06-13 17:30:08 +0000	[diff] [blame]	299	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	300	if (!radix1_detected)
				301	continue;
				302	// Radix 1 was detected
				303	if (level == *threadLevel) {
				304	// If only one thread per core, then just decrement
				305	// the depth which removes the threadlevel from address2os
				306	for (i = 0; i < nActiveThreads; ++i) {
				307	address2os[i].first.depth--;
				308	}
				309	*threadLevel = -1;
				310	} else if (level == *coreLevel) {
				311	// For core level, we move the thread labels over if they are still
				312	// valid (*threadLevel != -1), and also reduce the depth another level
				313	for (i = 0; i < nActiveThreads; ++i) {
				314	if (*threadLevel != -1) {
				315	address2os[i].first.labels[*coreLevel] =
				316	address2os[i].first.labels[*threadLevel];
				317	}
				318	address2os[i].first.depth--;
				319	}
				320	*coreLevel = -1;
				321	}
				322	}
				323	return address2os[0].first.depth;
Jonathan Peyton	202a24d	2016-06-13 17:30:08 +0000	[diff] [blame]	324	}
				325
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	326	// Returns the number of objects of type 'type' below 'obj' within the topology
				327	// tree structure. e.g., if obj is a HWLOC_OBJ_PACKAGE object, and type is
				328	// HWLOC_OBJ_PU, then this will return the number of PU's under the SOCKET
				329	// object.
				330	static int __kmp_hwloc_get_nobjs_under_obj(hwloc_obj_t obj,
				331	hwloc_obj_type_t type) {
				332	int retval = 0;
				333	hwloc_obj_t first;
				334	for (first = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, obj->type,
				335	obj->logical_index, type, 0);
				336	first != NULL &&
				337	hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, obj->type, first) ==
				338	obj;
				339	first = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, first->type,
				340	first)) {
				341	++retval;
				342	}
				343	return retval;
Jonathan Peyton	202a24d	2016-06-13 17:30:08 +0000	[diff] [blame]	344	}
				345
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	346	static int __kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
				347	kmp_i18n_id_t *const msg_id) {
				348	*address2os = NULL;
				349	*msg_id = kmp_i18n_null;
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	350
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	351	// Save the affinity mask for the current thread.
				352	kmp_affin_mask_t *oldMask;
				353	KMP_CPU_ALLOC(oldMask);
				354	__kmp_get_system_affinity(oldMask, TRUE);
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	355
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	356	int depth = 3;
				357	int pkgLevel = 0;
				358	int coreLevel = 1;
				359	int threadLevel = 2;
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	360
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	361	if (!KMP_AFFINITY_CAPABLE()) {
				362	// Hack to try and infer the machine topology using only the data
				363	// available from cpuid on the current thread, and __kmp_xproc.
				364	KMP_ASSERT(__kmp_affinity_type == affinity_none);
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	365
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	366	nCoresPerPkg = __kmp_hwloc_get_nobjs_under_obj(
				367	hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_PACKAGE, 0),
				368	HWLOC_OBJ_CORE);
				369	__kmp_nThreadsPerCore = __kmp_hwloc_get_nobjs_under_obj(
				370	hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_CORE, 0),
				371	HWLOC_OBJ_PU);
				372	__kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
				373	nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	374	if (__kmp_affinity_verbose) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	375	KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY");
				376	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				377	if (__kmp_affinity_uniform_topology()) {
				378	KMP_INFORM(Uniform, "KMP_AFFINITY");
				379	} else {
				380	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				381	}
				382	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				383	__kmp_nThreadsPerCore, __kmp_ncores);
				384	}
				385	KMP_CPU_FREE(oldMask);
				386	return 0;
				387	}
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	388
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	389	// Allocate the data structure to be returned.
				390	AddrUnsPair *retval =
				391	(AddrUnsPair )__kmp_allocate(sizeof(AddrUnsPair) __kmp_avail_proc);
				392	__kmp_pu_os_idx = (int )__kmp_allocate(sizeof(int) __kmp_avail_proc);
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	393
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	394	// When affinity is off, this routine will still be called to set
				395	// __kmp_ncores, as well as __kmp_nThreadsPerCore,
				396	// nCoresPerPkg, & nPackages. Make sure all these vars are set
				397	// correctly, and return if affinity is not enabled.
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	398
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	399	hwloc_obj_t pu;
				400	hwloc_obj_t core;
				401	hwloc_obj_t socket;
				402	int nActiveThreads = 0;
				403	int socket_identifier = 0;
				404	// re-calculate globals to count only accessible resources
				405	__kmp_ncores = nPackages = nCoresPerPkg = __kmp_nThreadsPerCore = 0;
				406	for (socket =
				407	hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_PACKAGE, 0);
				408	socket != NULL;
				409	socket = hwloc_get_next_obj_by_type(__kmp_hwloc_topology,
				410	HWLOC_OBJ_PACKAGE, socket),
				411	socket_identifier++) {
				412	int core_identifier = 0;
				413	int num_active_cores = 0;
				414	for (core = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, socket->type,
				415	socket->logical_index,
				416	HWLOC_OBJ_CORE, 0);
				417	core != NULL &&
				418	hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, socket->type,
				419	core) == socket;
				420	core = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_CORE,
				421	core),
				422	core_identifier++) {
				423	int pu_identifier = 0;
				424	int num_active_threads = 0;
				425	for (pu = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, core->type,
				426	core->logical_index, HWLOC_OBJ_PU,
				427	0);
				428	pu != NULL &&
				429	hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, core->type,
				430	pu) == core;
				431	pu = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_PU,
				432	pu),
				433	pu_identifier++) {
				434	Address addr(3);
				435	if(!KMP_CPU_ISSET(pu->os_index, __kmp_affin_fullMask))
				436	continue; // skip inactive (inaccessible) unit
				437	KA_TRACE(20,
				438	("Hwloc inserting %d (%d) %d (%d) %d (%d) into address2os\n",
				439	socket->os_index, socket->logical_index, core->os_index,
				440	core->logical_index, pu->os_index,pu->logical_index));
				441	addr.labels[0] = socket_identifier; // package
				442	addr.labels[1] = core_identifier; // core
				443	addr.labels[2] = pu_identifier; // pu
				444	retval[nActiveThreads] = AddrUnsPair(addr, pu->os_index);
				445	__kmp_pu_os_idx[nActiveThreads] =
				446	pu->os_index; // keep os index for each active pu
				447	nActiveThreads++;
				448	++num_active_threads; // count active threads per core
				449	}
				450	if (num_active_threads) { // were there any active threads on the core?
				451	++__kmp_ncores; // count total active cores
				452	++num_active_cores; // count active cores per socket
				453	if (num_active_threads > __kmp_nThreadsPerCore)
				454	__kmp_nThreadsPerCore = num_active_threads; // calc maximum
				455	}
				456	}
				457	if (num_active_cores) { // were there any active cores on the socket?
				458	++nPackages; // count total active packages
				459	if (num_active_cores > nCoresPerPkg)
				460	nCoresPerPkg = num_active_cores; // calc maximum
				461	}
				462	}
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	463
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	464	// If there's only one thread context to bind to, return now.
				465	KMP_DEBUG_ASSERT(nActiveThreads == __kmp_avail_proc);
				466	KMP_ASSERT(nActiveThreads > 0);
				467	if (nActiveThreads == 1) {
				468	__kmp_ncores = nPackages = 1;
				469	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
				470	if (__kmp_affinity_verbose) {
				471	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				472	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				473
				474	KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
				475	if (__kmp_affinity_respect_mask) {
				476	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				477	} else {
				478	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				479	}
				480	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				481	KMP_INFORM(Uniform, "KMP_AFFINITY");
				482	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				483	__kmp_nThreadsPerCore, __kmp_ncores);
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	484	}
				485
				486	if (__kmp_affinity_type == affinity_none) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	487	__kmp_free(retval);
				488	KMP_CPU_FREE(oldMask);
				489	return 0;
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	490	}
				491
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	492	// Form an Address object which only includes the package level.
				493	Address addr(1);
				494	addr.labels[0] = retval[0].first.labels[pkgLevel];
				495	retval[0].first = addr;
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	496
				497	if (__kmp_affinity_gran_levels < 0) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	498	__kmp_affinity_gran_levels = 0;
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	499	}
				500
				501	if (__kmp_affinity_verbose) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	502	__kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	503	}
				504
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	505	*address2os = retval;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	506	KMP_CPU_FREE(oldMask);
				507	return 1;
				508	}
				509
				510	// Sort the table by physical Id.
				511	qsort(retval, nActiveThreads, sizeof(*retval),
				512	__kmp_affinity_cmp_Address_labels);
				513
				514	// Check to see if the machine topology is uniform
				515	unsigned uniform =
				516	(nPackages * nCoresPerPkg * __kmp_nThreadsPerCore == nActiveThreads);
				517
				518	// Print the machine topology summary.
				519	if (__kmp_affinity_verbose) {
				520	char mask[KMP_AFFIN_MASK_PRINT_LEN];
				521	__kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				522
				523	KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
				524	if (__kmp_affinity_respect_mask) {
				525	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask);
				526	} else {
				527	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask);
				528	}
				529	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				530	if (uniform) {
				531	KMP_INFORM(Uniform, "KMP_AFFINITY");
				532	} else {
				533	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				534	}
				535
				536	kmp_str_buf_t buf;
				537	__kmp_str_buf_init(&buf);
				538
				539	__kmp_str_buf_print(&buf, "%d", nPackages);
				540	// for (level = 1; level <= pkgLevel; level++) {
				541	// __kmp_str_buf_print(&buf, " x %d", maxCt[level]);
				542	// }
				543	KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg,
				544	__kmp_nThreadsPerCore, __kmp_ncores);
				545
				546	__kmp_str_buf_free(&buf);
				547	}
				548
				549	if (__kmp_affinity_type == affinity_none) {
				550	__kmp_free(retval);
				551	KMP_CPU_FREE(oldMask);
				552	return 0;
				553	}
				554
				555	// Find any levels with radiix 1, and remove them from the map
				556	// (except for the package level).
				557	depth = __kmp_affinity_remove_radix_one_levels(
				558	retval, nActiveThreads, depth, &pkgLevel, &coreLevel, &threadLevel);
				559
				560	if (__kmp_affinity_gran_levels < 0) {
				561	// Set the granularity level based on what levels are modeled
				562	// in the machine topology map.
				563	__kmp_affinity_gran_levels = 0;
				564	if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
				565	__kmp_affinity_gran_levels++;
				566	}
				567	if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
				568	__kmp_affinity_gran_levels++;
				569	}
				570	if (__kmp_affinity_gran > affinity_gran_package) {
				571	__kmp_affinity_gran_levels++;
				572	}
				573	}
				574
				575	if (__kmp_affinity_verbose) {
				576	__kmp_affinity_print_topology(retval, nActiveThreads, depth, pkgLevel,
				577	coreLevel, threadLevel);
				578	}
				579
				580	KMP_CPU_FREE(oldMask);
				581	*address2os = retval;
				582	return depth;
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	583	}
				584	#endif // KMP_USE_HWLOC
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	585
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	586	// If we don't know how to retrieve the machine's processor topology, or
				587	// encounter an error in doing so, this routine is called to form a "flat"
				588	// mapping of os thread id's <-> processor id's.
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	589	static int __kmp_affinity_create_flat_map(AddrUnsPair **address2os,
				590	kmp_i18n_id_t *const msg_id) {
				591	*address2os = NULL;
				592	*msg_id = kmp_i18n_null;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	593
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	594	// Even if __kmp_affinity_type == affinity_none, this routine might still
				595	// called to set __kmp_ncores, as well as
				596	// __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
				597	if (!KMP_AFFINITY_CAPABLE()) {
				598	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				599	__kmp_ncores = nPackages = __kmp_xproc;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	600	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	601	if (__kmp_affinity_verbose) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	602	KMP_INFORM(AffFlatTopology, "KMP_AFFINITY");
				603	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				604	KMP_INFORM(Uniform, "KMP_AFFINITY");
				605	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				606	__kmp_nThreadsPerCore, __kmp_ncores);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	607	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	608	return 0;
				609	}
				610
				611	// When affinity is off, this routine will still be called to set
				612	// __kmp_ncores, as well as __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
				613	// Make sure all these vars are set correctly, and return now if affinity is
				614	// not enabled.
				615	__kmp_ncores = nPackages = __kmp_avail_proc;
				616	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
				617	if (__kmp_affinity_verbose) {
				618	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				619	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				620	__kmp_affin_fullMask);
				621
				622	KMP_INFORM(AffCapableUseFlat, "KMP_AFFINITY");
				623	if (__kmp_affinity_respect_mask) {
				624	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				625	} else {
				626	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	627	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	628	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				629	KMP_INFORM(Uniform, "KMP_AFFINITY");
				630	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				631	__kmp_nThreadsPerCore, __kmp_ncores);
				632	}
				633	KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
				634	__kmp_pu_os_idx = (int )__kmp_allocate(sizeof(int) __kmp_avail_proc);
				635	if (__kmp_affinity_type == affinity_none) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	636	int avail_ct = 0;
				637	int i;
Jonathan Peyton	c5304aa	2016-06-13 21:28:03 +0000	[diff] [blame]	638	KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	639	if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask))
				640	continue;
				641	__kmp_pu_os_idx[avail_ct++] = i; // suppose indices are flat
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	642	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	643	return 0;
				644	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	645
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	646	// Contruct the data structure to be returned.
				647	*address2os =
				648	(AddrUnsPair )__kmp_allocate(sizeof(address2os) __kmp_avail_proc);
				649	int avail_ct = 0;
				650	unsigned int i;
				651	KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
				652	// Skip this proc if it is not included in the machine model.
				653	if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
				654	continue;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	655	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	656	__kmp_pu_os_idx[avail_ct] = i; // suppose indices are flat
				657	Address addr(1);
				658	addr.labels[0] = i;
				659	(*address2os)[avail_ct++] = AddrUnsPair(addr, i);
				660	}
				661	if (__kmp_affinity_verbose) {
				662	KMP_INFORM(OSProcToPackage, "KMP_AFFINITY");
				663	}
				664
				665	if (__kmp_affinity_gran_levels < 0) {
				666	// Only the package level is modeled in the machine topology map,
				667	// so the #levels of granularity is either 0 or 1.
				668	if (__kmp_affinity_gran > affinity_gran_package) {
				669	__kmp_affinity_gran_levels = 1;
				670	} else {
				671	__kmp_affinity_gran_levels = 0;
				672	}
				673	}
				674	return 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	675	}
				676
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	677	#if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	678
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	679	// If multiple Windows* OS processor groups exist, we can create a 2-level
				680	// topology map with the groups at level 0 and the individual procs at level 1.
				681	// This facilitates letting the threads float among all procs in a group,
				682	// if granularity=group (the default when there are multiple groups).
				683	static int __kmp_affinity_create_proc_group_map(AddrUnsPair **address2os,
				684	kmp_i18n_id_t *const msg_id) {
				685	*address2os = NULL;
				686	*msg_id = kmp_i18n_null;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	687
Jonathan Peyton	5868499	2017-05-15 19:05:59 +0000	[diff] [blame]	688	// If we aren't affinity capable, then return now.
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	689	// The flat mapping will be used.
Jonathan Peyton	5868499	2017-05-15 19:05:59 +0000	[diff] [blame]	690	if (!KMP_AFFINITY_CAPABLE()) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	691	// FIXME set *msg_id
				692	return -1;
				693	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	694
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	695	// Contruct the data structure to be returned.
				696	*address2os =
				697	(AddrUnsPair )__kmp_allocate(sizeof(address2os) __kmp_avail_proc);
				698	KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
				699	__kmp_pu_os_idx = (int )__kmp_allocate(sizeof(int) __kmp_avail_proc);
				700	int avail_ct = 0;
				701	int i;
				702	KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
				703	// Skip this proc if it is not included in the machine model.
				704	if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
				705	continue;
				706	}
				707	__kmp_pu_os_idx[avail_ct] = i; // suppose indices are flat
				708	Address addr(2);
				709	addr.labels[0] = i / (CHAR_BIT * sizeof(DWORD_PTR));
				710	addr.labels[1] = i % (CHAR_BIT * sizeof(DWORD_PTR));
				711	(*address2os)[avail_ct++] = AddrUnsPair(addr, i);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	712
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	713	if (__kmp_affinity_verbose) {
				714	KMP_INFORM(AffOSProcToGroup, "KMP_AFFINITY", i, addr.labels[0],
				715	addr.labels[1]);
				716	}
				717	}
				718
				719	if (__kmp_affinity_gran_levels < 0) {
				720	if (__kmp_affinity_gran == affinity_gran_group) {
				721	__kmp_affinity_gran_levels = 1;
				722	} else if ((__kmp_affinity_gran == affinity_gran_fine) \|\|
				723	(__kmp_affinity_gran == affinity_gran_thread)) {
				724	__kmp_affinity_gran_levels = 0;
				725	} else {
				726	const char *gran_str = NULL;
				727	if (__kmp_affinity_gran == affinity_gran_core) {
				728	gran_str = "core";
				729	} else if (__kmp_affinity_gran == affinity_gran_package) {
				730	gran_str = "package";
				731	} else if (__kmp_affinity_gran == affinity_gran_node) {
				732	gran_str = "node";
				733	} else {
				734	KMP_ASSERT(0);
				735	}
				736
				737	// Warning: can't use affinity granularity \"gran\" with group topology
				738	// method, using "thread"
				739	__kmp_affinity_gran_levels = 0;
				740	}
				741	}
				742	return 2;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	743	}
				744
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	745	#endif /* KMP_GROUP_AFFINITY */
				746
				747	#if KMP_ARCH_X86 \|\| KMP_ARCH_X86_64
				748
				749	static int __kmp_cpuid_mask_width(int count) {
				750	int r = 0;
				751
				752	while ((1 << r) < count)
				753	++r;
				754	return r;
				755	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	756
				757	class apicThreadInfo {
				758	public:
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	759	unsigned osId; // param to __kmp_affinity_bind_thread
				760	unsigned apicId; // from cpuid after binding
				761	unsigned maxCoresPerPkg; // ""
				762	unsigned maxThreadsPerPkg; // ""
				763	unsigned pkgId; // inferred from above values
				764	unsigned coreId; // ""
				765	unsigned threadId; // ""
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	766	};
				767
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	768	static int __kmp_affinity_cmp_apicThreadInfo_os_id(const void *a,
				769	const void *b) {
				770	const apicThreadInfo aa = (const apicThreadInfo )a;
				771	const apicThreadInfo bb = (const apicThreadInfo )b;
				772	if (aa->osId < bb->osId)
				773	return -1;
				774	if (aa->osId > bb->osId)
				775	return 1;
				776	return 0;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	777	}
				778
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	779	static int __kmp_affinity_cmp_apicThreadInfo_phys_id(const void *a,
				780	const void *b) {
				781	const apicThreadInfo aa = (const apicThreadInfo )a;
				782	const apicThreadInfo bb = (const apicThreadInfo )b;
				783	if (aa->pkgId < bb->pkgId)
				784	return -1;
				785	if (aa->pkgId > bb->pkgId)
				786	return 1;
				787	if (aa->coreId < bb->coreId)
				788	return -1;
				789	if (aa->coreId > bb->coreId)
				790	return 1;
				791	if (aa->threadId < bb->threadId)
				792	return -1;
				793	if (aa->threadId > bb->threadId)
				794	return 1;
				795	return 0;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	796	}
				797
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	798	// On IA-32 architecture and Intel(R) 64 architecture, we attempt to use
				799	// an algorithm which cycles through the available os threads, setting
				800	// the current thread's affinity mask to that thread, and then retrieves
				801	// the Apic Id for each thread context using the cpuid instruction.
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	802	static int __kmp_affinity_create_apicid_map(AddrUnsPair **address2os,
				803	kmp_i18n_id_t *const msg_id) {
				804	kmp_cpuid buf;
				805	int rc;
				806	*address2os = NULL;
				807	*msg_id = kmp_i18n_null;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	808
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	809	// Check if cpuid leaf 4 is supported.
				810	__kmp_x86_cpuid(0, 0, &buf);
				811	if (buf.eax < 4) {
				812	*msg_id = kmp_i18n_str_NoLeaf4Support;
				813	return -1;
				814	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	815
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	816	// The algorithm used starts by setting the affinity to each available thread
				817	// and retrieving info from the cpuid instruction, so if we are not capable of
				818	// calling __kmp_get_system_affinity() and _kmp_get_system_affinity(), then we
				819	// need to do something else - use the defaults that we calculated from
				820	// issuing cpuid without binding to each proc.
				821	if (!KMP_AFFINITY_CAPABLE()) {
				822	// Hack to try and infer the machine topology using only the data
				823	// available from cpuid on the current thread, and __kmp_xproc.
				824	KMP_ASSERT(__kmp_affinity_type == affinity_none);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	825
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	826	// Get an upper bound on the number of threads per package using cpuid(1).
				827	// On some OS/chps combinations where HT is supported by the chip but is
				828	// disabled, this value will be 2 on a single core chip. Usually, it will be
				829	// 2 if HT is enabled and 1 if HT is disabled.
				830	__kmp_x86_cpuid(1, 0, &buf);
				831	int maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
				832	if (maxThreadsPerPkg == 0) {
				833	maxThreadsPerPkg = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	834	}
				835
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	836	// The num cores per pkg comes from cpuid(4). 1 must be added to the encoded
				837	// value.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	838	//
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	839	// The author of cpu_count.cpp treated this only an upper bound on the
				840	// number of cores, but I haven't seen any cases where it was greater than
				841	// the actual number of cores, so we will treat it as exact in this block of
				842	// code.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	843	//
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	844	// First, we need to check if cpuid(4) is supported on this chip. To see if
				845	// cpuid(n) is supported, issue cpuid(0) and check if eax has the value n or
				846	// greater.
				847	__kmp_x86_cpuid(0, 0, &buf);
				848	if (buf.eax >= 4) {
				849	__kmp_x86_cpuid(4, 0, &buf);
				850	nCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
				851	} else {
				852	nCoresPerPkg = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	853	}
				854
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	855	// There is no way to reliably tell if HT is enabled without issuing the
				856	// cpuid instruction from every thread, can correlating the cpuid info, so
				857	// if the machine is not affinity capable, we assume that HT is off. We have
				858	// seen quite a few machines where maxThreadsPerPkg is 2, yet the machine
				859	// does not support HT.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	860	//
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	861	// - Older OSes are usually found on machines with older chips, which do not
				862	// support HT.
				863	// - The performance penalty for mistakenly identifying a machine as HT when
				864	// it isn't (which results in blocktime being incorrecly set to 0) is
				865	// greater than the penalty when for mistakenly identifying a machine as
				866	// being 1 thread/core when it is really HT enabled (which results in
				867	// blocktime being incorrectly set to a positive value).
				868	__kmp_ncores = __kmp_xproc;
				869	nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	870	__kmp_nThreadsPerCore = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	871	if (__kmp_affinity_verbose) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	872	KMP_INFORM(AffNotCapableUseLocCpuid, "KMP_AFFINITY");
				873	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				874	if (__kmp_affinity_uniform_topology()) {
				875	KMP_INFORM(Uniform, "KMP_AFFINITY");
				876	} else {
				877	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				878	}
				879	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				880	__kmp_nThreadsPerCore, __kmp_ncores);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	881	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	882	return 0;
				883	}
				884
				885	// From here on, we can assume that it is safe to call
				886	// __kmp_get_system_affinity() and __kmp_set_system_affinity(), even if
				887	// __kmp_affinity_type = affinity_none.
				888
				889	// Save the affinity mask for the current thread.
				890	kmp_affin_mask_t *oldMask;
				891	KMP_CPU_ALLOC(oldMask);
				892	KMP_ASSERT(oldMask != NULL);
				893	__kmp_get_system_affinity(oldMask, TRUE);
				894
				895	// Run through each of the available contexts, binding the current thread
				896	// to it, and obtaining the pertinent information using the cpuid instr.
				897	//
				898	// The relevant information is:
				899	// - Apic Id: Bits 24:31 of ebx after issuing cpuid(1) - each thread context
				900	// has a uniqie Apic Id, which is of the form pkg# : core# : thread#.
				901	// - Max Threads Per Pkg: Bits 16:23 of ebx after issuing cpuid(1). The value
				902	// of this field determines the width of the core# + thread# fields in the
				903	// Apic Id. It is also an upper bound on the number of threads per
				904	// package, but it has been verified that situations happen were it is not
				905	// exact. In particular, on certain OS/chip combinations where Intel(R)
				906	// Hyper-Threading Technology is supported by the chip but has been
				907	// disabled, the value of this field will be 2 (for a single core chip).
				908	// On other OS/chip combinations supporting Intel(R) Hyper-Threading
				909	// Technology, the value of this field will be 1 when Intel(R)
				910	// Hyper-Threading Technology is disabled and 2 when it is enabled.
				911	// - Max Cores Per Pkg: Bits 26:31 of eax after issuing cpuid(4). The value
				912	// of this field (+1) determines the width of the core# field in the Apic
				913	// Id. The comments in "cpucount.cpp" say that this value is an upper
				914	// bound, but the IA-32 architecture manual says that it is exactly the
				915	// number of cores per package, and I haven't seen any case where it
				916	// wasn't.
				917	//
				918	// From this information, deduce the package Id, core Id, and thread Id,
				919	// and set the corresponding fields in the apicThreadInfo struct.
				920	unsigned i;
				921	apicThreadInfo threadInfo = (apicThreadInfo )__kmp_allocate(
				922	__kmp_avail_proc * sizeof(apicThreadInfo));
				923	unsigned nApics = 0;
				924	KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
				925	// Skip this proc if it is not included in the machine model.
				926	if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
				927	continue;
Jonathan Peyton	fd7cc42	2016-06-21 15:54:38 +0000	[diff] [blame]	928	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	929	KMP_DEBUG_ASSERT((int)nApics < __kmp_avail_proc);
				930
				931	__kmp_affinity_dispatch->bind_thread(i);
				932	threadInfo[nApics].osId = i;
				933
				934	// The apic id and max threads per pkg come from cpuid(1).
				935	__kmp_x86_cpuid(1, 0, &buf);
				936	if (((buf.edx >> 9) & 1) == 0) {
				937	__kmp_set_system_affinity(oldMask, TRUE);
				938	__kmp_free(threadInfo);
				939	KMP_CPU_FREE(oldMask);
				940	*msg_id = kmp_i18n_str_ApicNotPresent;
				941	return -1;
				942	}
				943	threadInfo[nApics].apicId = (buf.ebx >> 24) & 0xff;
				944	threadInfo[nApics].maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
				945	if (threadInfo[nApics].maxThreadsPerPkg == 0) {
				946	threadInfo[nApics].maxThreadsPerPkg = 1;
				947	}
				948
				949	// Max cores per pkg comes from cpuid(4). 1 must be added to the encoded
				950	// value.
				951	//
				952	// First, we need to check if cpuid(4) is supported on this chip. To see if
				953	// cpuid(n) is supported, issue cpuid(0) and check if eax has the value n
				954	// or greater.
				955	__kmp_x86_cpuid(0, 0, &buf);
				956	if (buf.eax >= 4) {
				957	__kmp_x86_cpuid(4, 0, &buf);
				958	threadInfo[nApics].maxCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
				959	} else {
				960	threadInfo[nApics].maxCoresPerPkg = 1;
				961	}
				962
				963	// Infer the pkgId / coreId / threadId using only the info obtained locally.
				964	int widthCT = __kmp_cpuid_mask_width(threadInfo[nApics].maxThreadsPerPkg);
				965	threadInfo[nApics].pkgId = threadInfo[nApics].apicId >> widthCT;
				966
				967	int widthC = __kmp_cpuid_mask_width(threadInfo[nApics].maxCoresPerPkg);
				968	int widthT = widthCT - widthC;
				969	if (widthT < 0) {
				970	// I've never seen this one happen, but I suppose it could, if the cpuid
				971	// instruction on a chip was really screwed up. Make sure to restore the
				972	// affinity mask before the tail call.
				973	__kmp_set_system_affinity(oldMask, TRUE);
				974	__kmp_free(threadInfo);
				975	KMP_CPU_FREE(oldMask);
				976	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				977	return -1;
				978	}
				979
				980	int maskC = (1 << widthC) - 1;
				981	threadInfo[nApics].coreId = (threadInfo[nApics].apicId >> widthT) & maskC;
				982
				983	int maskT = (1 << widthT) - 1;
				984	threadInfo[nApics].threadId = threadInfo[nApics].apicId & maskT;
				985
				986	nApics++;
				987	}
				988
				989	// We've collected all the info we need.
				990	// Restore the old affinity mask for this thread.
				991	__kmp_set_system_affinity(oldMask, TRUE);
				992
				993	// If there's only one thread context to bind to, form an Address object
				994	// with depth 1 and return immediately (or, if affinity is off, set
				995	// address2os to NULL and return).
				996	//
				997	// If it is configured to omit the package level when there is only a single
				998	// package, the logic at the end of this routine won't work if there is only
				999	// a single thread - it would try to form an Address object with depth 0.
				1000	KMP_ASSERT(nApics > 0);
				1001	if (nApics == 1) {
				1002	__kmp_ncores = nPackages = 1;
				1003	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
				1004	if (__kmp_affinity_verbose) {
				1005	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				1006	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				1007
				1008	KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
				1009	if (__kmp_affinity_respect_mask) {
				1010	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				1011	} else {
				1012	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				1013	}
				1014	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1015	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1016	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				1017	__kmp_nThreadsPerCore, __kmp_ncores);
				1018	}
				1019
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1020	if (__kmp_affinity_type == affinity_none) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1021	__kmp_free(threadInfo);
				1022	KMP_CPU_FREE(oldMask);
				1023	return 0;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1024	}
				1025
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1026	address2os = (AddrUnsPair )__kmp_allocate(sizeof(AddrUnsPair));
				1027	Address addr(1);
				1028	addr.labels[0] = threadInfo[0].pkgId;
				1029	(*address2os)[0] = AddrUnsPair(addr, threadInfo[0].osId);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1030
				1031	if (__kmp_affinity_gran_levels < 0) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1032	__kmp_affinity_gran_levels = 0;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1033	}
				1034
				1035	if (__kmp_affinity_verbose) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1036	__kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1037	}
				1038
				1039	__kmp_free(threadInfo);
				1040	KMP_CPU_FREE(oldMask);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1041	return 1;
				1042	}
				1043
				1044	// Sort the threadInfo table by physical Id.
				1045	qsort(threadInfo, nApics, sizeof(*threadInfo),
				1046	__kmp_affinity_cmp_apicThreadInfo_phys_id);
				1047
				1048	// The table is now sorted by pkgId / coreId / threadId, but we really don't
				1049	// know the radix of any of the fields. pkgId's may be sparsely assigned among
				1050	// the chips on a system. Although coreId's are usually assigned
				1051	// [0 .. coresPerPkg-1] and threadId's are usually assigned
				1052	// [0..threadsPerCore-1], we don't want to make any such assumptions.
				1053	//
				1054	// For that matter, we don't know what coresPerPkg and threadsPerCore (or the
				1055	// total # packages) are at this point - we want to determine that now. We
				1056	// only have an upper bound on the first two figures.
				1057	//
				1058	// We also perform a consistency check at this point: the values returned by
				1059	// the cpuid instruction for any thread bound to a given package had better
				1060	// return the same info for maxThreadsPerPkg and maxCoresPerPkg.
				1061	nPackages = 1;
				1062	nCoresPerPkg = 1;
				1063	__kmp_nThreadsPerCore = 1;
				1064	unsigned nCores = 1;
				1065
				1066	unsigned pkgCt = 1; // to determine radii
				1067	unsigned lastPkgId = threadInfo[0].pkgId;
				1068	unsigned coreCt = 1;
				1069	unsigned lastCoreId = threadInfo[0].coreId;
				1070	unsigned threadCt = 1;
				1071	unsigned lastThreadId = threadInfo[0].threadId;
				1072
				1073	// intra-pkg consist checks
				1074	unsigned prevMaxCoresPerPkg = threadInfo[0].maxCoresPerPkg;
				1075	unsigned prevMaxThreadsPerPkg = threadInfo[0].maxThreadsPerPkg;
				1076
				1077	for (i = 1; i < nApics; i++) {
				1078	if (threadInfo[i].pkgId != lastPkgId) {
				1079	nCores++;
				1080	pkgCt++;
				1081	lastPkgId = threadInfo[i].pkgId;
				1082	if ((int)coreCt > nCoresPerPkg)
				1083	nCoresPerPkg = coreCt;
				1084	coreCt = 1;
				1085	lastCoreId = threadInfo[i].coreId;
				1086	if ((int)threadCt > __kmp_nThreadsPerCore)
				1087	__kmp_nThreadsPerCore = threadCt;
				1088	threadCt = 1;
				1089	lastThreadId = threadInfo[i].threadId;
				1090
				1091	// This is a different package, so go on to the next iteration without
				1092	// doing any consistency checks. Reset the consistency check vars, though.
				1093	prevMaxCoresPerPkg = threadInfo[i].maxCoresPerPkg;
				1094	prevMaxThreadsPerPkg = threadInfo[i].maxThreadsPerPkg;
				1095	continue;
				1096	}
				1097
				1098	if (threadInfo[i].coreId != lastCoreId) {
				1099	nCores++;
				1100	coreCt++;
				1101	lastCoreId = threadInfo[i].coreId;
				1102	if ((int)threadCt > __kmp_nThreadsPerCore)
				1103	__kmp_nThreadsPerCore = threadCt;
				1104	threadCt = 1;
				1105	lastThreadId = threadInfo[i].threadId;
				1106	} else if (threadInfo[i].threadId != lastThreadId) {
				1107	threadCt++;
				1108	lastThreadId = threadInfo[i].threadId;
				1109	} else {
				1110	__kmp_free(threadInfo);
				1111	KMP_CPU_FREE(oldMask);
				1112	*msg_id = kmp_i18n_str_LegacyApicIDsNotUnique;
				1113	return -1;
				1114	}
				1115
				1116	// Check to make certain that the maxCoresPerPkg and maxThreadsPerPkg
				1117	// fields agree between all the threads bounds to a given package.
				1118	if ((prevMaxCoresPerPkg != threadInfo[i].maxCoresPerPkg) \|\|
				1119	(prevMaxThreadsPerPkg != threadInfo[i].maxThreadsPerPkg)) {
				1120	__kmp_free(threadInfo);
				1121	KMP_CPU_FREE(oldMask);
				1122	*msg_id = kmp_i18n_str_InconsistentCpuidInfo;
				1123	return -1;
				1124	}
				1125	}
				1126	nPackages = pkgCt;
				1127	if ((int)coreCt > nCoresPerPkg)
				1128	nCoresPerPkg = coreCt;
				1129	if ((int)threadCt > __kmp_nThreadsPerCore)
				1130	__kmp_nThreadsPerCore = threadCt;
				1131
				1132	// When affinity is off, this routine will still be called to set
				1133	// __kmp_ncores, as well as __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
				1134	// Make sure all these vars are set correctly, and return now if affinity is
				1135	// not enabled.
				1136	__kmp_ncores = nCores;
				1137	if (__kmp_affinity_verbose) {
				1138	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				1139	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				1140
				1141	KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
				1142	if (__kmp_affinity_respect_mask) {
				1143	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				1144	} else {
				1145	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				1146	}
				1147	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1148	if (__kmp_affinity_uniform_topology()) {
				1149	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1150	} else {
				1151	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				1152	}
				1153	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				1154	__kmp_nThreadsPerCore, __kmp_ncores);
				1155	}
				1156	KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
				1157	KMP_DEBUG_ASSERT(nApics == __kmp_avail_proc);
				1158	__kmp_pu_os_idx = (int )__kmp_allocate(sizeof(int) __kmp_avail_proc);
				1159	for (i = 0; i < nApics; ++i) {
				1160	__kmp_pu_os_idx[i] = threadInfo[i].osId;
				1161	}
				1162	if (__kmp_affinity_type == affinity_none) {
				1163	__kmp_free(threadInfo);
				1164	KMP_CPU_FREE(oldMask);
				1165	return 0;
				1166	}
				1167
				1168	// Now that we've determined the number of packages, the number of cores per
				1169	// package, and the number of threads per core, we can construct the data
				1170	// structure that is to be returned.
				1171	int pkgLevel = 0;
				1172	int coreLevel = (nCoresPerPkg <= 1) ? -1 : 1;
				1173	int threadLevel =
				1174	(__kmp_nThreadsPerCore <= 1) ? -1 : ((coreLevel >= 0) ? 2 : 1);
				1175	unsigned depth = (pkgLevel >= 0) + (coreLevel >= 0) + (threadLevel >= 0);
				1176
				1177	KMP_ASSERT(depth > 0);
				1178	address2os = (AddrUnsPair )__kmp_allocate(sizeof(AddrUnsPair) * nApics);
				1179
				1180	for (i = 0; i < nApics; ++i) {
				1181	Address addr(depth);
				1182	unsigned os = threadInfo[i].osId;
				1183	int d = 0;
				1184
				1185	if (pkgLevel >= 0) {
				1186	addr.labels[d++] = threadInfo[i].pkgId;
				1187	}
				1188	if (coreLevel >= 0) {
				1189	addr.labels[d++] = threadInfo[i].coreId;
				1190	}
				1191	if (threadLevel >= 0) {
				1192	addr.labels[d++] = threadInfo[i].threadId;
				1193	}
				1194	(*address2os)[i] = AddrUnsPair(addr, os);
				1195	}
				1196
				1197	if (__kmp_affinity_gran_levels < 0) {
				1198	// Set the granularity level based on what levels are modeled in the machine
				1199	// topology map.
				1200	__kmp_affinity_gran_levels = 0;
				1201	if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
				1202	__kmp_affinity_gran_levels++;
				1203	}
				1204	if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
				1205	__kmp_affinity_gran_levels++;
				1206	}
				1207	if ((pkgLevel >= 0) && (__kmp_affinity_gran > affinity_gran_package)) {
				1208	__kmp_affinity_gran_levels++;
				1209	}
				1210	}
				1211
				1212	if (__kmp_affinity_verbose) {
				1213	__kmp_affinity_print_topology(*address2os, nApics, depth, pkgLevel,
				1214	coreLevel, threadLevel);
				1215	}
				1216
				1217	__kmp_free(threadInfo);
				1218	KMP_CPU_FREE(oldMask);
				1219	return depth;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1220	}
				1221
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1222	// Intel(R) microarchitecture code name Nehalem, Dunnington and later
				1223	// architectures support a newer interface for specifying the x2APIC Ids,
				1224	// based on cpuid leaf 11.
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1225	static int __kmp_affinity_create_x2apicid_map(AddrUnsPair **address2os,
				1226	kmp_i18n_id_t *const msg_id) {
				1227	kmp_cpuid buf;
				1228	*address2os = NULL;
				1229	*msg_id = kmp_i18n_null;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1230
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1231	// Check to see if cpuid leaf 11 is supported.
				1232	__kmp_x86_cpuid(0, 0, &buf);
				1233	if (buf.eax < 11) {
				1234	*msg_id = kmp_i18n_str_NoLeaf11Support;
				1235	return -1;
				1236	}
				1237	__kmp_x86_cpuid(11, 0, &buf);
				1238	if (buf.ebx == 0) {
				1239	*msg_id = kmp_i18n_str_NoLeaf11Support;
				1240	return -1;
				1241	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1242
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1243	// Find the number of levels in the machine topology. While we're at it, get
				1244	// the default values for __kmp_nThreadsPerCore & nCoresPerPkg. We will try to
				1245	// get more accurate values later by explicitly counting them, but get
				1246	// reasonable defaults now, in case we return early.
				1247	int level;
				1248	int threadLevel = -1;
				1249	int coreLevel = -1;
				1250	int pkgLevel = -1;
				1251	__kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
				1252
				1253	for (level = 0;; level++) {
				1254	if (level > 31) {
				1255	// FIXME: Hack for DPD200163180
				1256	//
				1257	// If level is big then something went wrong -> exiting
				1258	//
				1259	// There could actually be 32 valid levels in the machine topology, but so
				1260	// far, the only machine we have seen which does not exit this loop before
				1261	// iteration 32 has fubar x2APIC settings.
				1262	//
				1263	// For now, just reject this case based upon loop trip count.
				1264	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1265	return -1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1266	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1267	__kmp_x86_cpuid(11, level, &buf);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1268	if (buf.ebx == 0) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1269	if (pkgLevel < 0) {
				1270	// Will infer nPackages from __kmp_xproc
				1271	pkgLevel = level;
				1272	level++;
				1273	}
				1274	break;
				1275	}
				1276	int kind = (buf.ecx >> 8) & 0xff;
				1277	if (kind == 1) {
				1278	// SMT level
				1279	threadLevel = level;
				1280	coreLevel = -1;
				1281	pkgLevel = -1;
				1282	__kmp_nThreadsPerCore = buf.ebx & 0xffff;
				1283	if (__kmp_nThreadsPerCore == 0) {
				1284	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1285	return -1;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1286	}
				1287	} else if (kind == 2) {
				1288	// core level
				1289	coreLevel = level;
				1290	pkgLevel = -1;
				1291	nCoresPerPkg = buf.ebx & 0xffff;
				1292	if (nCoresPerPkg == 0) {
				1293	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1294	return -1;
				1295	}
				1296	} else {
				1297	if (level <= 0) {
				1298	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1299	return -1;
				1300	}
				1301	if (pkgLevel >= 0) {
				1302	continue;
				1303	}
				1304	pkgLevel = level;
				1305	nPackages = buf.ebx & 0xffff;
				1306	if (nPackages == 0) {
				1307	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1308	return -1;
				1309	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1310	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1311	}
				1312	int depth = level;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1313
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1314	// In the above loop, "level" was counted from the finest level (usually
				1315	// thread) to the coarsest. The caller expects that we will place the labels
				1316	// in (*address2os)[].first.labels[] in the inverse order, so we need to
				1317	// invert the vars saying which level means what.
				1318	if (threadLevel >= 0) {
				1319	threadLevel = depth - threadLevel - 1;
				1320	}
				1321	if (coreLevel >= 0) {
				1322	coreLevel = depth - coreLevel - 1;
				1323	}
				1324	KMP_DEBUG_ASSERT(pkgLevel >= 0);
				1325	pkgLevel = depth - pkgLevel - 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1326
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1327	// The algorithm used starts by setting the affinity to each available thread
				1328	// and retrieving info from the cpuid instruction, so if we are not capable of
				1329	// calling __kmp_get_system_affinity() and _kmp_get_system_affinity(), then we
				1330	// need to do something else - use the defaults that we calculated from
				1331	// issuing cpuid without binding to each proc.
				1332	if (!KMP_AFFINITY_CAPABLE()) {
				1333	// Hack to try and infer the machine topology using only the data
				1334	// available from cpuid on the current thread, and __kmp_xproc.
				1335	KMP_ASSERT(__kmp_affinity_type == affinity_none);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1336
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1337	__kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
				1338	nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1339	if (__kmp_affinity_verbose) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1340	KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY");
				1341	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1342	if (__kmp_affinity_uniform_topology()) {
				1343	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1344	} else {
				1345	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				1346	}
				1347	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				1348	__kmp_nThreadsPerCore, __kmp_ncores);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1349	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1350	return 0;
				1351	}
				1352
				1353	// From here on, we can assume that it is safe to call
				1354	// __kmp_get_system_affinity() and __kmp_set_system_affinity(), even if
				1355	// __kmp_affinity_type = affinity_none.
				1356
				1357	// Save the affinity mask for the current thread.
				1358	kmp_affin_mask_t *oldMask;
				1359	KMP_CPU_ALLOC(oldMask);
				1360	__kmp_get_system_affinity(oldMask, TRUE);
				1361
				1362	// Allocate the data structure to be returned.
				1363	AddrUnsPair *retval =
				1364	(AddrUnsPair )__kmp_allocate(sizeof(AddrUnsPair) __kmp_avail_proc);
				1365
				1366	// Run through each of the available contexts, binding the current thread
				1367	// to it, and obtaining the pertinent information using the cpuid instr.
				1368	unsigned int proc;
				1369	int nApics = 0;
				1370	KMP_CPU_SET_ITERATE(proc, __kmp_affin_fullMask) {
				1371	// Skip this proc if it is not included in the machine model.
				1372	if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
				1373	continue;
Jonathan Peyton	fd7cc42	2016-06-21 15:54:38 +0000	[diff] [blame]	1374	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1375	KMP_DEBUG_ASSERT(nApics < __kmp_avail_proc);
				1376
				1377	__kmp_affinity_dispatch->bind_thread(proc);
				1378
				1379	// Extract labels for each level in the machine topology map from Apic ID.
				1380	Address addr(depth);
				1381	int prev_shift = 0;
				1382
				1383	for (level = 0; level < depth; level++) {
				1384	__kmp_x86_cpuid(11, level, &buf);
				1385	unsigned apicId = buf.edx;
				1386	if (buf.ebx == 0) {
				1387	if (level != depth - 1) {
				1388	KMP_CPU_FREE(oldMask);
				1389	*msg_id = kmp_i18n_str_InconsistentCpuidInfo;
				1390	return -1;
				1391	}
				1392	addr.labels[depth - level - 1] = apicId >> prev_shift;
				1393	level++;
				1394	break;
				1395	}
				1396	int shift = buf.eax & 0x1f;
				1397	int mask = (1 << shift) - 1;
				1398	addr.labels[depth - level - 1] = (apicId & mask) >> prev_shift;
				1399	prev_shift = shift;
				1400	}
				1401	if (level != depth) {
				1402	KMP_CPU_FREE(oldMask);
				1403	*msg_id = kmp_i18n_str_InconsistentCpuidInfo;
				1404	return -1;
				1405	}
				1406
				1407	retval[nApics] = AddrUnsPair(addr, proc);
				1408	nApics++;
				1409	}
				1410
				1411	// We've collected all the info we need.
				1412	// Restore the old affinity mask for this thread.
				1413	__kmp_set_system_affinity(oldMask, TRUE);
				1414
				1415	// If there's only one thread context to bind to, return now.
				1416	KMP_ASSERT(nApics > 0);
				1417	if (nApics == 1) {
				1418	__kmp_ncores = nPackages = 1;
				1419	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
				1420	if (__kmp_affinity_verbose) {
				1421	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				1422	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				1423
				1424	KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
				1425	if (__kmp_affinity_respect_mask) {
				1426	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				1427	} else {
				1428	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				1429	}
				1430	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1431	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1432	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				1433	__kmp_nThreadsPerCore, __kmp_ncores);
				1434	}
				1435
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1436	if (__kmp_affinity_type == affinity_none) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1437	__kmp_free(retval);
				1438	KMP_CPU_FREE(oldMask);
				1439	return 0;
				1440	}
				1441
				1442	// Form an Address object which only includes the package level.
				1443	Address addr(1);
				1444	addr.labels[0] = retval[0].first.labels[pkgLevel];
				1445	retval[0].first = addr;
				1446
				1447	if (__kmp_affinity_gran_levels < 0) {
				1448	__kmp_affinity_gran_levels = 0;
				1449	}
				1450
				1451	if (__kmp_affinity_verbose) {
				1452	__kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
				1453	}
				1454
				1455	*address2os = retval;
				1456	KMP_CPU_FREE(oldMask);
				1457	return 1;
				1458	}
				1459
				1460	// Sort the table by physical Id.
				1461	qsort(retval, nApics, sizeof(*retval), __kmp_affinity_cmp_Address_labels);
				1462
				1463	// Find the radix at each of the levels.
				1464	unsigned totals = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1465	unsigned counts = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1466	unsigned maxCt = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1467	unsigned last = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1468	for (level = 0; level < depth; level++) {
				1469	totals[level] = 1;
				1470	maxCt[level] = 1;
				1471	counts[level] = 1;
				1472	last[level] = retval[0].first.labels[level];
				1473	}
				1474
				1475	// From here on, the iteration variable "level" runs from the finest level to
				1476	// the coarsest, i.e. we iterate forward through
				1477	// (*address2os)[].first.labels[] - in the previous loops, we iterated
				1478	// backwards.
				1479	for (proc = 1; (int)proc < nApics; proc++) {
				1480	int level;
				1481	for (level = 0; level < depth; level++) {
				1482	if (retval[proc].first.labels[level] != last[level]) {
				1483	int j;
				1484	for (j = level + 1; j < depth; j++) {
				1485	totals[j]++;
				1486	counts[j] = 1;
				1487	// The line below causes printing incorrect topology information in
				1488	// case the max value for some level (maxCt[level]) is encountered
				1489	// earlier than some less value while going through the array. For
				1490	// example, let pkg0 has 4 cores and pkg1 has 2 cores. Then
				1491	// maxCt[1] == 2
				1492	// whereas it must be 4.
				1493	// TODO!!! Check if it can be commented safely
				1494	// maxCt[j] = 1;
				1495	last[j] = retval[proc].first.labels[j];
				1496	}
				1497	totals[level]++;
				1498	counts[level]++;
				1499	if (counts[level] > maxCt[level]) {
				1500	maxCt[level] = counts[level];
				1501	}
				1502	last[level] = retval[proc].first.labels[level];
				1503	break;
				1504	} else if (level == depth - 1) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1505	__kmp_free(last);
				1506	__kmp_free(maxCt);
				1507	__kmp_free(counts);
				1508	__kmp_free(totals);
				1509	__kmp_free(retval);
				1510	KMP_CPU_FREE(oldMask);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1511	*msg_id = kmp_i18n_str_x2ApicIDsNotUnique;
				1512	return -1;
				1513	}
				1514	}
				1515	}
				1516
				1517	// When affinity is off, this routine will still be called to set
				1518	// __kmp_ncores, as well as __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
				1519	// Make sure all these vars are set correctly, and return if affinity is not
				1520	// enabled.
				1521	if (threadLevel >= 0) {
				1522	__kmp_nThreadsPerCore = maxCt[threadLevel];
				1523	} else {
				1524	__kmp_nThreadsPerCore = 1;
				1525	}
				1526	nPackages = totals[pkgLevel];
				1527
				1528	if (coreLevel >= 0) {
				1529	__kmp_ncores = totals[coreLevel];
				1530	nCoresPerPkg = maxCt[coreLevel];
				1531	} else {
				1532	__kmp_ncores = nPackages;
				1533	nCoresPerPkg = 1;
				1534	}
				1535
				1536	// Check to see if the machine topology is uniform
				1537	unsigned prod = maxCt[0];
				1538	for (level = 1; level < depth; level++) {
				1539	prod *= maxCt[level];
				1540	}
				1541	bool uniform = (prod == totals[level - 1]);
				1542
				1543	// Print the machine topology summary.
				1544	if (__kmp_affinity_verbose) {
				1545	char mask[KMP_AFFIN_MASK_PRINT_LEN];
				1546	__kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				1547
				1548	KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
				1549	if (__kmp_affinity_respect_mask) {
				1550	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask);
				1551	} else {
				1552	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask);
				1553	}
				1554	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1555	if (uniform) {
				1556	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1557	} else {
				1558	KMP_INFORM(NonUniform, "KMP_AFFINITY");
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1559	}
				1560
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1561	kmp_str_buf_t buf;
				1562	__kmp_str_buf_init(&buf);
				1563
				1564	__kmp_str_buf_print(&buf, "%d", totals[0]);
				1565	for (level = 1; level <= pkgLevel; level++) {
				1566	__kmp_str_buf_print(&buf, " x %d", maxCt[level]);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1567	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1568	KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg,
				1569	__kmp_nThreadsPerCore, __kmp_ncores);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1570
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1571	__kmp_str_buf_free(&buf);
				1572	}
				1573	KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
				1574	KMP_DEBUG_ASSERT(nApics == __kmp_avail_proc);
				1575	__kmp_pu_os_idx = (int )__kmp_allocate(sizeof(int) __kmp_avail_proc);
				1576	for (proc = 0; (int)proc < nApics; ++proc) {
				1577	__kmp_pu_os_idx[proc] = retval[proc].second;
				1578	}
				1579	if (__kmp_affinity_type == affinity_none) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1580	__kmp_free(last);
				1581	__kmp_free(maxCt);
				1582	__kmp_free(counts);
				1583	__kmp_free(totals);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1584	__kmp_free(retval);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1585	KMP_CPU_FREE(oldMask);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1586	return 0;
				1587	}
				1588
				1589	// Find any levels with radiix 1, and remove them from the map
				1590	// (except for the package level).
				1591	int new_depth = 0;
				1592	for (level = 0; level < depth; level++) {
				1593	if ((maxCt[level] == 1) && (level != pkgLevel)) {
				1594	continue;
				1595	}
				1596	new_depth++;
				1597	}
				1598
				1599	// If we are removing any levels, allocate a new vector to return,
				1600	// and copy the relevant information to it.
				1601	if (new_depth != depth) {
				1602	AddrUnsPair *new_retval =
				1603	(AddrUnsPair )__kmp_allocate(sizeof(AddrUnsPair) nApics);
				1604	for (proc = 0; (int)proc < nApics; proc++) {
				1605	Address addr(new_depth);
				1606	new_retval[proc] = AddrUnsPair(addr, retval[proc].second);
				1607	}
				1608	int new_level = 0;
				1609	int newPkgLevel = -1;
				1610	int newCoreLevel = -1;
				1611	int newThreadLevel = -1;
				1612	int i;
				1613	for (level = 0; level < depth; level++) {
				1614	if ((maxCt[level] == 1) && (level != pkgLevel)) {
				1615	// Remove this level. Never remove the package level
				1616	continue;
				1617	}
				1618	if (level == pkgLevel) {
				1619	newPkgLevel = level;
				1620	}
				1621	if (level == coreLevel) {
				1622	newCoreLevel = level;
				1623	}
				1624	if (level == threadLevel) {
				1625	newThreadLevel = level;
				1626	}
				1627	for (proc = 0; (int)proc < nApics; proc++) {
				1628	new_retval[proc].first.labels[new_level] =
				1629	retval[proc].first.labels[level];
				1630	}
				1631	new_level++;
				1632	}
				1633
				1634	__kmp_free(retval);
				1635	retval = new_retval;
				1636	depth = new_depth;
				1637	pkgLevel = newPkgLevel;
				1638	coreLevel = newCoreLevel;
				1639	threadLevel = newThreadLevel;
				1640	}
				1641
				1642	if (__kmp_affinity_gran_levels < 0) {
				1643	// Set the granularity level based on what levels are modeled
				1644	// in the machine topology map.
				1645	__kmp_affinity_gran_levels = 0;
				1646	if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
				1647	__kmp_affinity_gran_levels++;
				1648	}
				1649	if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
				1650	__kmp_affinity_gran_levels++;
				1651	}
				1652	if (__kmp_affinity_gran > affinity_gran_package) {
				1653	__kmp_affinity_gran_levels++;
				1654	}
				1655	}
				1656
				1657	if (__kmp_affinity_verbose) {
				1658	__kmp_affinity_print_topology(retval, nApics, depth, pkgLevel, coreLevel,
				1659	threadLevel);
				1660	}
				1661
				1662	__kmp_free(last);
				1663	__kmp_free(maxCt);
				1664	__kmp_free(counts);
				1665	__kmp_free(totals);
				1666	KMP_CPU_FREE(oldMask);
				1667	*address2os = retval;
				1668	return depth;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1669	}
				1670
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1671	#endif /* KMP_ARCH_X86 \|\| KMP_ARCH_X86_64 */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1672
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1673	#define osIdIndex 0
				1674	#define threadIdIndex 1
				1675	#define coreIdIndex 2
				1676	#define pkgIdIndex 3
				1677	#define nodeIdIndex 4
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1678
				1679	typedef unsigned *ProcCpuInfo;
				1680	static unsigned maxIndex = pkgIdIndex;
				1681
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1682	static int __kmp_affinity_cmp_ProcCpuInfo_os_id(const void a, const void b) {
				1683	const unsigned aa = (const unsigned )a;
				1684	const unsigned bb = (const unsigned )b;
				1685	if (aa[osIdIndex] < bb[osIdIndex])
				1686	return -1;
				1687	if (aa[osIdIndex] > bb[osIdIndex])
				1688	return 1;
				1689	return 0;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1690	};
				1691
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1692	static int __kmp_affinity_cmp_ProcCpuInfo_phys_id(const void *a,
				1693	const void *b) {
				1694	unsigned i;
				1695	const unsigned aa = ((const unsigned **)a);
				1696	const unsigned bb = ((const unsigned **)b);
				1697	for (i = maxIndex;; i--) {
				1698	if (aa[i] < bb[i])
				1699	return -1;
				1700	if (aa[i] > bb[i])
				1701	return 1;
				1702	if (i == osIdIndex)
				1703	break;
				1704	}
				1705	return 0;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1706	}
				1707
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1708	// Parse /proc/cpuinfo (or an alternate file in the same format) to obtain the
				1709	// affinity map.
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1710	static int __kmp_affinity_create_cpuinfo_map(AddrUnsPair **address2os,
				1711	int *line,
				1712	kmp_i18n_id_t *const msg_id,
				1713	FILE *f) {
				1714	*address2os = NULL;
				1715	*msg_id = kmp_i18n_null;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1716
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1717	// Scan of the file, and count the number of "processor" (osId) fields,
				1718	// and find the highest value of <n> for a node_<n> field.
				1719	char buf[256];
				1720	unsigned num_records = 0;
				1721	while (!feof(f)) {
				1722	buf[sizeof(buf) - 1] = 1;
				1723	if (!fgets(buf, sizeof(buf), f)) {
				1724	// Read errors presumably because of EOF
				1725	break;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1726	}
				1727
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1728	char s1[] = "processor";
				1729	if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
				1730	num_records++;
				1731	continue;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1732	}
				1733
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1734	// FIXME - this will match "node_<n> <garbage>"
				1735	unsigned level;
				1736	if (KMP_SSCANF(buf, "node_%d id", &level) == 1) {
				1737	if (nodeIdIndex + level >= maxIndex) {
				1738	maxIndex = nodeIdIndex + level;
				1739	}
				1740	continue;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1741	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1742	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1743
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1744	// Check for empty file / no valid processor records, or too many. The number
				1745	// of records can't exceed the number of valid bits in the affinity mask.
				1746	if (num_records == 0) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1747	*line = 0;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1748	*msg_id = kmp_i18n_str_NoProcRecords;
				1749	return -1;
				1750	}
				1751	if (num_records > (unsigned)__kmp_xproc) {
				1752	*line = 0;
				1753	*msg_id = kmp_i18n_str_TooManyProcRecords;
				1754	return -1;
				1755	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1756
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1757	// Set the file pointer back to the begginning, so that we can scan the file
				1758	// again, this time performing a full parse of the data. Allocate a vector of
				1759	// ProcCpuInfo object, where we will place the data. Adding an extra element
				1760	// at the end allows us to remove a lot of extra checks for termination
				1761	// conditions.
				1762	if (fseek(f, 0, SEEK_SET) != 0) {
				1763	*line = 0;
				1764	*msg_id = kmp_i18n_str_CantRewindCpuinfo;
				1765	return -1;
				1766	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1767
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1768	// Allocate the array of records to store the proc info in. The dummy
				1769	// element at the end makes the logic in filling them out easier to code.
				1770	unsigned **threadInfo =
				1771	(unsigned *)__kmp_allocate((num_records + 1) sizeof(unsigned *));
				1772	unsigned i;
				1773	for (i = 0; i <= num_records; i++) {
				1774	threadInfo[i] =
				1775	(unsigned )__kmp_allocate((maxIndex + 1) sizeof(unsigned));
				1776	}
				1777
				1778	#define CLEANUP_THREAD_INFO \
				1779	for (i = 0; i <= num_records; i++) { \
				1780	__kmp_free(threadInfo[i]); \
				1781	} \
				1782	__kmp_free(threadInfo);
				1783
				1784	// A value of UINT_MAX means that we didn't find the field
				1785	unsigned __index;
				1786
				1787	#define INIT_PROC_INFO(p) \
				1788	for (__index = 0; __index <= maxIndex; __index++) { \
				1789	(p)[__index] = UINT_MAX; \
				1790	}
				1791
				1792	for (i = 0; i <= num_records; i++) {
				1793	INIT_PROC_INFO(threadInfo[i]);
				1794	}
				1795
				1796	unsigned num_avail = 0;
				1797	*line = 0;
				1798	while (!feof(f)) {
				1799	// Create an inner scoping level, so that all the goto targets at the end of
				1800	// the loop appear in an outer scoping level. This avoids warnings about
				1801	// jumping past an initialization to a target in the same block.
				1802	{
				1803	buf[sizeof(buf) - 1] = 1;
				1804	bool long_line = false;
				1805	if (!fgets(buf, sizeof(buf), f)) {
				1806	// Read errors presumably because of EOF
				1807	// If there is valid data in threadInfo[num_avail], then fake
				1808	// a blank line in ensure that the last address gets parsed.
				1809	bool valid = false;
				1810	for (i = 0; i <= maxIndex; i++) {
				1811	if (threadInfo[num_avail][i] != UINT_MAX) {
				1812	valid = true;
				1813	}
				1814	}
				1815	if (!valid) {
				1816	break;
				1817	}
				1818	buf[0] = 0;
				1819	} else if (!buf[sizeof(buf) - 1]) {
				1820	// The line is longer than the buffer. Set a flag and don't
				1821	// emit an error if we were going to ignore the line, anyway.
				1822	long_line = true;
				1823
				1824	#define CHECK_LINE \
				1825	if (long_line) { \
				1826	CLEANUP_THREAD_INFO; \
				1827	*msg_id = kmp_i18n_str_LongLineCpuinfo; \
				1828	return -1; \
				1829	}
				1830	}
				1831	(*line)++;
				1832
				1833	char s1[] = "processor";
				1834	if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
				1835	CHECK_LINE;
				1836	char *p = strchr(buf + sizeof(s1) - 1, ':');
				1837	unsigned val;
				1838	if ((p == NULL) \|\| (KMP_SSCANF(p + 1, "%u\n", &val) != 1))
				1839	goto no_val;
				1840	if (threadInfo[num_avail][osIdIndex] != UINT_MAX)
				1841	goto dup_field;
				1842	threadInfo[num_avail][osIdIndex] = val;
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	1843	#if KMP_OS_LINUX && USE_SYSFS_INFO
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1844	char path[256];
				1845	KMP_SNPRINTF(
				1846	path, sizeof(path),
				1847	"/sys/devices/system/cpu/cpu%u/topology/physical_package_id",
				1848	threadInfo[num_avail][osIdIndex]);
				1849	__kmp_read_from_file(path, "%u", &threadInfo[num_avail][pkgIdIndex]);
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	1850
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1851	KMP_SNPRINTF(path, sizeof(path),
				1852	"/sys/devices/system/cpu/cpu%u/topology/core_id",
				1853	threadInfo[num_avail][osIdIndex]);
				1854	__kmp_read_from_file(path, "%u", &threadInfo[num_avail][coreIdIndex]);
				1855	continue;
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	1856	#else
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1857	}
				1858	char s2[] = "physical id";
				1859	if (strncmp(buf, s2, sizeof(s2) - 1) == 0) {
				1860	CHECK_LINE;
				1861	char *p = strchr(buf + sizeof(s2) - 1, ':');
				1862	unsigned val;
				1863	if ((p == NULL) \|\| (KMP_SSCANF(p + 1, "%u\n", &val) != 1))
				1864	goto no_val;
				1865	if (threadInfo[num_avail][pkgIdIndex] != UINT_MAX)
				1866	goto dup_field;
				1867	threadInfo[num_avail][pkgIdIndex] = val;
				1868	continue;
				1869	}
				1870	char s3[] = "core id";
				1871	if (strncmp(buf, s3, sizeof(s3) - 1) == 0) {
				1872	CHECK_LINE;
				1873	char *p = strchr(buf + sizeof(s3) - 1, ':');
				1874	unsigned val;
				1875	if ((p == NULL) \|\| (KMP_SSCANF(p + 1, "%u\n", &val) != 1))
				1876	goto no_val;
				1877	if (threadInfo[num_avail][coreIdIndex] != UINT_MAX)
				1878	goto dup_field;
				1879	threadInfo[num_avail][coreIdIndex] = val;
				1880	continue;
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	1881	#endif // KMP_OS_LINUX && USE_SYSFS_INFO
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1882	}
				1883	char s4[] = "thread id";
				1884	if (strncmp(buf, s4, sizeof(s4) - 1) == 0) {
				1885	CHECK_LINE;
				1886	char *p = strchr(buf + sizeof(s4) - 1, ':');
				1887	unsigned val;
				1888	if ((p == NULL) \|\| (KMP_SSCANF(p + 1, "%u\n", &val) != 1))
				1889	goto no_val;
				1890	if (threadInfo[num_avail][threadIdIndex] != UINT_MAX)
				1891	goto dup_field;
				1892	threadInfo[num_avail][threadIdIndex] = val;
				1893	continue;
				1894	}
				1895	unsigned level;
				1896	if (KMP_SSCANF(buf, "node_%d id", &level) == 1) {
				1897	CHECK_LINE;
				1898	char *p = strchr(buf + sizeof(s4) - 1, ':');
				1899	unsigned val;
				1900	if ((p == NULL) \|\| (KMP_SSCANF(p + 1, "%u\n", &val) != 1))
				1901	goto no_val;
				1902	KMP_ASSERT(nodeIdIndex + level <= maxIndex);
				1903	if (threadInfo[num_avail][nodeIdIndex + level] != UINT_MAX)
				1904	goto dup_field;
				1905	threadInfo[num_avail][nodeIdIndex + level] = val;
				1906	continue;
				1907	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1908
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1909	// We didn't recognize the leading token on the line. There are lots of
				1910	// leading tokens that we don't recognize - if the line isn't empty, go on
				1911	// to the next line.
				1912	if ((buf != 0) && (buf != '\n')) {
				1913	// If the line is longer than the buffer, read characters
				1914	// until we find a newline.
				1915	if (long_line) {
				1916	int ch;
				1917	while (((ch = fgetc(f)) != EOF) && (ch != '\n'))
				1918	;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1919	}
				1920	continue;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1921	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1922
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1923	// A newline has signalled the end of the processor record.
				1924	// Check that there aren't too many procs specified.
				1925	if ((int)num_avail == __kmp_xproc) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1926	CLEANUP_THREAD_INFO;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1927	*msg_id = kmp_i18n_str_TooManyEntries;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1928	return -1;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1929	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1930
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1931	// Check for missing fields. The osId field must be there, and we
				1932	// currently require that the physical id field is specified, also.
				1933	if (threadInfo[num_avail][osIdIndex] == UINT_MAX) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1934	CLEANUP_THREAD_INFO;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1935	*msg_id = kmp_i18n_str_MissingProcField;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1936	return -1;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1937	}
				1938	if (threadInfo[0][pkgIdIndex] == UINT_MAX) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1939	CLEANUP_THREAD_INFO;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1940	*msg_id = kmp_i18n_str_MissingPhysicalIDField;
				1941	return -1;
				1942	}
				1943
				1944	// Skip this proc if it is not included in the machine model.
				1945	if (!KMP_CPU_ISSET(threadInfo[num_avail][osIdIndex],
				1946	__kmp_affin_fullMask)) {
				1947	INIT_PROC_INFO(threadInfo[num_avail]);
				1948	continue;
				1949	}
				1950
				1951	// We have a successful parse of this proc's info.
				1952	// Increment the counter, and prepare for the next proc.
				1953	num_avail++;
				1954	KMP_ASSERT(num_avail <= num_records);
				1955	INIT_PROC_INFO(threadInfo[num_avail]);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1956	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1957	continue;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1958
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1959	no_val:
				1960	CLEANUP_THREAD_INFO;
				1961	*msg_id = kmp_i18n_str_MissingValCpuinfo;
				1962	return -1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1963
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1964	dup_field:
				1965	CLEANUP_THREAD_INFO;
				1966	*msg_id = kmp_i18n_str_DuplicateFieldCpuinfo;
				1967	return -1;
				1968	}
				1969	*line = 0;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1970
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1971	#if KMP_MIC && REDUCE_TEAM_SIZE
				1972	unsigned teamSize = 0;
				1973	#endif // KMP_MIC && REDUCE_TEAM_SIZE
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1974
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1975	// check for num_records == __kmp_xproc ???
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1976
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1977	// If there's only one thread context to bind to, form an Address object with
				1978	// depth 1 and return immediately (or, if affinity is off, set address2os to
				1979	// NULL and return).
				1980	//
				1981	// If it is configured to omit the package level when there is only a single
				1982	// package, the logic at the end of this routine won't work if there is only a
				1983	// single thread - it would try to form an Address object with depth 0.
				1984	KMP_ASSERT(num_avail > 0);
				1985	KMP_ASSERT(num_avail <= num_records);
				1986	if (num_avail == 1) {
				1987	__kmp_ncores = 1;
				1988	__kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1989	if (__kmp_affinity_verbose) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1990	if (!KMP_AFFINITY_CAPABLE()) {
				1991	KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
				1992	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1993	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1994	} else {
				1995	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				1996	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				1997	__kmp_affin_fullMask);
				1998	KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
				1999	if (__kmp_affinity_respect_mask) {
				2000	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				2001	} else {
				2002	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2003	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2004	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				2005	KMP_INFORM(Uniform, "KMP_AFFINITY");
				2006	}
				2007	int index;
				2008	kmp_str_buf_t buf;
				2009	__kmp_str_buf_init(&buf);
				2010	__kmp_str_buf_print(&buf, "1");
				2011	for (index = maxIndex - 1; index > pkgIdIndex; index--) {
				2012	__kmp_str_buf_print(&buf, " x 1");
				2013	}
				2014	KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, 1, 1, 1);
				2015	__kmp_str_buf_free(&buf);
Jonathan Peyton	fd7cc42	2016-06-21 15:54:38 +0000	[diff] [blame]	2016	}
				2017
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2018	if (__kmp_affinity_type == affinity_none) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2019	CLEANUP_THREAD_INFO;
				2020	return 0;
				2021	}
				2022
				2023	address2os = (AddrUnsPair )__kmp_allocate(sizeof(AddrUnsPair));
				2024	Address addr(1);
				2025	addr.labels[0] = threadInfo[0][pkgIdIndex];
				2026	(*address2os)[0] = AddrUnsPair(addr, threadInfo[0][osIdIndex]);
				2027
				2028	if (__kmp_affinity_gran_levels < 0) {
				2029	__kmp_affinity_gran_levels = 0;
				2030	}
				2031
				2032	if (__kmp_affinity_verbose) {
				2033	__kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
				2034	}
				2035
				2036	CLEANUP_THREAD_INFO;
				2037	return 1;
				2038	}
				2039
				2040	// Sort the threadInfo table by physical Id.
				2041	qsort(threadInfo, num_avail, sizeof(*threadInfo),
				2042	__kmp_affinity_cmp_ProcCpuInfo_phys_id);
				2043
				2044	// The table is now sorted by pkgId / coreId / threadId, but we really don't
				2045	// know the radix of any of the fields. pkgId's may be sparsely assigned among
				2046	// the chips on a system. Although coreId's are usually assigned
				2047	// [0 .. coresPerPkg-1] and threadId's are usually assigned
				2048	// [0..threadsPerCore-1], we don't want to make any such assumptions.
				2049	//
				2050	// For that matter, we don't know what coresPerPkg and threadsPerCore (or the
				2051	// total # packages) are at this point - we want to determine that now. We
				2052	// only have an upper bound on the first two figures.
				2053	unsigned *counts =
				2054	(unsigned )__kmp_allocate((maxIndex + 1) sizeof(unsigned));
				2055	unsigned *maxCt =
				2056	(unsigned )__kmp_allocate((maxIndex + 1) sizeof(unsigned));
				2057	unsigned *totals =
				2058	(unsigned )__kmp_allocate((maxIndex + 1) sizeof(unsigned));
				2059	unsigned *lastId =
				2060	(unsigned )__kmp_allocate((maxIndex + 1) sizeof(unsigned));
				2061
				2062	bool assign_thread_ids = false;
				2063	unsigned threadIdCt;
				2064	unsigned index;
				2065
				2066	restart_radix_check:
				2067	threadIdCt = 0;
				2068
				2069	// Initialize the counter arrays with data from threadInfo[0].
				2070	if (assign_thread_ids) {
				2071	if (threadInfo[0][threadIdIndex] == UINT_MAX) {
				2072	threadInfo[0][threadIdIndex] = threadIdCt++;
				2073	} else if (threadIdCt <= threadInfo[0][threadIdIndex]) {
				2074	threadIdCt = threadInfo[0][threadIdIndex] + 1;
				2075	}
				2076	}
				2077	for (index = 0; index <= maxIndex; index++) {
				2078	counts[index] = 1;
				2079	maxCt[index] = 1;
				2080	totals[index] = 1;
				2081	lastId[index] = threadInfo[0][index];
				2082	;
				2083	}
				2084
				2085	// Run through the rest of the OS procs.
				2086	for (i = 1; i < num_avail; i++) {
				2087	// Find the most significant index whose id differs from the id for the
				2088	// previous OS proc.
				2089	for (index = maxIndex; index >= threadIdIndex; index--) {
				2090	if (assign_thread_ids && (index == threadIdIndex)) {
				2091	// Auto-assign the thread id field if it wasn't specified.
				2092	if (threadInfo[i][threadIdIndex] == UINT_MAX) {
				2093	threadInfo[i][threadIdIndex] = threadIdCt++;
				2094	}
Jonathan Peyton	642688b	2017-06-01 16:46:36 +0000	[diff] [blame]	2095	// Apparently the thread id field was specified for some entries and not
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2096	// others. Start the thread id counter off at the next higher thread id.
				2097	else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
				2098	threadIdCt = threadInfo[i][threadIdIndex] + 1;
				2099	}
				2100	}
				2101	if (threadInfo[i][index] != lastId[index]) {
				2102	// Run through all indices which are less significant, and reset the
				2103	// counts to 1. At all levels up to and including index, we need to
				2104	// increment the totals and record the last id.
				2105	unsigned index2;
				2106	for (index2 = threadIdIndex; index2 < index; index2++) {
				2107	totals[index2]++;
				2108	if (counts[index2] > maxCt[index2]) {
				2109	maxCt[index2] = counts[index2];
				2110	}
				2111	counts[index2] = 1;
				2112	lastId[index2] = threadInfo[i][index2];
				2113	}
				2114	counts[index]++;
				2115	totals[index]++;
				2116	lastId[index] = threadInfo[i][index];
				2117
				2118	if (assign_thread_ids && (index > threadIdIndex)) {
				2119
				2120	#if KMP_MIC && REDUCE_TEAM_SIZE
				2121	// The default team size is the total #threads in the machine
				2122	// minus 1 thread for every core that has 3 or more threads.
				2123	teamSize += (threadIdCt <= 2) ? (threadIdCt) : (threadIdCt - 1);
				2124	#endif // KMP_MIC && REDUCE_TEAM_SIZE
				2125
				2126	// Restart the thread counter, as we are on a new core.
				2127	threadIdCt = 0;
				2128
				2129	// Auto-assign the thread id field if it wasn't specified.
				2130	if (threadInfo[i][threadIdIndex] == UINT_MAX) {
				2131	threadInfo[i][threadIdIndex] = threadIdCt++;
				2132	}
				2133
				2134	// Aparrently the thread id field was specified for some entries and
				2135	// not others. Start the thread id counter off at the next higher
				2136	// thread id.
				2137	else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
				2138	threadIdCt = threadInfo[i][threadIdIndex] + 1;
				2139	}
				2140	}
				2141	break;
				2142	}
				2143	}
				2144	if (index < threadIdIndex) {
				2145	// If thread ids were specified, it is an error if they are not unique.
				2146	// Also, check that we waven't already restarted the loop (to be safe -
				2147	// shouldn't need to).
				2148	if ((threadInfo[i][threadIdIndex] != UINT_MAX) \|\| assign_thread_ids) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2149	__kmp_free(lastId);
				2150	__kmp_free(totals);
				2151	__kmp_free(maxCt);
				2152	__kmp_free(counts);
				2153	CLEANUP_THREAD_INFO;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2154	*msg_id = kmp_i18n_str_PhysicalIDsNotUnique;
				2155	return -1;
				2156	}
				2157
				2158	// If the thread ids were not specified and we see entries entries that
				2159	// are duplicates, start the loop over and assign the thread ids manually.
				2160	assign_thread_ids = true;
				2161	goto restart_radix_check;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2162	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2163	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2164
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2165	#if KMP_MIC && REDUCE_TEAM_SIZE
				2166	// The default team size is the total #threads in the machine
				2167	// minus 1 thread for every core that has 3 or more threads.
				2168	teamSize += (threadIdCt <= 2) ? (threadIdCt) : (threadIdCt - 1);
				2169	#endif // KMP_MIC && REDUCE_TEAM_SIZE
				2170
				2171	for (index = threadIdIndex; index <= maxIndex; index++) {
				2172	if (counts[index] > maxCt[index]) {
				2173	maxCt[index] = counts[index];
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2174	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2175	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2176
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2177	__kmp_nThreadsPerCore = maxCt[threadIdIndex];
				2178	nCoresPerPkg = maxCt[coreIdIndex];
				2179	nPackages = totals[pkgIdIndex];
				2180
				2181	// Check to see if the machine topology is uniform
				2182	unsigned prod = totals[maxIndex];
				2183	for (index = threadIdIndex; index < maxIndex; index++) {
				2184	prod *= maxCt[index];
				2185	}
				2186	bool uniform = (prod == totals[threadIdIndex]);
				2187
				2188	// When affinity is off, this routine will still be called to set
				2189	// __kmp_ncores, as well as __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
				2190	// Make sure all these vars are set correctly, and return now if affinity is
				2191	// not enabled.
				2192	__kmp_ncores = totals[coreIdIndex];
				2193
				2194	if (__kmp_affinity_verbose) {
				2195	if (!KMP_AFFINITY_CAPABLE()) {
				2196	KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
				2197	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				2198	if (uniform) {
				2199	KMP_INFORM(Uniform, "KMP_AFFINITY");
				2200	} else {
				2201	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				2202	}
				2203	} else {
				2204	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				2205	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				2206	__kmp_affin_fullMask);
				2207	KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
				2208	if (__kmp_affinity_respect_mask) {
				2209	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				2210	} else {
				2211	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				2212	}
				2213	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				2214	if (uniform) {
				2215	KMP_INFORM(Uniform, "KMP_AFFINITY");
				2216	} else {
				2217	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				2218	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2219	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2220	kmp_str_buf_t buf;
				2221	__kmp_str_buf_init(&buf);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2222
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2223	__kmp_str_buf_print(&buf, "%d", totals[maxIndex]);
				2224	for (index = maxIndex - 1; index >= pkgIdIndex; index--) {
				2225	__kmp_str_buf_print(&buf, " x %d", maxCt[index]);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2226	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2227	KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, maxCt[coreIdIndex],
				2228	maxCt[threadIdIndex], __kmp_ncores);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2229
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2230	__kmp_str_buf_free(&buf);
				2231	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2232
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2233	#if KMP_MIC && REDUCE_TEAM_SIZE
				2234	// Set the default team size.
				2235	if ((__kmp_dflt_team_nth == 0) && (teamSize > 0)) {
				2236	__kmp_dflt_team_nth = teamSize;
				2237	KA_TRACE(20, ("__kmp_affinity_create_cpuinfo_map: setting "
				2238	"__kmp_dflt_team_nth = %d\n",
				2239	__kmp_dflt_team_nth));
				2240	}
				2241	#endif // KMP_MIC && REDUCE_TEAM_SIZE
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2242
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2243	KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
				2244	KMP_DEBUG_ASSERT(num_avail == __kmp_avail_proc);
				2245	__kmp_pu_os_idx = (int )__kmp_allocate(sizeof(int) __kmp_avail_proc);
				2246	for (i = 0; i < num_avail; ++i) { // fill the os indices
				2247	__kmp_pu_os_idx[i] = threadInfo[i][osIdIndex];
				2248	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2249
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2250	if (__kmp_affinity_type == affinity_none) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2251	__kmp_free(lastId);
				2252	__kmp_free(totals);
				2253	__kmp_free(maxCt);
				2254	__kmp_free(counts);
				2255	CLEANUP_THREAD_INFO;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2256	return 0;
				2257	}
				2258
				2259	// Count the number of levels which have more nodes at that level than at the
				2260	// parent's level (with there being an implicit root node of the top level).
				2261	// This is equivalent to saying that there is at least one node at this level
				2262	// which has a sibling. These levels are in the map, and the package level is
				2263	// always in the map.
				2264	bool inMap = (bool )__kmp_allocate((maxIndex + 1) * sizeof(bool));
				2265	int level = 0;
				2266	for (index = threadIdIndex; index < maxIndex; index++) {
				2267	KMP_ASSERT(totals[index] >= totals[index + 1]);
				2268	inMap[index] = (totals[index] > totals[index + 1]);
				2269	}
				2270	inMap[maxIndex] = (totals[maxIndex] > 1);
				2271	inMap[pkgIdIndex] = true;
				2272
				2273	int depth = 0;
				2274	for (index = threadIdIndex; index <= maxIndex; index++) {
				2275	if (inMap[index]) {
				2276	depth++;
				2277	}
				2278	}
				2279	KMP_ASSERT(depth > 0);
				2280
				2281	// Construct the data structure that is to be returned.
				2282	address2os = (AddrUnsPair )__kmp_allocate(sizeof(AddrUnsPair) * num_avail);
				2283	int pkgLevel = -1;
				2284	int coreLevel = -1;
				2285	int threadLevel = -1;
				2286
				2287	for (i = 0; i < num_avail; ++i) {
				2288	Address addr(depth);
				2289	unsigned os = threadInfo[i][osIdIndex];
				2290	int src_index;
				2291	int dst_index = 0;
				2292
				2293	for (src_index = maxIndex; src_index >= threadIdIndex; src_index--) {
				2294	if (!inMap[src_index]) {
				2295	continue;
				2296	}
				2297	addr.labels[dst_index] = threadInfo[i][src_index];
				2298	if (src_index == pkgIdIndex) {
				2299	pkgLevel = dst_index;
				2300	} else if (src_index == coreIdIndex) {
				2301	coreLevel = dst_index;
				2302	} else if (src_index == threadIdIndex) {
				2303	threadLevel = dst_index;
				2304	}
				2305	dst_index++;
				2306	}
				2307	(*address2os)[i] = AddrUnsPair(addr, os);
				2308	}
				2309
				2310	if (__kmp_affinity_gran_levels < 0) {
				2311	// Set the granularity level based on what levels are modeled
				2312	// in the machine topology map.
				2313	unsigned src_index;
				2314	__kmp_affinity_gran_levels = 0;
				2315	for (src_index = threadIdIndex; src_index <= maxIndex; src_index++) {
				2316	if (!inMap[src_index]) {
				2317	continue;
				2318	}
				2319	switch (src_index) {
				2320	case threadIdIndex:
				2321	if (__kmp_affinity_gran > affinity_gran_thread) {
				2322	__kmp_affinity_gran_levels++;
				2323	}
				2324
				2325	break;
				2326	case coreIdIndex:
				2327	if (__kmp_affinity_gran > affinity_gran_core) {
				2328	__kmp_affinity_gran_levels++;
				2329	}
				2330	break;
				2331
				2332	case pkgIdIndex:
				2333	if (__kmp_affinity_gran > affinity_gran_package) {
				2334	__kmp_affinity_gran_levels++;
				2335	}
				2336	break;
				2337	}
				2338	}
				2339	}
				2340
				2341	if (__kmp_affinity_verbose) {
				2342	__kmp_affinity_print_topology(*address2os, num_avail, depth, pkgLevel,
				2343	coreLevel, threadLevel);
				2344	}
				2345
				2346	__kmp_free(inMap);
				2347	__kmp_free(lastId);
				2348	__kmp_free(totals);
				2349	__kmp_free(maxCt);
				2350	__kmp_free(counts);
				2351	CLEANUP_THREAD_INFO;
				2352	return depth;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2353	}
				2354
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2355	// Create and return a table of affinity masks, indexed by OS thread ID.
				2356	// This routine handles OR'ing together all the affinity masks of threads
				2357	// that are sufficiently close, if granularity > fine.
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2358	static kmp_affin_mask_t __kmp_create_masks(unsigned maxIndex,
				2359	unsigned *numUnique,
				2360	AddrUnsPair *address2os,
				2361	unsigned numAddrs) {
				2362	// First form a table of affinity masks in order of OS thread id.
				2363	unsigned depth;
				2364	unsigned maxOsId;
				2365	unsigned i;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2366
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2367	KMP_ASSERT(numAddrs > 0);
				2368	depth = address2os[0].first.depth;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2369
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2370	maxOsId = 0;
				2371	for (i = 0; i < numAddrs; i++) {
				2372	unsigned osId = address2os[i].second;
				2373	if (osId > maxOsId) {
				2374	maxOsId = osId;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2375	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2376	}
				2377	kmp_affin_mask_t *osId2Mask;
				2378	KMP_CPU_ALLOC_ARRAY(osId2Mask, (maxOsId + 1));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2379
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2380	// Sort the address2os table according to physical order. Doing so will put
				2381	// all threads on the same core/package/node in consecutive locations.
				2382	qsort(address2os, numAddrs, sizeof(*address2os),
				2383	__kmp_affinity_cmp_Address_labels);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2384
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2385	KMP_ASSERT(__kmp_affinity_gran_levels >= 0);
				2386	if (__kmp_affinity_verbose && (__kmp_affinity_gran_levels > 0)) {
				2387	KMP_INFORM(ThreadsMigrate, "KMP_AFFINITY", __kmp_affinity_gran_levels);
				2388	}
				2389	if (__kmp_affinity_gran_levels >= (int)depth) {
				2390	if (__kmp_affinity_verbose \|\|
				2391	(__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) {
				2392	KMP_WARNING(AffThreadsMayMigrate);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2393	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2394	}
				2395
				2396	// Run through the table, forming the masks for all threads on each core.
				2397	// Threads on the same core will have identical "Address" objects, not
				2398	// considering the last level, which must be the thread id. All threads on a
				2399	// core will appear consecutively.
				2400	unsigned unique = 0;
				2401	unsigned j = 0; // index of 1st thread on core
				2402	unsigned leader = 0;
				2403	Address *leaderAddr = &(address2os[0].first);
				2404	kmp_affin_mask_t *sum;
				2405	KMP_CPU_ALLOC_ON_STACK(sum);
				2406	KMP_CPU_ZERO(sum);
				2407	KMP_CPU_SET(address2os[0].second, sum);
				2408	for (i = 1; i < numAddrs; i++) {
				2409	// If this thread is sufficiently close to the leader (within the
				2410	// granularity setting), then set the bit for this os thread in the
				2411	// affinity mask for this group, and go on to the next thread.
				2412	if (leaderAddr->isClose(address2os[i].first, __kmp_affinity_gran_levels)) {
				2413	KMP_CPU_SET(address2os[i].second, sum);
				2414	continue;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2415	}
				2416
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2417	// For every thread in this group, copy the mask to the thread's entry in
				2418	// the osId2Mask table. Mark the first address as a leader.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2419	for (; j < i; j++) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2420	unsigned osId = address2os[j].second;
				2421	KMP_DEBUG_ASSERT(osId <= maxOsId);
				2422	kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
				2423	KMP_CPU_COPY(mask, sum);
				2424	address2os[j].first.leader = (j == leader);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2425	}
				2426	unique++;
				2427
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2428	// Start a new mask.
				2429	leader = i;
				2430	leaderAddr = &(address2os[i].first);
				2431	KMP_CPU_ZERO(sum);
				2432	KMP_CPU_SET(address2os[i].second, sum);
				2433	}
				2434
				2435	// For every thread in last group, copy the mask to the thread's
				2436	// entry in the osId2Mask table.
				2437	for (; j < i; j++) {
				2438	unsigned osId = address2os[j].second;
				2439	KMP_DEBUG_ASSERT(osId <= maxOsId);
				2440	kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
				2441	KMP_CPU_COPY(mask, sum);
				2442	address2os[j].first.leader = (j == leader);
				2443	}
				2444	unique++;
				2445	KMP_CPU_FREE_FROM_STACK(sum);
				2446
				2447	*maxIndex = maxOsId;
				2448	*numUnique = unique;
				2449	return osId2Mask;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2450	}
				2451
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2452	// Stuff for the affinity proclist parsers. It's easier to declare these vars
				2453	// as file-static than to try and pass them through the calling sequence of
				2454	// the recursive-descent OMP_PLACES parser.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2455	static kmp_affin_mask_t *newMasks;
				2456	static int numNewMasks;
				2457	static int nextNewMask;
				2458
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2459	#define ADD_MASK(_mask) \
				2460	{ \
				2461	if (nextNewMask >= numNewMasks) { \
				2462	int i; \
				2463	numNewMasks *= 2; \
				2464	kmp_affin_mask_t *temp; \
				2465	KMP_CPU_INTERNAL_ALLOC_ARRAY(temp, numNewMasks); \
				2466	for (i = 0; i < numNewMasks / 2; i++) { \
				2467	kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i); \
				2468	kmp_affin_mask_t *dest = KMP_CPU_INDEX(temp, i); \
				2469	KMP_CPU_COPY(dest, src); \
				2470	} \
				2471	KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks / 2); \
				2472	newMasks = temp; \
				2473	} \
				2474	KMP_CPU_COPY(KMP_CPU_INDEX(newMasks, nextNewMask), (_mask)); \
				2475	nextNewMask++; \
				2476	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2477
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2478	#define ADD_MASK_OSID(_osId, _osId2Mask, _maxOsId) \
				2479	{ \
				2480	if (((_osId) > _maxOsId) \|\| \
				2481	(!KMP_CPU_ISSET((_osId), KMP_CPU_INDEX((_osId2Mask), (_osId))))) { \
				2482	if (__kmp_affinity_verbose \|\| \
				2483	(__kmp_affinity_warnings && \
				2484	(__kmp_affinity_type != affinity_none))) { \
				2485	KMP_WARNING(AffIgnoreInvalidProcID, _osId); \
				2486	} \
				2487	} else { \
				2488	ADD_MASK(KMP_CPU_INDEX(_osId2Mask, (_osId))); \
				2489	} \
				2490	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2491
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2492	// Re-parse the proclist (for the explicit affinity type), and form the list
				2493	// of affinity newMasks indexed by gtid.
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2494	static void __kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks,
				2495	unsigned int *out_numMasks,
				2496	const char *proclist,
				2497	kmp_affin_mask_t *osId2Mask,
				2498	int maxOsId) {
				2499	int i;
				2500	const char *scan = proclist;
				2501	const char *next = proclist;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2502
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2503	// We use malloc() for the temporary mask vector, so that we can use
				2504	// realloc() to extend it.
				2505	numNewMasks = 2;
				2506	KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks);
				2507	nextNewMask = 0;
				2508	kmp_affin_mask_t *sumMask;
				2509	KMP_CPU_ALLOC(sumMask);
				2510	int setSize = 0;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2511
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2512	for (;;) {
				2513	int start, end, stride;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2514
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2515	SKIP_WS(scan);
				2516	next = scan;
				2517	if (*next == '\0') {
				2518	break;
				2519	}
				2520
				2521	if (*next == '{') {
				2522	int num;
				2523	setSize = 0;
				2524	next++; // skip '{'
				2525	SKIP_WS(next);
				2526	scan = next;
				2527
				2528	// Read the first integer in the set.
				2529	KMP_ASSERT2((next >= '0') && (next <= '9'), "bad proclist");
				2530	SKIP_DIGITS(next);
				2531	num = __kmp_str_to_int(scan, *next);
				2532	KMP_ASSERT2(num >= 0, "bad explicit proc list");
				2533
				2534	// Copy the mask for that osId to the sum (union) mask.
				2535	if ((num > maxOsId) \|\|
				2536	(!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
				2537	if (__kmp_affinity_verbose \|\|
				2538	(__kmp_affinity_warnings &&
				2539	(__kmp_affinity_type != affinity_none))) {
				2540	KMP_WARNING(AffIgnoreInvalidProcID, num);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2541	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2542	KMP_CPU_ZERO(sumMask);
				2543	} else {
				2544	KMP_CPU_COPY(sumMask, KMP_CPU_INDEX(osId2Mask, num));
				2545	setSize = 1;
				2546	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2547
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2548	for (;;) {
				2549	// Check for end of set.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2550	SKIP_WS(next);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2551	if (*next == '}') {
				2552	next++; // skip '}'
				2553	break;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2554	}
				2555
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2556	// Skip optional comma.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2557	if (*next == ',') {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2558	next++;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2559	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2560	SKIP_WS(next);
				2561
				2562	// Read the next integer in the set.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2563	scan = next;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2564	KMP_ASSERT2((next >= '0') && (next <= '9'), "bad explicit proc list");
				2565
				2566	SKIP_DIGITS(next);
				2567	num = __kmp_str_to_int(scan, *next);
				2568	KMP_ASSERT2(num >= 0, "bad explicit proc list");
				2569
				2570	// Add the mask for that osId to the sum mask.
				2571	if ((num > maxOsId) \|\|
				2572	(!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
				2573	if (__kmp_affinity_verbose \|\|
				2574	(__kmp_affinity_warnings &&
				2575	(__kmp_affinity_type != affinity_none))) {
				2576	KMP_WARNING(AffIgnoreInvalidProcID, num);
				2577	}
				2578	} else {
				2579	KMP_CPU_UNION(sumMask, KMP_CPU_INDEX(osId2Mask, num));
				2580	setSize++;
				2581	}
				2582	}
				2583	if (setSize > 0) {
				2584	ADD_MASK(sumMask);
				2585	}
				2586
				2587	SKIP_WS(next);
				2588	if (*next == ',') {
				2589	next++;
				2590	}
				2591	scan = next;
				2592	continue;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2593	}
				2594
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2595	// Read the first integer.
				2596	KMP_ASSERT2((next >= '0') && (next <= '9'), "bad explicit proc list");
				2597	SKIP_DIGITS(next);
				2598	start = __kmp_str_to_int(scan, *next);
				2599	KMP_ASSERT2(start >= 0, "bad explicit proc list");
				2600	SKIP_WS(next);
				2601
				2602	// If this isn't a range, then add a mask to the list and go on.
				2603	if (*next != '-') {
				2604	ADD_MASK_OSID(start, osId2Mask, maxOsId);
				2605
				2606	// Skip optional comma.
				2607	if (*next == ',') {
				2608	next++;
				2609	}
				2610	scan = next;
				2611	continue;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2612	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2613
				2614	// This is a range. Skip over the '-' and read in the 2nd int.
				2615	next++; // skip '-'
				2616	SKIP_WS(next);
				2617	scan = next;
				2618	KMP_ASSERT2((next >= '0') && (next <= '9'), "bad explicit proc list");
				2619	SKIP_DIGITS(next);
				2620	end = __kmp_str_to_int(scan, *next);
				2621	KMP_ASSERT2(end >= 0, "bad explicit proc list");
				2622
				2623	// Check for a stride parameter
				2624	stride = 1;
				2625	SKIP_WS(next);
				2626	if (*next == ':') {
				2627	// A stride is specified. Skip over the ':" and read the 3rd int.
				2628	int sign = +1;
				2629	next++; // skip ':'
				2630	SKIP_WS(next);
				2631	scan = next;
				2632	if (*next == '-') {
				2633	sign = -1;
				2634	next++;
				2635	SKIP_WS(next);
				2636	scan = next;
				2637	}
				2638	KMP_ASSERT2((next >= '0') && (next <= '9'), "bad explicit proc list");
				2639	SKIP_DIGITS(next);
				2640	stride = __kmp_str_to_int(scan, *next);
				2641	KMP_ASSERT2(stride >= 0, "bad explicit proc list");
				2642	stride *= sign;
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	2643	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2644
				2645	// Do some range checks.
				2646	KMP_ASSERT2(stride != 0, "bad explicit proc list");
				2647	if (stride > 0) {
				2648	KMP_ASSERT2(start <= end, "bad explicit proc list");
				2649	} else {
				2650	KMP_ASSERT2(start >= end, "bad explicit proc list");
				2651	}
				2652	KMP_ASSERT2((end - start) / stride <= 65536, "bad explicit proc list");
				2653
				2654	// Add the mask for each OS proc # to the list.
				2655	if (stride > 0) {
				2656	do {
				2657	ADD_MASK_OSID(start, osId2Mask, maxOsId);
				2658	start += stride;
				2659	} while (start <= end);
				2660	} else {
				2661	do {
				2662	ADD_MASK_OSID(start, osId2Mask, maxOsId);
				2663	start += stride;
				2664	} while (start >= end);
				2665	}
				2666
				2667	// Skip optional comma.
				2668	SKIP_WS(next);
				2669	if (*next == ',') {
				2670	next++;
				2671	}
				2672	scan = next;
				2673	}
				2674
				2675	*out_numMasks = nextNewMask;
				2676	if (nextNewMask == 0) {
				2677	*out_masks = NULL;
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	2678	KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2679	return;
				2680	}
				2681	KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask);
				2682	for (i = 0; i < nextNewMask; i++) {
				2683	kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i);
				2684	kmp_affin_mask_t dest = KMP_CPU_INDEX((out_masks), i);
				2685	KMP_CPU_COPY(dest, src);
				2686	}
				2687	KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
				2688	KMP_CPU_FREE(sumMask);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2689	}
				2690
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2691	#if OMP_40_ENABLED
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2692
				2693	/*-----------------------------------------------------------------------------
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2694	Re-parse the OMP_PLACES proc id list, forming the newMasks for the different
				2695	places. Again, Here is the grammar:
				2696
				2697	place_list := place
				2698	place_list := place , place_list
				2699	place := num
				2700	place := place : num
				2701	place := place : num : signed
				2702	place := { subplacelist }
				2703	place := ! place // (lowest priority)
				2704	subplace_list := subplace
				2705	subplace_list := subplace , subplace_list
				2706	subplace := num
				2707	subplace := num : num
				2708	subplace := num : num : signed
				2709	signed := num
				2710	signed := + signed
				2711	signed := - signed
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2712	-----------------------------------------------------------------------------*/
				2713
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2714	static void __kmp_process_subplace_list(const char **scan,
				2715	kmp_affin_mask_t *osId2Mask,
				2716	int maxOsId, kmp_affin_mask_t *tempMask,
				2717	int *setSize) {
				2718	const char *next;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2719
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2720	for (;;) {
				2721	int start, count, stride, i;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2722
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2723	// Read in the starting proc id
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2724	SKIP_WS(*scan);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2725	KMP_ASSERT2((scan >= '0') && (scan <= '9'), "bad explicit places list");
				2726	next = *scan;
				2727	SKIP_DIGITS(next);
				2728	start = __kmp_str_to_int(scan, next);
				2729	KMP_ASSERT(start >= 0);
				2730	*scan = next;
				2731
				2732	// valid follow sets are ',' ':' and '}'
				2733	SKIP_WS(*scan);
				2734	if (scan == '}' \|\| scan == ',') {
				2735	if ((start > maxOsId) \|\|
				2736	(!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
				2737	if (__kmp_affinity_verbose \|\|
				2738	(__kmp_affinity_warnings &&
				2739	(__kmp_affinity_type != affinity_none))) {
				2740	KMP_WARNING(AffIgnoreInvalidProcID, start);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2741	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2742	} else {
				2743	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
				2744	(*setSize)++;
				2745	}
				2746	if (**scan == '}') {
				2747	break;
				2748	}
				2749	(*scan)++; // skip ','
				2750	continue;
				2751	}
				2752	KMP_ASSERT2(**scan == ':', "bad explicit places list");
				2753	(*scan)++; // skip ':'
				2754
				2755	// Read count parameter
				2756	SKIP_WS(*scan);
				2757	KMP_ASSERT2((scan >= '0') && (scan <= '9'), "bad explicit places list");
				2758	next = *scan;
				2759	SKIP_DIGITS(next);
				2760	count = __kmp_str_to_int(scan, next);
				2761	KMP_ASSERT(count >= 0);
				2762	*scan = next;
				2763
				2764	// valid follow sets are ',' ':' and '}'
				2765	SKIP_WS(*scan);
				2766	if (scan == '}' \|\| scan == ',') {
				2767	for (i = 0; i < count; i++) {
				2768	if ((start > maxOsId) \|\|
				2769	(!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
				2770	if (__kmp_affinity_verbose \|\|
				2771	(__kmp_affinity_warnings &&
				2772	(__kmp_affinity_type != affinity_none))) {
				2773	KMP_WARNING(AffIgnoreInvalidProcID, start);
				2774	}
				2775	break; // don't proliferate warnings for large count
				2776	} else {
				2777	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
				2778	start++;
				2779	(*setSize)++;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2780	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2781	}
				2782	if (**scan == '}') {
				2783	break;
				2784	}
				2785	(*scan)++; // skip ','
				2786	continue;
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	2787	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2788	KMP_ASSERT2(**scan == ':', "bad explicit places list");
				2789	(*scan)++; // skip ':'
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2790
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2791	// Read stride parameter
				2792	int sign = +1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2793	for (;;) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2794	SKIP_WS(*scan);
				2795	if (**scan == '+') {
				2796	(*scan)++; // skip '+'
				2797	continue;
				2798	}
				2799	if (**scan == '-') {
				2800	sign *= -1;
				2801	(*scan)++; // skip '-'
				2802	continue;
				2803	}
				2804	break;
				2805	}
				2806	SKIP_WS(*scan);
				2807	KMP_ASSERT2((scan >= '0') && (scan <= '9'), "bad explicit places list");
				2808	next = *scan;
				2809	SKIP_DIGITS(next);
				2810	stride = __kmp_str_to_int(scan, next);
				2811	KMP_ASSERT(stride >= 0);
				2812	*scan = next;
				2813	stride *= sign;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2814
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2815	// valid follow sets are ',' and '}'
				2816	SKIP_WS(*scan);
				2817	if (scan == '}' \|\| scan == ',') {
				2818	for (i = 0; i < count; i++) {
				2819	if ((start > maxOsId) \|\|
				2820	(!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
				2821	if (__kmp_affinity_verbose \|\|
				2822	(__kmp_affinity_warnings &&
				2823	(__kmp_affinity_type != affinity_none))) {
				2824	KMP_WARNING(AffIgnoreInvalidProcID, start);
				2825	}
				2826	break; // don't proliferate warnings for large count
				2827	} else {
				2828	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
				2829	start += stride;
				2830	(*setSize)++;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2831	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2832	}
				2833	if (**scan == '}') {
				2834	break;
				2835	}
				2836	(*scan)++; // skip ','
				2837	continue;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2838	}
				2839
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2840	KMP_ASSERT2(0, "bad explicit places list");
				2841	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2842	}
				2843
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2844	static void __kmp_process_place(const char *scan, kmp_affin_mask_t osId2Mask,
				2845	int maxOsId, kmp_affin_mask_t *tempMask,
				2846	int *setSize) {
				2847	const char *next;
				2848
				2849	// valid follow sets are '{' '!' and num
				2850	SKIP_WS(*scan);
				2851	if (**scan == '{') {
				2852	(*scan)++; // skip '{'
				2853	__kmp_process_subplace_list(scan, osId2Mask, maxOsId, tempMask, setSize);
				2854	KMP_ASSERT2(**scan == '}', "bad explicit places list");
				2855	(*scan)++; // skip '}'
				2856	} else if (**scan == '!') {
				2857	(*scan)++; // skip '!'
				2858	__kmp_process_place(scan, osId2Mask, maxOsId, tempMask, setSize);
				2859	KMP_CPU_COMPLEMENT(maxOsId, tempMask);
				2860	} else if ((scan >= '0') && (scan <= '9')) {
				2861	next = *scan;
				2862	SKIP_DIGITS(next);
				2863	int num = __kmp_str_to_int(scan, next);
				2864	KMP_ASSERT(num >= 0);
				2865	if ((num > maxOsId) \|\|
				2866	(!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
				2867	if (__kmp_affinity_verbose \|\|
				2868	(__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) {
				2869	KMP_WARNING(AffIgnoreInvalidProcID, num);
				2870	}
				2871	} else {
				2872	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, num));
				2873	(*setSize)++;
				2874	}
				2875	*scan = next; // skip num
				2876	} else {
				2877	KMP_ASSERT2(0, "bad explicit places list");
				2878	}
				2879	}
				2880
				2881	// static void
				2882	void __kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
				2883	unsigned int *out_numMasks,
				2884	const char *placelist,
				2885	kmp_affin_mask_t *osId2Mask,
				2886	int maxOsId) {
				2887	int i, j, count, stride, sign;
				2888	const char *scan = placelist;
				2889	const char *next = placelist;
				2890
				2891	numNewMasks = 2;
				2892	KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks);
				2893	nextNewMask = 0;
				2894
				2895	// tempMask is modified based on the previous or initial
				2896	// place to form the current place
				2897	// previousMask contains the previous place
				2898	kmp_affin_mask_t *tempMask;
				2899	kmp_affin_mask_t *previousMask;
				2900	KMP_CPU_ALLOC(tempMask);
				2901	KMP_CPU_ZERO(tempMask);
				2902	KMP_CPU_ALLOC(previousMask);
				2903	KMP_CPU_ZERO(previousMask);
				2904	int setSize = 0;
				2905
				2906	for (;;) {
				2907	__kmp_process_place(&scan, osId2Mask, maxOsId, tempMask, &setSize);
				2908
				2909	// valid follow sets are ',' ':' and EOL
				2910	SKIP_WS(scan);
				2911	if (scan == '\0' \|\| scan == ',') {
				2912	if (setSize > 0) {
				2913	ADD_MASK(tempMask);
				2914	}
				2915	KMP_CPU_ZERO(tempMask);
				2916	setSize = 0;
				2917	if (*scan == '\0') {
				2918	break;
				2919	}
				2920	scan++; // skip ','
				2921	continue;
				2922	}
				2923
				2924	KMP_ASSERT2(*scan == ':', "bad explicit places list");
				2925	scan++; // skip ':'
				2926
				2927	// Read count parameter
				2928	SKIP_WS(scan);
				2929	KMP_ASSERT2((scan >= '0') && (scan <= '9'), "bad explicit places list");
				2930	next = scan;
				2931	SKIP_DIGITS(next);
				2932	count = __kmp_str_to_int(scan, *next);
				2933	KMP_ASSERT(count >= 0);
				2934	scan = next;
				2935
				2936	// valid follow sets are ',' ':' and EOL
				2937	SKIP_WS(scan);
				2938	if (scan == '\0' \|\| scan == ',') {
				2939	stride = +1;
				2940	} else {
				2941	KMP_ASSERT2(*scan == ':', "bad explicit places list");
				2942	scan++; // skip ':'
				2943
				2944	// Read stride parameter
				2945	sign = +1;
				2946	for (;;) {
				2947	SKIP_WS(scan);
				2948	if (*scan == '+') {
				2949	scan++; // skip '+'
				2950	continue;
				2951	}
				2952	if (*scan == '-') {
				2953	sign *= -1;
				2954	scan++; // skip '-'
				2955	continue;
				2956	}
				2957	break;
				2958	}
				2959	SKIP_WS(scan);
				2960	KMP_ASSERT2((scan >= '0') && (scan <= '9'), "bad explicit places list");
				2961	next = scan;
				2962	SKIP_DIGITS(next);
				2963	stride = __kmp_str_to_int(scan, *next);
				2964	KMP_DEBUG_ASSERT(stride >= 0);
				2965	scan = next;
				2966	stride *= sign;
				2967	}
				2968
				2969	// Add places determined by initial_place : count : stride
				2970	for (i = 0; i < count; i++) {
				2971	if (setSize == 0) {
				2972	break;
				2973	}
				2974	// Add the current place, then build the next place (tempMask) from that
				2975	KMP_CPU_COPY(previousMask, tempMask);
				2976	ADD_MASK(previousMask);
				2977	KMP_CPU_ZERO(tempMask);
				2978	setSize = 0;
				2979	KMP_CPU_SET_ITERATE(j, previousMask) {
				2980	if (!KMP_CPU_ISSET(j, previousMask)) {
				2981	continue;
				2982	}
				2983	if ((j + stride > maxOsId) \|\| (j + stride < 0) \|\|
				2984	(!KMP_CPU_ISSET(j, __kmp_affin_fullMask)) \|\|
				2985	(!KMP_CPU_ISSET(j + stride,
				2986	KMP_CPU_INDEX(osId2Mask, j + stride)))) {
				2987	if ((__kmp_affinity_verbose \|\|
				2988	(__kmp_affinity_warnings &&
				2989	(__kmp_affinity_type != affinity_none))) &&
				2990	i < count - 1) {
				2991	KMP_WARNING(AffIgnoreInvalidProcID, j + stride);
				2992	}
				2993	continue;
				2994	}
				2995	KMP_CPU_SET(j + stride, tempMask);
				2996	setSize++;
				2997	}
				2998	}
				2999	KMP_CPU_ZERO(tempMask);
				3000	setSize = 0;
				3001
				3002	// valid follow sets are ',' and EOL
				3003	SKIP_WS(scan);
				3004	if (*scan == '\0') {
				3005	break;
				3006	}
				3007	if (*scan == ',') {
				3008	scan++; // skip ','
				3009	continue;
				3010	}
				3011
				3012	KMP_ASSERT2(0, "bad explicit places list");
				3013	}
				3014
				3015	*out_numMasks = nextNewMask;
				3016	if (nextNewMask == 0) {
				3017	*out_masks = NULL;
				3018	KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
				3019	return;
				3020	}
				3021	KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask);
				3022	KMP_CPU_FREE(tempMask);
				3023	KMP_CPU_FREE(previousMask);
				3024	for (i = 0; i < nextNewMask; i++) {
				3025	kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i);
				3026	kmp_affin_mask_t dest = KMP_CPU_INDEX((out_masks), i);
				3027	KMP_CPU_COPY(dest, src);
				3028	}
				3029	KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
				3030	}
				3031
				3032	#endif /* OMP_40_ENABLED */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3033
				3034	#undef ADD_MASK
				3035	#undef ADD_MASK_OSID
				3036
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3037	#if KMP_USE_HWLOC
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3038	static int __kmp_hwloc_count_children_by_type(hwloc_topology_t t, hwloc_obj_t o,
				3039	hwloc_obj_type_t type,
				3040	hwloc_obj_t* f) {
				3041	if (!hwloc_compare_types(o->type, type)) {
				3042	if (*f == NULL)
				3043	*f = o; // output first descendant found
				3044	return 1;
				3045	}
				3046	int sum = 0;
				3047	for (unsigned i = 0; i < o->arity; i++)
				3048	sum += __kmp_hwloc_count_children_by_type(t, o->children[i], type, f);
				3049	return sum; // will be 0 if no one found (as PU arity is 0)
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3050	}
				3051
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3052	static int __kmp_hwloc_count_children_by_depth(hwloc_topology_t t,
				3053	hwloc_obj_t o, unsigned depth,
				3054	hwloc_obj_t* f) {
				3055	if (o->depth == depth) {
				3056	if (*f == NULL)
				3057	*f = o; // output first descendant found
				3058	return 1;
				3059	}
				3060	int sum = 0;
				3061	for (unsigned i = 0; i < o->arity; i++)
				3062	sum += __kmp_hwloc_count_children_by_depth(t, o->children[i], depth, f);
				3063	return sum; // will be 0 if no one found (as PU arity is 0)
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3064	}
				3065
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3066	static int __kmp_hwloc_skip_PUs_obj(hwloc_topology_t t, hwloc_obj_t o) {
				3067	// skip PUs descendants of the object o
				3068	int skipped = 0;
				3069	hwloc_obj_t hT = NULL;
				3070	int N = __kmp_hwloc_count_children_by_type(t, o, HWLOC_OBJ_PU, &hT);
				3071	for (int i = 0; i < N; ++i) {
				3072	KMP_DEBUG_ASSERT(hT);
				3073	unsigned idx = hT->os_index;
				3074	if (KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
				3075	KMP_CPU_CLR(idx, __kmp_affin_fullMask);
				3076	KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx));
				3077	++skipped;
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3078	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3079	hT = hwloc_get_next_obj_by_type(t, HWLOC_OBJ_PU, hT);
				3080	}
				3081	return skipped; // count number of skipped units
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3082	}
				3083
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3084	static int __kmp_hwloc_obj_has_PUs(hwloc_topology_t t, hwloc_obj_t o) {
				3085	// check if obj has PUs present in fullMask
				3086	hwloc_obj_t hT = NULL;
				3087	int N = __kmp_hwloc_count_children_by_type(t, o, HWLOC_OBJ_PU, &hT);
				3088	for (int i = 0; i < N; ++i) {
				3089	KMP_DEBUG_ASSERT(hT);
				3090	unsigned idx = hT->os_index;
				3091	if (KMP_CPU_ISSET(idx, __kmp_affin_fullMask))
				3092	return 1; // found PU
				3093	hT = hwloc_get_next_obj_by_type(t, HWLOC_OBJ_PU, hT);
				3094	}
				3095	return 0; // no PUs found
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3096	}
				3097	#endif // KMP_USE_HWLOC
				3098
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3099	static void __kmp_apply_thread_places(AddrUnsPair **pAddr, int depth) {
				3100	AddrUnsPair *newAddr;
				3101	if (__kmp_hws_requested == 0)
				3102	goto _exit; // no topology limiting actions requested, exit
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3103	#if KMP_USE_HWLOC
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3104	if (__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC) {
				3105	// Number of subobjects calculated dynamically, this works fine for
				3106	// any non-uniform topology.
				3107	// L2 cache objects are determined by depth, other objects - by type.
				3108	hwloc_topology_t tp = __kmp_hwloc_topology;
				3109	int nS=0, nN=0, nL=0, nC=0, nT=0; // logical index including skipped
				3110	int nCr=0, nTr=0; // number of requested units
				3111	int nPkg=0, nCo=0, n_new=0, n_old = 0, nCpP=0, nTpC=0; // counters
				3112	hwloc_obj_t hT, hC, hL, hN, hS; // hwloc objects (pointers to)
				3113	int L2depth, idx;
Jonathan Peyton	dd4aa9b	2015-10-08 17:55:54 +0000	[diff] [blame]	3114
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3115	// check support of extensions ----------------------------------
				3116	int numa_support = 0, tile_support = 0;
				3117	if (__kmp_pu_os_idx)
				3118	hT = hwloc_get_pu_obj_by_os_index(tp,
				3119	__kmp_pu_os_idx[__kmp_avail_proc - 1]);
				3120	else
				3121	hT = hwloc_get_obj_by_type(tp, HWLOC_OBJ_PU, __kmp_avail_proc - 1);
				3122	if (hT == NULL) { // something's gone wrong
				3123	KMP_WARNING(AffHWSubsetUnsupported);
				3124	goto _exit;
				3125	}
				3126	// check NUMA node
				3127	hN = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hT);
				3128	hS = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_PACKAGE, hT);
				3129	if (hN != NULL && hN->depth > hS->depth) {
				3130	numa_support = 1; // 1 in case socket includes node(s)
				3131	} else if (__kmp_hws_node.num > 0) {
				3132	// don't support sockets inside NUMA node (no such HW found for testing)
				3133	KMP_WARNING(AffHWSubsetUnsupported);
				3134	goto _exit;
				3135	}
				3136	// check L2 cahce, get object by depth because of multiple caches
				3137	L2depth = hwloc_get_cache_type_depth(tp, 2, HWLOC_OBJ_CACHE_UNIFIED);
				3138	hL = hwloc_get_ancestor_obj_by_depth(tp, L2depth, hT);
				3139	if (hL != NULL && __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE,
				3140	&hC) > 1) {
				3141	tile_support = 1; // no sense to count L2 if it includes single core
				3142	} else if (__kmp_hws_tile.num > 0) {
				3143	if (__kmp_hws_core.num == 0) {
				3144	__kmp_hws_core = __kmp_hws_tile; // replace L2 with core
				3145	__kmp_hws_tile.num = 0;
				3146	} else {
				3147	// L2 and core are both requested, but represent same object
				3148	KMP_WARNING(AffHWSubsetInvalid);
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3149	goto _exit;
				3150	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3151	}
				3152	// end of check of extensions -----------------------------------
				3153
				3154	// fill in unset items, validate settings -----------------------
				3155	if (__kmp_hws_socket.num == 0)
				3156	__kmp_hws_socket.num = nPackages; // use all available sockets
				3157	if (__kmp_hws_socket.offset >= nPackages) {
				3158	KMP_WARNING(AffHWSubsetManySockets);
				3159	goto _exit;
				3160	}
				3161	if (numa_support) {
				3162	int NN = __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_NUMANODE,
				3163	&hN); // num nodes in socket
				3164	if (__kmp_hws_node.num == 0)
				3165	__kmp_hws_node.num = NN; // use all available nodes
				3166	if (__kmp_hws_node.offset >= NN) {
				3167	KMP_WARNING(AffHWSubsetManyNodes);
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3168	goto _exit;
				3169	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3170	if (tile_support) {
				3171	// get num tiles in node
				3172	int NL = __kmp_hwloc_count_children_by_depth(tp, hN, L2depth, &hL);
				3173	if (__kmp_hws_tile.num == 0) {
				3174	__kmp_hws_tile.num = NL + 1;
				3175	} // use all available tiles, some node may have more tiles, thus +1
				3176	if (__kmp_hws_tile.offset >= NL) {
				3177	KMP_WARNING(AffHWSubsetManyTiles);
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3178	goto _exit;
				3179	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3180	int NC = __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE,
				3181	&hC); // num cores in tile
				3182	if (__kmp_hws_core.num == 0)
				3183	__kmp_hws_core.num = NC; // use all available cores
				3184	if (__kmp_hws_core.offset >= NC) {
				3185	KMP_WARNING(AffHWSubsetManyCores);
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3186	goto _exit;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3187	}
				3188	} else { // tile_support
				3189	int NC = __kmp_hwloc_count_children_by_type(tp, hN, HWLOC_OBJ_CORE,
				3190	&hC); // num cores in node
				3191	if (__kmp_hws_core.num == 0)
				3192	__kmp_hws_core.num = NC; // use all available cores
				3193	if (__kmp_hws_core.offset >= NC) {
				3194	KMP_WARNING(AffHWSubsetManyCores);
				3195	goto _exit;
				3196	}
				3197	} // tile_support
				3198	} else { // numa_support
				3199	if (tile_support) {
				3200	// get num tiles in socket
				3201	int NL = __kmp_hwloc_count_children_by_depth(tp, hS, L2depth, &hL);
				3202	if (__kmp_hws_tile.num == 0)
				3203	__kmp_hws_tile.num = NL; // use all available tiles
				3204	if (__kmp_hws_tile.offset >= NL) {
				3205	KMP_WARNING(AffHWSubsetManyTiles);
				3206	goto _exit;
				3207	}
				3208	int NC = __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE,
				3209	&hC); // num cores in tile
				3210	if (__kmp_hws_core.num == 0)
				3211	__kmp_hws_core.num = NC; // use all available cores
				3212	if (__kmp_hws_core.offset >= NC) {
				3213	KMP_WARNING(AffHWSubsetManyCores);
				3214	goto _exit;
				3215	}
				3216	} else { // tile_support
				3217	int NC = __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_CORE,
				3218	&hC); // num cores in socket
				3219	if (__kmp_hws_core.num == 0)
				3220	__kmp_hws_core.num = NC; // use all available cores
				3221	if (__kmp_hws_core.offset >= NC) {
				3222	KMP_WARNING(AffHWSubsetManyCores);
				3223	goto _exit;
				3224	}
				3225	} // tile_support
				3226	}
				3227	if (__kmp_hws_proc.num == 0)
				3228	__kmp_hws_proc.num = __kmp_nThreadsPerCore; // use all available procs
				3229	if (__kmp_hws_proc.offset >= __kmp_nThreadsPerCore) {
				3230	KMP_WARNING(AffHWSubsetManyProcs);
				3231	goto _exit;
				3232	}
				3233	// end of validation --------------------------------------------
				3234
				3235	if (pAddr) // pAddr is NULL in case of affinity_none
				3236	newAddr = (AddrUnsPair )__kmp_allocate(sizeof(AddrUnsPair)
				3237	__kmp_avail_proc); // max size
				3238	// main loop to form HW subset ----------------------------------
				3239	hS = NULL;
				3240	int NP = hwloc_get_nbobjs_by_type(tp, HWLOC_OBJ_PACKAGE);
				3241	for (int s = 0; s < NP; ++s) {
				3242	// Check Socket -----------------------------------------------
				3243	hS = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PACKAGE, hS);
				3244	if (!__kmp_hwloc_obj_has_PUs(tp, hS))
				3245	continue; // skip socket if all PUs are out of fullMask
				3246	++nS; // only count objects those have PUs in affinity mask
				3247	if (nS <= __kmp_hws_socket.offset \|\|
				3248	nS > __kmp_hws_socket.num + __kmp_hws_socket.offset) {
				3249	n_old += __kmp_hwloc_skip_PUs_obj(tp, hS); // skip socket
				3250	continue; // move to next socket
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3251	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3252	nCr = 0; // count number of cores per socket
				3253	// socket requested, go down the topology tree
				3254	// check 4 cases: (+NUMA+Tile), (+NUMA-Tile), (-NUMA+Tile), (-NUMA-Tile)
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3255	if (numa_support) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3256	nN = 0;
				3257	hN = NULL;
				3258	// num nodes in current socket
				3259	int NN = __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_NUMANODE,
				3260	&hN);
				3261	for (int n = 0; n < NN; ++n) {
				3262	// Check NUMA Node ----------------------------------------
				3263	if (!__kmp_hwloc_obj_has_PUs(tp, hN)) {
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3264	hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3265	continue; // skip node if all PUs are out of fullMask
				3266	}
				3267	++nN;
				3268	if (nN <= __kmp_hws_node.offset \|\|
				3269	nN > __kmp_hws_node.num + __kmp_hws_node.offset) {
				3270	// skip node as not requested
				3271	n_old += __kmp_hwloc_skip_PUs_obj(tp, hN); // skip node
				3272	hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN);
				3273	continue; // move to next node
				3274	}
				3275	// node requested, go down the topology tree
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3276	if (tile_support) {
				3277	nL = 0;
				3278	hL = NULL;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3279	int NL = __kmp_hwloc_count_children_by_depth(tp, hN, L2depth, &hL);
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3280	for (int l = 0; l < NL; ++l) {
				3281	// Check L2 (tile) ------------------------------------
				3282	if (!__kmp_hwloc_obj_has_PUs(tp, hL)) {
				3283	hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
				3284	continue; // skip tile if all PUs are out of fullMask
				3285	}
				3286	++nL;
				3287	if (nL <= __kmp_hws_tile.offset \|\|
				3288	nL > __kmp_hws_tile.num + __kmp_hws_tile.offset) {
				3289	// skip tile as not requested
				3290	n_old += __kmp_hwloc_skip_PUs_obj(tp, hL); // skip tile
				3291	hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
				3292	continue; // move to next tile
				3293	}
				3294	// tile requested, go down the topology tree
				3295	nC = 0;
				3296	hC = NULL;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3297	// num cores in current tile
				3298	int NC = __kmp_hwloc_count_children_by_type(tp, hL,
				3299	HWLOC_OBJ_CORE, &hC);
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3300	for (int c = 0; c < NC; ++c) {
				3301	// Check Core ---------------------------------------
				3302	if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
				3303	hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
				3304	continue; // skip core if all PUs are out of fullMask
				3305	}
				3306	++nC;
				3307	if (nC <= __kmp_hws_core.offset \|\|
				3308	nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
				3309	// skip node as not requested
				3310	n_old += __kmp_hwloc_skip_PUs_obj(tp, hC); // skip core
				3311	hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
				3312	continue; // move to next node
				3313	}
				3314	// core requested, go down to PUs
				3315	nT = 0;
				3316	nTr = 0;
				3317	hT = NULL;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3318	// num procs in current core
				3319	int NT = __kmp_hwloc_count_children_by_type(tp, hC,
				3320	HWLOC_OBJ_PU, &hT);
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3321	for (int t = 0; t < NT; ++t) {
				3322	// Check PU ---------------------------------------
				3323	idx = hT->os_index;
				3324	if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
				3325	hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
				3326	continue; // skip PU if not in fullMask
				3327	}
				3328	++nT;
				3329	if (nT <= __kmp_hws_proc.offset \|\|
				3330	nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
				3331	// skip PU
				3332	KMP_CPU_CLR(idx, __kmp_affin_fullMask);
				3333	++n_old;
				3334	KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx));
				3335	hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
				3336	continue; // move to next node
				3337	}
				3338	++nTr;
				3339	if (pAddr) // collect requested thread's data
				3340	newAddr[n_new] = (*pAddr)[n_old];
				3341	++n_new;
				3342	++n_old;
				3343	hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
				3344	} // threads loop
				3345	if (nTr > 0) {
				3346	++nCr; // num cores per socket
				3347	++nCo; // total num cores
				3348	if (nTr > nTpC)
				3349	nTpC = nTr; // calc max threads per core
				3350	}
				3351	hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
				3352	} // cores loop
				3353	hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
				3354	} // tiles loop
				3355	} else { // tile_support
				3356	// no tiles, check cores
				3357	nC = 0;
				3358	hC = NULL;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3359	// num cores in current node
				3360	int NC = __kmp_hwloc_count_children_by_type(tp, hN, HWLOC_OBJ_CORE,
				3361	&hC);
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3362	for (int c = 0; c < NC; ++c) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3363	// Check Core ---------------------------------------
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3364	if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
				3365	hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
				3366	continue; // skip core if all PUs are out of fullMask
				3367	}
				3368	++nC;
				3369	if (nC <= __kmp_hws_core.offset \|\|
				3370	nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
				3371	// skip node as not requested
				3372	n_old += __kmp_hwloc_skip_PUs_obj(tp, hC); // skip core
				3373	hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
				3374	continue; // move to next node
				3375	}
				3376	// core requested, go down to PUs
				3377	nT = 0;
				3378	nTr = 0;
				3379	hT = NULL;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3380	int NT = __kmp_hwloc_count_children_by_type(tp, hC, HWLOC_OBJ_PU,
				3381	&hT);
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3382	for (int t = 0; t < NT; ++t) {
				3383	// Check PU ---------------------------------------
				3384	idx = hT->os_index;
				3385	if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
				3386	hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
				3387	continue; // skip PU if not in fullMask
				3388	}
				3389	++nT;
				3390	if (nT <= __kmp_hws_proc.offset \|\|
				3391	nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
				3392	// skip PU
				3393	KMP_CPU_CLR(idx, __kmp_affin_fullMask);
				3394	++n_old;
				3395	KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx));
				3396	hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
				3397	continue; // move to next node
				3398	}
				3399	++nTr;
				3400	if (pAddr) // collect requested thread's data
				3401	newAddr[n_new] = (*pAddr)[n_old];
				3402	++n_new;
				3403	++n_old;
				3404	hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
				3405	} // threads loop
				3406	if (nTr > 0) {
				3407	++nCr; // num cores per socket
				3408	++nCo; // total num cores
				3409	if (nTr > nTpC)
				3410	nTpC = nTr; // calc max threads per core
				3411	}
				3412	hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
				3413	} // cores loop
				3414	} // tiles support
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3415	hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN);
				3416	} // nodes loop
				3417	} else { // numa_support
				3418	// no NUMA support
				3419	if (tile_support) {
				3420	nL = 0;
				3421	hL = NULL;
				3422	// num tiles in current socket
				3423	int NL = __kmp_hwloc_count_children_by_depth(tp, hS, L2depth, &hL);
				3424	for (int l = 0; l < NL; ++l) {
				3425	// Check L2 (tile) ------------------------------------
				3426	if (!__kmp_hwloc_obj_has_PUs(tp, hL)) {
				3427	hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
				3428	continue; // skip tile if all PUs are out of fullMask
				3429	}
				3430	++nL;
				3431	if (nL <= __kmp_hws_tile.offset \|\|
				3432	nL > __kmp_hws_tile.num + __kmp_hws_tile.offset) {
				3433	// skip tile as not requested
				3434	n_old += __kmp_hwloc_skip_PUs_obj(tp, hL); // skip tile
				3435	hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
				3436	continue; // move to next tile
				3437	}
				3438	// tile requested, go down the topology tree
				3439	nC = 0;
				3440	hC = NULL;
				3441	// num cores per tile
				3442	int NC = __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE,
				3443	&hC);
				3444	for (int c = 0; c < NC; ++c) {
				3445	// Check Core ---------------------------------------
				3446	if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
				3447	hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
				3448	continue; // skip core if all PUs are out of fullMask
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3449	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3450	++nC;
				3451	if (nC <= __kmp_hws_core.offset \|\|
				3452	nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
				3453	// skip node as not requested
				3454	n_old += __kmp_hwloc_skip_PUs_obj(tp, hC); // skip core
				3455	hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
				3456	continue; // move to next node
				3457	}
				3458	// core requested, go down to PUs
				3459	nT = 0;
				3460	nTr = 0;
				3461	hT = NULL;
				3462	// num procs per core
				3463	int NT = __kmp_hwloc_count_children_by_type(tp, hC, HWLOC_OBJ_PU,
				3464	&hT);
				3465	for (int t = 0; t < NT; ++t) {
				3466	// Check PU ---------------------------------------
				3467	idx = hT->os_index;
				3468	if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
				3469	hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
				3470	continue; // skip PU if not in fullMask
				3471	}
				3472	++nT;
				3473	if (nT <= __kmp_hws_proc.offset \|\|
				3474	nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
				3475	// skip PU
				3476	KMP_CPU_CLR(idx, __kmp_affin_fullMask);
				3477	++n_old;
				3478	KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx));
				3479	hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
				3480	continue; // move to next node
				3481	}
				3482	++nTr;
				3483	if (pAddr) // collect requested thread's data
				3484	newAddr[n_new] = (*pAddr)[n_old];
				3485	++n_new;
				3486	++n_old;
				3487	hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
				3488	} // threads loop
				3489	if (nTr > 0) {
				3490	++nCr; // num cores per socket
				3491	++nCo; // total num cores
				3492	if (nTr > nTpC)
				3493	nTpC = nTr; // calc max threads per core
				3494	}
				3495	hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
				3496	} // cores loop
				3497	hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
				3498	} // tiles loop
				3499	} else { // tile_support
				3500	// no tiles, check cores
				3501	nC = 0;
				3502	hC = NULL;
				3503	// num cores in socket
				3504	int NC = __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_CORE,
				3505	&hC);
				3506	for (int c = 0; c < NC; ++c) {
				3507	// Check Core -------------------------------------------
				3508	if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
				3509	hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
				3510	continue; // skip core if all PUs are out of fullMask
				3511	}
				3512	++nC;
				3513	if (nC <= __kmp_hws_core.offset \|\|
				3514	nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
				3515	// skip node as not requested
				3516	n_old += __kmp_hwloc_skip_PUs_obj(tp, hC); // skip core
				3517	hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
				3518	continue; // move to next node
				3519	}
				3520	// core requested, go down to PUs
				3521	nT = 0;
				3522	nTr = 0;
				3523	hT = NULL;
				3524	// num procs per core
				3525	int NT = __kmp_hwloc_count_children_by_type(tp, hC, HWLOC_OBJ_PU,
				3526	&hT);
				3527	for (int t = 0; t < NT; ++t) {
				3528	// Check PU ---------------------------------------
				3529	idx = hT->os_index;
				3530	if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
				3531	hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
				3532	continue; // skip PU if not in fullMask
				3533	}
				3534	++nT;
				3535	if (nT <= __kmp_hws_proc.offset \|\|
				3536	nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
				3537	// skip PU
				3538	KMP_CPU_CLR(idx, __kmp_affin_fullMask);
				3539	++n_old;
				3540	KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx));
				3541	hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
				3542	continue; // move to next node
				3543	}
				3544	++nTr;
				3545	if (pAddr) // collect requested thread's data
				3546	newAddr[n_new] = (*pAddr)[n_old];
				3547	++n_new;
				3548	++n_old;
				3549	hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
				3550	} // threads loop
				3551	if (nTr > 0) {
				3552	++nCr; // num cores per socket
				3553	++nCo; // total num cores
				3554	if (nTr > nTpC)
				3555	nTpC = nTr; // calc max threads per core
				3556	}
				3557	hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
				3558	} // cores loop
				3559	} // tiles support
				3560	} // numa_support
				3561	if (nCr > 0) { // found cores?
				3562	++nPkg; // num sockets
				3563	if (nCr > nCpP)
				3564	nCpP = nCr; // calc max cores per socket
				3565	}
				3566	} // sockets loop
				3567
				3568	// check the subset is valid
				3569	KMP_DEBUG_ASSERT(n_old == __kmp_avail_proc);
				3570	KMP_DEBUG_ASSERT(nPkg > 0);
				3571	KMP_DEBUG_ASSERT(nCpP > 0);
				3572	KMP_DEBUG_ASSERT(nTpC > 0);
				3573	KMP_DEBUG_ASSERT(nCo > 0);
				3574	KMP_DEBUG_ASSERT(nPkg <= nPackages);
				3575	KMP_DEBUG_ASSERT(nCpP <= nCoresPerPkg);
				3576	KMP_DEBUG_ASSERT(nTpC <= __kmp_nThreadsPerCore);
				3577	KMP_DEBUG_ASSERT(nCo <= __kmp_ncores);
				3578
				3579	nPackages = nPkg; // correct num sockets
				3580	nCoresPerPkg = nCpP; // correct num cores per socket
				3581	__kmp_nThreadsPerCore = nTpC; // correct num threads per core
				3582	__kmp_avail_proc = n_new; // correct num procs
				3583	__kmp_ncores = nCo; // correct num cores
				3584	// hwloc topology method end
				3585	} else
				3586	#endif // KMP_USE_HWLOC
				3587	{
				3588	int n_old = 0, n_new = 0, proc_num = 0;
				3589	if (__kmp_hws_node.num > 0 \|\| __kmp_hws_tile.num > 0) {
				3590	KMP_WARNING(AffHWSubsetNoHWLOC);
				3591	goto _exit;
				3592	}
				3593	if (__kmp_hws_socket.num == 0)
				3594	__kmp_hws_socket.num = nPackages; // use all available sockets
				3595	if (__kmp_hws_core.num == 0)
				3596	__kmp_hws_core.num = nCoresPerPkg; // use all available cores
				3597	if (__kmp_hws_proc.num == 0 \|\|
				3598	__kmp_hws_proc.num > __kmp_nThreadsPerCore)
				3599	__kmp_hws_proc.num = __kmp_nThreadsPerCore; // use all HW contexts
				3600	if ( !__kmp_affinity_uniform_topology() ) {
				3601	KMP_WARNING( AffHWSubsetNonUniform );
				3602	goto _exit; // don't support non-uniform topology
				3603	}
				3604	if ( depth > 3 ) {
				3605	KMP_WARNING( AffHWSubsetNonThreeLevel );
				3606	goto _exit; // don't support not-3-level topology
				3607	}
				3608	if (__kmp_hws_socket.offset + __kmp_hws_socket.num > nPackages) {
				3609	KMP_WARNING(AffHWSubsetManySockets);
				3610	goto _exit;
				3611	}
				3612	if ( __kmp_hws_core.offset + __kmp_hws_core.num > nCoresPerPkg ) {
				3613	KMP_WARNING( AffHWSubsetManyCores );
				3614	goto _exit;
				3615	}
				3616	// Form the requested subset
				3617	if (pAddr) // pAddr is NULL in case of affinity_none
				3618	newAddr = (AddrUnsPair )__kmp_allocate(sizeof(AddrUnsPair)
				3619	__kmp_hws_socket.num *
				3620	__kmp_hws_core.num *
				3621	__kmp_hws_proc.num);
				3622	for (int i = 0; i < nPackages; ++i) {
				3623	if (i < __kmp_hws_socket.offset \|\|
				3624	i >= __kmp_hws_socket.offset + __kmp_hws_socket.num) {
				3625	// skip not-requested socket
				3626	n_old += nCoresPerPkg * __kmp_nThreadsPerCore;
				3627	if (__kmp_pu_os_idx != NULL) {
				3628	// walk through skipped socket
				3629	for (int j = 0; j < nCoresPerPkg; ++j) {
				3630	for (int k = 0; k < __kmp_nThreadsPerCore; ++k) {
				3631	KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
				3632	++proc_num;
Jonathan Peyton	fd7cc42	2016-06-21 15:54:38 +0000	[diff] [blame]	3633	}
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3634	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3635	}
				3636	} else {
				3637	// walk through requested socket
				3638	for (int j = 0; j < nCoresPerPkg; ++j) {
				3639	if (j < __kmp_hws_core.offset \|\|
				3640	j >= __kmp_hws_core.offset + __kmp_hws_core.num)
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3641	{ // skip not-requested core
				3642	n_old += __kmp_nThreadsPerCore;
				3643	if (__kmp_pu_os_idx != NULL) {
				3644	for (int k = 0; k < __kmp_nThreadsPerCore; ++k) {
				3645	KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
				3646	++proc_num;
Jonathan Peyton	fd7cc42	2016-06-21 15:54:38 +0000	[diff] [blame]	3647	}
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3648	}
				3649	} else {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3650	// walk through requested core
				3651	for (int k = 0; k < __kmp_nThreadsPerCore; ++k) {
				3652	if (k < __kmp_hws_proc.num) {
				3653	if (pAddr) // collect requested thread's data
				3654	newAddr[n_new] = (*pAddr)[n_old];
				3655	n_new++;
				3656	} else {
				3657	if (__kmp_pu_os_idx != NULL)
				3658	KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3659	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3660	n_old++;
				3661	++proc_num;
Jonathan Peyton	fd7cc42	2016-06-21 15:54:38 +0000	[diff] [blame]	3662	}
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3663	}
Jonathan Peyton	fd7cc42	2016-06-21 15:54:38 +0000	[diff] [blame]	3664	}
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3665	}
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3666	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3667	KMP_DEBUG_ASSERT(n_old == nPackages * nCoresPerPkg * __kmp_nThreadsPerCore);
				3668	KMP_DEBUG_ASSERT(n_new == __kmp_hws_socket.num * __kmp_hws_core.num *
				3669	__kmp_hws_proc.num);
				3670	nPackages = __kmp_hws_socket.num; // correct nPackages
				3671	nCoresPerPkg = __kmp_hws_core.num; // correct nCoresPerPkg
				3672	__kmp_nThreadsPerCore = __kmp_hws_proc.num; // correct __kmp_nThreadsPerCore
				3673	__kmp_avail_proc = n_new; // correct avail_proc
				3674	__kmp_ncores = nPackages * __kmp_hws_core.num; // correct ncores
				3675	} // non-hwloc topology method
				3676	if (pAddr) {
				3677	__kmp_free( *pAddr );
				3678	*pAddr = newAddr; // replace old topology with new one
				3679	}
				3680	if (__kmp_affinity_verbose) {
				3681	char m[KMP_AFFIN_MASK_PRINT_LEN];
				3682	__kmp_affinity_print_mask(m,KMP_AFFIN_MASK_PRINT_LEN,__kmp_affin_fullMask);
				3683	if (__kmp_affinity_respect_mask) {
				3684	KMP_INFORM(InitOSProcSetRespect, "KMP_HW_SUBSET", m);
Paul Osmialowski	ecbe2ea	2016-07-29 20:55:03 +0000	[diff] [blame]	3685	} else {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3686	KMP_INFORM(InitOSProcSetNotRespect, "KMP_HW_SUBSET", m);
Paul Osmialowski	ecbe2ea	2016-07-29 20:55:03 +0000	[diff] [blame]	3687	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3688	KMP_INFORM(AvailableOSProc, "KMP_HW_SUBSET", __kmp_avail_proc);
				3689	kmp_str_buf_t buf;
				3690	__kmp_str_buf_init(&buf);
				3691	__kmp_str_buf_print(&buf, "%d", nPackages);
				3692	KMP_INFORM(TopologyExtra, "KMP_HW_SUBSET", buf.str, nCoresPerPkg,
				3693	__kmp_nThreadsPerCore, __kmp_ncores);
				3694	__kmp_str_buf_free(&buf);
				3695	}
				3696	_exit:
				3697	if (__kmp_pu_os_idx != NULL) {
				3698	__kmp_free(__kmp_pu_os_idx);
				3699	__kmp_pu_os_idx = NULL;
				3700	}
				3701	}
				3702
				3703	// This function figures out the deepest level at which there is at least one
				3704	// cluster/core with more than one processing unit bound to it.
				3705	static int __kmp_affinity_find_core_level(const AddrUnsPair *address2os,
				3706	int nprocs, int bottom_level) {
				3707	int core_level = 0;
				3708
				3709	for (int i = 0; i < nprocs; i++) {
				3710	for (int j = bottom_level; j > 0; j--) {
				3711	if (address2os[i].first.labels[j] > 0) {
				3712	if (core_level < (j - 1)) {
				3713	core_level = j - 1;
				3714	}
				3715	}
				3716	}
				3717	}
				3718	return core_level;
				3719	}
				3720
				3721	// This function counts number of clusters/cores at given level.
				3722	static int __kmp_affinity_compute_ncores(const AddrUnsPair *address2os,
				3723	int nprocs, int bottom_level,
				3724	int core_level) {
				3725	int ncores = 0;
				3726	int i, j;
				3727
				3728	j = bottom_level;
				3729	for (i = 0; i < nprocs; i++) {
				3730	for (j = bottom_level; j > core_level; j--) {
				3731	if ((i + 1) < nprocs) {
				3732	if (address2os[i + 1].first.labels[j] > 0) {
				3733	break;
				3734	}
				3735	}
				3736	}
				3737	if (j == core_level) {
				3738	ncores++;
				3739	}
				3740	}
				3741	if (j > core_level) {
				3742	// In case of ( nprocs < __kmp_avail_proc ) we may end too deep and miss one
				3743	// core. May occur when called from __kmp_affinity_find_core().
				3744	ncores++;
				3745	}
				3746	return ncores;
				3747	}
				3748
				3749	// This function finds to which cluster/core given processing unit is bound.
				3750	static int __kmp_affinity_find_core(const AddrUnsPair *address2os, int proc,
				3751	int bottom_level, int core_level) {
				3752	return __kmp_affinity_compute_ncores(address2os, proc + 1, bottom_level,
				3753	core_level) - 1;
				3754	}
				3755
				3756	// This function finds maximal number of processing units bound to a
				3757	// cluster/core at given level.
				3758	static int __kmp_affinity_max_proc_per_core(const AddrUnsPair *address2os,
				3759	int nprocs, int bottom_level,
				3760	int core_level) {
				3761	int maxprocpercore = 0;
				3762
				3763	if (core_level < bottom_level) {
				3764	for (int i = 0; i < nprocs; i++) {
				3765	int percore = address2os[i].first.labels[core_level + 1] + 1;
				3766
				3767	if (percore > maxprocpercore) {
				3768	maxprocpercore = percore;
				3769	}
				3770	}
				3771	} else {
				3772	maxprocpercore = 1;
				3773	}
				3774	return maxprocpercore;
Paul Osmialowski	ecbe2ea	2016-07-29 20:55:03 +0000	[diff] [blame]	3775	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3776
				3777	static AddrUnsPair *address2os = NULL;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3778	static int *procarr = NULL;
				3779	static int __kmp_aff_depth = 0;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3780
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3781	#define KMP_EXIT_AFF_NONE \
				3782	KMP_ASSERT(__kmp_affinity_type == affinity_none); \
				3783	KMP_ASSERT(address2os == NULL); \
				3784	__kmp_apply_thread_places(NULL, 0); \
				3785	return;
Jonathan Peyton	fd7cc42	2016-06-21 15:54:38 +0000	[diff] [blame]	3786
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3787	static int __kmp_affinity_cmp_Address_child_num(const void a, const void b) {
				3788	const Address aa = (const Address )&(((AddrUnsPair *)a)->first);
				3789	const Address bb = (const Address )&(((AddrUnsPair *)b)->first);
				3790	unsigned depth = aa->depth;
				3791	unsigned i;
				3792	KMP_DEBUG_ASSERT(depth == bb->depth);
				3793	KMP_DEBUG_ASSERT((unsigned)__kmp_affinity_compact <= depth);
				3794	KMP_DEBUG_ASSERT(__kmp_affinity_compact >= 0);
				3795	for (i = 0; i < (unsigned)__kmp_affinity_compact; i++) {
				3796	int j = depth - i - 1;
				3797	if (aa->childNums[j] < bb->childNums[j])
				3798	return -1;
				3799	if (aa->childNums[j] > bb->childNums[j])
				3800	return 1;
				3801	}
				3802	for (; i < depth; i++) {
				3803	int j = i - __kmp_affinity_compact;
				3804	if (aa->childNums[j] < bb->childNums[j])
				3805	return -1;
				3806	if (aa->childNums[j] > bb->childNums[j])
				3807	return 1;
				3808	}
				3809	return 0;
Jonathan Peyton	e6abe52	2016-09-02 20:54:58 +0000	[diff] [blame]	3810	}
				3811
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3812	static void __kmp_aux_affinity_initialize(void) {
				3813	if (__kmp_affinity_masks != NULL) {
				3814	KMP_ASSERT(__kmp_affin_fullMask != NULL);
				3815	return;
				3816	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3817
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3818	// Create the "full" mask - this defines all of the processors that we
				3819	// consider to be in the machine model. If respect is set, then it is the
				3820	// initialization thread's affinity mask. Otherwise, it is all processors that
				3821	// we know about on the machine.
				3822	if (__kmp_affin_fullMask == NULL) {
				3823	KMP_CPU_ALLOC(__kmp_affin_fullMask);
				3824	}
				3825	if (KMP_AFFINITY_CAPABLE()) {
				3826	if (__kmp_affinity_respect_mask) {
				3827	__kmp_get_system_affinity(__kmp_affin_fullMask, TRUE);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3828
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3829	// Count the number of available processors.
				3830	unsigned i;
				3831	__kmp_avail_proc = 0;
				3832	KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
				3833	if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
				3834	continue;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3835	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3836	__kmp_avail_proc++;
				3837	}
				3838	if (__kmp_avail_proc > __kmp_xproc) {
				3839	if (__kmp_affinity_verbose \|\|
				3840	(__kmp_affinity_warnings &&
				3841	(__kmp_affinity_type != affinity_none))) {
				3842	KMP_WARNING(ErrorInitializeAffinity);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3843	}
				3844	__kmp_affinity_type = affinity_none;
Andrey Churbanov	1f037e4	2015-03-10 09:15:26 +0000	[diff] [blame]	3845	KMP_AFFINITY_DISABLE();
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3846	return;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3847	}
				3848	} else {
				3849	__kmp_affinity_entire_machine_mask(__kmp_affin_fullMask);
				3850	__kmp_avail_proc = __kmp_xproc;
				3851	}
				3852	}
				3853
				3854	int depth = -1;
				3855	kmp_i18n_id_t msg_id = kmp_i18n_null;
				3856
				3857	// For backward compatibility, setting KMP_CPUINFO_FILE =>
				3858	// KMP_TOPOLOGY_METHOD=cpuinfo
				3859	if ((__kmp_cpuinfo_file != NULL) &&
				3860	(__kmp_affinity_top_method == affinity_top_method_all)) {
				3861	__kmp_affinity_top_method = affinity_top_method_cpuinfo;
				3862	}
				3863
				3864	if (__kmp_affinity_top_method == affinity_top_method_all) {
				3865	// In the default code path, errors are not fatal - we just try using
				3866	// another method. We only emit a warning message if affinity is on, or the
				3867	// verbose flag is set, an the nowarnings flag was not set.
				3868	const char *file_name = NULL;
				3869	int line = 0;
				3870	#if KMP_USE_HWLOC
				3871	if (depth < 0 &&
				3872	__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC) {
				3873	if (__kmp_affinity_verbose) {
				3874	KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
				3875	}
				3876	if (!__kmp_hwloc_error) {
				3877	depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id);
				3878	if (depth == 0) {
				3879	KMP_EXIT_AFF_NONE;
				3880	} else if (depth < 0 && __kmp_affinity_verbose) {
				3881	KMP_INFORM(AffIgnoringHwloc, "KMP_AFFINITY");
				3882	}
				3883	} else if (__kmp_affinity_verbose) {
				3884	KMP_INFORM(AffIgnoringHwloc, "KMP_AFFINITY");
				3885	}
				3886	}
				3887	#endif
				3888
				3889	#if KMP_ARCH_X86 \|\| KMP_ARCH_X86_64
				3890
				3891	if (depth < 0) {
				3892	if (__kmp_affinity_verbose) {
				3893	KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
				3894	}
				3895
				3896	file_name = NULL;
				3897	depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
				3898	if (depth == 0) {
				3899	KMP_EXIT_AFF_NONE;
				3900	}
				3901
				3902	if (depth < 0) {
				3903	if (__kmp_affinity_verbose) {
				3904	if (msg_id != kmp_i18n_null) {
				3905	KMP_INFORM(AffInfoStrStr, "KMP_AFFINITY",
				3906	__kmp_i18n_catgets(msg_id),
				3907	KMP_I18N_STR(DecodingLegacyAPIC));
				3908	} else {
				3909	KMP_INFORM(AffInfoStr, "KMP_AFFINITY",
				3910	KMP_I18N_STR(DecodingLegacyAPIC));
				3911	}
				3912	}
				3913
				3914	file_name = NULL;
				3915	depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
				3916	if (depth == 0) {
				3917	KMP_EXIT_AFF_NONE;
				3918	}
				3919	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3920	}
				3921
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3922	#endif /* KMP_ARCH_X86 \|\| KMP_ARCH_X86_64 */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3923
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3924	#if KMP_OS_LINUX
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3925
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3926	if (depth < 0) {
				3927	if (__kmp_affinity_verbose) {
				3928	if (msg_id != kmp_i18n_null) {
				3929	KMP_INFORM(AffStrParseFilename, "KMP_AFFINITY",
				3930	__kmp_i18n_catgets(msg_id), "/proc/cpuinfo");
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3931	} else {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3932	KMP_INFORM(AffParseFilename, "KMP_AFFINITY", "/proc/cpuinfo");
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3933	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3934	}
				3935
				3936	FILE *f = fopen("/proc/cpuinfo", "r");
				3937	if (f == NULL) {
				3938	msg_id = kmp_i18n_str_CantOpenCpuinfo;
				3939	} else {
				3940	file_name = "/proc/cpuinfo";
				3941	depth =
				3942	__kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
				3943	fclose(f);
				3944	if (depth == 0) {
				3945	KMP_EXIT_AFF_NONE;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3946	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3947	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3948	}
				3949
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3950	#endif /* KMP_OS_LINUX */
				3951
				3952	#if KMP_GROUP_AFFINITY
				3953
				3954	if ((depth < 0) && (__kmp_num_proc_groups > 1)) {
				3955	if (__kmp_affinity_verbose) {
				3956	KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
				3957	}
				3958
				3959	depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
				3960	KMP_ASSERT(depth != 0);
				3961	}
				3962
				3963	#endif /* KMP_GROUP_AFFINITY */
				3964
				3965	if (depth < 0) {
				3966	if (__kmp_affinity_verbose && (msg_id != kmp_i18n_null)) {
				3967	if (file_name == NULL) {
				3968	KMP_INFORM(UsingFlatOS, __kmp_i18n_catgets(msg_id));
				3969	} else if (line == 0) {
				3970	KMP_INFORM(UsingFlatOSFile, file_name, __kmp_i18n_catgets(msg_id));
				3971	} else {
				3972	KMP_INFORM(UsingFlatOSFileLine, file_name, line,
				3973	__kmp_i18n_catgets(msg_id));
				3974	}
				3975	}
				3976	// FIXME - print msg if msg_id = kmp_i18n_null ???
				3977
				3978	file_name = "";
				3979	depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
				3980	if (depth == 0) {
				3981	KMP_EXIT_AFF_NONE;
				3982	}
				3983	KMP_ASSERT(depth > 0);
				3984	KMP_ASSERT(address2os != NULL);
				3985	}
				3986	}
				3987
				3988	// If the user has specified that a paricular topology discovery method is to be
				3989	// used, then we abort if that method fails. The exception is group affinity,
				3990	// which might have been implicitly set.
				3991
				3992	#if KMP_ARCH_X86 \|\| KMP_ARCH_X86_64
				3993
				3994	else if (__kmp_affinity_top_method == affinity_top_method_x2apicid) {
				3995	if (__kmp_affinity_verbose) {
				3996	KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
				3997	}
				3998
				3999	depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
				4000	if (depth == 0) {
				4001	KMP_EXIT_AFF_NONE;
				4002	}
				4003	if (depth < 0) {
				4004	KMP_ASSERT(msg_id != kmp_i18n_null);
				4005	KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
				4006	}
				4007	} else if (__kmp_affinity_top_method == affinity_top_method_apicid) {
				4008	if (__kmp_affinity_verbose) {
				4009	KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(DecodingLegacyAPIC));
				4010	}
				4011
				4012	depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
				4013	if (depth == 0) {
				4014	KMP_EXIT_AFF_NONE;
				4015	}
				4016	if (depth < 0) {
				4017	KMP_ASSERT(msg_id != kmp_i18n_null);
				4018	KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
				4019	}
				4020	}
				4021
				4022	#endif /* KMP_ARCH_X86 \|\| KMP_ARCH_X86_64 */
				4023
				4024	else if (__kmp_affinity_top_method == affinity_top_method_cpuinfo) {
				4025	const char *filename;
				4026	if (__kmp_cpuinfo_file != NULL) {
				4027	filename = __kmp_cpuinfo_file;
				4028	} else {
				4029	filename = "/proc/cpuinfo";
				4030	}
				4031
				4032	if (__kmp_affinity_verbose) {
				4033	KMP_INFORM(AffParseFilename, "KMP_AFFINITY", filename);
				4034	}
				4035
				4036	FILE *f = fopen(filename, "r");
				4037	if (f == NULL) {
				4038	int code = errno;
				4039	if (__kmp_cpuinfo_file != NULL) {
				4040	__kmp_msg(kmp_ms_fatal, KMP_MSG(CantOpenFileForReading, filename),
				4041	KMP_ERR(code), KMP_HNT(NameComesFrom_CPUINFO_FILE),
				4042	__kmp_msg_null);
				4043	} else {
				4044	__kmp_msg(kmp_ms_fatal, KMP_MSG(CantOpenFileForReading, filename),
				4045	KMP_ERR(code), __kmp_msg_null);
				4046	}
				4047	}
				4048	int line = 0;
				4049	depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
				4050	fclose(f);
				4051	if (depth < 0) {
				4052	KMP_ASSERT(msg_id != kmp_i18n_null);
				4053	if (line > 0) {
				4054	KMP_FATAL(FileLineMsgExiting, filename, line,
				4055	__kmp_i18n_catgets(msg_id));
				4056	} else {
				4057	KMP_FATAL(FileMsgExiting, filename, __kmp_i18n_catgets(msg_id));
				4058	}
				4059	}
				4060	if (__kmp_affinity_type == affinity_none) {
				4061	KMP_ASSERT(depth == 0);
				4062	KMP_EXIT_AFF_NONE;
				4063	}
				4064	}
				4065
				4066	#if KMP_GROUP_AFFINITY
				4067
				4068	else if (__kmp_affinity_top_method == affinity_top_method_group) {
				4069	if (__kmp_affinity_verbose) {
				4070	KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
				4071	}
				4072
				4073	depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
				4074	KMP_ASSERT(depth != 0);
				4075	if (depth < 0) {
				4076	KMP_ASSERT(msg_id != kmp_i18n_null);
				4077	KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
				4078	}
				4079	}
				4080
				4081	#endif /* KMP_GROUP_AFFINITY */
				4082
				4083	else if (__kmp_affinity_top_method == affinity_top_method_flat) {
				4084	if (__kmp_affinity_verbose) {
				4085	KMP_INFORM(AffUsingFlatOS, "KMP_AFFINITY");
				4086	}
				4087
				4088	depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
				4089	if (depth == 0) {
				4090	KMP_EXIT_AFF_NONE;
				4091	}
				4092	// should not fail
				4093	KMP_ASSERT(depth > 0);
				4094	KMP_ASSERT(address2os != NULL);
				4095	}
				4096
				4097	#if KMP_USE_HWLOC
				4098	else if (__kmp_affinity_top_method == affinity_top_method_hwloc) {
				4099	KMP_ASSERT(__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC);
				4100	if (__kmp_affinity_verbose) {
				4101	KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
				4102	}
				4103	depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id);
				4104	if (depth == 0) {
				4105	KMP_EXIT_AFF_NONE;
				4106	}
				4107	}
				4108	#endif // KMP_USE_HWLOC
				4109
				4110	if (address2os == NULL) {
				4111	if (KMP_AFFINITY_CAPABLE() &&
				4112	(__kmp_affinity_verbose \|\|
				4113	(__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none)))) {
				4114	KMP_WARNING(ErrorInitializeAffinity);
				4115	}
				4116	__kmp_affinity_type = affinity_none;
				4117	KMP_AFFINITY_DISABLE();
				4118	return;
				4119	}
				4120
				4121	__kmp_apply_thread_places(&address2os, depth);
				4122
				4123	// Create the table of masks, indexed by thread Id.
				4124	unsigned maxIndex;
				4125	unsigned numUnique;
				4126	kmp_affin_mask_t *osId2Mask =
				4127	__kmp_create_masks(&maxIndex, &numUnique, address2os, __kmp_avail_proc);
				4128	if (__kmp_affinity_gran_levels == 0) {
				4129	KMP_DEBUG_ASSERT((int)numUnique == __kmp_avail_proc);
				4130	}
				4131
				4132	// Set the childNums vector in all Address objects. This must be done before
				4133	// we can sort using __kmp_affinity_cmp_Address_child_num(), which takes into
				4134	// account the setting of __kmp_affinity_compact.
				4135	__kmp_affinity_assign_child_nums(address2os, __kmp_avail_proc);
				4136
				4137	switch (__kmp_affinity_type) {
				4138
				4139	case affinity_explicit:
				4140	KMP_DEBUG_ASSERT(__kmp_affinity_proclist != NULL);
				4141	#if OMP_40_ENABLED
				4142	if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
				4143	#endif
				4144	{
				4145	__kmp_affinity_process_proclist(
				4146	&__kmp_affinity_masks, &__kmp_affinity_num_masks,
				4147	__kmp_affinity_proclist, osId2Mask, maxIndex);
				4148	}
				4149	#if OMP_40_ENABLED
				4150	else {
				4151	__kmp_affinity_process_placelist(
				4152	&__kmp_affinity_masks, &__kmp_affinity_num_masks,
				4153	__kmp_affinity_proclist, osId2Mask, maxIndex);
				4154	}
				4155	#endif
				4156	if (__kmp_affinity_num_masks == 0) {
				4157	if (__kmp_affinity_verbose \|\|
				4158	(__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) {
				4159	KMP_WARNING(AffNoValidProcID);
				4160	}
				4161	__kmp_affinity_type = affinity_none;
				4162	return;
				4163	}
				4164	break;
				4165
				4166	// The other affinity types rely on sorting the Addresses according to some
				4167	// permutation of the machine topology tree. Set __kmp_affinity_compact and
				4168	// __kmp_affinity_offset appropriately, then jump to a common code fragment
				4169	// to do the sort and create the array of affinity masks.
				4170
				4171	case affinity_logical:
				4172	__kmp_affinity_compact = 0;
				4173	if (__kmp_affinity_offset) {
				4174	__kmp_affinity_offset =
				4175	__kmp_nThreadsPerCore * __kmp_affinity_offset % __kmp_avail_proc;
				4176	}
				4177	goto sortAddresses;
				4178
				4179	case affinity_physical:
				4180	if (__kmp_nThreadsPerCore > 1) {
				4181	__kmp_affinity_compact = 1;
				4182	if (__kmp_affinity_compact >= depth) {
				4183	__kmp_affinity_compact = 0;
				4184	}
				4185	} else {
				4186	__kmp_affinity_compact = 0;
				4187	}
				4188	if (__kmp_affinity_offset) {
				4189	__kmp_affinity_offset =
				4190	__kmp_nThreadsPerCore * __kmp_affinity_offset % __kmp_avail_proc;
				4191	}
				4192	goto sortAddresses;
				4193
				4194	case affinity_scatter:
				4195	if (__kmp_affinity_compact >= depth) {
				4196	__kmp_affinity_compact = 0;
				4197	} else {
				4198	__kmp_affinity_compact = depth - 1 - __kmp_affinity_compact;
				4199	}
				4200	goto sortAddresses;
				4201
				4202	case affinity_compact:
				4203	if (__kmp_affinity_compact >= depth) {
				4204	__kmp_affinity_compact = depth - 1;
				4205	}
				4206	goto sortAddresses;
				4207
				4208	case affinity_balanced:
				4209	if (depth <= 1) {
				4210	if (__kmp_affinity_verbose \|\| __kmp_affinity_warnings) {
				4211	KMP_WARNING(AffBalancedNotAvail, "KMP_AFFINITY");
				4212	}
				4213	__kmp_affinity_type = affinity_none;
				4214	return;
				4215	} else if (__kmp_affinity_uniform_topology()) {
				4216	break;
				4217	} else { // Non-uniform topology
				4218
				4219	// Save the depth for further usage
				4220	__kmp_aff_depth = depth;
				4221
				4222	int core_level = __kmp_affinity_find_core_level(
				4223	address2os, __kmp_avail_proc, depth - 1);
				4224	int ncores = __kmp_affinity_compute_ncores(address2os, __kmp_avail_proc,
				4225	depth - 1, core_level);
				4226	int maxprocpercore = __kmp_affinity_max_proc_per_core(
				4227	address2os, __kmp_avail_proc, depth - 1, core_level);
				4228
				4229	int nproc = ncores * maxprocpercore;
				4230	if ((nproc < 2) \|\| (nproc < __kmp_avail_proc)) {
				4231	if (__kmp_affinity_verbose \|\| __kmp_affinity_warnings) {
				4232	KMP_WARNING(AffBalancedNotAvail, "KMP_AFFINITY");
				4233	}
				4234	__kmp_affinity_type = affinity_none;
				4235	return;
				4236	}
				4237
				4238	procarr = (int )__kmp_allocate(sizeof(int) nproc);
				4239	for (int i = 0; i < nproc; i++) {
				4240	procarr[i] = -1;
				4241	}
				4242
				4243	int lastcore = -1;
				4244	int inlastcore = 0;
				4245	for (int i = 0; i < __kmp_avail_proc; i++) {
				4246	int proc = address2os[i].second;
				4247	int core =
				4248	__kmp_affinity_find_core(address2os, i, depth - 1, core_level);
				4249
				4250	if (core == lastcore) {
				4251	inlastcore++;
				4252	} else {
				4253	inlastcore = 0;
				4254	}
				4255	lastcore = core;
				4256
				4257	procarr[core * maxprocpercore + inlastcore] = proc;
				4258	}
				4259
				4260	break;
				4261	}
				4262
				4263	sortAddresses:
				4264	// Allocate the gtid->affinity mask table.
				4265	if (__kmp_affinity_dups) {
				4266	__kmp_affinity_num_masks = __kmp_avail_proc;
				4267	} else {
				4268	__kmp_affinity_num_masks = numUnique;
				4269	}
				4270
				4271	#if OMP_40_ENABLED
				4272	if ((__kmp_nested_proc_bind.bind_types[0] != proc_bind_intel) &&
				4273	(__kmp_affinity_num_places > 0) &&
				4274	((unsigned)__kmp_affinity_num_places < __kmp_affinity_num_masks)) {
				4275	__kmp_affinity_num_masks = __kmp_affinity_num_places;
				4276	}
				4277	#endif
				4278
				4279	KMP_CPU_ALLOC_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
				4280
				4281	// Sort the address2os table according to the current setting of
				4282	// __kmp_affinity_compact, then fill out __kmp_affinity_masks.
				4283	qsort(address2os, __kmp_avail_proc, sizeof(*address2os),
				4284	__kmp_affinity_cmp_Address_child_num);
				4285	{
				4286	int i;
				4287	unsigned j;
				4288	for (i = 0, j = 0; i < __kmp_avail_proc; i++) {
				4289	if ((!__kmp_affinity_dups) && (!address2os[i].first.leader)) {
				4290	continue;
				4291	}
				4292	unsigned osId = address2os[i].second;
				4293	kmp_affin_mask_t *src = KMP_CPU_INDEX(osId2Mask, osId);
				4294	kmp_affin_mask_t *dest = KMP_CPU_INDEX(__kmp_affinity_masks, j);
				4295	KMP_ASSERT(KMP_CPU_ISSET(osId, src));
				4296	KMP_CPU_COPY(dest, src);
				4297	if (++j >= __kmp_affinity_num_masks) {
				4298	break;
				4299	}
				4300	}
				4301	KMP_DEBUG_ASSERT(j == __kmp_affinity_num_masks);
				4302	}
				4303	break;
				4304
				4305	default:
				4306	KMP_ASSERT2(0, "Unexpected affinity setting");
				4307	}
				4308
				4309	KMP_CPU_FREE_ARRAY(osId2Mask, maxIndex + 1);
				4310	machine_hierarchy.init(address2os, __kmp_avail_proc);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4311	}
Jonathan Peyton	fd7cc42	2016-06-21 15:54:38 +0000	[diff] [blame]	4312	#undef KMP_EXIT_AFF_NONE
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4313
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4314	void __kmp_affinity_initialize(void) {
				4315	// Much of the code above was written assumming that if a machine was not
				4316	// affinity capable, then __kmp_affinity_type == affinity_none. We now
				4317	// explicitly represent this as __kmp_affinity_type == affinity_disabled.
				4318	// There are too many checks for __kmp_affinity_type == affinity_none
				4319	// in this code. Instead of trying to change them all, check if
				4320	// __kmp_affinity_type == affinity_disabled, and if so, slam it with
				4321	// affinity_none, call the real initialization routine, then restore
				4322	// __kmp_affinity_type to affinity_disabled.
				4323	int disabled = (__kmp_affinity_type == affinity_disabled);
				4324	if (!KMP_AFFINITY_CAPABLE()) {
				4325	KMP_ASSERT(disabled);
				4326	}
				4327	if (disabled) {
				4328	__kmp_affinity_type = affinity_none;
				4329	}
				4330	__kmp_aux_affinity_initialize();
				4331	if (disabled) {
				4332	__kmp_affinity_type = affinity_disabled;
				4333	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4334	}
				4335
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4336	void __kmp_affinity_uninitialize(void) {
				4337	if (__kmp_affinity_masks != NULL) {
				4338	KMP_CPU_FREE_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
				4339	__kmp_affinity_masks = NULL;
				4340	}
				4341	if (__kmp_affin_fullMask != NULL) {
				4342	KMP_CPU_FREE(__kmp_affin_fullMask);
				4343	__kmp_affin_fullMask = NULL;
				4344	}
				4345	__kmp_affinity_num_masks = 0;
				4346	__kmp_affinity_type = affinity_default;
				4347	#if OMP_40_ENABLED
				4348	__kmp_affinity_num_places = 0;
				4349	#endif
				4350	if (__kmp_affinity_proclist != NULL) {
				4351	__kmp_free(__kmp_affinity_proclist);
				4352	__kmp_affinity_proclist = NULL;
				4353	}
				4354	if (address2os != NULL) {
				4355	__kmp_free(address2os);
				4356	address2os = NULL;
				4357	}
				4358	if (procarr != NULL) {
				4359	__kmp_free(procarr);
				4360	procarr = NULL;
				4361	}
				4362	#if KMP_USE_HWLOC
				4363	if (__kmp_hwloc_topology != NULL) {
				4364	hwloc_topology_destroy(__kmp_hwloc_topology);
				4365	__kmp_hwloc_topology = NULL;
				4366	}
				4367	#endif
				4368	KMPAffinity::destroy_api();
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4369	}
				4370
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4371	void __kmp_affinity_set_init_mask(int gtid, int isa_root) {
				4372	if (!KMP_AFFINITY_CAPABLE()) {
				4373	return;
				4374	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4375
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4376	kmp_info_t th = (kmp_info_t )TCR_SYNC_PTR(__kmp_threads[gtid]);
				4377	if (th->th.th_affin_mask == NULL) {
				4378	KMP_CPU_ALLOC(th->th.th_affin_mask);
				4379	} else {
				4380	KMP_CPU_ZERO(th->th.th_affin_mask);
				4381	}
				4382
				4383	// Copy the thread mask to the kmp_info_t strucuture. If
				4384	// __kmp_affinity_type == affinity_none, copy the "full" mask, i.e. one that
				4385	// has all of the OS proc ids set, or if __kmp_affinity_respect_mask is set,
				4386	// then the full mask is the same as the mask of the initialization thread.
				4387	kmp_affin_mask_t *mask;
				4388	int i;
				4389
				4390	#if OMP_40_ENABLED
				4391	if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
				4392	#endif
				4393	{
				4394	if ((__kmp_affinity_type == affinity_none) \|\|
				4395	(__kmp_affinity_type == affinity_balanced)) {
				4396	#if KMP_GROUP_AFFINITY
				4397	if (__kmp_num_proc_groups > 1) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4398	return;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4399	}
				4400	#endif
				4401	KMP_ASSERT(__kmp_affin_fullMask != NULL);
				4402	i = KMP_PLACE_ALL;
				4403	mask = __kmp_affin_fullMask;
				4404	} else {
				4405	KMP_DEBUG_ASSERT(__kmp_affinity_num_masks > 0);
				4406	i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
				4407	mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4408	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4409	}
				4410	#if OMP_40_ENABLED
				4411	else {
				4412	if ((!isa_root) \|\|
				4413	(__kmp_nested_proc_bind.bind_types[0] == proc_bind_false)) {
				4414	#if KMP_GROUP_AFFINITY
				4415	if (__kmp_num_proc_groups > 1) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4416	return;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4417	}
				4418	#endif
				4419	KMP_ASSERT(__kmp_affin_fullMask != NULL);
				4420	i = KMP_PLACE_ALL;
				4421	mask = __kmp_affin_fullMask;
				4422	} else {
				4423	// int i = some hash function or just a counter that doesn't
				4424	// always start at 0. Use gtid for now.
				4425	KMP_DEBUG_ASSERT(__kmp_affinity_num_masks > 0);
				4426	i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
				4427	mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4428	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4429	}
				4430	#endif
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4431
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4432	#if OMP_40_ENABLED
				4433	th->th.th_current_place = i;
				4434	if (isa_root) {
				4435	th->th.th_new_place = i;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4436	th->th.th_first_place = 0;
				4437	th->th.th_last_place = __kmp_affinity_num_masks - 1;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4438	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4439
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4440	if (i == KMP_PLACE_ALL) {
				4441	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to all places\n",
				4442	gtid));
				4443	} else {
				4444	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to place %d\n",
				4445	gtid, i));
				4446	}
				4447	#else
				4448	if (i == -1) {
				4449	KA_TRACE(
				4450	100,
				4451	("__kmp_affinity_set_init_mask: binding T#%d to __kmp_affin_fullMask\n",
				4452	gtid));
				4453	} else {
				4454	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to mask %d\n",
				4455	gtid, i));
				4456	}
				4457	#endif /* OMP_40_ENABLED */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4458
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4459	KMP_CPU_COPY(th->th.th_affin_mask, mask);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4460
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4461	if (__kmp_affinity_verbose) {
				4462	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4463	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4464	th->th.th_affin_mask);
				4465	KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
				4466	__kmp_gettid(), gtid, buf);
				4467	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4468
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4469	#if KMP_OS_WINDOWS
				4470	// On Windows* OS, the process affinity mask might have changed. If the user
				4471	// didn't request affinity and this call fails, just continue silently.
				4472	// See CQ171393.
				4473	if (__kmp_affinity_type == affinity_none) {
				4474	__kmp_set_system_affinity(th->th.th_affin_mask, FALSE);
				4475	} else
Jonathan Peyton	7c465a5	2016-09-12 19:02:53 +0000	[diff] [blame]	4476	#endif
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4477	__kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
Jonathan Peyton	7c465a5	2016-09-12 19:02:53 +0000	[diff] [blame]	4478	}
				4479
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4480	#if OMP_40_ENABLED
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4481
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4482	void __kmp_affinity_set_place(int gtid) {
				4483	int retval;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4484
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4485	if (!KMP_AFFINITY_CAPABLE()) {
				4486	return;
				4487	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4488
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4489	kmp_info_t th = (kmp_info_t )TCR_SYNC_PTR(__kmp_threads[gtid]);
				4490
				4491	KA_TRACE(100, ("__kmp_affinity_set_place: binding T#%d to place %d (current "
				4492	"place = %d)\n",
				4493	gtid, th->th.th_new_place, th->th.th_current_place));
				4494
				4495	// Check that the new place is within this thread's partition.
				4496	KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
				4497	KMP_ASSERT(th->th.th_new_place >= 0);
				4498	KMP_ASSERT((unsigned)th->th.th_new_place <= __kmp_affinity_num_masks);
				4499	if (th->th.th_first_place <= th->th.th_last_place) {
				4500	KMP_ASSERT((th->th.th_new_place >= th->th.th_first_place) &&
				4501	(th->th.th_new_place <= th->th.th_last_place));
				4502	} else {
				4503	KMP_ASSERT((th->th.th_new_place <= th->th.th_first_place) \|\|
				4504	(th->th.th_new_place >= th->th.th_last_place));
				4505	}
				4506
				4507	// Copy the thread mask to the kmp_info_t strucuture,
				4508	// and set this thread's affinity.
				4509	kmp_affin_mask_t *mask =
				4510	KMP_CPU_INDEX(__kmp_affinity_masks, th->th.th_new_place);
				4511	KMP_CPU_COPY(th->th.th_affin_mask, mask);
				4512	th->th.th_current_place = th->th.th_new_place;
				4513
				4514	if (__kmp_affinity_verbose) {
				4515	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4516	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4517	th->th.th_affin_mask);
				4518	KMP_INFORM(BoundToOSProcSet, "OMP_PROC_BIND", (kmp_int32)getpid(),
				4519	__kmp_gettid(), gtid, buf);
				4520	}
				4521	__kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
				4522	}
				4523
				4524	#endif /* OMP_40_ENABLED */
				4525
				4526	int __kmp_aux_set_affinity(void **mask) {
				4527	int gtid;
				4528	kmp_info_t *th;
				4529	int retval;
				4530
				4531	if (!KMP_AFFINITY_CAPABLE()) {
				4532	return -1;
				4533	}
				4534
				4535	gtid = __kmp_entry_gtid();
				4536	KA_TRACE(1000, ; {
				4537	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4538	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4539	(kmp_affin_mask_t )(mask));
				4540	__kmp_debug_printf(
				4541	"kmp_set_affinity: setting affinity mask for thread %d = %s\n", gtid,
				4542	buf);
				4543	});
				4544
				4545	if (__kmp_env_consistency_check) {
				4546	if ((mask == NULL) \|\| (*mask == NULL)) {
				4547	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				4548	} else {
				4549	unsigned proc;
				4550	int num_procs = 0;
				4551
				4552	KMP_CPU_SET_ITERATE(proc, ((kmp_affin_mask_t )(mask))) {
				4553	if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
				4554	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4555	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4556	if (!KMP_CPU_ISSET(proc, (kmp_affin_mask_t )(mask))) {
				4557	continue;
				4558	}
				4559	num_procs++;
				4560	}
				4561	if (num_procs == 0) {
				4562	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				4563	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4564
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4565	#if KMP_GROUP_AFFINITY
				4566	if (__kmp_get_proc_group((kmp_affin_mask_t )(mask)) < 0) {
				4567	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				4568	}
				4569	#endif /* KMP_GROUP_AFFINITY */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4570	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4571	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4572
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4573	th = __kmp_threads[gtid];
				4574	KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
				4575	retval = __kmp_set_system_affinity((kmp_affin_mask_t )(mask), FALSE);
				4576	if (retval == 0) {
				4577	KMP_CPU_COPY(th->th.th_affin_mask, (kmp_affin_mask_t )(mask));
				4578	}
				4579
				4580	#if OMP_40_ENABLED
				4581	th->th.th_current_place = KMP_PLACE_UNDEFINED;
				4582	th->th.th_new_place = KMP_PLACE_UNDEFINED;
				4583	th->th.th_first_place = 0;
				4584	th->th.th_last_place = __kmp_affinity_num_masks - 1;
				4585
				4586	// Turn off 4.0 affinity for the current tread at this parallel level.
				4587	th->th.th_current_task->td_icvs.proc_bind = proc_bind_false;
				4588	#endif
				4589
				4590	return retval;
				4591	}
				4592
				4593	int __kmp_aux_get_affinity(void **mask) {
				4594	int gtid;
				4595	int retval;
				4596	kmp_info_t *th;
				4597
				4598	if (!KMP_AFFINITY_CAPABLE()) {
				4599	return -1;
				4600	}
				4601
				4602	gtid = __kmp_entry_gtid();
				4603	th = __kmp_threads[gtid];
				4604	KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
				4605
				4606	KA_TRACE(1000, ; {
				4607	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4608	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4609	th->th.th_affin_mask);
				4610	__kmp_printf("kmp_get_affinity: stored affinity mask for thread %d = %s\n",
				4611	gtid, buf);
				4612	});
				4613
				4614	if (__kmp_env_consistency_check) {
				4615	if ((mask == NULL) \|\| (*mask == NULL)) {
				4616	KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity");
				4617	}
				4618	}
				4619
				4620	#if !KMP_OS_WINDOWS
				4621
				4622	retval = __kmp_get_system_affinity((kmp_affin_mask_t )(mask), FALSE);
				4623	KA_TRACE(1000, ; {
				4624	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4625	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4626	(kmp_affin_mask_t )(mask));
				4627	__kmp_printf("kmp_get_affinity: system affinity mask for thread %d = %s\n",
				4628	gtid, buf);
				4629	});
				4630	return retval;
				4631
				4632	#else
				4633
				4634	KMP_CPU_COPY((kmp_affin_mask_t )(mask), th->th.th_affin_mask);
				4635	return 0;
				4636
				4637	#endif /* KMP_OS_WINDOWS */
				4638	}
				4639
				4640	int __kmp_aux_get_affinity_max_proc() {
				4641	if (!KMP_AFFINITY_CAPABLE()) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4642	return 0;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4643	}
				4644	#if KMP_GROUP_AFFINITY
				4645	if (__kmp_num_proc_groups > 1) {
				4646	return (int)(__kmp_num_proc_groups * sizeof(DWORD_PTR) * CHAR_BIT);
				4647	}
				4648	#endif
				4649	return __kmp_xproc;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4650	}
				4651
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4652	int __kmp_aux_set_affinity_mask_proc(int proc, void **mask) {
				4653	int retval;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4654
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4655	if (!KMP_AFFINITY_CAPABLE()) {
				4656	return -1;
				4657	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4658
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4659	KA_TRACE(1000, ; {
				4660	int gtid = __kmp_entry_gtid();
				4661	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4662	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4663	(kmp_affin_mask_t )(mask));
				4664	__kmp_debug_printf("kmp_set_affinity_mask_proc: setting proc %d in "
				4665	"affinity mask for thread %d = %s\n",
				4666	proc, gtid, buf);
				4667	});
				4668
				4669	if (__kmp_env_consistency_check) {
				4670	if ((mask == NULL) \|\| (*mask == NULL)) {
				4671	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity_mask_proc");
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4672	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4673	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4674
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4675	if ((proc < 0) \|\| (proc >= __kmp_aux_get_affinity_max_proc())) {
				4676	return -1;
				4677	}
				4678	if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
				4679	return -2;
				4680	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4681
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4682	KMP_CPU_SET(proc, (kmp_affin_mask_t )(mask));
				4683	return 0;
				4684	}
				4685
				4686	int __kmp_aux_unset_affinity_mask_proc(int proc, void **mask) {
				4687	int retval;
				4688
				4689	if (!KMP_AFFINITY_CAPABLE()) {
				4690	return -1;
				4691	}
				4692
				4693	KA_TRACE(1000, ; {
				4694	int gtid = __kmp_entry_gtid();
				4695	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4696	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4697	(kmp_affin_mask_t )(mask));
				4698	__kmp_debug_printf("kmp_unset_affinity_mask_proc: unsetting proc %d in "
				4699	"affinity mask for thread %d = %s\n",
				4700	proc, gtid, buf);
				4701	});
				4702
				4703	if (__kmp_env_consistency_check) {
				4704	if ((mask == NULL) \|\| (*mask == NULL)) {
				4705	KMP_FATAL(AffinityInvalidMask, "kmp_unset_affinity_mask_proc");
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4706	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4707	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4708
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4709	if ((proc < 0) \|\| (proc >= __kmp_aux_get_affinity_max_proc())) {
				4710	return -1;
				4711	}
				4712	if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
				4713	return -2;
				4714	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4715
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4716	KMP_CPU_CLR(proc, (kmp_affin_mask_t )(mask));
				4717	return 0;
				4718	}
				4719
				4720	int __kmp_aux_get_affinity_mask_proc(int proc, void **mask) {
				4721	int retval;
				4722
				4723	if (!KMP_AFFINITY_CAPABLE()) {
				4724	return -1;
				4725	}
				4726
				4727	KA_TRACE(1000, ; {
				4728	int gtid = __kmp_entry_gtid();
				4729	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4730	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4731	(kmp_affin_mask_t )(mask));
				4732	__kmp_debug_printf("kmp_get_affinity_mask_proc: getting proc %d in "
				4733	"affinity mask for thread %d = %s\n",
				4734	proc, gtid, buf);
				4735	});
				4736
				4737	if (__kmp_env_consistency_check) {
				4738	if ((mask == NULL) \|\| (*mask == NULL)) {
				4739	KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity_mask_proc");
				4740	}
				4741	}
				4742
				4743	if ((proc < 0) \|\| (proc >= __kmp_aux_get_affinity_max_proc())) {
				4744	return -1;
				4745	}
				4746	if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4747	return 0;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4748	}
				4749
				4750	return KMP_CPU_ISSET(proc, (kmp_affin_mask_t )(mask));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4751	}
				4752
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4753	// Dynamic affinity settings - Affinity balanced
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4754	void __kmp_balanced_affinity(int tid, int nthreads) {
				4755	bool fine_gran = true;
Paul Osmialowski	ecbe2ea	2016-07-29 20:55:03 +0000	[diff] [blame]	4756
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4757	switch (__kmp_affinity_gran) {
				4758	case affinity_gran_fine:
				4759	case affinity_gran_thread:
				4760	break;
				4761	case affinity_gran_core:
				4762	if (__kmp_nThreadsPerCore > 1) {
				4763	fine_gran = false;
				4764	}
				4765	break;
				4766	case affinity_gran_package:
				4767	if (nCoresPerPkg > 1) {
				4768	fine_gran = false;
				4769	}
				4770	break;
				4771	default:
				4772	fine_gran = false;
				4773	}
				4774
				4775	if (__kmp_affinity_uniform_topology()) {
				4776	int coreID;
				4777	int threadID;
				4778	// Number of hyper threads per core in HT machine
				4779	int __kmp_nth_per_core = __kmp_avail_proc / __kmp_ncores;
				4780	// Number of cores
				4781	int ncores = __kmp_ncores;
				4782	if ((nPackages > 1) && (__kmp_nth_per_core <= 1)) {
				4783	__kmp_nth_per_core = __kmp_avail_proc / nPackages;
				4784	ncores = nPackages;
				4785	}
				4786	// How many threads will be bound to each core
				4787	int chunk = nthreads / ncores;
				4788	// How many cores will have an additional thread bound to it - "big cores"
				4789	int big_cores = nthreads % ncores;
				4790	// Number of threads on the big cores
				4791	int big_nth = (chunk + 1) * big_cores;
				4792	if (tid < big_nth) {
				4793	coreID = tid / (chunk + 1);
				4794	threadID = (tid % (chunk + 1)) % __kmp_nth_per_core;
				4795	} else { // tid >= big_nth
				4796	coreID = (tid - big_cores) / chunk;
				4797	threadID = ((tid - big_cores) % chunk) % __kmp_nth_per_core;
Paul Osmialowski	ecbe2ea	2016-07-29 20:55:03 +0000	[diff] [blame]	4798	}
				4799
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4800	KMP_DEBUG_ASSERT2(KMP_AFFINITY_CAPABLE(),
				4801	"Illegal set affinity operation when not capable");
				4802
				4803	kmp_affin_mask_t *mask;
				4804	KMP_CPU_ALLOC_ON_STACK(mask);
				4805	KMP_CPU_ZERO(mask);
				4806
				4807	if (fine_gran) {
				4808	int osID = address2os[coreID * __kmp_nth_per_core + threadID].second;
				4809	KMP_CPU_SET(osID, mask);
				4810	} else {
				4811	for (int i = 0; i < __kmp_nth_per_core; i++) {
				4812	int osID;
				4813	osID = address2os[coreID * __kmp_nth_per_core + i].second;
				4814	KMP_CPU_SET(osID, mask);
				4815	}
				4816	}
				4817	if (__kmp_affinity_verbose) {
				4818	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4819	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
				4820	KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
				4821	__kmp_gettid(), tid, buf);
				4822	}
				4823	__kmp_set_system_affinity(mask, TRUE);
				4824	KMP_CPU_FREE_FROM_STACK(mask);
				4825	} else { // Non-uniform topology
				4826
				4827	kmp_affin_mask_t *mask;
				4828	KMP_CPU_ALLOC_ON_STACK(mask);
				4829	KMP_CPU_ZERO(mask);
				4830
				4831	int core_level = __kmp_affinity_find_core_level(
				4832	address2os, __kmp_avail_proc, __kmp_aff_depth - 1);
				4833	int ncores = __kmp_affinity_compute_ncores(address2os, __kmp_avail_proc,
				4834	__kmp_aff_depth - 1, core_level);
				4835	int nth_per_core = __kmp_affinity_max_proc_per_core(
				4836	address2os, __kmp_avail_proc, __kmp_aff_depth - 1, core_level);
				4837
				4838	// For performance gain consider the special case nthreads ==
				4839	// __kmp_avail_proc
				4840	if (nthreads == __kmp_avail_proc) {
				4841	if (fine_gran) {
				4842	int osID = address2os[tid].second;
				4843	KMP_CPU_SET(osID, mask);
				4844	} else {
				4845	int core = __kmp_affinity_find_core(address2os, tid,
				4846	__kmp_aff_depth - 1, core_level);
				4847	for (int i = 0; i < __kmp_avail_proc; i++) {
				4848	int osID = address2os[i].second;
				4849	if (__kmp_affinity_find_core(address2os, i, __kmp_aff_depth - 1,
				4850	core_level) == core) {
				4851	KMP_CPU_SET(osID, mask);
				4852	}
Paul Osmialowski	ecbe2ea	2016-07-29 20:55:03 +0000	[diff] [blame]	4853	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4854	}
				4855	} else if (nthreads <= ncores) {
				4856
				4857	int core = 0;
				4858	for (int i = 0; i < ncores; i++) {
				4859	// Check if this core from procarr[] is in the mask
				4860	int in_mask = 0;
				4861	for (int j = 0; j < nth_per_core; j++) {
				4862	if (procarr[i * nth_per_core + j] != -1) {
				4863	in_mask = 1;
				4864	break;
				4865	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4866	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4867	if (in_mask) {
				4868	if (tid == core) {
				4869	for (int j = 0; j < nth_per_core; j++) {
				4870	int osID = procarr[i * nth_per_core + j];
				4871	if (osID != -1) {
				4872	KMP_CPU_SET(osID, mask);
				4873	// For fine granularity it is enough to set the first available
				4874	// osID for this core
				4875	if (fine_gran) {
				4876	break;
				4877	}
				4878	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4879	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4880	break;
				4881	} else {
				4882	core++;
				4883	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4884	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4885	}
				4886	} else { // nthreads > ncores
				4887	// Array to save the number of processors at each core
				4888	int nproc_at_core = (int )KMP_ALLOCA(sizeof(int) * ncores);
				4889	// Array to save the number of cores with "x" available processors;
				4890	int *ncores_with_x_procs =
				4891	(int )KMP_ALLOCA(sizeof(int) (nth_per_core + 1));
				4892	// Array to save the number of cores with # procs from x to nth_per_core
				4893	int *ncores_with_x_to_max_procs =
				4894	(int )KMP_ALLOCA(sizeof(int) (nth_per_core + 1));
				4895
				4896	for (int i = 0; i <= nth_per_core; i++) {
				4897	ncores_with_x_procs[i] = 0;
				4898	ncores_with_x_to_max_procs[i] = 0;
				4899	}
				4900
				4901	for (int i = 0; i < ncores; i++) {
				4902	int cnt = 0;
				4903	for (int j = 0; j < nth_per_core; j++) {
				4904	if (procarr[i * nth_per_core + j] != -1) {
				4905	cnt++;
				4906	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4907	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4908	nproc_at_core[i] = cnt;
				4909	ncores_with_x_procs[cnt]++;
				4910	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4911
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4912	for (int i = 0; i <= nth_per_core; i++) {
				4913	for (int j = i; j <= nth_per_core; j++) {
				4914	ncores_with_x_to_max_procs[i] += ncores_with_x_procs[j];
				4915	}
				4916	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4917
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4918	// Max number of processors
				4919	int nproc = nth_per_core * ncores;
				4920	// An array to keep number of threads per each context
				4921	int newarr = (int )__kmp_allocate(sizeof(int) * nproc);
				4922	for (int i = 0; i < nproc; i++) {
				4923	newarr[i] = 0;
				4924	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4925
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4926	int nth = nthreads;
				4927	int flag = 0;
				4928	while (nth > 0) {
				4929	for (int j = 1; j <= nth_per_core; j++) {
				4930	int cnt = ncores_with_x_to_max_procs[j];
				4931	for (int i = 0; i < ncores; i++) {
				4932	// Skip the core with 0 processors
				4933	if (nproc_at_core[i] == 0) {
				4934	continue;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4935	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4936	for (int k = 0; k < nth_per_core; k++) {
				4937	if (procarr[i * nth_per_core + k] != -1) {
				4938	if (newarr[i * nth_per_core + k] == 0) {
				4939	newarr[i * nth_per_core + k] = 1;
				4940	cnt--;
				4941	nth--;
				4942	break;
				4943	} else {
				4944	if (flag != 0) {
				4945	newarr[i * nth_per_core + k]++;
				4946	cnt--;
				4947	nth--;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4948	break;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4949	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4950	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4951	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4952	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4953	if (cnt == 0 \|\| nth == 0) {
				4954	break;
				4955	}
				4956	}
				4957	if (nth == 0) {
				4958	break;
				4959	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4960	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4961	flag = 1;
				4962	}
				4963	int sum = 0;
				4964	for (int i = 0; i < nproc; i++) {
				4965	sum += newarr[i];
				4966	if (sum > tid) {
				4967	if (fine_gran) {
				4968	int osID = procarr[i];
				4969	KMP_CPU_SET(osID, mask);
				4970	} else {
				4971	int coreID = i / nth_per_core;
				4972	for (int ii = 0; ii < nth_per_core; ii++) {
				4973	int osID = procarr[coreID * nth_per_core + ii];
				4974	if (osID != -1) {
				4975	KMP_CPU_SET(osID, mask);
				4976	}
				4977	}
				4978	}
				4979	break;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4980	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4981	}
				4982	__kmp_free(newarr);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4983	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4984
				4985	if (__kmp_affinity_verbose) {
				4986	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4987	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
				4988	KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
				4989	__kmp_gettid(), tid, buf);
				4990	}
				4991	__kmp_set_system_affinity(mask, TRUE);
				4992	KMP_CPU_FREE_FROM_STACK(mask);
				4993	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4994	}
				4995
Jonathan Peyton	3076fa4	2016-01-12 17:21:55 +0000	[diff] [blame]	4996	#if KMP_OS_LINUX
				4997	// We don't need this entry for Windows because
				4998	// there is GetProcessAffinityMask() api
				4999	//
				5000	// The intended usage is indicated by these steps:
				5001	// 1) The user gets the current affinity mask
				5002	// 2) Then sets the affinity by calling this function
				5003	// 3) Error check the return value
				5004	// 4) Use non-OpenMP parallelization
				5005	// 5) Reset the affinity to what was stored in step 1)
				5006	#ifdef __cplusplus
				5007	extern "C"
				5008	#endif
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	5009	int
				5010	kmp_set_thread_affinity_mask_initial()
Jonathan Peyton	3076fa4	2016-01-12 17:21:55 +0000	[diff] [blame]	5011	// the function returns 0 on success,
				5012	// -1 if we cannot bind thread
				5013	// >0 (errno) if an error happened during binding
				5014	{
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	5015	int gtid = __kmp_get_gtid();
				5016	if (gtid < 0) {
				5017	// Do not touch non-omp threads
				5018	KA_TRACE(30, ("kmp_set_thread_affinity_mask_initial: "
				5019	"non-omp thread, returning\n"));
				5020	return -1;
				5021	}
				5022	if (!KMP_AFFINITY_CAPABLE() \|\| !__kmp_init_middle) {
				5023	KA_TRACE(30, ("kmp_set_thread_affinity_mask_initial: "
				5024	"affinity not initialized, returning\n"));
				5025	return -1;
				5026	}
				5027	KA_TRACE(30, ("kmp_set_thread_affinity_mask_initial: "
				5028	"set full mask for thread %d\n",
				5029	gtid));
				5030	KMP_DEBUG_ASSERT(__kmp_affin_fullMask != NULL);
				5031	return __kmp_set_system_affinity(__kmp_affin_fullMask, FALSE);
Jonathan Peyton	3076fa4	2016-01-12 17:21:55 +0000	[diff] [blame]	5032	}
				5033	#endif
				5034
Alp Toker	763b939	2014-02-28 09:42:41 +0000	[diff] [blame]	5035	#endif // KMP_AFFINITY_SUPPORTED