Blame - openmp/runtime/src/kmp_affinity.cpp - toolchain/llvm-project

blob: b58a3d6e461a0c9466326a94737b529c99e716e2 [file] [log] [blame]

Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1	/*
				2	* kmp_affinity.cpp -- affinity management
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3	*/
				4
				5
				6	//===----------------------------------------------------------------------===//
				7	//
				8	// The LLVM Compiler Infrastructure
				9	//
				10	// This file is dual licensed under the MIT and the University of Illinois Open
				11	// Source Licenses. See LICENSE.txt for details.
				12	//
				13	//===----------------------------------------------------------------------===//
				14
				15
				16	#include "kmp.h"
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	17	#include "kmp_affinity.h"
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	18	#include "kmp_i18n.h"
				19	#include "kmp_io.h"
				20	#include "kmp_str.h"
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	21	#include "kmp_wrapper_getpid.h"
Jonathan Peyton	1707836	2015-09-10 19:22:07 +0000	[diff] [blame]	22
				23	// Store the real or imagined machine hierarchy here
				24	static hierarchy_info machine_hierarchy;
				25
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	26	void __kmp_cleanup_hierarchy() { machine_hierarchy.fini(); }
				27
Jonathan Peyton	1707836	2015-09-10 19:22:07 +0000	[diff] [blame]	28
				29	void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	30	kmp_uint32 depth;
				31	// The test below is true if affinity is available, but set to "none". Need to
				32	// init on first use of hierarchical barrier.
				33	if (TCR_1(machine_hierarchy.uninitialized))
				34	machine_hierarchy.init(NULL, nproc);
Jonathan Peyton	1707836	2015-09-10 19:22:07 +0000	[diff] [blame]	35
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	36	// Adjust the hierarchy in case num threads exceeds original
				37	if (nproc > machine_hierarchy.base_num_threads)
				38	machine_hierarchy.resize(nproc);
Jonathan Peyton	7dee82e	2015-11-09 16:24:53 +0000	[diff] [blame]	39
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	40	depth = machine_hierarchy.depth;
				41	KMP_DEBUG_ASSERT(depth > 0);
Jonathan Peyton	1707836	2015-09-10 19:22:07 +0000	[diff] [blame]	42
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	43	thr_bar->depth = depth;
				44	thr_bar->base_leaf_kids = (kmp_uint8)machine_hierarchy.numPerLevel[0] - 1;
				45	thr_bar->skip_per_level = machine_hierarchy.skipPerLevel;
Jonathan Peyton	1707836	2015-09-10 19:22:07 +0000	[diff] [blame]	46	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	47
Alp Toker	763b939	2014-02-28 09:42:41 +0000	[diff] [blame]	48	#if KMP_AFFINITY_SUPPORTED
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	49
Jonathan Peyton	1cdd87a	2016-11-14 21:08:35 +0000	[diff] [blame]	50	bool KMPAffinity::picked_api = false;
				51
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	52	void *KMPAffinity::Mask::operator new(size_t n) { return __kmp_allocate(n); }
				53	void *KMPAffinity::Mask::operator new[](size_t n) { return __kmp_allocate(n); }
				54	void KMPAffinity::Mask::operator delete(void *p) { __kmp_free(p); }
				55	void KMPAffinity::Mask::operator delete[](void *p) { __kmp_free(p); }
				56	void *KMPAffinity::operator new(size_t n) { return __kmp_allocate(n); }
				57	void KMPAffinity::operator delete(void *p) { __kmp_free(p); }
Jonathan Peyton	1cdd87a	2016-11-14 21:08:35 +0000	[diff] [blame]	58
				59	void KMPAffinity::pick_api() {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	60	KMPAffinity *affinity_dispatch;
				61	if (picked_api)
				62	return;
Jonathan Peyton	1cdd87a	2016-11-14 21:08:35 +0000	[diff] [blame]	63	#if KMP_USE_HWLOC
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	64	if (__kmp_affinity_top_method == affinity_top_method_hwloc) {
				65	affinity_dispatch = new KMPHwlocAffinity();
				66	} else
Jonathan Peyton	1cdd87a	2016-11-14 21:08:35 +0000	[diff] [blame]	67	#endif
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	68	{
				69	affinity_dispatch = new KMPNativeAffinity();
				70	}
				71	__kmp_affinity_dispatch = affinity_dispatch;
				72	picked_api = true;
Jonathan Peyton	1cdd87a	2016-11-14 21:08:35 +0000	[diff] [blame]	73	}
				74
				75	void KMPAffinity::destroy_api() {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	76	if (__kmp_affinity_dispatch != NULL) {
				77	delete __kmp_affinity_dispatch;
				78	__kmp_affinity_dispatch = NULL;
				79	picked_api = false;
				80	}
Jonathan Peyton	1cdd87a	2016-11-14 21:08:35 +0000	[diff] [blame]	81	}
				82
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	83	// Print the affinity mask to the character array in a pretty format.
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	84	char __kmp_affinity_print_mask(char buf, int buf_len,
				85	kmp_affin_mask_t *mask) {
				86	KMP_ASSERT(buf_len >= 40);
				87	char *scan = buf;
				88	char *end = buf + buf_len - 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	89
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	90	// Find first element / check for empty set.
				91	size_t i;
				92	i = mask->begin();
				93	if (i == mask->end()) {
				94	KMP_SNPRINTF(scan, end - scan + 1, "{<empty>}");
				95	while (*scan != '\0')
				96	scan++;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	97	KMP_ASSERT(scan <= end);
				98	return buf;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	99	}
				100
				101	KMP_SNPRINTF(scan, end - scan + 1, "{%ld", (long)i);
				102	while (*scan != '\0')
				103	scan++;
				104	i++;
				105	for (; i != mask->end(); i = mask->next(i)) {
				106	if (!KMP_CPU_ISSET(i, mask)) {
				107	continue;
				108	}
				109
				110	// Check for buffer overflow. A string of the form ",<n>" will have at most
				111	// 10 characters, plus we want to leave room to print ",...}" if the set is
				112	// too large to print for a total of 15 characters. We already left room for
				113	// '\0' in setting end.
				114	if (end - scan < 15) {
				115	break;
				116	}
				117	KMP_SNPRINTF(scan, end - scan + 1, ",%-ld", (long)i);
				118	while (*scan != '\0')
				119	scan++;
				120	}
				121	if (i != mask->end()) {
				122	KMP_SNPRINTF(scan, end - scan + 1, ",...");
				123	while (*scan != '\0')
				124	scan++;
				125	}
				126	KMP_SNPRINTF(scan, end - scan + 1, "}");
				127	while (*scan != '\0')
				128	scan++;
				129	KMP_ASSERT(scan <= end);
				130	return buf;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	131	}
				132
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	133	void __kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask) {
				134	KMP_CPU_ZERO(mask);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	135
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	136	#if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	137
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	138	if (__kmp_num_proc_groups > 1) {
				139	int group;
				140	KMP_DEBUG_ASSERT(__kmp_GetActiveProcessorCount != NULL);
				141	for (group = 0; group < __kmp_num_proc_groups; group++) {
				142	int i;
				143	int num = __kmp_GetActiveProcessorCount(group);
				144	for (i = 0; i < num; i++) {
				145	KMP_CPU_SET(i + group * (CHAR_BIT * sizeof(DWORD_PTR)), mask);
				146	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	147	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	148	} else
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	149
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	150	#endif /* KMP_GROUP_AFFINITY */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	151
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	152	{
				153	int proc;
				154	for (proc = 0; proc < __kmp_xproc; proc++) {
				155	KMP_CPU_SET(proc, mask);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	156	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	157	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	158	}
				159
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	160	// When sorting by labels, __kmp_affinity_assign_child_nums() must first be
				161	// called to renumber the labels from [0..n] and place them into the child_num
				162	// vector of the address object. This is done in case the labels used for
Alp Toker	8f2d3f0	2014-02-24 10:40:15 +0000	[diff] [blame]	163	// the children at one node of the hierarchy differ from those used for
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	164	// another node at the same level. Example: suppose the machine has 2 nodes
				165	// with 2 packages each. The first node contains packages 601 and 602, and
				166	// second node contains packages 603 and 604. If we try to sort the table
				167	// for "scatter" affinity, the table will still be sorted 601, 602, 603, 604
				168	// because we are paying attention to the labels themselves, not the ordinal
				169	// child numbers. By using the child numbers in the sort, the result is
				170	// {0,0}=601, {0,1}=603, {1,0}=602, {1,1}=604.
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	171	static void __kmp_affinity_assign_child_nums(AddrUnsPair *address2os,
				172	int numAddrs) {
				173	KMP_DEBUG_ASSERT(numAddrs > 0);
				174	int depth = address2os->first.depth;
				175	unsigned counts = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				176	unsigned lastLabel = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				177	int labCt;
				178	for (labCt = 0; labCt < depth; labCt++) {
				179	address2os[0].first.childNums[labCt] = counts[labCt] = 0;
				180	lastLabel[labCt] = address2os[0].first.labels[labCt];
				181	}
				182	int i;
				183	for (i = 1; i < numAddrs; i++) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	184	for (labCt = 0; labCt < depth; labCt++) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	185	if (address2os[i].first.labels[labCt] != lastLabel[labCt]) {
				186	int labCt2;
				187	for (labCt2 = labCt + 1; labCt2 < depth; labCt2++) {
				188	counts[labCt2] = 0;
				189	lastLabel[labCt2] = address2os[i].first.labels[labCt2];
				190	}
				191	counts[labCt]++;
				192	lastLabel[labCt] = address2os[i].first.labels[labCt];
				193	break;
				194	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	195	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	196	for (labCt = 0; labCt < depth; labCt++) {
				197	address2os[i].first.childNums[labCt] = counts[labCt];
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	198	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	199	for (; labCt < (int)Address::maxDepth; labCt++) {
				200	address2os[i].first.childNums[labCt] = 0;
				201	}
				202	}
				203	__kmp_free(lastLabel);
				204	__kmp_free(counts);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	205	}
				206
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	207	// All of the __kmp_affinity_create_*_map() routines should set
				208	// __kmp_affinity_masks to a vector of affinity mask objects of length
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	209	// __kmp_affinity_num_masks, if __kmp_affinity_type != affinity_none, and return
				210	// the number of levels in the machine topology tree (zero if
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	211	// __kmp_affinity_type == affinity_none).
				212	//
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	213	// All of the __kmp_affinity_create_*_map() routines should set
				214	// *__kmp_affin_fullMask to the affinity mask for the initialization thread.
				215	// They need to save and restore the mask, and it could be needed later, so
				216	// saving it is just an optimization to avoid calling kmp_get_system_affinity()
				217	// again.
Jonathan Peyton	c5304aa	2016-06-13 21:28:03 +0000	[diff] [blame]	218	kmp_affin_mask_t *__kmp_affin_fullMask = NULL;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	219
				220	static int nCoresPerPkg, nPackages;
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame]	221	static int __kmp_nThreadsPerCore;
				222	#ifndef KMP_DFLT_NTH_CORES
				223	static int __kmp_ncores;
				224	#endif
Jonathan Peyton	fd7cc42	2016-06-21 15:54:38 +0000	[diff] [blame]	225	static int *__kmp_pu_os_idx = NULL;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	226
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	227	// __kmp_affinity_uniform_topology() doesn't work when called from
				228	// places which support arbitrarily many levels in the machine topology
				229	// map, i.e. the non-default cases in __kmp_affinity_create_cpuinfo_map()
				230	// __kmp_affinity_create_x2apicid_map().
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	231	inline static bool __kmp_affinity_uniform_topology() {
				232	return __kmp_avail_proc == (__kmp_nThreadsPerCore * nCoresPerPkg * nPackages);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	233	}
				234
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	235	// Print out the detailed machine topology map, i.e. the physical locations
				236	// of each OS proc.
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	237	static void __kmp_affinity_print_topology(AddrUnsPair *address2os, int len,
				238	int depth, int pkgLevel,
				239	int coreLevel, int threadLevel) {
				240	int proc;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	241
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	242	KMP_INFORM(OSProcToPhysicalThreadMap, "KMP_AFFINITY");
				243	for (proc = 0; proc < len; proc++) {
				244	int level;
				245	kmp_str_buf_t buf;
				246	__kmp_str_buf_init(&buf);
				247	for (level = 0; level < depth; level++) {
				248	if (level == threadLevel) {
				249	__kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Thread));
				250	} else if (level == coreLevel) {
				251	__kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Core));
				252	} else if (level == pkgLevel) {
				253	__kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Package));
				254	} else if (level > pkgLevel) {
				255	__kmp_str_buf_print(&buf, "%s_%d ", KMP_I18N_STR(Node),
				256	level - pkgLevel - 1);
				257	} else {
				258	__kmp_str_buf_print(&buf, "L%d ", level);
				259	}
				260	__kmp_str_buf_print(&buf, "%d ", address2os[proc].first.labels[level]);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	261	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	262	KMP_INFORM(OSProcMapToPack, "KMP_AFFINITY", address2os[proc].second,
				263	buf.str);
				264	__kmp_str_buf_free(&buf);
				265	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	266	}
				267
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	268	#if KMP_USE_HWLOC
Jonathan Peyton	202a24d	2016-06-13 17:30:08 +0000	[diff] [blame]	269
				270	// This function removes the topology levels that are radix 1 and don't offer
				271	// further information about the topology. The most common example is when you
				272	// have one thread context per core, we don't want the extra thread context
				273	// level if it offers no unique labels. So they are removed.
				274	// return value: the new depth of address2os
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	275	static int __kmp_affinity_remove_radix_one_levels(AddrUnsPair *address2os,
				276	int nActiveThreads, int depth,
				277	int pkgLevel, int coreLevel,
				278	int *threadLevel) {
				279	int level;
				280	int i;
				281	int radix1_detected;
Jonathan Peyton	202a24d	2016-06-13 17:30:08 +0000	[diff] [blame]	282
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	283	for (level = depth - 1; level >= 0; --level) {
				284	// Always keep the package level
				285	if (level == *pkgLevel)
				286	continue;
				287	// Detect if this level is radix 1
				288	radix1_detected = 1;
				289	for (i = 1; i < nActiveThreads; ++i) {
				290	if (address2os[0].first.labels[level] !=
				291	address2os[i].first.labels[level]) {
				292	// There are differing label values for this level so it stays
				293	radix1_detected = 0;
				294	break;
				295	}
Jonathan Peyton	202a24d	2016-06-13 17:30:08 +0000	[diff] [blame]	296	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	297	if (!radix1_detected)
				298	continue;
				299	// Radix 1 was detected
				300	if (level == *threadLevel) {
				301	// If only one thread per core, then just decrement
				302	// the depth which removes the threadlevel from address2os
				303	for (i = 0; i < nActiveThreads; ++i) {
				304	address2os[i].first.depth--;
				305	}
				306	*threadLevel = -1;
				307	} else if (level == *coreLevel) {
				308	// For core level, we move the thread labels over if they are still
				309	// valid (*threadLevel != -1), and also reduce the depth another level
				310	for (i = 0; i < nActiveThreads; ++i) {
				311	if (*threadLevel != -1) {
				312	address2os[i].first.labels[*coreLevel] =
				313	address2os[i].first.labels[*threadLevel];
				314	}
				315	address2os[i].first.depth--;
				316	}
				317	*coreLevel = -1;
				318	}
				319	}
				320	return address2os[0].first.depth;
Jonathan Peyton	202a24d	2016-06-13 17:30:08 +0000	[diff] [blame]	321	}
				322
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	323	// Returns the number of objects of type 'type' below 'obj' within the topology
				324	// tree structure. e.g., if obj is a HWLOC_OBJ_PACKAGE object, and type is
				325	// HWLOC_OBJ_PU, then this will return the number of PU's under the SOCKET
				326	// object.
				327	static int __kmp_hwloc_get_nobjs_under_obj(hwloc_obj_t obj,
				328	hwloc_obj_type_t type) {
				329	int retval = 0;
				330	hwloc_obj_t first;
				331	for (first = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, obj->type,
				332	obj->logical_index, type, 0);
				333	first != NULL &&
				334	hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, obj->type, first) ==
				335	obj;
				336	first = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, first->type,
				337	first)) {
				338	++retval;
				339	}
				340	return retval;
Jonathan Peyton	202a24d	2016-06-13 17:30:08 +0000	[diff] [blame]	341	}
				342
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	343	static int __kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
				344	kmp_i18n_id_t *const msg_id) {
				345	*address2os = NULL;
				346	*msg_id = kmp_i18n_null;
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	347
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	348	// Save the affinity mask for the current thread.
				349	kmp_affin_mask_t *oldMask;
				350	KMP_CPU_ALLOC(oldMask);
				351	__kmp_get_system_affinity(oldMask, TRUE);
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	352
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	353	int depth = 3;
				354	int pkgLevel = 0;
				355	int coreLevel = 1;
				356	int threadLevel = 2;
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	357
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	358	if (!KMP_AFFINITY_CAPABLE()) {
				359	// Hack to try and infer the machine topology using only the data
				360	// available from cpuid on the current thread, and __kmp_xproc.
				361	KMP_ASSERT(__kmp_affinity_type == affinity_none);
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	362
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	363	nCoresPerPkg = __kmp_hwloc_get_nobjs_under_obj(
				364	hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_PACKAGE, 0),
				365	HWLOC_OBJ_CORE);
				366	__kmp_nThreadsPerCore = __kmp_hwloc_get_nobjs_under_obj(
				367	hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_CORE, 0),
				368	HWLOC_OBJ_PU);
				369	__kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
				370	nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	371	if (__kmp_affinity_verbose) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	372	KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY");
				373	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				374	if (__kmp_affinity_uniform_topology()) {
				375	KMP_INFORM(Uniform, "KMP_AFFINITY");
				376	} else {
				377	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				378	}
				379	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				380	__kmp_nThreadsPerCore, __kmp_ncores);
				381	}
				382	KMP_CPU_FREE(oldMask);
				383	return 0;
				384	}
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	385
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	386	// Allocate the data structure to be returned.
				387	AddrUnsPair *retval =
				388	(AddrUnsPair )__kmp_allocate(sizeof(AddrUnsPair) __kmp_avail_proc);
				389	__kmp_pu_os_idx = (int )__kmp_allocate(sizeof(int) __kmp_avail_proc);
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	390
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	391	// When affinity is off, this routine will still be called to set
				392	// __kmp_ncores, as well as __kmp_nThreadsPerCore,
				393	// nCoresPerPkg, & nPackages. Make sure all these vars are set
				394	// correctly, and return if affinity is not enabled.
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	395
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	396	hwloc_obj_t pu;
				397	hwloc_obj_t core;
				398	hwloc_obj_t socket;
				399	int nActiveThreads = 0;
				400	int socket_identifier = 0;
				401	// re-calculate globals to count only accessible resources
				402	__kmp_ncores = nPackages = nCoresPerPkg = __kmp_nThreadsPerCore = 0;
				403	for (socket =
				404	hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_PACKAGE, 0);
				405	socket != NULL;
				406	socket = hwloc_get_next_obj_by_type(__kmp_hwloc_topology,
				407	HWLOC_OBJ_PACKAGE, socket),
				408	socket_identifier++) {
				409	int core_identifier = 0;
				410	int num_active_cores = 0;
				411	for (core = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, socket->type,
				412	socket->logical_index,
				413	HWLOC_OBJ_CORE, 0);
				414	core != NULL &&
				415	hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, socket->type,
				416	core) == socket;
				417	core = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_CORE,
				418	core),
				419	core_identifier++) {
				420	int pu_identifier = 0;
				421	int num_active_threads = 0;
				422	for (pu = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, core->type,
				423	core->logical_index, HWLOC_OBJ_PU,
				424	0);
				425	pu != NULL &&
				426	hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, core->type,
				427	pu) == core;
				428	pu = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_PU,
				429	pu),
				430	pu_identifier++) {
				431	Address addr(3);
				432	if(!KMP_CPU_ISSET(pu->os_index, __kmp_affin_fullMask))
				433	continue; // skip inactive (inaccessible) unit
				434	KA_TRACE(20,
				435	("Hwloc inserting %d (%d) %d (%d) %d (%d) into address2os\n",
				436	socket->os_index, socket->logical_index, core->os_index,
				437	core->logical_index, pu->os_index,pu->logical_index));
				438	addr.labels[0] = socket_identifier; // package
				439	addr.labels[1] = core_identifier; // core
				440	addr.labels[2] = pu_identifier; // pu
				441	retval[nActiveThreads] = AddrUnsPair(addr, pu->os_index);
				442	__kmp_pu_os_idx[nActiveThreads] =
				443	pu->os_index; // keep os index for each active pu
				444	nActiveThreads++;
				445	++num_active_threads; // count active threads per core
				446	}
				447	if (num_active_threads) { // were there any active threads on the core?
				448	++__kmp_ncores; // count total active cores
				449	++num_active_cores; // count active cores per socket
				450	if (num_active_threads > __kmp_nThreadsPerCore)
				451	__kmp_nThreadsPerCore = num_active_threads; // calc maximum
				452	}
				453	}
				454	if (num_active_cores) { // were there any active cores on the socket?
				455	++nPackages; // count total active packages
				456	if (num_active_cores > nCoresPerPkg)
				457	nCoresPerPkg = num_active_cores; // calc maximum
				458	}
				459	}
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	460
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	461	// If there's only one thread context to bind to, return now.
				462	KMP_DEBUG_ASSERT(nActiveThreads == __kmp_avail_proc);
				463	KMP_ASSERT(nActiveThreads > 0);
				464	if (nActiveThreads == 1) {
				465	__kmp_ncores = nPackages = 1;
				466	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
				467	if (__kmp_affinity_verbose) {
				468	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				469	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				470
				471	KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
				472	if (__kmp_affinity_respect_mask) {
				473	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				474	} else {
				475	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				476	}
				477	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				478	KMP_INFORM(Uniform, "KMP_AFFINITY");
				479	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				480	__kmp_nThreadsPerCore, __kmp_ncores);
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	481	}
				482
				483	if (__kmp_affinity_type == affinity_none) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	484	__kmp_free(retval);
				485	KMP_CPU_FREE(oldMask);
				486	return 0;
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	487	}
				488
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	489	// Form an Address object which only includes the package level.
				490	Address addr(1);
				491	addr.labels[0] = retval[0].first.labels[pkgLevel];
				492	retval[0].first = addr;
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	493
				494	if (__kmp_affinity_gran_levels < 0) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	495	__kmp_affinity_gran_levels = 0;
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	496	}
				497
				498	if (__kmp_affinity_verbose) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	499	__kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	500	}
				501
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	502	*address2os = retval;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	503	KMP_CPU_FREE(oldMask);
				504	return 1;
				505	}
				506
				507	// Sort the table by physical Id.
				508	qsort(retval, nActiveThreads, sizeof(*retval),
				509	__kmp_affinity_cmp_Address_labels);
				510
				511	// Check to see if the machine topology is uniform
				512	unsigned uniform =
				513	(nPackages * nCoresPerPkg * __kmp_nThreadsPerCore == nActiveThreads);
				514
				515	// Print the machine topology summary.
				516	if (__kmp_affinity_verbose) {
				517	char mask[KMP_AFFIN_MASK_PRINT_LEN];
				518	__kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				519
				520	KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
				521	if (__kmp_affinity_respect_mask) {
				522	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask);
				523	} else {
				524	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask);
				525	}
				526	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				527	if (uniform) {
				528	KMP_INFORM(Uniform, "KMP_AFFINITY");
				529	} else {
				530	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				531	}
				532
				533	kmp_str_buf_t buf;
				534	__kmp_str_buf_init(&buf);
				535
				536	__kmp_str_buf_print(&buf, "%d", nPackages);
				537	// for (level = 1; level <= pkgLevel; level++) {
				538	// __kmp_str_buf_print(&buf, " x %d", maxCt[level]);
				539	// }
				540	KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg,
				541	__kmp_nThreadsPerCore, __kmp_ncores);
				542
				543	__kmp_str_buf_free(&buf);
				544	}
				545
				546	if (__kmp_affinity_type == affinity_none) {
				547	__kmp_free(retval);
				548	KMP_CPU_FREE(oldMask);
				549	return 0;
				550	}
				551
				552	// Find any levels with radiix 1, and remove them from the map
				553	// (except for the package level).
				554	depth = __kmp_affinity_remove_radix_one_levels(
				555	retval, nActiveThreads, depth, &pkgLevel, &coreLevel, &threadLevel);
				556
				557	if (__kmp_affinity_gran_levels < 0) {
				558	// Set the granularity level based on what levels are modeled
				559	// in the machine topology map.
				560	__kmp_affinity_gran_levels = 0;
				561	if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
				562	__kmp_affinity_gran_levels++;
				563	}
				564	if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
				565	__kmp_affinity_gran_levels++;
				566	}
				567	if (__kmp_affinity_gran > affinity_gran_package) {
				568	__kmp_affinity_gran_levels++;
				569	}
				570	}
				571
				572	if (__kmp_affinity_verbose) {
				573	__kmp_affinity_print_topology(retval, nActiveThreads, depth, pkgLevel,
				574	coreLevel, threadLevel);
				575	}
				576
				577	KMP_CPU_FREE(oldMask);
				578	*address2os = retval;
				579	return depth;
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	580	}
				581	#endif // KMP_USE_HWLOC
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	582
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	583	// If we don't know how to retrieve the machine's processor topology, or
				584	// encounter an error in doing so, this routine is called to form a "flat"
				585	// mapping of os thread id's <-> processor id's.
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	586	static int __kmp_affinity_create_flat_map(AddrUnsPair **address2os,
				587	kmp_i18n_id_t *const msg_id) {
				588	*address2os = NULL;
				589	*msg_id = kmp_i18n_null;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	590
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	591	// Even if __kmp_affinity_type == affinity_none, this routine might still
				592	// called to set __kmp_ncores, as well as
				593	// __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
				594	if (!KMP_AFFINITY_CAPABLE()) {
				595	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				596	__kmp_ncores = nPackages = __kmp_xproc;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	597	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	598	if (__kmp_affinity_verbose) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	599	KMP_INFORM(AffFlatTopology, "KMP_AFFINITY");
				600	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				601	KMP_INFORM(Uniform, "KMP_AFFINITY");
				602	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				603	__kmp_nThreadsPerCore, __kmp_ncores);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	604	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	605	return 0;
				606	}
				607
				608	// When affinity is off, this routine will still be called to set
				609	// __kmp_ncores, as well as __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
				610	// Make sure all these vars are set correctly, and return now if affinity is
				611	// not enabled.
				612	__kmp_ncores = nPackages = __kmp_avail_proc;
				613	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
				614	if (__kmp_affinity_verbose) {
				615	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				616	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				617	__kmp_affin_fullMask);
				618
				619	KMP_INFORM(AffCapableUseFlat, "KMP_AFFINITY");
				620	if (__kmp_affinity_respect_mask) {
				621	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				622	} else {
				623	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	624	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	625	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				626	KMP_INFORM(Uniform, "KMP_AFFINITY");
				627	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				628	__kmp_nThreadsPerCore, __kmp_ncores);
				629	}
				630	KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
				631	__kmp_pu_os_idx = (int )__kmp_allocate(sizeof(int) __kmp_avail_proc);
				632	if (__kmp_affinity_type == affinity_none) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	633	int avail_ct = 0;
				634	int i;
Jonathan Peyton	c5304aa	2016-06-13 21:28:03 +0000	[diff] [blame]	635	KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	636	if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask))
				637	continue;
				638	__kmp_pu_os_idx[avail_ct++] = i; // suppose indices are flat
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	639	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	640	return 0;
				641	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	642
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	643	// Contruct the data structure to be returned.
				644	*address2os =
				645	(AddrUnsPair )__kmp_allocate(sizeof(address2os) __kmp_avail_proc);
				646	int avail_ct = 0;
				647	unsigned int i;
				648	KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
				649	// Skip this proc if it is not included in the machine model.
				650	if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
				651	continue;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	652	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	653	__kmp_pu_os_idx[avail_ct] = i; // suppose indices are flat
				654	Address addr(1);
				655	addr.labels[0] = i;
				656	(*address2os)[avail_ct++] = AddrUnsPair(addr, i);
				657	}
				658	if (__kmp_affinity_verbose) {
				659	KMP_INFORM(OSProcToPackage, "KMP_AFFINITY");
				660	}
				661
				662	if (__kmp_affinity_gran_levels < 0) {
				663	// Only the package level is modeled in the machine topology map,
				664	// so the #levels of granularity is either 0 or 1.
				665	if (__kmp_affinity_gran > affinity_gran_package) {
				666	__kmp_affinity_gran_levels = 1;
				667	} else {
				668	__kmp_affinity_gran_levels = 0;
				669	}
				670	}
				671	return 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	672	}
				673
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	674	#if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	675
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	676	// If multiple Windows* OS processor groups exist, we can create a 2-level
				677	// topology map with the groups at level 0 and the individual procs at level 1.
				678	// This facilitates letting the threads float among all procs in a group,
				679	// if granularity=group (the default when there are multiple groups).
				680	static int __kmp_affinity_create_proc_group_map(AddrUnsPair **address2os,
				681	kmp_i18n_id_t *const msg_id) {
				682	*address2os = NULL;
				683	*msg_id = kmp_i18n_null;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	684
Jonathan Peyton	5868499	2017-05-15 19:05:59 +0000	[diff] [blame^]	685	// If we aren't affinity capable, then return now.
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	686	// The flat mapping will be used.
Jonathan Peyton	5868499	2017-05-15 19:05:59 +0000	[diff] [blame^]	687	if (!KMP_AFFINITY_CAPABLE()) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	688	// FIXME set *msg_id
				689	return -1;
				690	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	691
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	692	// Contruct the data structure to be returned.
				693	*address2os =
				694	(AddrUnsPair )__kmp_allocate(sizeof(address2os) __kmp_avail_proc);
				695	KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
				696	__kmp_pu_os_idx = (int )__kmp_allocate(sizeof(int) __kmp_avail_proc);
				697	int avail_ct = 0;
				698	int i;
				699	KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
				700	// Skip this proc if it is not included in the machine model.
				701	if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
				702	continue;
				703	}
				704	__kmp_pu_os_idx[avail_ct] = i; // suppose indices are flat
				705	Address addr(2);
				706	addr.labels[0] = i / (CHAR_BIT * sizeof(DWORD_PTR));
				707	addr.labels[1] = i % (CHAR_BIT * sizeof(DWORD_PTR));
				708	(*address2os)[avail_ct++] = AddrUnsPair(addr, i);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	709
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	710	if (__kmp_affinity_verbose) {
				711	KMP_INFORM(AffOSProcToGroup, "KMP_AFFINITY", i, addr.labels[0],
				712	addr.labels[1]);
				713	}
				714	}
				715
				716	if (__kmp_affinity_gran_levels < 0) {
				717	if (__kmp_affinity_gran == affinity_gran_group) {
				718	__kmp_affinity_gran_levels = 1;
				719	} else if ((__kmp_affinity_gran == affinity_gran_fine) \|\|
				720	(__kmp_affinity_gran == affinity_gran_thread)) {
				721	__kmp_affinity_gran_levels = 0;
				722	} else {
				723	const char *gran_str = NULL;
				724	if (__kmp_affinity_gran == affinity_gran_core) {
				725	gran_str = "core";
				726	} else if (__kmp_affinity_gran == affinity_gran_package) {
				727	gran_str = "package";
				728	} else if (__kmp_affinity_gran == affinity_gran_node) {
				729	gran_str = "node";
				730	} else {
				731	KMP_ASSERT(0);
				732	}
				733
				734	// Warning: can't use affinity granularity \"gran\" with group topology
				735	// method, using "thread"
				736	__kmp_affinity_gran_levels = 0;
				737	}
				738	}
				739	return 2;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	740	}
				741
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	742	#endif /* KMP_GROUP_AFFINITY */
				743
				744	#if KMP_ARCH_X86 \|\| KMP_ARCH_X86_64
				745
				746	static int __kmp_cpuid_mask_width(int count) {
				747	int r = 0;
				748
				749	while ((1 << r) < count)
				750	++r;
				751	return r;
				752	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	753
				754	class apicThreadInfo {
				755	public:
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	756	unsigned osId; // param to __kmp_affinity_bind_thread
				757	unsigned apicId; // from cpuid after binding
				758	unsigned maxCoresPerPkg; // ""
				759	unsigned maxThreadsPerPkg; // ""
				760	unsigned pkgId; // inferred from above values
				761	unsigned coreId; // ""
				762	unsigned threadId; // ""
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	763	};
				764
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	765	static int __kmp_affinity_cmp_apicThreadInfo_os_id(const void *a,
				766	const void *b) {
				767	const apicThreadInfo aa = (const apicThreadInfo )a;
				768	const apicThreadInfo bb = (const apicThreadInfo )b;
				769	if (aa->osId < bb->osId)
				770	return -1;
				771	if (aa->osId > bb->osId)
				772	return 1;
				773	return 0;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	774	}
				775
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	776	static int __kmp_affinity_cmp_apicThreadInfo_phys_id(const void *a,
				777	const void *b) {
				778	const apicThreadInfo aa = (const apicThreadInfo )a;
				779	const apicThreadInfo bb = (const apicThreadInfo )b;
				780	if (aa->pkgId < bb->pkgId)
				781	return -1;
				782	if (aa->pkgId > bb->pkgId)
				783	return 1;
				784	if (aa->coreId < bb->coreId)
				785	return -1;
				786	if (aa->coreId > bb->coreId)
				787	return 1;
				788	if (aa->threadId < bb->threadId)
				789	return -1;
				790	if (aa->threadId > bb->threadId)
				791	return 1;
				792	return 0;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	793	}
				794
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	795	// On IA-32 architecture and Intel(R) 64 architecture, we attempt to use
				796	// an algorithm which cycles through the available os threads, setting
				797	// the current thread's affinity mask to that thread, and then retrieves
				798	// the Apic Id for each thread context using the cpuid instruction.
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	799	static int __kmp_affinity_create_apicid_map(AddrUnsPair **address2os,
				800	kmp_i18n_id_t *const msg_id) {
				801	kmp_cpuid buf;
				802	int rc;
				803	*address2os = NULL;
				804	*msg_id = kmp_i18n_null;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	805
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	806	// Check if cpuid leaf 4 is supported.
				807	__kmp_x86_cpuid(0, 0, &buf);
				808	if (buf.eax < 4) {
				809	*msg_id = kmp_i18n_str_NoLeaf4Support;
				810	return -1;
				811	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	812
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	813	// The algorithm used starts by setting the affinity to each available thread
				814	// and retrieving info from the cpuid instruction, so if we are not capable of
				815	// calling __kmp_get_system_affinity() and _kmp_get_system_affinity(), then we
				816	// need to do something else - use the defaults that we calculated from
				817	// issuing cpuid without binding to each proc.
				818	if (!KMP_AFFINITY_CAPABLE()) {
				819	// Hack to try and infer the machine topology using only the data
				820	// available from cpuid on the current thread, and __kmp_xproc.
				821	KMP_ASSERT(__kmp_affinity_type == affinity_none);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	822
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	823	// Get an upper bound on the number of threads per package using cpuid(1).
				824	// On some OS/chps combinations where HT is supported by the chip but is
				825	// disabled, this value will be 2 on a single core chip. Usually, it will be
				826	// 2 if HT is enabled and 1 if HT is disabled.
				827	__kmp_x86_cpuid(1, 0, &buf);
				828	int maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
				829	if (maxThreadsPerPkg == 0) {
				830	maxThreadsPerPkg = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	831	}
				832
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	833	// The num cores per pkg comes from cpuid(4). 1 must be added to the encoded
				834	// value.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	835	//
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	836	// The author of cpu_count.cpp treated this only an upper bound on the
				837	// number of cores, but I haven't seen any cases where it was greater than
				838	// the actual number of cores, so we will treat it as exact in this block of
				839	// code.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	840	//
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	841	// First, we need to check if cpuid(4) is supported on this chip. To see if
				842	// cpuid(n) is supported, issue cpuid(0) and check if eax has the value n or
				843	// greater.
				844	__kmp_x86_cpuid(0, 0, &buf);
				845	if (buf.eax >= 4) {
				846	__kmp_x86_cpuid(4, 0, &buf);
				847	nCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
				848	} else {
				849	nCoresPerPkg = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	850	}
				851
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	852	// There is no way to reliably tell if HT is enabled without issuing the
				853	// cpuid instruction from every thread, can correlating the cpuid info, so
				854	// if the machine is not affinity capable, we assume that HT is off. We have
				855	// seen quite a few machines where maxThreadsPerPkg is 2, yet the machine
				856	// does not support HT.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	857	//
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	858	// - Older OSes are usually found on machines with older chips, which do not
				859	// support HT.
				860	// - The performance penalty for mistakenly identifying a machine as HT when
				861	// it isn't (which results in blocktime being incorrecly set to 0) is
				862	// greater than the penalty when for mistakenly identifying a machine as
				863	// being 1 thread/core when it is really HT enabled (which results in
				864	// blocktime being incorrectly set to a positive value).
				865	__kmp_ncores = __kmp_xproc;
				866	nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	867	__kmp_nThreadsPerCore = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	868	if (__kmp_affinity_verbose) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	869	KMP_INFORM(AffNotCapableUseLocCpuid, "KMP_AFFINITY");
				870	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				871	if (__kmp_affinity_uniform_topology()) {
				872	KMP_INFORM(Uniform, "KMP_AFFINITY");
				873	} else {
				874	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				875	}
				876	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				877	__kmp_nThreadsPerCore, __kmp_ncores);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	878	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	879	return 0;
				880	}
				881
				882	// From here on, we can assume that it is safe to call
				883	// __kmp_get_system_affinity() and __kmp_set_system_affinity(), even if
				884	// __kmp_affinity_type = affinity_none.
				885
				886	// Save the affinity mask for the current thread.
				887	kmp_affin_mask_t *oldMask;
				888	KMP_CPU_ALLOC(oldMask);
				889	KMP_ASSERT(oldMask != NULL);
				890	__kmp_get_system_affinity(oldMask, TRUE);
				891
				892	// Run through each of the available contexts, binding the current thread
				893	// to it, and obtaining the pertinent information using the cpuid instr.
				894	//
				895	// The relevant information is:
				896	// - Apic Id: Bits 24:31 of ebx after issuing cpuid(1) - each thread context
				897	// has a uniqie Apic Id, which is of the form pkg# : core# : thread#.
				898	// - Max Threads Per Pkg: Bits 16:23 of ebx after issuing cpuid(1). The value
				899	// of this field determines the width of the core# + thread# fields in the
				900	// Apic Id. It is also an upper bound on the number of threads per
				901	// package, but it has been verified that situations happen were it is not
				902	// exact. In particular, on certain OS/chip combinations where Intel(R)
				903	// Hyper-Threading Technology is supported by the chip but has been
				904	// disabled, the value of this field will be 2 (for a single core chip).
				905	// On other OS/chip combinations supporting Intel(R) Hyper-Threading
				906	// Technology, the value of this field will be 1 when Intel(R)
				907	// Hyper-Threading Technology is disabled and 2 when it is enabled.
				908	// - Max Cores Per Pkg: Bits 26:31 of eax after issuing cpuid(4). The value
				909	// of this field (+1) determines the width of the core# field in the Apic
				910	// Id. The comments in "cpucount.cpp" say that this value is an upper
				911	// bound, but the IA-32 architecture manual says that it is exactly the
				912	// number of cores per package, and I haven't seen any case where it
				913	// wasn't.
				914	//
				915	// From this information, deduce the package Id, core Id, and thread Id,
				916	// and set the corresponding fields in the apicThreadInfo struct.
				917	unsigned i;
				918	apicThreadInfo threadInfo = (apicThreadInfo )__kmp_allocate(
				919	__kmp_avail_proc * sizeof(apicThreadInfo));
				920	unsigned nApics = 0;
				921	KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
				922	// Skip this proc if it is not included in the machine model.
				923	if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
				924	continue;
Jonathan Peyton	fd7cc42	2016-06-21 15:54:38 +0000	[diff] [blame]	925	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	926	KMP_DEBUG_ASSERT((int)nApics < __kmp_avail_proc);
				927
				928	__kmp_affinity_dispatch->bind_thread(i);
				929	threadInfo[nApics].osId = i;
				930
				931	// The apic id and max threads per pkg come from cpuid(1).
				932	__kmp_x86_cpuid(1, 0, &buf);
				933	if (((buf.edx >> 9) & 1) == 0) {
				934	__kmp_set_system_affinity(oldMask, TRUE);
				935	__kmp_free(threadInfo);
				936	KMP_CPU_FREE(oldMask);
				937	*msg_id = kmp_i18n_str_ApicNotPresent;
				938	return -1;
				939	}
				940	threadInfo[nApics].apicId = (buf.ebx >> 24) & 0xff;
				941	threadInfo[nApics].maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
				942	if (threadInfo[nApics].maxThreadsPerPkg == 0) {
				943	threadInfo[nApics].maxThreadsPerPkg = 1;
				944	}
				945
				946	// Max cores per pkg comes from cpuid(4). 1 must be added to the encoded
				947	// value.
				948	//
				949	// First, we need to check if cpuid(4) is supported on this chip. To see if
				950	// cpuid(n) is supported, issue cpuid(0) and check if eax has the value n
				951	// or greater.
				952	__kmp_x86_cpuid(0, 0, &buf);
				953	if (buf.eax >= 4) {
				954	__kmp_x86_cpuid(4, 0, &buf);
				955	threadInfo[nApics].maxCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
				956	} else {
				957	threadInfo[nApics].maxCoresPerPkg = 1;
				958	}
				959
				960	// Infer the pkgId / coreId / threadId using only the info obtained locally.
				961	int widthCT = __kmp_cpuid_mask_width(threadInfo[nApics].maxThreadsPerPkg);
				962	threadInfo[nApics].pkgId = threadInfo[nApics].apicId >> widthCT;
				963
				964	int widthC = __kmp_cpuid_mask_width(threadInfo[nApics].maxCoresPerPkg);
				965	int widthT = widthCT - widthC;
				966	if (widthT < 0) {
				967	// I've never seen this one happen, but I suppose it could, if the cpuid
				968	// instruction on a chip was really screwed up. Make sure to restore the
				969	// affinity mask before the tail call.
				970	__kmp_set_system_affinity(oldMask, TRUE);
				971	__kmp_free(threadInfo);
				972	KMP_CPU_FREE(oldMask);
				973	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				974	return -1;
				975	}
				976
				977	int maskC = (1 << widthC) - 1;
				978	threadInfo[nApics].coreId = (threadInfo[nApics].apicId >> widthT) & maskC;
				979
				980	int maskT = (1 << widthT) - 1;
				981	threadInfo[nApics].threadId = threadInfo[nApics].apicId & maskT;
				982
				983	nApics++;
				984	}
				985
				986	// We've collected all the info we need.
				987	// Restore the old affinity mask for this thread.
				988	__kmp_set_system_affinity(oldMask, TRUE);
				989
				990	// If there's only one thread context to bind to, form an Address object
				991	// with depth 1 and return immediately (or, if affinity is off, set
				992	// address2os to NULL and return).
				993	//
				994	// If it is configured to omit the package level when there is only a single
				995	// package, the logic at the end of this routine won't work if there is only
				996	// a single thread - it would try to form an Address object with depth 0.
				997	KMP_ASSERT(nApics > 0);
				998	if (nApics == 1) {
				999	__kmp_ncores = nPackages = 1;
				1000	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
				1001	if (__kmp_affinity_verbose) {
				1002	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				1003	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				1004
				1005	KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
				1006	if (__kmp_affinity_respect_mask) {
				1007	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				1008	} else {
				1009	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				1010	}
				1011	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1012	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1013	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				1014	__kmp_nThreadsPerCore, __kmp_ncores);
				1015	}
				1016
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1017	if (__kmp_affinity_type == affinity_none) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1018	__kmp_free(threadInfo);
				1019	KMP_CPU_FREE(oldMask);
				1020	return 0;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1021	}
				1022
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1023	address2os = (AddrUnsPair )__kmp_allocate(sizeof(AddrUnsPair));
				1024	Address addr(1);
				1025	addr.labels[0] = threadInfo[0].pkgId;
				1026	(*address2os)[0] = AddrUnsPair(addr, threadInfo[0].osId);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1027
				1028	if (__kmp_affinity_gran_levels < 0) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1029	__kmp_affinity_gran_levels = 0;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1030	}
				1031
				1032	if (__kmp_affinity_verbose) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1033	__kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1034	}
				1035
				1036	__kmp_free(threadInfo);
				1037	KMP_CPU_FREE(oldMask);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1038	return 1;
				1039	}
				1040
				1041	// Sort the threadInfo table by physical Id.
				1042	qsort(threadInfo, nApics, sizeof(*threadInfo),
				1043	__kmp_affinity_cmp_apicThreadInfo_phys_id);
				1044
				1045	// The table is now sorted by pkgId / coreId / threadId, but we really don't
				1046	// know the radix of any of the fields. pkgId's may be sparsely assigned among
				1047	// the chips on a system. Although coreId's are usually assigned
				1048	// [0 .. coresPerPkg-1] and threadId's are usually assigned
				1049	// [0..threadsPerCore-1], we don't want to make any such assumptions.
				1050	//
				1051	// For that matter, we don't know what coresPerPkg and threadsPerCore (or the
				1052	// total # packages) are at this point - we want to determine that now. We
				1053	// only have an upper bound on the first two figures.
				1054	//
				1055	// We also perform a consistency check at this point: the values returned by
				1056	// the cpuid instruction for any thread bound to a given package had better
				1057	// return the same info for maxThreadsPerPkg and maxCoresPerPkg.
				1058	nPackages = 1;
				1059	nCoresPerPkg = 1;
				1060	__kmp_nThreadsPerCore = 1;
				1061	unsigned nCores = 1;
				1062
				1063	unsigned pkgCt = 1; // to determine radii
				1064	unsigned lastPkgId = threadInfo[0].pkgId;
				1065	unsigned coreCt = 1;
				1066	unsigned lastCoreId = threadInfo[0].coreId;
				1067	unsigned threadCt = 1;
				1068	unsigned lastThreadId = threadInfo[0].threadId;
				1069
				1070	// intra-pkg consist checks
				1071	unsigned prevMaxCoresPerPkg = threadInfo[0].maxCoresPerPkg;
				1072	unsigned prevMaxThreadsPerPkg = threadInfo[0].maxThreadsPerPkg;
				1073
				1074	for (i = 1; i < nApics; i++) {
				1075	if (threadInfo[i].pkgId != lastPkgId) {
				1076	nCores++;
				1077	pkgCt++;
				1078	lastPkgId = threadInfo[i].pkgId;
				1079	if ((int)coreCt > nCoresPerPkg)
				1080	nCoresPerPkg = coreCt;
				1081	coreCt = 1;
				1082	lastCoreId = threadInfo[i].coreId;
				1083	if ((int)threadCt > __kmp_nThreadsPerCore)
				1084	__kmp_nThreadsPerCore = threadCt;
				1085	threadCt = 1;
				1086	lastThreadId = threadInfo[i].threadId;
				1087
				1088	// This is a different package, so go on to the next iteration without
				1089	// doing any consistency checks. Reset the consistency check vars, though.
				1090	prevMaxCoresPerPkg = threadInfo[i].maxCoresPerPkg;
				1091	prevMaxThreadsPerPkg = threadInfo[i].maxThreadsPerPkg;
				1092	continue;
				1093	}
				1094
				1095	if (threadInfo[i].coreId != lastCoreId) {
				1096	nCores++;
				1097	coreCt++;
				1098	lastCoreId = threadInfo[i].coreId;
				1099	if ((int)threadCt > __kmp_nThreadsPerCore)
				1100	__kmp_nThreadsPerCore = threadCt;
				1101	threadCt = 1;
				1102	lastThreadId = threadInfo[i].threadId;
				1103	} else if (threadInfo[i].threadId != lastThreadId) {
				1104	threadCt++;
				1105	lastThreadId = threadInfo[i].threadId;
				1106	} else {
				1107	__kmp_free(threadInfo);
				1108	KMP_CPU_FREE(oldMask);
				1109	*msg_id = kmp_i18n_str_LegacyApicIDsNotUnique;
				1110	return -1;
				1111	}
				1112
				1113	// Check to make certain that the maxCoresPerPkg and maxThreadsPerPkg
				1114	// fields agree between all the threads bounds to a given package.
				1115	if ((prevMaxCoresPerPkg != threadInfo[i].maxCoresPerPkg) \|\|
				1116	(prevMaxThreadsPerPkg != threadInfo[i].maxThreadsPerPkg)) {
				1117	__kmp_free(threadInfo);
				1118	KMP_CPU_FREE(oldMask);
				1119	*msg_id = kmp_i18n_str_InconsistentCpuidInfo;
				1120	return -1;
				1121	}
				1122	}
				1123	nPackages = pkgCt;
				1124	if ((int)coreCt > nCoresPerPkg)
				1125	nCoresPerPkg = coreCt;
				1126	if ((int)threadCt > __kmp_nThreadsPerCore)
				1127	__kmp_nThreadsPerCore = threadCt;
				1128
				1129	// When affinity is off, this routine will still be called to set
				1130	// __kmp_ncores, as well as __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
				1131	// Make sure all these vars are set correctly, and return now if affinity is
				1132	// not enabled.
				1133	__kmp_ncores = nCores;
				1134	if (__kmp_affinity_verbose) {
				1135	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				1136	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				1137
				1138	KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
				1139	if (__kmp_affinity_respect_mask) {
				1140	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				1141	} else {
				1142	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				1143	}
				1144	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1145	if (__kmp_affinity_uniform_topology()) {
				1146	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1147	} else {
				1148	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				1149	}
				1150	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				1151	__kmp_nThreadsPerCore, __kmp_ncores);
				1152	}
				1153	KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
				1154	KMP_DEBUG_ASSERT(nApics == __kmp_avail_proc);
				1155	__kmp_pu_os_idx = (int )__kmp_allocate(sizeof(int) __kmp_avail_proc);
				1156	for (i = 0; i < nApics; ++i) {
				1157	__kmp_pu_os_idx[i] = threadInfo[i].osId;
				1158	}
				1159	if (__kmp_affinity_type == affinity_none) {
				1160	__kmp_free(threadInfo);
				1161	KMP_CPU_FREE(oldMask);
				1162	return 0;
				1163	}
				1164
				1165	// Now that we've determined the number of packages, the number of cores per
				1166	// package, and the number of threads per core, we can construct the data
				1167	// structure that is to be returned.
				1168	int pkgLevel = 0;
				1169	int coreLevel = (nCoresPerPkg <= 1) ? -1 : 1;
				1170	int threadLevel =
				1171	(__kmp_nThreadsPerCore <= 1) ? -1 : ((coreLevel >= 0) ? 2 : 1);
				1172	unsigned depth = (pkgLevel >= 0) + (coreLevel >= 0) + (threadLevel >= 0);
				1173
				1174	KMP_ASSERT(depth > 0);
				1175	address2os = (AddrUnsPair )__kmp_allocate(sizeof(AddrUnsPair) * nApics);
				1176
				1177	for (i = 0; i < nApics; ++i) {
				1178	Address addr(depth);
				1179	unsigned os = threadInfo[i].osId;
				1180	int d = 0;
				1181
				1182	if (pkgLevel >= 0) {
				1183	addr.labels[d++] = threadInfo[i].pkgId;
				1184	}
				1185	if (coreLevel >= 0) {
				1186	addr.labels[d++] = threadInfo[i].coreId;
				1187	}
				1188	if (threadLevel >= 0) {
				1189	addr.labels[d++] = threadInfo[i].threadId;
				1190	}
				1191	(*address2os)[i] = AddrUnsPair(addr, os);
				1192	}
				1193
				1194	if (__kmp_affinity_gran_levels < 0) {
				1195	// Set the granularity level based on what levels are modeled in the machine
				1196	// topology map.
				1197	__kmp_affinity_gran_levels = 0;
				1198	if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
				1199	__kmp_affinity_gran_levels++;
				1200	}
				1201	if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
				1202	__kmp_affinity_gran_levels++;
				1203	}
				1204	if ((pkgLevel >= 0) && (__kmp_affinity_gran > affinity_gran_package)) {
				1205	__kmp_affinity_gran_levels++;
				1206	}
				1207	}
				1208
				1209	if (__kmp_affinity_verbose) {
				1210	__kmp_affinity_print_topology(*address2os, nApics, depth, pkgLevel,
				1211	coreLevel, threadLevel);
				1212	}
				1213
				1214	__kmp_free(threadInfo);
				1215	KMP_CPU_FREE(oldMask);
				1216	return depth;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1217	}
				1218
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1219	// Intel(R) microarchitecture code name Nehalem, Dunnington and later
				1220	// architectures support a newer interface for specifying the x2APIC Ids,
				1221	// based on cpuid leaf 11.
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1222	static int __kmp_affinity_create_x2apicid_map(AddrUnsPair **address2os,
				1223	kmp_i18n_id_t *const msg_id) {
				1224	kmp_cpuid buf;
				1225	*address2os = NULL;
				1226	*msg_id = kmp_i18n_null;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1227
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1228	// Check to see if cpuid leaf 11 is supported.
				1229	__kmp_x86_cpuid(0, 0, &buf);
				1230	if (buf.eax < 11) {
				1231	*msg_id = kmp_i18n_str_NoLeaf11Support;
				1232	return -1;
				1233	}
				1234	__kmp_x86_cpuid(11, 0, &buf);
				1235	if (buf.ebx == 0) {
				1236	*msg_id = kmp_i18n_str_NoLeaf11Support;
				1237	return -1;
				1238	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1239
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1240	// Find the number of levels in the machine topology. While we're at it, get
				1241	// the default values for __kmp_nThreadsPerCore & nCoresPerPkg. We will try to
				1242	// get more accurate values later by explicitly counting them, but get
				1243	// reasonable defaults now, in case we return early.
				1244	int level;
				1245	int threadLevel = -1;
				1246	int coreLevel = -1;
				1247	int pkgLevel = -1;
				1248	__kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
				1249
				1250	for (level = 0;; level++) {
				1251	if (level > 31) {
				1252	// FIXME: Hack for DPD200163180
				1253	//
				1254	// If level is big then something went wrong -> exiting
				1255	//
				1256	// There could actually be 32 valid levels in the machine topology, but so
				1257	// far, the only machine we have seen which does not exit this loop before
				1258	// iteration 32 has fubar x2APIC settings.
				1259	//
				1260	// For now, just reject this case based upon loop trip count.
				1261	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1262	return -1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1263	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1264	__kmp_x86_cpuid(11, level, &buf);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1265	if (buf.ebx == 0) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1266	if (pkgLevel < 0) {
				1267	// Will infer nPackages from __kmp_xproc
				1268	pkgLevel = level;
				1269	level++;
				1270	}
				1271	break;
				1272	}
				1273	int kind = (buf.ecx >> 8) & 0xff;
				1274	if (kind == 1) {
				1275	// SMT level
				1276	threadLevel = level;
				1277	coreLevel = -1;
				1278	pkgLevel = -1;
				1279	__kmp_nThreadsPerCore = buf.ebx & 0xffff;
				1280	if (__kmp_nThreadsPerCore == 0) {
				1281	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1282	return -1;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1283	}
				1284	} else if (kind == 2) {
				1285	// core level
				1286	coreLevel = level;
				1287	pkgLevel = -1;
				1288	nCoresPerPkg = buf.ebx & 0xffff;
				1289	if (nCoresPerPkg == 0) {
				1290	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1291	return -1;
				1292	}
				1293	} else {
				1294	if (level <= 0) {
				1295	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1296	return -1;
				1297	}
				1298	if (pkgLevel >= 0) {
				1299	continue;
				1300	}
				1301	pkgLevel = level;
				1302	nPackages = buf.ebx & 0xffff;
				1303	if (nPackages == 0) {
				1304	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1305	return -1;
				1306	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1307	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1308	}
				1309	int depth = level;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1310
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1311	// In the above loop, "level" was counted from the finest level (usually
				1312	// thread) to the coarsest. The caller expects that we will place the labels
				1313	// in (*address2os)[].first.labels[] in the inverse order, so we need to
				1314	// invert the vars saying which level means what.
				1315	if (threadLevel >= 0) {
				1316	threadLevel = depth - threadLevel - 1;
				1317	}
				1318	if (coreLevel >= 0) {
				1319	coreLevel = depth - coreLevel - 1;
				1320	}
				1321	KMP_DEBUG_ASSERT(pkgLevel >= 0);
				1322	pkgLevel = depth - pkgLevel - 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1323
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1324	// The algorithm used starts by setting the affinity to each available thread
				1325	// and retrieving info from the cpuid instruction, so if we are not capable of
				1326	// calling __kmp_get_system_affinity() and _kmp_get_system_affinity(), then we
				1327	// need to do something else - use the defaults that we calculated from
				1328	// issuing cpuid without binding to each proc.
				1329	if (!KMP_AFFINITY_CAPABLE()) {
				1330	// Hack to try and infer the machine topology using only the data
				1331	// available from cpuid on the current thread, and __kmp_xproc.
				1332	KMP_ASSERT(__kmp_affinity_type == affinity_none);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1333
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1334	__kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
				1335	nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1336	if (__kmp_affinity_verbose) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1337	KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY");
				1338	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1339	if (__kmp_affinity_uniform_topology()) {
				1340	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1341	} else {
				1342	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				1343	}
				1344	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				1345	__kmp_nThreadsPerCore, __kmp_ncores);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1346	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1347	return 0;
				1348	}
				1349
				1350	// From here on, we can assume that it is safe to call
				1351	// __kmp_get_system_affinity() and __kmp_set_system_affinity(), even if
				1352	// __kmp_affinity_type = affinity_none.
				1353
				1354	// Save the affinity mask for the current thread.
				1355	kmp_affin_mask_t *oldMask;
				1356	KMP_CPU_ALLOC(oldMask);
				1357	__kmp_get_system_affinity(oldMask, TRUE);
				1358
				1359	// Allocate the data structure to be returned.
				1360	AddrUnsPair *retval =
				1361	(AddrUnsPair )__kmp_allocate(sizeof(AddrUnsPair) __kmp_avail_proc);
				1362
				1363	// Run through each of the available contexts, binding the current thread
				1364	// to it, and obtaining the pertinent information using the cpuid instr.
				1365	unsigned int proc;
				1366	int nApics = 0;
				1367	KMP_CPU_SET_ITERATE(proc, __kmp_affin_fullMask) {
				1368	// Skip this proc if it is not included in the machine model.
				1369	if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
				1370	continue;
Jonathan Peyton	fd7cc42	2016-06-21 15:54:38 +0000	[diff] [blame]	1371	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1372	KMP_DEBUG_ASSERT(nApics < __kmp_avail_proc);
				1373
				1374	__kmp_affinity_dispatch->bind_thread(proc);
				1375
				1376	// Extract labels for each level in the machine topology map from Apic ID.
				1377	Address addr(depth);
				1378	int prev_shift = 0;
				1379
				1380	for (level = 0; level < depth; level++) {
				1381	__kmp_x86_cpuid(11, level, &buf);
				1382	unsigned apicId = buf.edx;
				1383	if (buf.ebx == 0) {
				1384	if (level != depth - 1) {
				1385	KMP_CPU_FREE(oldMask);
				1386	*msg_id = kmp_i18n_str_InconsistentCpuidInfo;
				1387	return -1;
				1388	}
				1389	addr.labels[depth - level - 1] = apicId >> prev_shift;
				1390	level++;
				1391	break;
				1392	}
				1393	int shift = buf.eax & 0x1f;
				1394	int mask = (1 << shift) - 1;
				1395	addr.labels[depth - level - 1] = (apicId & mask) >> prev_shift;
				1396	prev_shift = shift;
				1397	}
				1398	if (level != depth) {
				1399	KMP_CPU_FREE(oldMask);
				1400	*msg_id = kmp_i18n_str_InconsistentCpuidInfo;
				1401	return -1;
				1402	}
				1403
				1404	retval[nApics] = AddrUnsPair(addr, proc);
				1405	nApics++;
				1406	}
				1407
				1408	// We've collected all the info we need.
				1409	// Restore the old affinity mask for this thread.
				1410	__kmp_set_system_affinity(oldMask, TRUE);
				1411
				1412	// If there's only one thread context to bind to, return now.
				1413	KMP_ASSERT(nApics > 0);
				1414	if (nApics == 1) {
				1415	__kmp_ncores = nPackages = 1;
				1416	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
				1417	if (__kmp_affinity_verbose) {
				1418	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				1419	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				1420
				1421	KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
				1422	if (__kmp_affinity_respect_mask) {
				1423	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				1424	} else {
				1425	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				1426	}
				1427	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1428	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1429	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				1430	__kmp_nThreadsPerCore, __kmp_ncores);
				1431	}
				1432
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1433	if (__kmp_affinity_type == affinity_none) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1434	__kmp_free(retval);
				1435	KMP_CPU_FREE(oldMask);
				1436	return 0;
				1437	}
				1438
				1439	// Form an Address object which only includes the package level.
				1440	Address addr(1);
				1441	addr.labels[0] = retval[0].first.labels[pkgLevel];
				1442	retval[0].first = addr;
				1443
				1444	if (__kmp_affinity_gran_levels < 0) {
				1445	__kmp_affinity_gran_levels = 0;
				1446	}
				1447
				1448	if (__kmp_affinity_verbose) {
				1449	__kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
				1450	}
				1451
				1452	*address2os = retval;
				1453	KMP_CPU_FREE(oldMask);
				1454	return 1;
				1455	}
				1456
				1457	// Sort the table by physical Id.
				1458	qsort(retval, nApics, sizeof(*retval), __kmp_affinity_cmp_Address_labels);
				1459
				1460	// Find the radix at each of the levels.
				1461	unsigned totals = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1462	unsigned counts = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1463	unsigned maxCt = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1464	unsigned last = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1465	for (level = 0; level < depth; level++) {
				1466	totals[level] = 1;
				1467	maxCt[level] = 1;
				1468	counts[level] = 1;
				1469	last[level] = retval[0].first.labels[level];
				1470	}
				1471
				1472	// From here on, the iteration variable "level" runs from the finest level to
				1473	// the coarsest, i.e. we iterate forward through
				1474	// (*address2os)[].first.labels[] - in the previous loops, we iterated
				1475	// backwards.
				1476	for (proc = 1; (int)proc < nApics; proc++) {
				1477	int level;
				1478	for (level = 0; level < depth; level++) {
				1479	if (retval[proc].first.labels[level] != last[level]) {
				1480	int j;
				1481	for (j = level + 1; j < depth; j++) {
				1482	totals[j]++;
				1483	counts[j] = 1;
				1484	// The line below causes printing incorrect topology information in
				1485	// case the max value for some level (maxCt[level]) is encountered
				1486	// earlier than some less value while going through the array. For
				1487	// example, let pkg0 has 4 cores and pkg1 has 2 cores. Then
				1488	// maxCt[1] == 2
				1489	// whereas it must be 4.
				1490	// TODO!!! Check if it can be commented safely
				1491	// maxCt[j] = 1;
				1492	last[j] = retval[proc].first.labels[j];
				1493	}
				1494	totals[level]++;
				1495	counts[level]++;
				1496	if (counts[level] > maxCt[level]) {
				1497	maxCt[level] = counts[level];
				1498	}
				1499	last[level] = retval[proc].first.labels[level];
				1500	break;
				1501	} else if (level == depth - 1) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1502	__kmp_free(last);
				1503	__kmp_free(maxCt);
				1504	__kmp_free(counts);
				1505	__kmp_free(totals);
				1506	__kmp_free(retval);
				1507	KMP_CPU_FREE(oldMask);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1508	*msg_id = kmp_i18n_str_x2ApicIDsNotUnique;
				1509	return -1;
				1510	}
				1511	}
				1512	}
				1513
				1514	// When affinity is off, this routine will still be called to set
				1515	// __kmp_ncores, as well as __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
				1516	// Make sure all these vars are set correctly, and return if affinity is not
				1517	// enabled.
				1518	if (threadLevel >= 0) {
				1519	__kmp_nThreadsPerCore = maxCt[threadLevel];
				1520	} else {
				1521	__kmp_nThreadsPerCore = 1;
				1522	}
				1523	nPackages = totals[pkgLevel];
				1524
				1525	if (coreLevel >= 0) {
				1526	__kmp_ncores = totals[coreLevel];
				1527	nCoresPerPkg = maxCt[coreLevel];
				1528	} else {
				1529	__kmp_ncores = nPackages;
				1530	nCoresPerPkg = 1;
				1531	}
				1532
				1533	// Check to see if the machine topology is uniform
				1534	unsigned prod = maxCt[0];
				1535	for (level = 1; level < depth; level++) {
				1536	prod *= maxCt[level];
				1537	}
				1538	bool uniform = (prod == totals[level - 1]);
				1539
				1540	// Print the machine topology summary.
				1541	if (__kmp_affinity_verbose) {
				1542	char mask[KMP_AFFIN_MASK_PRINT_LEN];
				1543	__kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				1544
				1545	KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
				1546	if (__kmp_affinity_respect_mask) {
				1547	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask);
				1548	} else {
				1549	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask);
				1550	}
				1551	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1552	if (uniform) {
				1553	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1554	} else {
				1555	KMP_INFORM(NonUniform, "KMP_AFFINITY");
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1556	}
				1557
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1558	kmp_str_buf_t buf;
				1559	__kmp_str_buf_init(&buf);
				1560
				1561	__kmp_str_buf_print(&buf, "%d", totals[0]);
				1562	for (level = 1; level <= pkgLevel; level++) {
				1563	__kmp_str_buf_print(&buf, " x %d", maxCt[level]);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1564	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1565	KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg,
				1566	__kmp_nThreadsPerCore, __kmp_ncores);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1567
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1568	__kmp_str_buf_free(&buf);
				1569	}
				1570	KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
				1571	KMP_DEBUG_ASSERT(nApics == __kmp_avail_proc);
				1572	__kmp_pu_os_idx = (int )__kmp_allocate(sizeof(int) __kmp_avail_proc);
				1573	for (proc = 0; (int)proc < nApics; ++proc) {
				1574	__kmp_pu_os_idx[proc] = retval[proc].second;
				1575	}
				1576	if (__kmp_affinity_type == affinity_none) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1577	__kmp_free(last);
				1578	__kmp_free(maxCt);
				1579	__kmp_free(counts);
				1580	__kmp_free(totals);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1581	__kmp_free(retval);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1582	KMP_CPU_FREE(oldMask);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1583	return 0;
				1584	}
				1585
				1586	// Find any levels with radiix 1, and remove them from the map
				1587	// (except for the package level).
				1588	int new_depth = 0;
				1589	for (level = 0; level < depth; level++) {
				1590	if ((maxCt[level] == 1) && (level != pkgLevel)) {
				1591	continue;
				1592	}
				1593	new_depth++;
				1594	}
				1595
				1596	// If we are removing any levels, allocate a new vector to return,
				1597	// and copy the relevant information to it.
				1598	if (new_depth != depth) {
				1599	AddrUnsPair *new_retval =
				1600	(AddrUnsPair )__kmp_allocate(sizeof(AddrUnsPair) nApics);
				1601	for (proc = 0; (int)proc < nApics; proc++) {
				1602	Address addr(new_depth);
				1603	new_retval[proc] = AddrUnsPair(addr, retval[proc].second);
				1604	}
				1605	int new_level = 0;
				1606	int newPkgLevel = -1;
				1607	int newCoreLevel = -1;
				1608	int newThreadLevel = -1;
				1609	int i;
				1610	for (level = 0; level < depth; level++) {
				1611	if ((maxCt[level] == 1) && (level != pkgLevel)) {
				1612	// Remove this level. Never remove the package level
				1613	continue;
				1614	}
				1615	if (level == pkgLevel) {
				1616	newPkgLevel = level;
				1617	}
				1618	if (level == coreLevel) {
				1619	newCoreLevel = level;
				1620	}
				1621	if (level == threadLevel) {
				1622	newThreadLevel = level;
				1623	}
				1624	for (proc = 0; (int)proc < nApics; proc++) {
				1625	new_retval[proc].first.labels[new_level] =
				1626	retval[proc].first.labels[level];
				1627	}
				1628	new_level++;
				1629	}
				1630
				1631	__kmp_free(retval);
				1632	retval = new_retval;
				1633	depth = new_depth;
				1634	pkgLevel = newPkgLevel;
				1635	coreLevel = newCoreLevel;
				1636	threadLevel = newThreadLevel;
				1637	}
				1638
				1639	if (__kmp_affinity_gran_levels < 0) {
				1640	// Set the granularity level based on what levels are modeled
				1641	// in the machine topology map.
				1642	__kmp_affinity_gran_levels = 0;
				1643	if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
				1644	__kmp_affinity_gran_levels++;
				1645	}
				1646	if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
				1647	__kmp_affinity_gran_levels++;
				1648	}
				1649	if (__kmp_affinity_gran > affinity_gran_package) {
				1650	__kmp_affinity_gran_levels++;
				1651	}
				1652	}
				1653
				1654	if (__kmp_affinity_verbose) {
				1655	__kmp_affinity_print_topology(retval, nApics, depth, pkgLevel, coreLevel,
				1656	threadLevel);
				1657	}
				1658
				1659	__kmp_free(last);
				1660	__kmp_free(maxCt);
				1661	__kmp_free(counts);
				1662	__kmp_free(totals);
				1663	KMP_CPU_FREE(oldMask);
				1664	*address2os = retval;
				1665	return depth;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1666	}
				1667
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1668	#endif /* KMP_ARCH_X86 \|\| KMP_ARCH_X86_64 */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1669
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1670	#define osIdIndex 0
				1671	#define threadIdIndex 1
				1672	#define coreIdIndex 2
				1673	#define pkgIdIndex 3
				1674	#define nodeIdIndex 4
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1675
				1676	typedef unsigned *ProcCpuInfo;
				1677	static unsigned maxIndex = pkgIdIndex;
				1678
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1679	static int __kmp_affinity_cmp_ProcCpuInfo_os_id(const void a, const void b) {
				1680	const unsigned aa = (const unsigned )a;
				1681	const unsigned bb = (const unsigned )b;
				1682	if (aa[osIdIndex] < bb[osIdIndex])
				1683	return -1;
				1684	if (aa[osIdIndex] > bb[osIdIndex])
				1685	return 1;
				1686	return 0;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1687	};
				1688
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1689	static int __kmp_affinity_cmp_ProcCpuInfo_phys_id(const void *a,
				1690	const void *b) {
				1691	unsigned i;
				1692	const unsigned aa = ((const unsigned **)a);
				1693	const unsigned bb = ((const unsigned **)b);
				1694	for (i = maxIndex;; i--) {
				1695	if (aa[i] < bb[i])
				1696	return -1;
				1697	if (aa[i] > bb[i])
				1698	return 1;
				1699	if (i == osIdIndex)
				1700	break;
				1701	}
				1702	return 0;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1703	}
				1704
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1705	// Parse /proc/cpuinfo (or an alternate file in the same format) to obtain the
				1706	// affinity map.
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1707	static int __kmp_affinity_create_cpuinfo_map(AddrUnsPair **address2os,
				1708	int *line,
				1709	kmp_i18n_id_t *const msg_id,
				1710	FILE *f) {
				1711	*address2os = NULL;
				1712	*msg_id = kmp_i18n_null;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1713
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1714	// Scan of the file, and count the number of "processor" (osId) fields,
				1715	// and find the highest value of <n> for a node_<n> field.
				1716	char buf[256];
				1717	unsigned num_records = 0;
				1718	while (!feof(f)) {
				1719	buf[sizeof(buf) - 1] = 1;
				1720	if (!fgets(buf, sizeof(buf), f)) {
				1721	// Read errors presumably because of EOF
				1722	break;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1723	}
				1724
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1725	char s1[] = "processor";
				1726	if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
				1727	num_records++;
				1728	continue;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1729	}
				1730
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1731	// FIXME - this will match "node_<n> <garbage>"
				1732	unsigned level;
				1733	if (KMP_SSCANF(buf, "node_%d id", &level) == 1) {
				1734	if (nodeIdIndex + level >= maxIndex) {
				1735	maxIndex = nodeIdIndex + level;
				1736	}
				1737	continue;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1738	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1739	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1740
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1741	// Check for empty file / no valid processor records, or too many. The number
				1742	// of records can't exceed the number of valid bits in the affinity mask.
				1743	if (num_records == 0) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1744	*line = 0;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1745	*msg_id = kmp_i18n_str_NoProcRecords;
				1746	return -1;
				1747	}
				1748	if (num_records > (unsigned)__kmp_xproc) {
				1749	*line = 0;
				1750	*msg_id = kmp_i18n_str_TooManyProcRecords;
				1751	return -1;
				1752	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1753
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1754	// Set the file pointer back to the begginning, so that we can scan the file
				1755	// again, this time performing a full parse of the data. Allocate a vector of
				1756	// ProcCpuInfo object, where we will place the data. Adding an extra element
				1757	// at the end allows us to remove a lot of extra checks for termination
				1758	// conditions.
				1759	if (fseek(f, 0, SEEK_SET) != 0) {
				1760	*line = 0;
				1761	*msg_id = kmp_i18n_str_CantRewindCpuinfo;
				1762	return -1;
				1763	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1764
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1765	// Allocate the array of records to store the proc info in. The dummy
				1766	// element at the end makes the logic in filling them out easier to code.
				1767	unsigned **threadInfo =
				1768	(unsigned *)__kmp_allocate((num_records + 1) sizeof(unsigned *));
				1769	unsigned i;
				1770	for (i = 0; i <= num_records; i++) {
				1771	threadInfo[i] =
				1772	(unsigned )__kmp_allocate((maxIndex + 1) sizeof(unsigned));
				1773	}
				1774
				1775	#define CLEANUP_THREAD_INFO \
				1776	for (i = 0; i <= num_records; i++) { \
				1777	__kmp_free(threadInfo[i]); \
				1778	} \
				1779	__kmp_free(threadInfo);
				1780
				1781	// A value of UINT_MAX means that we didn't find the field
				1782	unsigned __index;
				1783
				1784	#define INIT_PROC_INFO(p) \
				1785	for (__index = 0; __index <= maxIndex; __index++) { \
				1786	(p)[__index] = UINT_MAX; \
				1787	}
				1788
				1789	for (i = 0; i <= num_records; i++) {
				1790	INIT_PROC_INFO(threadInfo[i]);
				1791	}
				1792
				1793	unsigned num_avail = 0;
				1794	*line = 0;
				1795	while (!feof(f)) {
				1796	// Create an inner scoping level, so that all the goto targets at the end of
				1797	// the loop appear in an outer scoping level. This avoids warnings about
				1798	// jumping past an initialization to a target in the same block.
				1799	{
				1800	buf[sizeof(buf) - 1] = 1;
				1801	bool long_line = false;
				1802	if (!fgets(buf, sizeof(buf), f)) {
				1803	// Read errors presumably because of EOF
				1804	// If there is valid data in threadInfo[num_avail], then fake
				1805	// a blank line in ensure that the last address gets parsed.
				1806	bool valid = false;
				1807	for (i = 0; i <= maxIndex; i++) {
				1808	if (threadInfo[num_avail][i] != UINT_MAX) {
				1809	valid = true;
				1810	}
				1811	}
				1812	if (!valid) {
				1813	break;
				1814	}
				1815	buf[0] = 0;
				1816	} else if (!buf[sizeof(buf) - 1]) {
				1817	// The line is longer than the buffer. Set a flag and don't
				1818	// emit an error if we were going to ignore the line, anyway.
				1819	long_line = true;
				1820
				1821	#define CHECK_LINE \
				1822	if (long_line) { \
				1823	CLEANUP_THREAD_INFO; \
				1824	*msg_id = kmp_i18n_str_LongLineCpuinfo; \
				1825	return -1; \
				1826	}
				1827	}
				1828	(*line)++;
				1829
				1830	char s1[] = "processor";
				1831	if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
				1832	CHECK_LINE;
				1833	char *p = strchr(buf + sizeof(s1) - 1, ':');
				1834	unsigned val;
				1835	if ((p == NULL) \|\| (KMP_SSCANF(p + 1, "%u\n", &val) != 1))
				1836	goto no_val;
				1837	if (threadInfo[num_avail][osIdIndex] != UINT_MAX)
				1838	goto dup_field;
				1839	threadInfo[num_avail][osIdIndex] = val;
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	1840	#if KMP_OS_LINUX && USE_SYSFS_INFO
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1841	char path[256];
				1842	KMP_SNPRINTF(
				1843	path, sizeof(path),
				1844	"/sys/devices/system/cpu/cpu%u/topology/physical_package_id",
				1845	threadInfo[num_avail][osIdIndex]);
				1846	__kmp_read_from_file(path, "%u", &threadInfo[num_avail][pkgIdIndex]);
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	1847
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1848	KMP_SNPRINTF(path, sizeof(path),
				1849	"/sys/devices/system/cpu/cpu%u/topology/core_id",
				1850	threadInfo[num_avail][osIdIndex]);
				1851	__kmp_read_from_file(path, "%u", &threadInfo[num_avail][coreIdIndex]);
				1852	continue;
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	1853	#else
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1854	}
				1855	char s2[] = "physical id";
				1856	if (strncmp(buf, s2, sizeof(s2) - 1) == 0) {
				1857	CHECK_LINE;
				1858	char *p = strchr(buf + sizeof(s2) - 1, ':');
				1859	unsigned val;
				1860	if ((p == NULL) \|\| (KMP_SSCANF(p + 1, "%u\n", &val) != 1))
				1861	goto no_val;
				1862	if (threadInfo[num_avail][pkgIdIndex] != UINT_MAX)
				1863	goto dup_field;
				1864	threadInfo[num_avail][pkgIdIndex] = val;
				1865	continue;
				1866	}
				1867	char s3[] = "core id";
				1868	if (strncmp(buf, s3, sizeof(s3) - 1) == 0) {
				1869	CHECK_LINE;
				1870	char *p = strchr(buf + sizeof(s3) - 1, ':');
				1871	unsigned val;
				1872	if ((p == NULL) \|\| (KMP_SSCANF(p + 1, "%u\n", &val) != 1))
				1873	goto no_val;
				1874	if (threadInfo[num_avail][coreIdIndex] != UINT_MAX)
				1875	goto dup_field;
				1876	threadInfo[num_avail][coreIdIndex] = val;
				1877	continue;
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	1878	#endif // KMP_OS_LINUX && USE_SYSFS_INFO
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1879	}
				1880	char s4[] = "thread id";
				1881	if (strncmp(buf, s4, sizeof(s4) - 1) == 0) {
				1882	CHECK_LINE;
				1883	char *p = strchr(buf + sizeof(s4) - 1, ':');
				1884	unsigned val;
				1885	if ((p == NULL) \|\| (KMP_SSCANF(p + 1, "%u\n", &val) != 1))
				1886	goto no_val;
				1887	if (threadInfo[num_avail][threadIdIndex] != UINT_MAX)
				1888	goto dup_field;
				1889	threadInfo[num_avail][threadIdIndex] = val;
				1890	continue;
				1891	}
				1892	unsigned level;
				1893	if (KMP_SSCANF(buf, "node_%d id", &level) == 1) {
				1894	CHECK_LINE;
				1895	char *p = strchr(buf + sizeof(s4) - 1, ':');
				1896	unsigned val;
				1897	if ((p == NULL) \|\| (KMP_SSCANF(p + 1, "%u\n", &val) != 1))
				1898	goto no_val;
				1899	KMP_ASSERT(nodeIdIndex + level <= maxIndex);
				1900	if (threadInfo[num_avail][nodeIdIndex + level] != UINT_MAX)
				1901	goto dup_field;
				1902	threadInfo[num_avail][nodeIdIndex + level] = val;
				1903	continue;
				1904	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1905
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1906	// We didn't recognize the leading token on the line. There are lots of
				1907	// leading tokens that we don't recognize - if the line isn't empty, go on
				1908	// to the next line.
				1909	if ((buf != 0) && (buf != '\n')) {
				1910	// If the line is longer than the buffer, read characters
				1911	// until we find a newline.
				1912	if (long_line) {
				1913	int ch;
				1914	while (((ch = fgetc(f)) != EOF) && (ch != '\n'))
				1915	;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1916	}
				1917	continue;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1918	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1919
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1920	// A newline has signalled the end of the processor record.
				1921	// Check that there aren't too many procs specified.
				1922	if ((int)num_avail == __kmp_xproc) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1923	CLEANUP_THREAD_INFO;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1924	*msg_id = kmp_i18n_str_TooManyEntries;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1925	return -1;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1926	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1927
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1928	// Check for missing fields. The osId field must be there, and we
				1929	// currently require that the physical id field is specified, also.
				1930	if (threadInfo[num_avail][osIdIndex] == UINT_MAX) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1931	CLEANUP_THREAD_INFO;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1932	*msg_id = kmp_i18n_str_MissingProcField;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1933	return -1;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1934	}
				1935	if (threadInfo[0][pkgIdIndex] == UINT_MAX) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1936	CLEANUP_THREAD_INFO;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1937	*msg_id = kmp_i18n_str_MissingPhysicalIDField;
				1938	return -1;
				1939	}
				1940
				1941	// Skip this proc if it is not included in the machine model.
				1942	if (!KMP_CPU_ISSET(threadInfo[num_avail][osIdIndex],
				1943	__kmp_affin_fullMask)) {
				1944	INIT_PROC_INFO(threadInfo[num_avail]);
				1945	continue;
				1946	}
				1947
				1948	// We have a successful parse of this proc's info.
				1949	// Increment the counter, and prepare for the next proc.
				1950	num_avail++;
				1951	KMP_ASSERT(num_avail <= num_records);
				1952	INIT_PROC_INFO(threadInfo[num_avail]);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1953	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1954	continue;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1955
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1956	no_val:
				1957	CLEANUP_THREAD_INFO;
				1958	*msg_id = kmp_i18n_str_MissingValCpuinfo;
				1959	return -1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1960
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1961	dup_field:
				1962	CLEANUP_THREAD_INFO;
				1963	*msg_id = kmp_i18n_str_DuplicateFieldCpuinfo;
				1964	return -1;
				1965	}
				1966	*line = 0;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1967
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1968	#if KMP_MIC && REDUCE_TEAM_SIZE
				1969	unsigned teamSize = 0;
				1970	#endif // KMP_MIC && REDUCE_TEAM_SIZE
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1971
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1972	// check for num_records == __kmp_xproc ???
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1973
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1974	// If there's only one thread context to bind to, form an Address object with
				1975	// depth 1 and return immediately (or, if affinity is off, set address2os to
				1976	// NULL and return).
				1977	//
				1978	// If it is configured to omit the package level when there is only a single
				1979	// package, the logic at the end of this routine won't work if there is only a
				1980	// single thread - it would try to form an Address object with depth 0.
				1981	KMP_ASSERT(num_avail > 0);
				1982	KMP_ASSERT(num_avail <= num_records);
				1983	if (num_avail == 1) {
				1984	__kmp_ncores = 1;
				1985	__kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1986	if (__kmp_affinity_verbose) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1987	if (!KMP_AFFINITY_CAPABLE()) {
				1988	KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
				1989	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1990	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1991	} else {
				1992	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				1993	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				1994	__kmp_affin_fullMask);
				1995	KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
				1996	if (__kmp_affinity_respect_mask) {
				1997	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				1998	} else {
				1999	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2000	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2001	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				2002	KMP_INFORM(Uniform, "KMP_AFFINITY");
				2003	}
				2004	int index;
				2005	kmp_str_buf_t buf;
				2006	__kmp_str_buf_init(&buf);
				2007	__kmp_str_buf_print(&buf, "1");
				2008	for (index = maxIndex - 1; index > pkgIdIndex; index--) {
				2009	__kmp_str_buf_print(&buf, " x 1");
				2010	}
				2011	KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, 1, 1, 1);
				2012	__kmp_str_buf_free(&buf);
Jonathan Peyton	fd7cc42	2016-06-21 15:54:38 +0000	[diff] [blame]	2013	}
				2014
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2015	if (__kmp_affinity_type == affinity_none) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2016	CLEANUP_THREAD_INFO;
				2017	return 0;
				2018	}
				2019
				2020	address2os = (AddrUnsPair )__kmp_allocate(sizeof(AddrUnsPair));
				2021	Address addr(1);
				2022	addr.labels[0] = threadInfo[0][pkgIdIndex];
				2023	(*address2os)[0] = AddrUnsPair(addr, threadInfo[0][osIdIndex]);
				2024
				2025	if (__kmp_affinity_gran_levels < 0) {
				2026	__kmp_affinity_gran_levels = 0;
				2027	}
				2028
				2029	if (__kmp_affinity_verbose) {
				2030	__kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
				2031	}
				2032
				2033	CLEANUP_THREAD_INFO;
				2034	return 1;
				2035	}
				2036
				2037	// Sort the threadInfo table by physical Id.
				2038	qsort(threadInfo, num_avail, sizeof(*threadInfo),
				2039	__kmp_affinity_cmp_ProcCpuInfo_phys_id);
				2040
				2041	// The table is now sorted by pkgId / coreId / threadId, but we really don't
				2042	// know the radix of any of the fields. pkgId's may be sparsely assigned among
				2043	// the chips on a system. Although coreId's are usually assigned
				2044	// [0 .. coresPerPkg-1] and threadId's are usually assigned
				2045	// [0..threadsPerCore-1], we don't want to make any such assumptions.
				2046	//
				2047	// For that matter, we don't know what coresPerPkg and threadsPerCore (or the
				2048	// total # packages) are at this point - we want to determine that now. We
				2049	// only have an upper bound on the first two figures.
				2050	unsigned *counts =
				2051	(unsigned )__kmp_allocate((maxIndex + 1) sizeof(unsigned));
				2052	unsigned *maxCt =
				2053	(unsigned )__kmp_allocate((maxIndex + 1) sizeof(unsigned));
				2054	unsigned *totals =
				2055	(unsigned )__kmp_allocate((maxIndex + 1) sizeof(unsigned));
				2056	unsigned *lastId =
				2057	(unsigned )__kmp_allocate((maxIndex + 1) sizeof(unsigned));
				2058
				2059	bool assign_thread_ids = false;
				2060	unsigned threadIdCt;
				2061	unsigned index;
				2062
				2063	restart_radix_check:
				2064	threadIdCt = 0;
				2065
				2066	// Initialize the counter arrays with data from threadInfo[0].
				2067	if (assign_thread_ids) {
				2068	if (threadInfo[0][threadIdIndex] == UINT_MAX) {
				2069	threadInfo[0][threadIdIndex] = threadIdCt++;
				2070	} else if (threadIdCt <= threadInfo[0][threadIdIndex]) {
				2071	threadIdCt = threadInfo[0][threadIdIndex] + 1;
				2072	}
				2073	}
				2074	for (index = 0; index <= maxIndex; index++) {
				2075	counts[index] = 1;
				2076	maxCt[index] = 1;
				2077	totals[index] = 1;
				2078	lastId[index] = threadInfo[0][index];
				2079	;
				2080	}
				2081
				2082	// Run through the rest of the OS procs.
				2083	for (i = 1; i < num_avail; i++) {
				2084	// Find the most significant index whose id differs from the id for the
				2085	// previous OS proc.
				2086	for (index = maxIndex; index >= threadIdIndex; index--) {
				2087	if (assign_thread_ids && (index == threadIdIndex)) {
				2088	// Auto-assign the thread id field if it wasn't specified.
				2089	if (threadInfo[i][threadIdIndex] == UINT_MAX) {
				2090	threadInfo[i][threadIdIndex] = threadIdCt++;
				2091	}
				2092	// Aparrently the thread id field was specified for some entries and not
				2093	// others. Start the thread id counter off at the next higher thread id.
				2094	else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
				2095	threadIdCt = threadInfo[i][threadIdIndex] + 1;
				2096	}
				2097	}
				2098	if (threadInfo[i][index] != lastId[index]) {
				2099	// Run through all indices which are less significant, and reset the
				2100	// counts to 1. At all levels up to and including index, we need to
				2101	// increment the totals and record the last id.
				2102	unsigned index2;
				2103	for (index2 = threadIdIndex; index2 < index; index2++) {
				2104	totals[index2]++;
				2105	if (counts[index2] > maxCt[index2]) {
				2106	maxCt[index2] = counts[index2];
				2107	}
				2108	counts[index2] = 1;
				2109	lastId[index2] = threadInfo[i][index2];
				2110	}
				2111	counts[index]++;
				2112	totals[index]++;
				2113	lastId[index] = threadInfo[i][index];
				2114
				2115	if (assign_thread_ids && (index > threadIdIndex)) {
				2116
				2117	#if KMP_MIC && REDUCE_TEAM_SIZE
				2118	// The default team size is the total #threads in the machine
				2119	// minus 1 thread for every core that has 3 or more threads.
				2120	teamSize += (threadIdCt <= 2) ? (threadIdCt) : (threadIdCt - 1);
				2121	#endif // KMP_MIC && REDUCE_TEAM_SIZE
				2122
				2123	// Restart the thread counter, as we are on a new core.
				2124	threadIdCt = 0;
				2125
				2126	// Auto-assign the thread id field if it wasn't specified.
				2127	if (threadInfo[i][threadIdIndex] == UINT_MAX) {
				2128	threadInfo[i][threadIdIndex] = threadIdCt++;
				2129	}
				2130
				2131	// Aparrently the thread id field was specified for some entries and
				2132	// not others. Start the thread id counter off at the next higher
				2133	// thread id.
				2134	else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
				2135	threadIdCt = threadInfo[i][threadIdIndex] + 1;
				2136	}
				2137	}
				2138	break;
				2139	}
				2140	}
				2141	if (index < threadIdIndex) {
				2142	// If thread ids were specified, it is an error if they are not unique.
				2143	// Also, check that we waven't already restarted the loop (to be safe -
				2144	// shouldn't need to).
				2145	if ((threadInfo[i][threadIdIndex] != UINT_MAX) \|\| assign_thread_ids) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2146	__kmp_free(lastId);
				2147	__kmp_free(totals);
				2148	__kmp_free(maxCt);
				2149	__kmp_free(counts);
				2150	CLEANUP_THREAD_INFO;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2151	*msg_id = kmp_i18n_str_PhysicalIDsNotUnique;
				2152	return -1;
				2153	}
				2154
				2155	// If the thread ids were not specified and we see entries entries that
				2156	// are duplicates, start the loop over and assign the thread ids manually.
				2157	assign_thread_ids = true;
				2158	goto restart_radix_check;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2159	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2160	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2161
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2162	#if KMP_MIC && REDUCE_TEAM_SIZE
				2163	// The default team size is the total #threads in the machine
				2164	// minus 1 thread for every core that has 3 or more threads.
				2165	teamSize += (threadIdCt <= 2) ? (threadIdCt) : (threadIdCt - 1);
				2166	#endif // KMP_MIC && REDUCE_TEAM_SIZE
				2167
				2168	for (index = threadIdIndex; index <= maxIndex; index++) {
				2169	if (counts[index] > maxCt[index]) {
				2170	maxCt[index] = counts[index];
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2171	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2172	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2173
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2174	__kmp_nThreadsPerCore = maxCt[threadIdIndex];
				2175	nCoresPerPkg = maxCt[coreIdIndex];
				2176	nPackages = totals[pkgIdIndex];
				2177
				2178	// Check to see if the machine topology is uniform
				2179	unsigned prod = totals[maxIndex];
				2180	for (index = threadIdIndex; index < maxIndex; index++) {
				2181	prod *= maxCt[index];
				2182	}
				2183	bool uniform = (prod == totals[threadIdIndex]);
				2184
				2185	// When affinity is off, this routine will still be called to set
				2186	// __kmp_ncores, as well as __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
				2187	// Make sure all these vars are set correctly, and return now if affinity is
				2188	// not enabled.
				2189	__kmp_ncores = totals[coreIdIndex];
				2190
				2191	if (__kmp_affinity_verbose) {
				2192	if (!KMP_AFFINITY_CAPABLE()) {
				2193	KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
				2194	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				2195	if (uniform) {
				2196	KMP_INFORM(Uniform, "KMP_AFFINITY");
				2197	} else {
				2198	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				2199	}
				2200	} else {
				2201	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				2202	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				2203	__kmp_affin_fullMask);
				2204	KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
				2205	if (__kmp_affinity_respect_mask) {
				2206	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				2207	} else {
				2208	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				2209	}
				2210	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				2211	if (uniform) {
				2212	KMP_INFORM(Uniform, "KMP_AFFINITY");
				2213	} else {
				2214	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				2215	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2216	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2217	kmp_str_buf_t buf;
				2218	__kmp_str_buf_init(&buf);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2219
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2220	__kmp_str_buf_print(&buf, "%d", totals[maxIndex]);
				2221	for (index = maxIndex - 1; index >= pkgIdIndex; index--) {
				2222	__kmp_str_buf_print(&buf, " x %d", maxCt[index]);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2223	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2224	KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, maxCt[coreIdIndex],
				2225	maxCt[threadIdIndex], __kmp_ncores);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2226
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2227	__kmp_str_buf_free(&buf);
				2228	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2229
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2230	#if KMP_MIC && REDUCE_TEAM_SIZE
				2231	// Set the default team size.
				2232	if ((__kmp_dflt_team_nth == 0) && (teamSize > 0)) {
				2233	__kmp_dflt_team_nth = teamSize;
				2234	KA_TRACE(20, ("__kmp_affinity_create_cpuinfo_map: setting "
				2235	"__kmp_dflt_team_nth = %d\n",
				2236	__kmp_dflt_team_nth));
				2237	}
				2238	#endif // KMP_MIC && REDUCE_TEAM_SIZE
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2239
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2240	KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
				2241	KMP_DEBUG_ASSERT(num_avail == __kmp_avail_proc);
				2242	__kmp_pu_os_idx = (int )__kmp_allocate(sizeof(int) __kmp_avail_proc);
				2243	for (i = 0; i < num_avail; ++i) { // fill the os indices
				2244	__kmp_pu_os_idx[i] = threadInfo[i][osIdIndex];
				2245	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2246
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2247	if (__kmp_affinity_type == affinity_none) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2248	__kmp_free(lastId);
				2249	__kmp_free(totals);
				2250	__kmp_free(maxCt);
				2251	__kmp_free(counts);
				2252	CLEANUP_THREAD_INFO;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2253	return 0;
				2254	}
				2255
				2256	// Count the number of levels which have more nodes at that level than at the
				2257	// parent's level (with there being an implicit root node of the top level).
				2258	// This is equivalent to saying that there is at least one node at this level
				2259	// which has a sibling. These levels are in the map, and the package level is
				2260	// always in the map.
				2261	bool inMap = (bool )__kmp_allocate((maxIndex + 1) * sizeof(bool));
				2262	int level = 0;
				2263	for (index = threadIdIndex; index < maxIndex; index++) {
				2264	KMP_ASSERT(totals[index] >= totals[index + 1]);
				2265	inMap[index] = (totals[index] > totals[index + 1]);
				2266	}
				2267	inMap[maxIndex] = (totals[maxIndex] > 1);
				2268	inMap[pkgIdIndex] = true;
				2269
				2270	int depth = 0;
				2271	for (index = threadIdIndex; index <= maxIndex; index++) {
				2272	if (inMap[index]) {
				2273	depth++;
				2274	}
				2275	}
				2276	KMP_ASSERT(depth > 0);
				2277
				2278	// Construct the data structure that is to be returned.
				2279	address2os = (AddrUnsPair )__kmp_allocate(sizeof(AddrUnsPair) * num_avail);
				2280	int pkgLevel = -1;
				2281	int coreLevel = -1;
				2282	int threadLevel = -1;
				2283
				2284	for (i = 0; i < num_avail; ++i) {
				2285	Address addr(depth);
				2286	unsigned os = threadInfo[i][osIdIndex];
				2287	int src_index;
				2288	int dst_index = 0;
				2289
				2290	for (src_index = maxIndex; src_index >= threadIdIndex; src_index--) {
				2291	if (!inMap[src_index]) {
				2292	continue;
				2293	}
				2294	addr.labels[dst_index] = threadInfo[i][src_index];
				2295	if (src_index == pkgIdIndex) {
				2296	pkgLevel = dst_index;
				2297	} else if (src_index == coreIdIndex) {
				2298	coreLevel = dst_index;
				2299	} else if (src_index == threadIdIndex) {
				2300	threadLevel = dst_index;
				2301	}
				2302	dst_index++;
				2303	}
				2304	(*address2os)[i] = AddrUnsPair(addr, os);
				2305	}
				2306
				2307	if (__kmp_affinity_gran_levels < 0) {
				2308	// Set the granularity level based on what levels are modeled
				2309	// in the machine topology map.
				2310	unsigned src_index;
				2311	__kmp_affinity_gran_levels = 0;
				2312	for (src_index = threadIdIndex; src_index <= maxIndex; src_index++) {
				2313	if (!inMap[src_index]) {
				2314	continue;
				2315	}
				2316	switch (src_index) {
				2317	case threadIdIndex:
				2318	if (__kmp_affinity_gran > affinity_gran_thread) {
				2319	__kmp_affinity_gran_levels++;
				2320	}
				2321
				2322	break;
				2323	case coreIdIndex:
				2324	if (__kmp_affinity_gran > affinity_gran_core) {
				2325	__kmp_affinity_gran_levels++;
				2326	}
				2327	break;
				2328
				2329	case pkgIdIndex:
				2330	if (__kmp_affinity_gran > affinity_gran_package) {
				2331	__kmp_affinity_gran_levels++;
				2332	}
				2333	break;
				2334	}
				2335	}
				2336	}
				2337
				2338	if (__kmp_affinity_verbose) {
				2339	__kmp_affinity_print_topology(*address2os, num_avail, depth, pkgLevel,
				2340	coreLevel, threadLevel);
				2341	}
				2342
				2343	__kmp_free(inMap);
				2344	__kmp_free(lastId);
				2345	__kmp_free(totals);
				2346	__kmp_free(maxCt);
				2347	__kmp_free(counts);
				2348	CLEANUP_THREAD_INFO;
				2349	return depth;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2350	}
				2351
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2352	// Create and return a table of affinity masks, indexed by OS thread ID.
				2353	// This routine handles OR'ing together all the affinity masks of threads
				2354	// that are sufficiently close, if granularity > fine.
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2355	static kmp_affin_mask_t __kmp_create_masks(unsigned maxIndex,
				2356	unsigned *numUnique,
				2357	AddrUnsPair *address2os,
				2358	unsigned numAddrs) {
				2359	// First form a table of affinity masks in order of OS thread id.
				2360	unsigned depth;
				2361	unsigned maxOsId;
				2362	unsigned i;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2363
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2364	KMP_ASSERT(numAddrs > 0);
				2365	depth = address2os[0].first.depth;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2366
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2367	maxOsId = 0;
				2368	for (i = 0; i < numAddrs; i++) {
				2369	unsigned osId = address2os[i].second;
				2370	if (osId > maxOsId) {
				2371	maxOsId = osId;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2372	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2373	}
				2374	kmp_affin_mask_t *osId2Mask;
				2375	KMP_CPU_ALLOC_ARRAY(osId2Mask, (maxOsId + 1));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2376
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2377	// Sort the address2os table according to physical order. Doing so will put
				2378	// all threads on the same core/package/node in consecutive locations.
				2379	qsort(address2os, numAddrs, sizeof(*address2os),
				2380	__kmp_affinity_cmp_Address_labels);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2381
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2382	KMP_ASSERT(__kmp_affinity_gran_levels >= 0);
				2383	if (__kmp_affinity_verbose && (__kmp_affinity_gran_levels > 0)) {
				2384	KMP_INFORM(ThreadsMigrate, "KMP_AFFINITY", __kmp_affinity_gran_levels);
				2385	}
				2386	if (__kmp_affinity_gran_levels >= (int)depth) {
				2387	if (__kmp_affinity_verbose \|\|
				2388	(__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) {
				2389	KMP_WARNING(AffThreadsMayMigrate);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2390	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2391	}
				2392
				2393	// Run through the table, forming the masks for all threads on each core.
				2394	// Threads on the same core will have identical "Address" objects, not
				2395	// considering the last level, which must be the thread id. All threads on a
				2396	// core will appear consecutively.
				2397	unsigned unique = 0;
				2398	unsigned j = 0; // index of 1st thread on core
				2399	unsigned leader = 0;
				2400	Address *leaderAddr = &(address2os[0].first);
				2401	kmp_affin_mask_t *sum;
				2402	KMP_CPU_ALLOC_ON_STACK(sum);
				2403	KMP_CPU_ZERO(sum);
				2404	KMP_CPU_SET(address2os[0].second, sum);
				2405	for (i = 1; i < numAddrs; i++) {
				2406	// If this thread is sufficiently close to the leader (within the
				2407	// granularity setting), then set the bit for this os thread in the
				2408	// affinity mask for this group, and go on to the next thread.
				2409	if (leaderAddr->isClose(address2os[i].first, __kmp_affinity_gran_levels)) {
				2410	KMP_CPU_SET(address2os[i].second, sum);
				2411	continue;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2412	}
				2413
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2414	// For every thread in this group, copy the mask to the thread's entry in
				2415	// the osId2Mask table. Mark the first address as a leader.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2416	for (; j < i; j++) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2417	unsigned osId = address2os[j].second;
				2418	KMP_DEBUG_ASSERT(osId <= maxOsId);
				2419	kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
				2420	KMP_CPU_COPY(mask, sum);
				2421	address2os[j].first.leader = (j == leader);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2422	}
				2423	unique++;
				2424
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2425	// Start a new mask.
				2426	leader = i;
				2427	leaderAddr = &(address2os[i].first);
				2428	KMP_CPU_ZERO(sum);
				2429	KMP_CPU_SET(address2os[i].second, sum);
				2430	}
				2431
				2432	// For every thread in last group, copy the mask to the thread's
				2433	// entry in the osId2Mask table.
				2434	for (; j < i; j++) {
				2435	unsigned osId = address2os[j].second;
				2436	KMP_DEBUG_ASSERT(osId <= maxOsId);
				2437	kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
				2438	KMP_CPU_COPY(mask, sum);
				2439	address2os[j].first.leader = (j == leader);
				2440	}
				2441	unique++;
				2442	KMP_CPU_FREE_FROM_STACK(sum);
				2443
				2444	*maxIndex = maxOsId;
				2445	*numUnique = unique;
				2446	return osId2Mask;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2447	}
				2448
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2449	// Stuff for the affinity proclist parsers. It's easier to declare these vars
				2450	// as file-static than to try and pass them through the calling sequence of
				2451	// the recursive-descent OMP_PLACES parser.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2452	static kmp_affin_mask_t *newMasks;
				2453	static int numNewMasks;
				2454	static int nextNewMask;
				2455
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2456	#define ADD_MASK(_mask) \
				2457	{ \
				2458	if (nextNewMask >= numNewMasks) { \
				2459	int i; \
				2460	numNewMasks *= 2; \
				2461	kmp_affin_mask_t *temp; \
				2462	KMP_CPU_INTERNAL_ALLOC_ARRAY(temp, numNewMasks); \
				2463	for (i = 0; i < numNewMasks / 2; i++) { \
				2464	kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i); \
				2465	kmp_affin_mask_t *dest = KMP_CPU_INDEX(temp, i); \
				2466	KMP_CPU_COPY(dest, src); \
				2467	} \
				2468	KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks / 2); \
				2469	newMasks = temp; \
				2470	} \
				2471	KMP_CPU_COPY(KMP_CPU_INDEX(newMasks, nextNewMask), (_mask)); \
				2472	nextNewMask++; \
				2473	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2474
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2475	#define ADD_MASK_OSID(_osId, _osId2Mask, _maxOsId) \
				2476	{ \
				2477	if (((_osId) > _maxOsId) \|\| \
				2478	(!KMP_CPU_ISSET((_osId), KMP_CPU_INDEX((_osId2Mask), (_osId))))) { \
				2479	if (__kmp_affinity_verbose \|\| \
				2480	(__kmp_affinity_warnings && \
				2481	(__kmp_affinity_type != affinity_none))) { \
				2482	KMP_WARNING(AffIgnoreInvalidProcID, _osId); \
				2483	} \
				2484	} else { \
				2485	ADD_MASK(KMP_CPU_INDEX(_osId2Mask, (_osId))); \
				2486	} \
				2487	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2488
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2489	// Re-parse the proclist (for the explicit affinity type), and form the list
				2490	// of affinity newMasks indexed by gtid.
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2491	static void __kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks,
				2492	unsigned int *out_numMasks,
				2493	const char *proclist,
				2494	kmp_affin_mask_t *osId2Mask,
				2495	int maxOsId) {
				2496	int i;
				2497	const char *scan = proclist;
				2498	const char *next = proclist;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2499
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2500	// We use malloc() for the temporary mask vector, so that we can use
				2501	// realloc() to extend it.
				2502	numNewMasks = 2;
				2503	KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks);
				2504	nextNewMask = 0;
				2505	kmp_affin_mask_t *sumMask;
				2506	KMP_CPU_ALLOC(sumMask);
				2507	int setSize = 0;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2508
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2509	for (;;) {
				2510	int start, end, stride;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2511
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2512	SKIP_WS(scan);
				2513	next = scan;
				2514	if (*next == '\0') {
				2515	break;
				2516	}
				2517
				2518	if (*next == '{') {
				2519	int num;
				2520	setSize = 0;
				2521	next++; // skip '{'
				2522	SKIP_WS(next);
				2523	scan = next;
				2524
				2525	// Read the first integer in the set.
				2526	KMP_ASSERT2((next >= '0') && (next <= '9'), "bad proclist");
				2527	SKIP_DIGITS(next);
				2528	num = __kmp_str_to_int(scan, *next);
				2529	KMP_ASSERT2(num >= 0, "bad explicit proc list");
				2530
				2531	// Copy the mask for that osId to the sum (union) mask.
				2532	if ((num > maxOsId) \|\|
				2533	(!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
				2534	if (__kmp_affinity_verbose \|\|
				2535	(__kmp_affinity_warnings &&
				2536	(__kmp_affinity_type != affinity_none))) {
				2537	KMP_WARNING(AffIgnoreInvalidProcID, num);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2538	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2539	KMP_CPU_ZERO(sumMask);
				2540	} else {
				2541	KMP_CPU_COPY(sumMask, KMP_CPU_INDEX(osId2Mask, num));
				2542	setSize = 1;
				2543	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2544
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2545	for (;;) {
				2546	// Check for end of set.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2547	SKIP_WS(next);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2548	if (*next == '}') {
				2549	next++; // skip '}'
				2550	break;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2551	}
				2552
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2553	// Skip optional comma.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2554	if (*next == ',') {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2555	next++;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2556	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2557	SKIP_WS(next);
				2558
				2559	// Read the next integer in the set.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2560	scan = next;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2561	KMP_ASSERT2((next >= '0') && (next <= '9'), "bad explicit proc list");
				2562
				2563	SKIP_DIGITS(next);
				2564	num = __kmp_str_to_int(scan, *next);
				2565	KMP_ASSERT2(num >= 0, "bad explicit proc list");
				2566
				2567	// Add the mask for that osId to the sum mask.
				2568	if ((num > maxOsId) \|\|
				2569	(!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
				2570	if (__kmp_affinity_verbose \|\|
				2571	(__kmp_affinity_warnings &&
				2572	(__kmp_affinity_type != affinity_none))) {
				2573	KMP_WARNING(AffIgnoreInvalidProcID, num);
				2574	}
				2575	} else {
				2576	KMP_CPU_UNION(sumMask, KMP_CPU_INDEX(osId2Mask, num));
				2577	setSize++;
				2578	}
				2579	}
				2580	if (setSize > 0) {
				2581	ADD_MASK(sumMask);
				2582	}
				2583
				2584	SKIP_WS(next);
				2585	if (*next == ',') {
				2586	next++;
				2587	}
				2588	scan = next;
				2589	continue;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2590	}
				2591
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2592	// Read the first integer.
				2593	KMP_ASSERT2((next >= '0') && (next <= '9'), "bad explicit proc list");
				2594	SKIP_DIGITS(next);
				2595	start = __kmp_str_to_int(scan, *next);
				2596	KMP_ASSERT2(start >= 0, "bad explicit proc list");
				2597	SKIP_WS(next);
				2598
				2599	// If this isn't a range, then add a mask to the list and go on.
				2600	if (*next != '-') {
				2601	ADD_MASK_OSID(start, osId2Mask, maxOsId);
				2602
				2603	// Skip optional comma.
				2604	if (*next == ',') {
				2605	next++;
				2606	}
				2607	scan = next;
				2608	continue;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2609	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2610
				2611	// This is a range. Skip over the '-' and read in the 2nd int.
				2612	next++; // skip '-'
				2613	SKIP_WS(next);
				2614	scan = next;
				2615	KMP_ASSERT2((next >= '0') && (next <= '9'), "bad explicit proc list");
				2616	SKIP_DIGITS(next);
				2617	end = __kmp_str_to_int(scan, *next);
				2618	KMP_ASSERT2(end >= 0, "bad explicit proc list");
				2619
				2620	// Check for a stride parameter
				2621	stride = 1;
				2622	SKIP_WS(next);
				2623	if (*next == ':') {
				2624	// A stride is specified. Skip over the ':" and read the 3rd int.
				2625	int sign = +1;
				2626	next++; // skip ':'
				2627	SKIP_WS(next);
				2628	scan = next;
				2629	if (*next == '-') {
				2630	sign = -1;
				2631	next++;
				2632	SKIP_WS(next);
				2633	scan = next;
				2634	}
				2635	KMP_ASSERT2((next >= '0') && (next <= '9'), "bad explicit proc list");
				2636	SKIP_DIGITS(next);
				2637	stride = __kmp_str_to_int(scan, *next);
				2638	KMP_ASSERT2(stride >= 0, "bad explicit proc list");
				2639	stride *= sign;
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	2640	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2641
				2642	// Do some range checks.
				2643	KMP_ASSERT2(stride != 0, "bad explicit proc list");
				2644	if (stride > 0) {
				2645	KMP_ASSERT2(start <= end, "bad explicit proc list");
				2646	} else {
				2647	KMP_ASSERT2(start >= end, "bad explicit proc list");
				2648	}
				2649	KMP_ASSERT2((end - start) / stride <= 65536, "bad explicit proc list");
				2650
				2651	// Add the mask for each OS proc # to the list.
				2652	if (stride > 0) {
				2653	do {
				2654	ADD_MASK_OSID(start, osId2Mask, maxOsId);
				2655	start += stride;
				2656	} while (start <= end);
				2657	} else {
				2658	do {
				2659	ADD_MASK_OSID(start, osId2Mask, maxOsId);
				2660	start += stride;
				2661	} while (start >= end);
				2662	}
				2663
				2664	// Skip optional comma.
				2665	SKIP_WS(next);
				2666	if (*next == ',') {
				2667	next++;
				2668	}
				2669	scan = next;
				2670	}
				2671
				2672	*out_numMasks = nextNewMask;
				2673	if (nextNewMask == 0) {
				2674	*out_masks = NULL;
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	2675	KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2676	return;
				2677	}
				2678	KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask);
				2679	for (i = 0; i < nextNewMask; i++) {
				2680	kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i);
				2681	kmp_affin_mask_t dest = KMP_CPU_INDEX((out_masks), i);
				2682	KMP_CPU_COPY(dest, src);
				2683	}
				2684	KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
				2685	KMP_CPU_FREE(sumMask);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2686	}
				2687
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2688	#if OMP_40_ENABLED
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2689
				2690	/*-----------------------------------------------------------------------------
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2691	Re-parse the OMP_PLACES proc id list, forming the newMasks for the different
				2692	places. Again, Here is the grammar:
				2693
				2694	place_list := place
				2695	place_list := place , place_list
				2696	place := num
				2697	place := place : num
				2698	place := place : num : signed
				2699	place := { subplacelist }
				2700	place := ! place // (lowest priority)
				2701	subplace_list := subplace
				2702	subplace_list := subplace , subplace_list
				2703	subplace := num
				2704	subplace := num : num
				2705	subplace := num : num : signed
				2706	signed := num
				2707	signed := + signed
				2708	signed := - signed
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2709	-----------------------------------------------------------------------------*/
				2710
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2711	static void __kmp_process_subplace_list(const char **scan,
				2712	kmp_affin_mask_t *osId2Mask,
				2713	int maxOsId, kmp_affin_mask_t *tempMask,
				2714	int *setSize) {
				2715	const char *next;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2716
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2717	for (;;) {
				2718	int start, count, stride, i;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2719
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2720	// Read in the starting proc id
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2721	SKIP_WS(*scan);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2722	KMP_ASSERT2((scan >= '0') && (scan <= '9'), "bad explicit places list");
				2723	next = *scan;
				2724	SKIP_DIGITS(next);
				2725	start = __kmp_str_to_int(scan, next);
				2726	KMP_ASSERT(start >= 0);
				2727	*scan = next;
				2728
				2729	// valid follow sets are ',' ':' and '}'
				2730	SKIP_WS(*scan);
				2731	if (scan == '}' \|\| scan == ',') {
				2732	if ((start > maxOsId) \|\|
				2733	(!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
				2734	if (__kmp_affinity_verbose \|\|
				2735	(__kmp_affinity_warnings &&
				2736	(__kmp_affinity_type != affinity_none))) {
				2737	KMP_WARNING(AffIgnoreInvalidProcID, start);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2738	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2739	} else {
				2740	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
				2741	(*setSize)++;
				2742	}
				2743	if (**scan == '}') {
				2744	break;
				2745	}
				2746	(*scan)++; // skip ','
				2747	continue;
				2748	}
				2749	KMP_ASSERT2(**scan == ':', "bad explicit places list");
				2750	(*scan)++; // skip ':'
				2751
				2752	// Read count parameter
				2753	SKIP_WS(*scan);
				2754	KMP_ASSERT2((scan >= '0') && (scan <= '9'), "bad explicit places list");
				2755	next = *scan;
				2756	SKIP_DIGITS(next);
				2757	count = __kmp_str_to_int(scan, next);
				2758	KMP_ASSERT(count >= 0);
				2759	*scan = next;
				2760
				2761	// valid follow sets are ',' ':' and '}'
				2762	SKIP_WS(*scan);
				2763	if (scan == '}' \|\| scan == ',') {
				2764	for (i = 0; i < count; i++) {
				2765	if ((start > maxOsId) \|\|
				2766	(!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
				2767	if (__kmp_affinity_verbose \|\|
				2768	(__kmp_affinity_warnings &&
				2769	(__kmp_affinity_type != affinity_none))) {
				2770	KMP_WARNING(AffIgnoreInvalidProcID, start);
				2771	}
				2772	break; // don't proliferate warnings for large count
				2773	} else {
				2774	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
				2775	start++;
				2776	(*setSize)++;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2777	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2778	}
				2779	if (**scan == '}') {
				2780	break;
				2781	}
				2782	(*scan)++; // skip ','
				2783	continue;
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	2784	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2785	KMP_ASSERT2(**scan == ':', "bad explicit places list");
				2786	(*scan)++; // skip ':'
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2787
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2788	// Read stride parameter
				2789	int sign = +1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2790	for (;;) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2791	SKIP_WS(*scan);
				2792	if (**scan == '+') {
				2793	(*scan)++; // skip '+'
				2794	continue;
				2795	}
				2796	if (**scan == '-') {
				2797	sign *= -1;
				2798	(*scan)++; // skip '-'
				2799	continue;
				2800	}
				2801	break;
				2802	}
				2803	SKIP_WS(*scan);
				2804	KMP_ASSERT2((scan >= '0') && (scan <= '9'), "bad explicit places list");
				2805	next = *scan;
				2806	SKIP_DIGITS(next);
				2807	stride = __kmp_str_to_int(scan, next);
				2808	KMP_ASSERT(stride >= 0);
				2809	*scan = next;
				2810	stride *= sign;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2811
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2812	// valid follow sets are ',' and '}'
				2813	SKIP_WS(*scan);
				2814	if (scan == '}' \|\| scan == ',') {
				2815	for (i = 0; i < count; i++) {
				2816	if ((start > maxOsId) \|\|
				2817	(!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
				2818	if (__kmp_affinity_verbose \|\|
				2819	(__kmp_affinity_warnings &&
				2820	(__kmp_affinity_type != affinity_none))) {
				2821	KMP_WARNING(AffIgnoreInvalidProcID, start);
				2822	}
				2823	break; // don't proliferate warnings for large count
				2824	} else {
				2825	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
				2826	start += stride;
				2827	(*setSize)++;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2828	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2829	}
				2830	if (**scan == '}') {
				2831	break;
				2832	}
				2833	(*scan)++; // skip ','
				2834	continue;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2835	}
				2836
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2837	KMP_ASSERT2(0, "bad explicit places list");
				2838	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2839	}
				2840
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2841	static void __kmp_process_place(const char *scan, kmp_affin_mask_t osId2Mask,
				2842	int maxOsId, kmp_affin_mask_t *tempMask,
				2843	int *setSize) {
				2844	const char *next;
				2845
				2846	// valid follow sets are '{' '!' and num
				2847	SKIP_WS(*scan);
				2848	if (**scan == '{') {
				2849	(*scan)++; // skip '{'
				2850	__kmp_process_subplace_list(scan, osId2Mask, maxOsId, tempMask, setSize);
				2851	KMP_ASSERT2(**scan == '}', "bad explicit places list");
				2852	(*scan)++; // skip '}'
				2853	} else if (**scan == '!') {
				2854	(*scan)++; // skip '!'
				2855	__kmp_process_place(scan, osId2Mask, maxOsId, tempMask, setSize);
				2856	KMP_CPU_COMPLEMENT(maxOsId, tempMask);
				2857	} else if ((scan >= '0') && (scan <= '9')) {
				2858	next = *scan;
				2859	SKIP_DIGITS(next);
				2860	int num = __kmp_str_to_int(scan, next);
				2861	KMP_ASSERT(num >= 0);
				2862	if ((num > maxOsId) \|\|
				2863	(!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
				2864	if (__kmp_affinity_verbose \|\|
				2865	(__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) {
				2866	KMP_WARNING(AffIgnoreInvalidProcID, num);
				2867	}
				2868	} else {
				2869	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, num));
				2870	(*setSize)++;
				2871	}
				2872	*scan = next; // skip num
				2873	} else {
				2874	KMP_ASSERT2(0, "bad explicit places list");
				2875	}
				2876	}
				2877
				2878	// static void
				2879	void __kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
				2880	unsigned int *out_numMasks,
				2881	const char *placelist,
				2882	kmp_affin_mask_t *osId2Mask,
				2883	int maxOsId) {
				2884	int i, j, count, stride, sign;
				2885	const char *scan = placelist;
				2886	const char *next = placelist;
				2887
				2888	numNewMasks = 2;
				2889	KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks);
				2890	nextNewMask = 0;
				2891
				2892	// tempMask is modified based on the previous or initial
				2893	// place to form the current place
				2894	// previousMask contains the previous place
				2895	kmp_affin_mask_t *tempMask;
				2896	kmp_affin_mask_t *previousMask;
				2897	KMP_CPU_ALLOC(tempMask);
				2898	KMP_CPU_ZERO(tempMask);
				2899	KMP_CPU_ALLOC(previousMask);
				2900	KMP_CPU_ZERO(previousMask);
				2901	int setSize = 0;
				2902
				2903	for (;;) {
				2904	__kmp_process_place(&scan, osId2Mask, maxOsId, tempMask, &setSize);
				2905
				2906	// valid follow sets are ',' ':' and EOL
				2907	SKIP_WS(scan);
				2908	if (scan == '\0' \|\| scan == ',') {
				2909	if (setSize > 0) {
				2910	ADD_MASK(tempMask);
				2911	}
				2912	KMP_CPU_ZERO(tempMask);
				2913	setSize = 0;
				2914	if (*scan == '\0') {
				2915	break;
				2916	}
				2917	scan++; // skip ','
				2918	continue;
				2919	}
				2920
				2921	KMP_ASSERT2(*scan == ':', "bad explicit places list");
				2922	scan++; // skip ':'
				2923
				2924	// Read count parameter
				2925	SKIP_WS(scan);
				2926	KMP_ASSERT2((scan >= '0') && (scan <= '9'), "bad explicit places list");
				2927	next = scan;
				2928	SKIP_DIGITS(next);
				2929	count = __kmp_str_to_int(scan, *next);
				2930	KMP_ASSERT(count >= 0);
				2931	scan = next;
				2932
				2933	// valid follow sets are ',' ':' and EOL
				2934	SKIP_WS(scan);
				2935	if (scan == '\0' \|\| scan == ',') {
				2936	stride = +1;
				2937	} else {
				2938	KMP_ASSERT2(*scan == ':', "bad explicit places list");
				2939	scan++; // skip ':'
				2940
				2941	// Read stride parameter
				2942	sign = +1;
				2943	for (;;) {
				2944	SKIP_WS(scan);
				2945	if (*scan == '+') {
				2946	scan++; // skip '+'
				2947	continue;
				2948	}
				2949	if (*scan == '-') {
				2950	sign *= -1;
				2951	scan++; // skip '-'
				2952	continue;
				2953	}
				2954	break;
				2955	}
				2956	SKIP_WS(scan);
				2957	KMP_ASSERT2((scan >= '0') && (scan <= '9'), "bad explicit places list");
				2958	next = scan;
				2959	SKIP_DIGITS(next);
				2960	stride = __kmp_str_to_int(scan, *next);
				2961	KMP_DEBUG_ASSERT(stride >= 0);
				2962	scan = next;
				2963	stride *= sign;
				2964	}
				2965
				2966	// Add places determined by initial_place : count : stride
				2967	for (i = 0; i < count; i++) {
				2968	if (setSize == 0) {
				2969	break;
				2970	}
				2971	// Add the current place, then build the next place (tempMask) from that
				2972	KMP_CPU_COPY(previousMask, tempMask);
				2973	ADD_MASK(previousMask);
				2974	KMP_CPU_ZERO(tempMask);
				2975	setSize = 0;
				2976	KMP_CPU_SET_ITERATE(j, previousMask) {
				2977	if (!KMP_CPU_ISSET(j, previousMask)) {
				2978	continue;
				2979	}
				2980	if ((j + stride > maxOsId) \|\| (j + stride < 0) \|\|
				2981	(!KMP_CPU_ISSET(j, __kmp_affin_fullMask)) \|\|
				2982	(!KMP_CPU_ISSET(j + stride,
				2983	KMP_CPU_INDEX(osId2Mask, j + stride)))) {
				2984	if ((__kmp_affinity_verbose \|\|
				2985	(__kmp_affinity_warnings &&
				2986	(__kmp_affinity_type != affinity_none))) &&
				2987	i < count - 1) {
				2988	KMP_WARNING(AffIgnoreInvalidProcID, j + stride);
				2989	}
				2990	continue;
				2991	}
				2992	KMP_CPU_SET(j + stride, tempMask);
				2993	setSize++;
				2994	}
				2995	}
				2996	KMP_CPU_ZERO(tempMask);
				2997	setSize = 0;
				2998
				2999	// valid follow sets are ',' and EOL
				3000	SKIP_WS(scan);
				3001	if (*scan == '\0') {
				3002	break;
				3003	}
				3004	if (*scan == ',') {
				3005	scan++; // skip ','
				3006	continue;
				3007	}
				3008
				3009	KMP_ASSERT2(0, "bad explicit places list");
				3010	}
				3011
				3012	*out_numMasks = nextNewMask;
				3013	if (nextNewMask == 0) {
				3014	*out_masks = NULL;
				3015	KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
				3016	return;
				3017	}
				3018	KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask);
				3019	KMP_CPU_FREE(tempMask);
				3020	KMP_CPU_FREE(previousMask);
				3021	for (i = 0; i < nextNewMask; i++) {
				3022	kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i);
				3023	kmp_affin_mask_t dest = KMP_CPU_INDEX((out_masks), i);
				3024	KMP_CPU_COPY(dest, src);
				3025	}
				3026	KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
				3027	}
				3028
				3029	#endif /* OMP_40_ENABLED */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3030
				3031	#undef ADD_MASK
				3032	#undef ADD_MASK_OSID
				3033
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3034	#if KMP_USE_HWLOC
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3035	static int __kmp_hwloc_count_children_by_type(hwloc_topology_t t, hwloc_obj_t o,
				3036	hwloc_obj_type_t type,
				3037	hwloc_obj_t* f) {
				3038	if (!hwloc_compare_types(o->type, type)) {
				3039	if (*f == NULL)
				3040	*f = o; // output first descendant found
				3041	return 1;
				3042	}
				3043	int sum = 0;
				3044	for (unsigned i = 0; i < o->arity; i++)
				3045	sum += __kmp_hwloc_count_children_by_type(t, o->children[i], type, f);
				3046	return sum; // will be 0 if no one found (as PU arity is 0)
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3047	}
				3048
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3049	static int __kmp_hwloc_count_children_by_depth(hwloc_topology_t t,
				3050	hwloc_obj_t o, unsigned depth,
				3051	hwloc_obj_t* f) {
				3052	if (o->depth == depth) {
				3053	if (*f == NULL)
				3054	*f = o; // output first descendant found
				3055	return 1;
				3056	}
				3057	int sum = 0;
				3058	for (unsigned i = 0; i < o->arity; i++)
				3059	sum += __kmp_hwloc_count_children_by_depth(t, o->children[i], depth, f);
				3060	return sum; // will be 0 if no one found (as PU arity is 0)
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3061	}
				3062
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3063	static int __kmp_hwloc_skip_PUs_obj(hwloc_topology_t t, hwloc_obj_t o) {
				3064	// skip PUs descendants of the object o
				3065	int skipped = 0;
				3066	hwloc_obj_t hT = NULL;
				3067	int N = __kmp_hwloc_count_children_by_type(t, o, HWLOC_OBJ_PU, &hT);
				3068	for (int i = 0; i < N; ++i) {
				3069	KMP_DEBUG_ASSERT(hT);
				3070	unsigned idx = hT->os_index;
				3071	if (KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
				3072	KMP_CPU_CLR(idx, __kmp_affin_fullMask);
				3073	KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx));
				3074	++skipped;
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3075	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3076	hT = hwloc_get_next_obj_by_type(t, HWLOC_OBJ_PU, hT);
				3077	}
				3078	return skipped; // count number of skipped units
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3079	}
				3080
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3081	static int __kmp_hwloc_obj_has_PUs(hwloc_topology_t t, hwloc_obj_t o) {
				3082	// check if obj has PUs present in fullMask
				3083	hwloc_obj_t hT = NULL;
				3084	int N = __kmp_hwloc_count_children_by_type(t, o, HWLOC_OBJ_PU, &hT);
				3085	for (int i = 0; i < N; ++i) {
				3086	KMP_DEBUG_ASSERT(hT);
				3087	unsigned idx = hT->os_index;
				3088	if (KMP_CPU_ISSET(idx, __kmp_affin_fullMask))
				3089	return 1; // found PU
				3090	hT = hwloc_get_next_obj_by_type(t, HWLOC_OBJ_PU, hT);
				3091	}
				3092	return 0; // no PUs found
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3093	}
				3094	#endif // KMP_USE_HWLOC
				3095
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3096	static void __kmp_apply_thread_places(AddrUnsPair **pAddr, int depth) {
				3097	AddrUnsPair *newAddr;
				3098	if (__kmp_hws_requested == 0)
				3099	goto _exit; // no topology limiting actions requested, exit
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3100	#if KMP_USE_HWLOC
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3101	if (__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC) {
				3102	// Number of subobjects calculated dynamically, this works fine for
				3103	// any non-uniform topology.
				3104	// L2 cache objects are determined by depth, other objects - by type.
				3105	hwloc_topology_t tp = __kmp_hwloc_topology;
				3106	int nS=0, nN=0, nL=0, nC=0, nT=0; // logical index including skipped
				3107	int nCr=0, nTr=0; // number of requested units
				3108	int nPkg=0, nCo=0, n_new=0, n_old = 0, nCpP=0, nTpC=0; // counters
				3109	hwloc_obj_t hT, hC, hL, hN, hS; // hwloc objects (pointers to)
				3110	int L2depth, idx;
Jonathan Peyton	dd4aa9b	2015-10-08 17:55:54 +0000	[diff] [blame]	3111
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3112	// check support of extensions ----------------------------------
				3113	int numa_support = 0, tile_support = 0;
				3114	if (__kmp_pu_os_idx)
				3115	hT = hwloc_get_pu_obj_by_os_index(tp,
				3116	__kmp_pu_os_idx[__kmp_avail_proc - 1]);
				3117	else
				3118	hT = hwloc_get_obj_by_type(tp, HWLOC_OBJ_PU, __kmp_avail_proc - 1);
				3119	if (hT == NULL) { // something's gone wrong
				3120	KMP_WARNING(AffHWSubsetUnsupported);
				3121	goto _exit;
				3122	}
				3123	// check NUMA node
				3124	hN = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hT);
				3125	hS = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_PACKAGE, hT);
				3126	if (hN != NULL && hN->depth > hS->depth) {
				3127	numa_support = 1; // 1 in case socket includes node(s)
				3128	} else if (__kmp_hws_node.num > 0) {
				3129	// don't support sockets inside NUMA node (no such HW found for testing)
				3130	KMP_WARNING(AffHWSubsetUnsupported);
				3131	goto _exit;
				3132	}
				3133	// check L2 cahce, get object by depth because of multiple caches
				3134	L2depth = hwloc_get_cache_type_depth(tp, 2, HWLOC_OBJ_CACHE_UNIFIED);
				3135	hL = hwloc_get_ancestor_obj_by_depth(tp, L2depth, hT);
				3136	if (hL != NULL && __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE,
				3137	&hC) > 1) {
				3138	tile_support = 1; // no sense to count L2 if it includes single core
				3139	} else if (__kmp_hws_tile.num > 0) {
				3140	if (__kmp_hws_core.num == 0) {
				3141	__kmp_hws_core = __kmp_hws_tile; // replace L2 with core
				3142	__kmp_hws_tile.num = 0;
				3143	} else {
				3144	// L2 and core are both requested, but represent same object
				3145	KMP_WARNING(AffHWSubsetInvalid);
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3146	goto _exit;
				3147	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3148	}
				3149	// end of check of extensions -----------------------------------
				3150
				3151	// fill in unset items, validate settings -----------------------
				3152	if (__kmp_hws_socket.num == 0)
				3153	__kmp_hws_socket.num = nPackages; // use all available sockets
				3154	if (__kmp_hws_socket.offset >= nPackages) {
				3155	KMP_WARNING(AffHWSubsetManySockets);
				3156	goto _exit;
				3157	}
				3158	if (numa_support) {
				3159	int NN = __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_NUMANODE,
				3160	&hN); // num nodes in socket
				3161	if (__kmp_hws_node.num == 0)
				3162	__kmp_hws_node.num = NN; // use all available nodes
				3163	if (__kmp_hws_node.offset >= NN) {
				3164	KMP_WARNING(AffHWSubsetManyNodes);
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3165	goto _exit;
				3166	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3167	if (tile_support) {
				3168	// get num tiles in node
				3169	int NL = __kmp_hwloc_count_children_by_depth(tp, hN, L2depth, &hL);
				3170	if (__kmp_hws_tile.num == 0) {
				3171	__kmp_hws_tile.num = NL + 1;
				3172	} // use all available tiles, some node may have more tiles, thus +1
				3173	if (__kmp_hws_tile.offset >= NL) {
				3174	KMP_WARNING(AffHWSubsetManyTiles);
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3175	goto _exit;
				3176	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3177	int NC = __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE,
				3178	&hC); // num cores in tile
				3179	if (__kmp_hws_core.num == 0)
				3180	__kmp_hws_core.num = NC; // use all available cores
				3181	if (__kmp_hws_core.offset >= NC) {
				3182	KMP_WARNING(AffHWSubsetManyCores);
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3183	goto _exit;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3184	}
				3185	} else { // tile_support
				3186	int NC = __kmp_hwloc_count_children_by_type(tp, hN, HWLOC_OBJ_CORE,
				3187	&hC); // num cores in node
				3188	if (__kmp_hws_core.num == 0)
				3189	__kmp_hws_core.num = NC; // use all available cores
				3190	if (__kmp_hws_core.offset >= NC) {
				3191	KMP_WARNING(AffHWSubsetManyCores);
				3192	goto _exit;
				3193	}
				3194	} // tile_support
				3195	} else { // numa_support
				3196	if (tile_support) {
				3197	// get num tiles in socket
				3198	int NL = __kmp_hwloc_count_children_by_depth(tp, hS, L2depth, &hL);
				3199	if (__kmp_hws_tile.num == 0)
				3200	__kmp_hws_tile.num = NL; // use all available tiles
				3201	if (__kmp_hws_tile.offset >= NL) {
				3202	KMP_WARNING(AffHWSubsetManyTiles);
				3203	goto _exit;
				3204	}
				3205	int NC = __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE,
				3206	&hC); // num cores in tile
				3207	if (__kmp_hws_core.num == 0)
				3208	__kmp_hws_core.num = NC; // use all available cores
				3209	if (__kmp_hws_core.offset >= NC) {
				3210	KMP_WARNING(AffHWSubsetManyCores);
				3211	goto _exit;
				3212	}
				3213	} else { // tile_support
				3214	int NC = __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_CORE,
				3215	&hC); // num cores in socket
				3216	if (__kmp_hws_core.num == 0)
				3217	__kmp_hws_core.num = NC; // use all available cores
				3218	if (__kmp_hws_core.offset >= NC) {
				3219	KMP_WARNING(AffHWSubsetManyCores);
				3220	goto _exit;
				3221	}
				3222	} // tile_support
				3223	}
				3224	if (__kmp_hws_proc.num == 0)
				3225	__kmp_hws_proc.num = __kmp_nThreadsPerCore; // use all available procs
				3226	if (__kmp_hws_proc.offset >= __kmp_nThreadsPerCore) {
				3227	KMP_WARNING(AffHWSubsetManyProcs);
				3228	goto _exit;
				3229	}
				3230	// end of validation --------------------------------------------
				3231
				3232	if (pAddr) // pAddr is NULL in case of affinity_none
				3233	newAddr = (AddrUnsPair )__kmp_allocate(sizeof(AddrUnsPair)
				3234	__kmp_avail_proc); // max size
				3235	// main loop to form HW subset ----------------------------------
				3236	hS = NULL;
				3237	int NP = hwloc_get_nbobjs_by_type(tp, HWLOC_OBJ_PACKAGE);
				3238	for (int s = 0; s < NP; ++s) {
				3239	// Check Socket -----------------------------------------------
				3240	hS = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PACKAGE, hS);
				3241	if (!__kmp_hwloc_obj_has_PUs(tp, hS))
				3242	continue; // skip socket if all PUs are out of fullMask
				3243	++nS; // only count objects those have PUs in affinity mask
				3244	if (nS <= __kmp_hws_socket.offset \|\|
				3245	nS > __kmp_hws_socket.num + __kmp_hws_socket.offset) {
				3246	n_old += __kmp_hwloc_skip_PUs_obj(tp, hS); // skip socket
				3247	continue; // move to next socket
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3248	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3249	nCr = 0; // count number of cores per socket
				3250	// socket requested, go down the topology tree
				3251	// check 4 cases: (+NUMA+Tile), (+NUMA-Tile), (-NUMA+Tile), (-NUMA-Tile)
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3252	if (numa_support) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3253	nN = 0;
				3254	hN = NULL;
				3255	// num nodes in current socket
				3256	int NN = __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_NUMANODE,
				3257	&hN);
				3258	for (int n = 0; n < NN; ++n) {
				3259	// Check NUMA Node ----------------------------------------
				3260	if (!__kmp_hwloc_obj_has_PUs(tp, hN)) {
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3261	hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3262	continue; // skip node if all PUs are out of fullMask
				3263	}
				3264	++nN;
				3265	if (nN <= __kmp_hws_node.offset \|\|
				3266	nN > __kmp_hws_node.num + __kmp_hws_node.offset) {
				3267	// skip node as not requested
				3268	n_old += __kmp_hwloc_skip_PUs_obj(tp, hN); // skip node
				3269	hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN);
				3270	continue; // move to next node
				3271	}
				3272	// node requested, go down the topology tree
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3273	if (tile_support) {
				3274	nL = 0;
				3275	hL = NULL;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3276	int NL = __kmp_hwloc_count_children_by_depth(tp, hN, L2depth, &hL);
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3277	for (int l = 0; l < NL; ++l) {
				3278	// Check L2 (tile) ------------------------------------
				3279	if (!__kmp_hwloc_obj_has_PUs(tp, hL)) {
				3280	hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
				3281	continue; // skip tile if all PUs are out of fullMask
				3282	}
				3283	++nL;
				3284	if (nL <= __kmp_hws_tile.offset \|\|
				3285	nL > __kmp_hws_tile.num + __kmp_hws_tile.offset) {
				3286	// skip tile as not requested
				3287	n_old += __kmp_hwloc_skip_PUs_obj(tp, hL); // skip tile
				3288	hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
				3289	continue; // move to next tile
				3290	}
				3291	// tile requested, go down the topology tree
				3292	nC = 0;
				3293	hC = NULL;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3294	// num cores in current tile
				3295	int NC = __kmp_hwloc_count_children_by_type(tp, hL,
				3296	HWLOC_OBJ_CORE, &hC);
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3297	for (int c = 0; c < NC; ++c) {
				3298	// Check Core ---------------------------------------
				3299	if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
				3300	hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
				3301	continue; // skip core if all PUs are out of fullMask
				3302	}
				3303	++nC;
				3304	if (nC <= __kmp_hws_core.offset \|\|
				3305	nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
				3306	// skip node as not requested
				3307	n_old += __kmp_hwloc_skip_PUs_obj(tp, hC); // skip core
				3308	hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
				3309	continue; // move to next node
				3310	}
				3311	// core requested, go down to PUs
				3312	nT = 0;
				3313	nTr = 0;
				3314	hT = NULL;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3315	// num procs in current core
				3316	int NT = __kmp_hwloc_count_children_by_type(tp, hC,
				3317	HWLOC_OBJ_PU, &hT);
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3318	for (int t = 0; t < NT; ++t) {
				3319	// Check PU ---------------------------------------
				3320	idx = hT->os_index;
				3321	if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
				3322	hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
				3323	continue; // skip PU if not in fullMask
				3324	}
				3325	++nT;
				3326	if (nT <= __kmp_hws_proc.offset \|\|
				3327	nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
				3328	// skip PU
				3329	KMP_CPU_CLR(idx, __kmp_affin_fullMask);
				3330	++n_old;
				3331	KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx));
				3332	hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
				3333	continue; // move to next node
				3334	}
				3335	++nTr;
				3336	if (pAddr) // collect requested thread's data
				3337	newAddr[n_new] = (*pAddr)[n_old];
				3338	++n_new;
				3339	++n_old;
				3340	hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
				3341	} // threads loop
				3342	if (nTr > 0) {
				3343	++nCr; // num cores per socket
				3344	++nCo; // total num cores
				3345	if (nTr > nTpC)
				3346	nTpC = nTr; // calc max threads per core
				3347	}
				3348	hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
				3349	} // cores loop
				3350	hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
				3351	} // tiles loop
				3352	} else { // tile_support
				3353	// no tiles, check cores
				3354	nC = 0;
				3355	hC = NULL;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3356	// num cores in current node
				3357	int NC = __kmp_hwloc_count_children_by_type(tp, hN, HWLOC_OBJ_CORE,
				3358	&hC);
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3359	for (int c = 0; c < NC; ++c) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3360	// Check Core ---------------------------------------
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3361	if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
				3362	hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
				3363	continue; // skip core if all PUs are out of fullMask
				3364	}
				3365	++nC;
				3366	if (nC <= __kmp_hws_core.offset \|\|
				3367	nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
				3368	// skip node as not requested
				3369	n_old += __kmp_hwloc_skip_PUs_obj(tp, hC); // skip core
				3370	hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
				3371	continue; // move to next node
				3372	}
				3373	// core requested, go down to PUs
				3374	nT = 0;
				3375	nTr = 0;
				3376	hT = NULL;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3377	int NT = __kmp_hwloc_count_children_by_type(tp, hC, HWLOC_OBJ_PU,
				3378	&hT);
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3379	for (int t = 0; t < NT; ++t) {
				3380	// Check PU ---------------------------------------
				3381	idx = hT->os_index;
				3382	if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
				3383	hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
				3384	continue; // skip PU if not in fullMask
				3385	}
				3386	++nT;
				3387	if (nT <= __kmp_hws_proc.offset \|\|
				3388	nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
				3389	// skip PU
				3390	KMP_CPU_CLR(idx, __kmp_affin_fullMask);
				3391	++n_old;
				3392	KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx));
				3393	hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
				3394	continue; // move to next node
				3395	}
				3396	++nTr;
				3397	if (pAddr) // collect requested thread's data
				3398	newAddr[n_new] = (*pAddr)[n_old];
				3399	++n_new;
				3400	++n_old;
				3401	hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
				3402	} // threads loop
				3403	if (nTr > 0) {
				3404	++nCr; // num cores per socket
				3405	++nCo; // total num cores
				3406	if (nTr > nTpC)
				3407	nTpC = nTr; // calc max threads per core
				3408	}
				3409	hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
				3410	} // cores loop
				3411	} // tiles support
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3412	hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN);
				3413	} // nodes loop
				3414	} else { // numa_support
				3415	// no NUMA support
				3416	if (tile_support) {
				3417	nL = 0;
				3418	hL = NULL;
				3419	// num tiles in current socket
				3420	int NL = __kmp_hwloc_count_children_by_depth(tp, hS, L2depth, &hL);
				3421	for (int l = 0; l < NL; ++l) {
				3422	// Check L2 (tile) ------------------------------------
				3423	if (!__kmp_hwloc_obj_has_PUs(tp, hL)) {
				3424	hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
				3425	continue; // skip tile if all PUs are out of fullMask
				3426	}
				3427	++nL;
				3428	if (nL <= __kmp_hws_tile.offset \|\|
				3429	nL > __kmp_hws_tile.num + __kmp_hws_tile.offset) {
				3430	// skip tile as not requested
				3431	n_old += __kmp_hwloc_skip_PUs_obj(tp, hL); // skip tile
				3432	hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
				3433	continue; // move to next tile
				3434	}
				3435	// tile requested, go down the topology tree
				3436	nC = 0;
				3437	hC = NULL;
				3438	// num cores per tile
				3439	int NC = __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE,
				3440	&hC);
				3441	for (int c = 0; c < NC; ++c) {
				3442	// Check Core ---------------------------------------
				3443	if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
				3444	hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
				3445	continue; // skip core if all PUs are out of fullMask
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3446	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3447	++nC;
				3448	if (nC <= __kmp_hws_core.offset \|\|
				3449	nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
				3450	// skip node as not requested
				3451	n_old += __kmp_hwloc_skip_PUs_obj(tp, hC); // skip core
				3452	hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
				3453	continue; // move to next node
				3454	}
				3455	// core requested, go down to PUs
				3456	nT = 0;
				3457	nTr = 0;
				3458	hT = NULL;
				3459	// num procs per core
				3460	int NT = __kmp_hwloc_count_children_by_type(tp, hC, HWLOC_OBJ_PU,
				3461	&hT);
				3462	for (int t = 0; t < NT; ++t) {
				3463	// Check PU ---------------------------------------
				3464	idx = hT->os_index;
				3465	if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
				3466	hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
				3467	continue; // skip PU if not in fullMask
				3468	}
				3469	++nT;
				3470	if (nT <= __kmp_hws_proc.offset \|\|
				3471	nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
				3472	// skip PU
				3473	KMP_CPU_CLR(idx, __kmp_affin_fullMask);
				3474	++n_old;
				3475	KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx));
				3476	hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
				3477	continue; // move to next node
				3478	}
				3479	++nTr;
				3480	if (pAddr) // collect requested thread's data
				3481	newAddr[n_new] = (*pAddr)[n_old];
				3482	++n_new;
				3483	++n_old;
				3484	hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
				3485	} // threads loop
				3486	if (nTr > 0) {
				3487	++nCr; // num cores per socket
				3488	++nCo; // total num cores
				3489	if (nTr > nTpC)
				3490	nTpC = nTr; // calc max threads per core
				3491	}
				3492	hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
				3493	} // cores loop
				3494	hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
				3495	} // tiles loop
				3496	} else { // tile_support
				3497	// no tiles, check cores
				3498	nC = 0;
				3499	hC = NULL;
				3500	// num cores in socket
				3501	int NC = __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_CORE,
				3502	&hC);
				3503	for (int c = 0; c < NC; ++c) {
				3504	// Check Core -------------------------------------------
				3505	if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
				3506	hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
				3507	continue; // skip core if all PUs are out of fullMask
				3508	}
				3509	++nC;
				3510	if (nC <= __kmp_hws_core.offset \|\|
				3511	nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
				3512	// skip node as not requested
				3513	n_old += __kmp_hwloc_skip_PUs_obj(tp, hC); // skip core
				3514	hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
				3515	continue; // move to next node
				3516	}
				3517	// core requested, go down to PUs
				3518	nT = 0;
				3519	nTr = 0;
				3520	hT = NULL;
				3521	// num procs per core
				3522	int NT = __kmp_hwloc_count_children_by_type(tp, hC, HWLOC_OBJ_PU,
				3523	&hT);
				3524	for (int t = 0; t < NT; ++t) {
				3525	// Check PU ---------------------------------------
				3526	idx = hT->os_index;
				3527	if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
				3528	hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
				3529	continue; // skip PU if not in fullMask
				3530	}
				3531	++nT;
				3532	if (nT <= __kmp_hws_proc.offset \|\|
				3533	nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
				3534	// skip PU
				3535	KMP_CPU_CLR(idx, __kmp_affin_fullMask);
				3536	++n_old;
				3537	KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx));
				3538	hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
				3539	continue; // move to next node
				3540	}
				3541	++nTr;
				3542	if (pAddr) // collect requested thread's data
				3543	newAddr[n_new] = (*pAddr)[n_old];
				3544	++n_new;
				3545	++n_old;
				3546	hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
				3547	} // threads loop
				3548	if (nTr > 0) {
				3549	++nCr; // num cores per socket
				3550	++nCo; // total num cores
				3551	if (nTr > nTpC)
				3552	nTpC = nTr; // calc max threads per core
				3553	}
				3554	hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
				3555	} // cores loop
				3556	} // tiles support
				3557	} // numa_support
				3558	if (nCr > 0) { // found cores?
				3559	++nPkg; // num sockets
				3560	if (nCr > nCpP)
				3561	nCpP = nCr; // calc max cores per socket
				3562	}
				3563	} // sockets loop
				3564
				3565	// check the subset is valid
				3566	KMP_DEBUG_ASSERT(n_old == __kmp_avail_proc);
				3567	KMP_DEBUG_ASSERT(nPkg > 0);
				3568	KMP_DEBUG_ASSERT(nCpP > 0);
				3569	KMP_DEBUG_ASSERT(nTpC > 0);
				3570	KMP_DEBUG_ASSERT(nCo > 0);
				3571	KMP_DEBUG_ASSERT(nPkg <= nPackages);
				3572	KMP_DEBUG_ASSERT(nCpP <= nCoresPerPkg);
				3573	KMP_DEBUG_ASSERT(nTpC <= __kmp_nThreadsPerCore);
				3574	KMP_DEBUG_ASSERT(nCo <= __kmp_ncores);
				3575
				3576	nPackages = nPkg; // correct num sockets
				3577	nCoresPerPkg = nCpP; // correct num cores per socket
				3578	__kmp_nThreadsPerCore = nTpC; // correct num threads per core
				3579	__kmp_avail_proc = n_new; // correct num procs
				3580	__kmp_ncores = nCo; // correct num cores
				3581	// hwloc topology method end
				3582	} else
				3583	#endif // KMP_USE_HWLOC
				3584	{
				3585	int n_old = 0, n_new = 0, proc_num = 0;
				3586	if (__kmp_hws_node.num > 0 \|\| __kmp_hws_tile.num > 0) {
				3587	KMP_WARNING(AffHWSubsetNoHWLOC);
				3588	goto _exit;
				3589	}
				3590	if (__kmp_hws_socket.num == 0)
				3591	__kmp_hws_socket.num = nPackages; // use all available sockets
				3592	if (__kmp_hws_core.num == 0)
				3593	__kmp_hws_core.num = nCoresPerPkg; // use all available cores
				3594	if (__kmp_hws_proc.num == 0 \|\|
				3595	__kmp_hws_proc.num > __kmp_nThreadsPerCore)
				3596	__kmp_hws_proc.num = __kmp_nThreadsPerCore; // use all HW contexts
				3597	if ( !__kmp_affinity_uniform_topology() ) {
				3598	KMP_WARNING( AffHWSubsetNonUniform );
				3599	goto _exit; // don't support non-uniform topology
				3600	}
				3601	if ( depth > 3 ) {
				3602	KMP_WARNING( AffHWSubsetNonThreeLevel );
				3603	goto _exit; // don't support not-3-level topology
				3604	}
				3605	if (__kmp_hws_socket.offset + __kmp_hws_socket.num > nPackages) {
				3606	KMP_WARNING(AffHWSubsetManySockets);
				3607	goto _exit;
				3608	}
				3609	if ( __kmp_hws_core.offset + __kmp_hws_core.num > nCoresPerPkg ) {
				3610	KMP_WARNING( AffHWSubsetManyCores );
				3611	goto _exit;
				3612	}
				3613	// Form the requested subset
				3614	if (pAddr) // pAddr is NULL in case of affinity_none
				3615	newAddr = (AddrUnsPair )__kmp_allocate(sizeof(AddrUnsPair)
				3616	__kmp_hws_socket.num *
				3617	__kmp_hws_core.num *
				3618	__kmp_hws_proc.num);
				3619	for (int i = 0; i < nPackages; ++i) {
				3620	if (i < __kmp_hws_socket.offset \|\|
				3621	i >= __kmp_hws_socket.offset + __kmp_hws_socket.num) {
				3622	// skip not-requested socket
				3623	n_old += nCoresPerPkg * __kmp_nThreadsPerCore;
				3624	if (__kmp_pu_os_idx != NULL) {
				3625	// walk through skipped socket
				3626	for (int j = 0; j < nCoresPerPkg; ++j) {
				3627	for (int k = 0; k < __kmp_nThreadsPerCore; ++k) {
				3628	KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
				3629	++proc_num;
Jonathan Peyton	fd7cc42	2016-06-21 15:54:38 +0000	[diff] [blame]	3630	}
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3631	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3632	}
				3633	} else {
				3634	// walk through requested socket
				3635	for (int j = 0; j < nCoresPerPkg; ++j) {
				3636	if (j < __kmp_hws_core.offset \|\|
				3637	j >= __kmp_hws_core.offset + __kmp_hws_core.num)
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3638	{ // skip not-requested core
				3639	n_old += __kmp_nThreadsPerCore;
				3640	if (__kmp_pu_os_idx != NULL) {
				3641	for (int k = 0; k < __kmp_nThreadsPerCore; ++k) {
				3642	KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
				3643	++proc_num;
Jonathan Peyton	fd7cc42	2016-06-21 15:54:38 +0000	[diff] [blame]	3644	}
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3645	}
				3646	} else {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3647	// walk through requested core
				3648	for (int k = 0; k < __kmp_nThreadsPerCore; ++k) {
				3649	if (k < __kmp_hws_proc.num) {
				3650	if (pAddr) // collect requested thread's data
				3651	newAddr[n_new] = (*pAddr)[n_old];
				3652	n_new++;
				3653	} else {
				3654	if (__kmp_pu_os_idx != NULL)
				3655	KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3656	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3657	n_old++;
				3658	++proc_num;
Jonathan Peyton	fd7cc42	2016-06-21 15:54:38 +0000	[diff] [blame]	3659	}
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3660	}
Jonathan Peyton	fd7cc42	2016-06-21 15:54:38 +0000	[diff] [blame]	3661	}
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3662	}
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3663	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3664	KMP_DEBUG_ASSERT(n_old == nPackages * nCoresPerPkg * __kmp_nThreadsPerCore);
				3665	KMP_DEBUG_ASSERT(n_new == __kmp_hws_socket.num * __kmp_hws_core.num *
				3666	__kmp_hws_proc.num);
				3667	nPackages = __kmp_hws_socket.num; // correct nPackages
				3668	nCoresPerPkg = __kmp_hws_core.num; // correct nCoresPerPkg
				3669	__kmp_nThreadsPerCore = __kmp_hws_proc.num; // correct __kmp_nThreadsPerCore
				3670	__kmp_avail_proc = n_new; // correct avail_proc
				3671	__kmp_ncores = nPackages * __kmp_hws_core.num; // correct ncores
				3672	} // non-hwloc topology method
				3673	if (pAddr) {
				3674	__kmp_free( *pAddr );
				3675	*pAddr = newAddr; // replace old topology with new one
				3676	}
				3677	if (__kmp_affinity_verbose) {
				3678	char m[KMP_AFFIN_MASK_PRINT_LEN];
				3679	__kmp_affinity_print_mask(m,KMP_AFFIN_MASK_PRINT_LEN,__kmp_affin_fullMask);
				3680	if (__kmp_affinity_respect_mask) {
				3681	KMP_INFORM(InitOSProcSetRespect, "KMP_HW_SUBSET", m);
Paul Osmialowski	ecbe2ea	2016-07-29 20:55:03 +0000	[diff] [blame]	3682	} else {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3683	KMP_INFORM(InitOSProcSetNotRespect, "KMP_HW_SUBSET", m);
Paul Osmialowski	ecbe2ea	2016-07-29 20:55:03 +0000	[diff] [blame]	3684	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3685	KMP_INFORM(AvailableOSProc, "KMP_HW_SUBSET", __kmp_avail_proc);
				3686	kmp_str_buf_t buf;
				3687	__kmp_str_buf_init(&buf);
				3688	__kmp_str_buf_print(&buf, "%d", nPackages);
				3689	KMP_INFORM(TopologyExtra, "KMP_HW_SUBSET", buf.str, nCoresPerPkg,
				3690	__kmp_nThreadsPerCore, __kmp_ncores);
				3691	__kmp_str_buf_free(&buf);
				3692	}
				3693	_exit:
				3694	if (__kmp_pu_os_idx != NULL) {
				3695	__kmp_free(__kmp_pu_os_idx);
				3696	__kmp_pu_os_idx = NULL;
				3697	}
				3698	}
				3699
				3700	// This function figures out the deepest level at which there is at least one
				3701	// cluster/core with more than one processing unit bound to it.
				3702	static int __kmp_affinity_find_core_level(const AddrUnsPair *address2os,
				3703	int nprocs, int bottom_level) {
				3704	int core_level = 0;
				3705
				3706	for (int i = 0; i < nprocs; i++) {
				3707	for (int j = bottom_level; j > 0; j--) {
				3708	if (address2os[i].first.labels[j] > 0) {
				3709	if (core_level < (j - 1)) {
				3710	core_level = j - 1;
				3711	}
				3712	}
				3713	}
				3714	}
				3715	return core_level;
				3716	}
				3717
				3718	// This function counts number of clusters/cores at given level.
				3719	static int __kmp_affinity_compute_ncores(const AddrUnsPair *address2os,
				3720	int nprocs, int bottom_level,
				3721	int core_level) {
				3722	int ncores = 0;
				3723	int i, j;
				3724
				3725	j = bottom_level;
				3726	for (i = 0; i < nprocs; i++) {
				3727	for (j = bottom_level; j > core_level; j--) {
				3728	if ((i + 1) < nprocs) {
				3729	if (address2os[i + 1].first.labels[j] > 0) {
				3730	break;
				3731	}
				3732	}
				3733	}
				3734	if (j == core_level) {
				3735	ncores++;
				3736	}
				3737	}
				3738	if (j > core_level) {
				3739	// In case of ( nprocs < __kmp_avail_proc ) we may end too deep and miss one
				3740	// core. May occur when called from __kmp_affinity_find_core().
				3741	ncores++;
				3742	}
				3743	return ncores;
				3744	}
				3745
				3746	// This function finds to which cluster/core given processing unit is bound.
				3747	static int __kmp_affinity_find_core(const AddrUnsPair *address2os, int proc,
				3748	int bottom_level, int core_level) {
				3749	return __kmp_affinity_compute_ncores(address2os, proc + 1, bottom_level,
				3750	core_level) - 1;
				3751	}
				3752
				3753	// This function finds maximal number of processing units bound to a
				3754	// cluster/core at given level.
				3755	static int __kmp_affinity_max_proc_per_core(const AddrUnsPair *address2os,
				3756	int nprocs, int bottom_level,
				3757	int core_level) {
				3758	int maxprocpercore = 0;
				3759
				3760	if (core_level < bottom_level) {
				3761	for (int i = 0; i < nprocs; i++) {
				3762	int percore = address2os[i].first.labels[core_level + 1] + 1;
				3763
				3764	if (percore > maxprocpercore) {
				3765	maxprocpercore = percore;
				3766	}
				3767	}
				3768	} else {
				3769	maxprocpercore = 1;
				3770	}
				3771	return maxprocpercore;
Paul Osmialowski	ecbe2ea	2016-07-29 20:55:03 +0000	[diff] [blame]	3772	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3773
				3774	static AddrUnsPair *address2os = NULL;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3775	static int *procarr = NULL;
				3776	static int __kmp_aff_depth = 0;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3777
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3778	#define KMP_EXIT_AFF_NONE \
				3779	KMP_ASSERT(__kmp_affinity_type == affinity_none); \
				3780	KMP_ASSERT(address2os == NULL); \
				3781	__kmp_apply_thread_places(NULL, 0); \
				3782	return;
Jonathan Peyton	fd7cc42	2016-06-21 15:54:38 +0000	[diff] [blame]	3783
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3784	static int __kmp_affinity_cmp_Address_child_num(const void a, const void b) {
				3785	const Address aa = (const Address )&(((AddrUnsPair *)a)->first);
				3786	const Address bb = (const Address )&(((AddrUnsPair *)b)->first);
				3787	unsigned depth = aa->depth;
				3788	unsigned i;
				3789	KMP_DEBUG_ASSERT(depth == bb->depth);
				3790	KMP_DEBUG_ASSERT((unsigned)__kmp_affinity_compact <= depth);
				3791	KMP_DEBUG_ASSERT(__kmp_affinity_compact >= 0);
				3792	for (i = 0; i < (unsigned)__kmp_affinity_compact; i++) {
				3793	int j = depth - i - 1;
				3794	if (aa->childNums[j] < bb->childNums[j])
				3795	return -1;
				3796	if (aa->childNums[j] > bb->childNums[j])
				3797	return 1;
				3798	}
				3799	for (; i < depth; i++) {
				3800	int j = i - __kmp_affinity_compact;
				3801	if (aa->childNums[j] < bb->childNums[j])
				3802	return -1;
				3803	if (aa->childNums[j] > bb->childNums[j])
				3804	return 1;
				3805	}
				3806	return 0;
Jonathan Peyton	e6abe52	2016-09-02 20:54:58 +0000	[diff] [blame]	3807	}
				3808
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3809	static void __kmp_aux_affinity_initialize(void) {
				3810	if (__kmp_affinity_masks != NULL) {
				3811	KMP_ASSERT(__kmp_affin_fullMask != NULL);
				3812	return;
				3813	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3814
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3815	// Create the "full" mask - this defines all of the processors that we
				3816	// consider to be in the machine model. If respect is set, then it is the
				3817	// initialization thread's affinity mask. Otherwise, it is all processors that
				3818	// we know about on the machine.
				3819	if (__kmp_affin_fullMask == NULL) {
				3820	KMP_CPU_ALLOC(__kmp_affin_fullMask);
				3821	}
				3822	if (KMP_AFFINITY_CAPABLE()) {
				3823	if (__kmp_affinity_respect_mask) {
				3824	__kmp_get_system_affinity(__kmp_affin_fullMask, TRUE);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3825
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3826	// Count the number of available processors.
				3827	unsigned i;
				3828	__kmp_avail_proc = 0;
				3829	KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
				3830	if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
				3831	continue;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3832	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3833	__kmp_avail_proc++;
				3834	}
				3835	if (__kmp_avail_proc > __kmp_xproc) {
				3836	if (__kmp_affinity_verbose \|\|
				3837	(__kmp_affinity_warnings &&
				3838	(__kmp_affinity_type != affinity_none))) {
				3839	KMP_WARNING(ErrorInitializeAffinity);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3840	}
				3841	__kmp_affinity_type = affinity_none;
Andrey Churbanov	1f037e4	2015-03-10 09:15:26 +0000	[diff] [blame]	3842	KMP_AFFINITY_DISABLE();
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3843	return;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3844	}
				3845	} else {
				3846	__kmp_affinity_entire_machine_mask(__kmp_affin_fullMask);
				3847	__kmp_avail_proc = __kmp_xproc;
				3848	}
				3849	}
				3850
				3851	int depth = -1;
				3852	kmp_i18n_id_t msg_id = kmp_i18n_null;
				3853
				3854	// For backward compatibility, setting KMP_CPUINFO_FILE =>
				3855	// KMP_TOPOLOGY_METHOD=cpuinfo
				3856	if ((__kmp_cpuinfo_file != NULL) &&
				3857	(__kmp_affinity_top_method == affinity_top_method_all)) {
				3858	__kmp_affinity_top_method = affinity_top_method_cpuinfo;
				3859	}
				3860
				3861	if (__kmp_affinity_top_method == affinity_top_method_all) {
				3862	// In the default code path, errors are not fatal - we just try using
				3863	// another method. We only emit a warning message if affinity is on, or the
				3864	// verbose flag is set, an the nowarnings flag was not set.
				3865	const char *file_name = NULL;
				3866	int line = 0;
				3867	#if KMP_USE_HWLOC
				3868	if (depth < 0 &&
				3869	__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC) {
				3870	if (__kmp_affinity_verbose) {
				3871	KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
				3872	}
				3873	if (!__kmp_hwloc_error) {
				3874	depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id);
				3875	if (depth == 0) {
				3876	KMP_EXIT_AFF_NONE;
				3877	} else if (depth < 0 && __kmp_affinity_verbose) {
				3878	KMP_INFORM(AffIgnoringHwloc, "KMP_AFFINITY");
				3879	}
				3880	} else if (__kmp_affinity_verbose) {
				3881	KMP_INFORM(AffIgnoringHwloc, "KMP_AFFINITY");
				3882	}
				3883	}
				3884	#endif
				3885
				3886	#if KMP_ARCH_X86 \|\| KMP_ARCH_X86_64
				3887
				3888	if (depth < 0) {
				3889	if (__kmp_affinity_verbose) {
				3890	KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
				3891	}
				3892
				3893	file_name = NULL;
				3894	depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
				3895	if (depth == 0) {
				3896	KMP_EXIT_AFF_NONE;
				3897	}
				3898
				3899	if (depth < 0) {
				3900	if (__kmp_affinity_verbose) {
				3901	if (msg_id != kmp_i18n_null) {
				3902	KMP_INFORM(AffInfoStrStr, "KMP_AFFINITY",
				3903	__kmp_i18n_catgets(msg_id),
				3904	KMP_I18N_STR(DecodingLegacyAPIC));
				3905	} else {
				3906	KMP_INFORM(AffInfoStr, "KMP_AFFINITY",
				3907	KMP_I18N_STR(DecodingLegacyAPIC));
				3908	}
				3909	}
				3910
				3911	file_name = NULL;
				3912	depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
				3913	if (depth == 0) {
				3914	KMP_EXIT_AFF_NONE;
				3915	}
				3916	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3917	}
				3918
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3919	#endif /* KMP_ARCH_X86 \|\| KMP_ARCH_X86_64 */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3920
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3921	#if KMP_OS_LINUX
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3922
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3923	if (depth < 0) {
				3924	if (__kmp_affinity_verbose) {
				3925	if (msg_id != kmp_i18n_null) {
				3926	KMP_INFORM(AffStrParseFilename, "KMP_AFFINITY",
				3927	__kmp_i18n_catgets(msg_id), "/proc/cpuinfo");
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3928	} else {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3929	KMP_INFORM(AffParseFilename, "KMP_AFFINITY", "/proc/cpuinfo");
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3930	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3931	}
				3932
				3933	FILE *f = fopen("/proc/cpuinfo", "r");
				3934	if (f == NULL) {
				3935	msg_id = kmp_i18n_str_CantOpenCpuinfo;
				3936	} else {
				3937	file_name = "/proc/cpuinfo";
				3938	depth =
				3939	__kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
				3940	fclose(f);
				3941	if (depth == 0) {
				3942	KMP_EXIT_AFF_NONE;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3943	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3944	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3945	}
				3946
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3947	#endif /* KMP_OS_LINUX */
				3948
				3949	#if KMP_GROUP_AFFINITY
				3950
				3951	if ((depth < 0) && (__kmp_num_proc_groups > 1)) {
				3952	if (__kmp_affinity_verbose) {
				3953	KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
				3954	}
				3955
				3956	depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
				3957	KMP_ASSERT(depth != 0);
				3958	}
				3959
				3960	#endif /* KMP_GROUP_AFFINITY */
				3961
				3962	if (depth < 0) {
				3963	if (__kmp_affinity_verbose && (msg_id != kmp_i18n_null)) {
				3964	if (file_name == NULL) {
				3965	KMP_INFORM(UsingFlatOS, __kmp_i18n_catgets(msg_id));
				3966	} else if (line == 0) {
				3967	KMP_INFORM(UsingFlatOSFile, file_name, __kmp_i18n_catgets(msg_id));
				3968	} else {
				3969	KMP_INFORM(UsingFlatOSFileLine, file_name, line,
				3970	__kmp_i18n_catgets(msg_id));
				3971	}
				3972	}
				3973	// FIXME - print msg if msg_id = kmp_i18n_null ???
				3974
				3975	file_name = "";
				3976	depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
				3977	if (depth == 0) {
				3978	KMP_EXIT_AFF_NONE;
				3979	}
				3980	KMP_ASSERT(depth > 0);
				3981	KMP_ASSERT(address2os != NULL);
				3982	}
				3983	}
				3984
				3985	// If the user has specified that a paricular topology discovery method is to be
				3986	// used, then we abort if that method fails. The exception is group affinity,
				3987	// which might have been implicitly set.
				3988
				3989	#if KMP_ARCH_X86 \|\| KMP_ARCH_X86_64
				3990
				3991	else if (__kmp_affinity_top_method == affinity_top_method_x2apicid) {
				3992	if (__kmp_affinity_verbose) {
				3993	KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
				3994	}
				3995
				3996	depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
				3997	if (depth == 0) {
				3998	KMP_EXIT_AFF_NONE;
				3999	}
				4000	if (depth < 0) {
				4001	KMP_ASSERT(msg_id != kmp_i18n_null);
				4002	KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
				4003	}
				4004	} else if (__kmp_affinity_top_method == affinity_top_method_apicid) {
				4005	if (__kmp_affinity_verbose) {
				4006	KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(DecodingLegacyAPIC));
				4007	}
				4008
				4009	depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
				4010	if (depth == 0) {
				4011	KMP_EXIT_AFF_NONE;
				4012	}
				4013	if (depth < 0) {
				4014	KMP_ASSERT(msg_id != kmp_i18n_null);
				4015	KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
				4016	}
				4017	}
				4018
				4019	#endif /* KMP_ARCH_X86 \|\| KMP_ARCH_X86_64 */
				4020
				4021	else if (__kmp_affinity_top_method == affinity_top_method_cpuinfo) {
				4022	const char *filename;
				4023	if (__kmp_cpuinfo_file != NULL) {
				4024	filename = __kmp_cpuinfo_file;
				4025	} else {
				4026	filename = "/proc/cpuinfo";
				4027	}
				4028
				4029	if (__kmp_affinity_verbose) {
				4030	KMP_INFORM(AffParseFilename, "KMP_AFFINITY", filename);
				4031	}
				4032
				4033	FILE *f = fopen(filename, "r");
				4034	if (f == NULL) {
				4035	int code = errno;
				4036	if (__kmp_cpuinfo_file != NULL) {
				4037	__kmp_msg(kmp_ms_fatal, KMP_MSG(CantOpenFileForReading, filename),
				4038	KMP_ERR(code), KMP_HNT(NameComesFrom_CPUINFO_FILE),
				4039	__kmp_msg_null);
				4040	} else {
				4041	__kmp_msg(kmp_ms_fatal, KMP_MSG(CantOpenFileForReading, filename),
				4042	KMP_ERR(code), __kmp_msg_null);
				4043	}
				4044	}
				4045	int line = 0;
				4046	depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
				4047	fclose(f);
				4048	if (depth < 0) {
				4049	KMP_ASSERT(msg_id != kmp_i18n_null);
				4050	if (line > 0) {
				4051	KMP_FATAL(FileLineMsgExiting, filename, line,
				4052	__kmp_i18n_catgets(msg_id));
				4053	} else {
				4054	KMP_FATAL(FileMsgExiting, filename, __kmp_i18n_catgets(msg_id));
				4055	}
				4056	}
				4057	if (__kmp_affinity_type == affinity_none) {
				4058	KMP_ASSERT(depth == 0);
				4059	KMP_EXIT_AFF_NONE;
				4060	}
				4061	}
				4062
				4063	#if KMP_GROUP_AFFINITY
				4064
				4065	else if (__kmp_affinity_top_method == affinity_top_method_group) {
				4066	if (__kmp_affinity_verbose) {
				4067	KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
				4068	}
				4069
				4070	depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
				4071	KMP_ASSERT(depth != 0);
				4072	if (depth < 0) {
				4073	KMP_ASSERT(msg_id != kmp_i18n_null);
				4074	KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
				4075	}
				4076	}
				4077
				4078	#endif /* KMP_GROUP_AFFINITY */
				4079
				4080	else if (__kmp_affinity_top_method == affinity_top_method_flat) {
				4081	if (__kmp_affinity_verbose) {
				4082	KMP_INFORM(AffUsingFlatOS, "KMP_AFFINITY");
				4083	}
				4084
				4085	depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
				4086	if (depth == 0) {
				4087	KMP_EXIT_AFF_NONE;
				4088	}
				4089	// should not fail
				4090	KMP_ASSERT(depth > 0);
				4091	KMP_ASSERT(address2os != NULL);
				4092	}
				4093
				4094	#if KMP_USE_HWLOC
				4095	else if (__kmp_affinity_top_method == affinity_top_method_hwloc) {
				4096	KMP_ASSERT(__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC);
				4097	if (__kmp_affinity_verbose) {
				4098	KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
				4099	}
				4100	depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id);
				4101	if (depth == 0) {
				4102	KMP_EXIT_AFF_NONE;
				4103	}
				4104	}
				4105	#endif // KMP_USE_HWLOC
				4106
				4107	if (address2os == NULL) {
				4108	if (KMP_AFFINITY_CAPABLE() &&
				4109	(__kmp_affinity_verbose \|\|
				4110	(__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none)))) {
				4111	KMP_WARNING(ErrorInitializeAffinity);
				4112	}
				4113	__kmp_affinity_type = affinity_none;
				4114	KMP_AFFINITY_DISABLE();
				4115	return;
				4116	}
				4117
				4118	__kmp_apply_thread_places(&address2os, depth);
				4119
				4120	// Create the table of masks, indexed by thread Id.
				4121	unsigned maxIndex;
				4122	unsigned numUnique;
				4123	kmp_affin_mask_t *osId2Mask =
				4124	__kmp_create_masks(&maxIndex, &numUnique, address2os, __kmp_avail_proc);
				4125	if (__kmp_affinity_gran_levels == 0) {
				4126	KMP_DEBUG_ASSERT((int)numUnique == __kmp_avail_proc);
				4127	}
				4128
				4129	// Set the childNums vector in all Address objects. This must be done before
				4130	// we can sort using __kmp_affinity_cmp_Address_child_num(), which takes into
				4131	// account the setting of __kmp_affinity_compact.
				4132	__kmp_affinity_assign_child_nums(address2os, __kmp_avail_proc);
				4133
				4134	switch (__kmp_affinity_type) {
				4135
				4136	case affinity_explicit:
				4137	KMP_DEBUG_ASSERT(__kmp_affinity_proclist != NULL);
				4138	#if OMP_40_ENABLED
				4139	if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
				4140	#endif
				4141	{
				4142	__kmp_affinity_process_proclist(
				4143	&__kmp_affinity_masks, &__kmp_affinity_num_masks,
				4144	__kmp_affinity_proclist, osId2Mask, maxIndex);
				4145	}
				4146	#if OMP_40_ENABLED
				4147	else {
				4148	__kmp_affinity_process_placelist(
				4149	&__kmp_affinity_masks, &__kmp_affinity_num_masks,
				4150	__kmp_affinity_proclist, osId2Mask, maxIndex);
				4151	}
				4152	#endif
				4153	if (__kmp_affinity_num_masks == 0) {
				4154	if (__kmp_affinity_verbose \|\|
				4155	(__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) {
				4156	KMP_WARNING(AffNoValidProcID);
				4157	}
				4158	__kmp_affinity_type = affinity_none;
				4159	return;
				4160	}
				4161	break;
				4162
				4163	// The other affinity types rely on sorting the Addresses according to some
				4164	// permutation of the machine topology tree. Set __kmp_affinity_compact and
				4165	// __kmp_affinity_offset appropriately, then jump to a common code fragment
				4166	// to do the sort and create the array of affinity masks.
				4167
				4168	case affinity_logical:
				4169	__kmp_affinity_compact = 0;
				4170	if (__kmp_affinity_offset) {
				4171	__kmp_affinity_offset =
				4172	__kmp_nThreadsPerCore * __kmp_affinity_offset % __kmp_avail_proc;
				4173	}
				4174	goto sortAddresses;
				4175
				4176	case affinity_physical:
				4177	if (__kmp_nThreadsPerCore > 1) {
				4178	__kmp_affinity_compact = 1;
				4179	if (__kmp_affinity_compact >= depth) {
				4180	__kmp_affinity_compact = 0;
				4181	}
				4182	} else {
				4183	__kmp_affinity_compact = 0;
				4184	}
				4185	if (__kmp_affinity_offset) {
				4186	__kmp_affinity_offset =
				4187	__kmp_nThreadsPerCore * __kmp_affinity_offset % __kmp_avail_proc;
				4188	}
				4189	goto sortAddresses;
				4190
				4191	case affinity_scatter:
				4192	if (__kmp_affinity_compact >= depth) {
				4193	__kmp_affinity_compact = 0;
				4194	} else {
				4195	__kmp_affinity_compact = depth - 1 - __kmp_affinity_compact;
				4196	}
				4197	goto sortAddresses;
				4198
				4199	case affinity_compact:
				4200	if (__kmp_affinity_compact >= depth) {
				4201	__kmp_affinity_compact = depth - 1;
				4202	}
				4203	goto sortAddresses;
				4204
				4205	case affinity_balanced:
				4206	if (depth <= 1) {
				4207	if (__kmp_affinity_verbose \|\| __kmp_affinity_warnings) {
				4208	KMP_WARNING(AffBalancedNotAvail, "KMP_AFFINITY");
				4209	}
				4210	__kmp_affinity_type = affinity_none;
				4211	return;
				4212	} else if (__kmp_affinity_uniform_topology()) {
				4213	break;
				4214	} else { // Non-uniform topology
				4215
				4216	// Save the depth for further usage
				4217	__kmp_aff_depth = depth;
				4218
				4219	int core_level = __kmp_affinity_find_core_level(
				4220	address2os, __kmp_avail_proc, depth - 1);
				4221	int ncores = __kmp_affinity_compute_ncores(address2os, __kmp_avail_proc,
				4222	depth - 1, core_level);
				4223	int maxprocpercore = __kmp_affinity_max_proc_per_core(
				4224	address2os, __kmp_avail_proc, depth - 1, core_level);
				4225
				4226	int nproc = ncores * maxprocpercore;
				4227	if ((nproc < 2) \|\| (nproc < __kmp_avail_proc)) {
				4228	if (__kmp_affinity_verbose \|\| __kmp_affinity_warnings) {
				4229	KMP_WARNING(AffBalancedNotAvail, "KMP_AFFINITY");
				4230	}
				4231	__kmp_affinity_type = affinity_none;
				4232	return;
				4233	}
				4234
				4235	procarr = (int )__kmp_allocate(sizeof(int) nproc);
				4236	for (int i = 0; i < nproc; i++) {
				4237	procarr[i] = -1;
				4238	}
				4239
				4240	int lastcore = -1;
				4241	int inlastcore = 0;
				4242	for (int i = 0; i < __kmp_avail_proc; i++) {
				4243	int proc = address2os[i].second;
				4244	int core =
				4245	__kmp_affinity_find_core(address2os, i, depth - 1, core_level);
				4246
				4247	if (core == lastcore) {
				4248	inlastcore++;
				4249	} else {
				4250	inlastcore = 0;
				4251	}
				4252	lastcore = core;
				4253
				4254	procarr[core * maxprocpercore + inlastcore] = proc;
				4255	}
				4256
				4257	break;
				4258	}
				4259
				4260	sortAddresses:
				4261	// Allocate the gtid->affinity mask table.
				4262	if (__kmp_affinity_dups) {
				4263	__kmp_affinity_num_masks = __kmp_avail_proc;
				4264	} else {
				4265	__kmp_affinity_num_masks = numUnique;
				4266	}
				4267
				4268	#if OMP_40_ENABLED
				4269	if ((__kmp_nested_proc_bind.bind_types[0] != proc_bind_intel) &&
				4270	(__kmp_affinity_num_places > 0) &&
				4271	((unsigned)__kmp_affinity_num_places < __kmp_affinity_num_masks)) {
				4272	__kmp_affinity_num_masks = __kmp_affinity_num_places;
				4273	}
				4274	#endif
				4275
				4276	KMP_CPU_ALLOC_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
				4277
				4278	// Sort the address2os table according to the current setting of
				4279	// __kmp_affinity_compact, then fill out __kmp_affinity_masks.
				4280	qsort(address2os, __kmp_avail_proc, sizeof(*address2os),
				4281	__kmp_affinity_cmp_Address_child_num);
				4282	{
				4283	int i;
				4284	unsigned j;
				4285	for (i = 0, j = 0; i < __kmp_avail_proc; i++) {
				4286	if ((!__kmp_affinity_dups) && (!address2os[i].first.leader)) {
				4287	continue;
				4288	}
				4289	unsigned osId = address2os[i].second;
				4290	kmp_affin_mask_t *src = KMP_CPU_INDEX(osId2Mask, osId);
				4291	kmp_affin_mask_t *dest = KMP_CPU_INDEX(__kmp_affinity_masks, j);
				4292	KMP_ASSERT(KMP_CPU_ISSET(osId, src));
				4293	KMP_CPU_COPY(dest, src);
				4294	if (++j >= __kmp_affinity_num_masks) {
				4295	break;
				4296	}
				4297	}
				4298	KMP_DEBUG_ASSERT(j == __kmp_affinity_num_masks);
				4299	}
				4300	break;
				4301
				4302	default:
				4303	KMP_ASSERT2(0, "Unexpected affinity setting");
				4304	}
				4305
				4306	KMP_CPU_FREE_ARRAY(osId2Mask, maxIndex + 1);
				4307	machine_hierarchy.init(address2os, __kmp_avail_proc);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4308	}
Jonathan Peyton	fd7cc42	2016-06-21 15:54:38 +0000	[diff] [blame]	4309	#undef KMP_EXIT_AFF_NONE
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4310
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4311	void __kmp_affinity_initialize(void) {
				4312	// Much of the code above was written assumming that if a machine was not
				4313	// affinity capable, then __kmp_affinity_type == affinity_none. We now
				4314	// explicitly represent this as __kmp_affinity_type == affinity_disabled.
				4315	// There are too many checks for __kmp_affinity_type == affinity_none
				4316	// in this code. Instead of trying to change them all, check if
				4317	// __kmp_affinity_type == affinity_disabled, and if so, slam it with
				4318	// affinity_none, call the real initialization routine, then restore
				4319	// __kmp_affinity_type to affinity_disabled.
				4320	int disabled = (__kmp_affinity_type == affinity_disabled);
				4321	if (!KMP_AFFINITY_CAPABLE()) {
				4322	KMP_ASSERT(disabled);
				4323	}
				4324	if (disabled) {
				4325	__kmp_affinity_type = affinity_none;
				4326	}
				4327	__kmp_aux_affinity_initialize();
				4328	if (disabled) {
				4329	__kmp_affinity_type = affinity_disabled;
				4330	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4331	}
				4332
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4333	void __kmp_affinity_uninitialize(void) {
				4334	if (__kmp_affinity_masks != NULL) {
				4335	KMP_CPU_FREE_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
				4336	__kmp_affinity_masks = NULL;
				4337	}
				4338	if (__kmp_affin_fullMask != NULL) {
				4339	KMP_CPU_FREE(__kmp_affin_fullMask);
				4340	__kmp_affin_fullMask = NULL;
				4341	}
				4342	__kmp_affinity_num_masks = 0;
				4343	__kmp_affinity_type = affinity_default;
				4344	#if OMP_40_ENABLED
				4345	__kmp_affinity_num_places = 0;
				4346	#endif
				4347	if (__kmp_affinity_proclist != NULL) {
				4348	__kmp_free(__kmp_affinity_proclist);
				4349	__kmp_affinity_proclist = NULL;
				4350	}
				4351	if (address2os != NULL) {
				4352	__kmp_free(address2os);
				4353	address2os = NULL;
				4354	}
				4355	if (procarr != NULL) {
				4356	__kmp_free(procarr);
				4357	procarr = NULL;
				4358	}
				4359	#if KMP_USE_HWLOC
				4360	if (__kmp_hwloc_topology != NULL) {
				4361	hwloc_topology_destroy(__kmp_hwloc_topology);
				4362	__kmp_hwloc_topology = NULL;
				4363	}
				4364	#endif
				4365	KMPAffinity::destroy_api();
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4366	}
				4367
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4368	void __kmp_affinity_set_init_mask(int gtid, int isa_root) {
				4369	if (!KMP_AFFINITY_CAPABLE()) {
				4370	return;
				4371	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4372
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4373	kmp_info_t th = (kmp_info_t )TCR_SYNC_PTR(__kmp_threads[gtid]);
				4374	if (th->th.th_affin_mask == NULL) {
				4375	KMP_CPU_ALLOC(th->th.th_affin_mask);
				4376	} else {
				4377	KMP_CPU_ZERO(th->th.th_affin_mask);
				4378	}
				4379
				4380	// Copy the thread mask to the kmp_info_t strucuture. If
				4381	// __kmp_affinity_type == affinity_none, copy the "full" mask, i.e. one that
				4382	// has all of the OS proc ids set, or if __kmp_affinity_respect_mask is set,
				4383	// then the full mask is the same as the mask of the initialization thread.
				4384	kmp_affin_mask_t *mask;
				4385	int i;
				4386
				4387	#if OMP_40_ENABLED
				4388	if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
				4389	#endif
				4390	{
				4391	if ((__kmp_affinity_type == affinity_none) \|\|
				4392	(__kmp_affinity_type == affinity_balanced)) {
				4393	#if KMP_GROUP_AFFINITY
				4394	if (__kmp_num_proc_groups > 1) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4395	return;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4396	}
				4397	#endif
				4398	KMP_ASSERT(__kmp_affin_fullMask != NULL);
				4399	i = KMP_PLACE_ALL;
				4400	mask = __kmp_affin_fullMask;
				4401	} else {
				4402	KMP_DEBUG_ASSERT(__kmp_affinity_num_masks > 0);
				4403	i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
				4404	mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4405	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4406	}
				4407	#if OMP_40_ENABLED
				4408	else {
				4409	if ((!isa_root) \|\|
				4410	(__kmp_nested_proc_bind.bind_types[0] == proc_bind_false)) {
				4411	#if KMP_GROUP_AFFINITY
				4412	if (__kmp_num_proc_groups > 1) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4413	return;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4414	}
				4415	#endif
				4416	KMP_ASSERT(__kmp_affin_fullMask != NULL);
				4417	i = KMP_PLACE_ALL;
				4418	mask = __kmp_affin_fullMask;
				4419	} else {
				4420	// int i = some hash function or just a counter that doesn't
				4421	// always start at 0. Use gtid for now.
				4422	KMP_DEBUG_ASSERT(__kmp_affinity_num_masks > 0);
				4423	i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
				4424	mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4425	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4426	}
				4427	#endif
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4428
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4429	#if OMP_40_ENABLED
				4430	th->th.th_current_place = i;
				4431	if (isa_root) {
				4432	th->th.th_new_place = i;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4433	th->th.th_first_place = 0;
				4434	th->th.th_last_place = __kmp_affinity_num_masks - 1;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4435	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4436
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4437	if (i == KMP_PLACE_ALL) {
				4438	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to all places\n",
				4439	gtid));
				4440	} else {
				4441	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to place %d\n",
				4442	gtid, i));
				4443	}
				4444	#else
				4445	if (i == -1) {
				4446	KA_TRACE(
				4447	100,
				4448	("__kmp_affinity_set_init_mask: binding T#%d to __kmp_affin_fullMask\n",
				4449	gtid));
				4450	} else {
				4451	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to mask %d\n",
				4452	gtid, i));
				4453	}
				4454	#endif /* OMP_40_ENABLED */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4455
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4456	KMP_CPU_COPY(th->th.th_affin_mask, mask);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4457
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4458	if (__kmp_affinity_verbose) {
				4459	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4460	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4461	th->th.th_affin_mask);
				4462	KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
				4463	__kmp_gettid(), gtid, buf);
				4464	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4465
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4466	#if KMP_OS_WINDOWS
				4467	// On Windows* OS, the process affinity mask might have changed. If the user
				4468	// didn't request affinity and this call fails, just continue silently.
				4469	// See CQ171393.
				4470	if (__kmp_affinity_type == affinity_none) {
				4471	__kmp_set_system_affinity(th->th.th_affin_mask, FALSE);
				4472	} else
Jonathan Peyton	7c465a5	2016-09-12 19:02:53 +0000	[diff] [blame]	4473	#endif
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4474	__kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
Jonathan Peyton	7c465a5	2016-09-12 19:02:53 +0000	[diff] [blame]	4475	}
				4476
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4477	#if OMP_40_ENABLED
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4478
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4479	void __kmp_affinity_set_place(int gtid) {
				4480	int retval;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4481
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4482	if (!KMP_AFFINITY_CAPABLE()) {
				4483	return;
				4484	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4485
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4486	kmp_info_t th = (kmp_info_t )TCR_SYNC_PTR(__kmp_threads[gtid]);
				4487
				4488	KA_TRACE(100, ("__kmp_affinity_set_place: binding T#%d to place %d (current "
				4489	"place = %d)\n",
				4490	gtid, th->th.th_new_place, th->th.th_current_place));
				4491
				4492	// Check that the new place is within this thread's partition.
				4493	KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
				4494	KMP_ASSERT(th->th.th_new_place >= 0);
				4495	KMP_ASSERT((unsigned)th->th.th_new_place <= __kmp_affinity_num_masks);
				4496	if (th->th.th_first_place <= th->th.th_last_place) {
				4497	KMP_ASSERT((th->th.th_new_place >= th->th.th_first_place) &&
				4498	(th->th.th_new_place <= th->th.th_last_place));
				4499	} else {
				4500	KMP_ASSERT((th->th.th_new_place <= th->th.th_first_place) \|\|
				4501	(th->th.th_new_place >= th->th.th_last_place));
				4502	}
				4503
				4504	// Copy the thread mask to the kmp_info_t strucuture,
				4505	// and set this thread's affinity.
				4506	kmp_affin_mask_t *mask =
				4507	KMP_CPU_INDEX(__kmp_affinity_masks, th->th.th_new_place);
				4508	KMP_CPU_COPY(th->th.th_affin_mask, mask);
				4509	th->th.th_current_place = th->th.th_new_place;
				4510
				4511	if (__kmp_affinity_verbose) {
				4512	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4513	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4514	th->th.th_affin_mask);
				4515	KMP_INFORM(BoundToOSProcSet, "OMP_PROC_BIND", (kmp_int32)getpid(),
				4516	__kmp_gettid(), gtid, buf);
				4517	}
				4518	__kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
				4519	}
				4520
				4521	#endif /* OMP_40_ENABLED */
				4522
				4523	int __kmp_aux_set_affinity(void **mask) {
				4524	int gtid;
				4525	kmp_info_t *th;
				4526	int retval;
				4527
				4528	if (!KMP_AFFINITY_CAPABLE()) {
				4529	return -1;
				4530	}
				4531
				4532	gtid = __kmp_entry_gtid();
				4533	KA_TRACE(1000, ; {
				4534	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4535	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4536	(kmp_affin_mask_t )(mask));
				4537	__kmp_debug_printf(
				4538	"kmp_set_affinity: setting affinity mask for thread %d = %s\n", gtid,
				4539	buf);
				4540	});
				4541
				4542	if (__kmp_env_consistency_check) {
				4543	if ((mask == NULL) \|\| (*mask == NULL)) {
				4544	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				4545	} else {
				4546	unsigned proc;
				4547	int num_procs = 0;
				4548
				4549	KMP_CPU_SET_ITERATE(proc, ((kmp_affin_mask_t )(mask))) {
				4550	if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
				4551	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4552	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4553	if (!KMP_CPU_ISSET(proc, (kmp_affin_mask_t )(mask))) {
				4554	continue;
				4555	}
				4556	num_procs++;
				4557	}
				4558	if (num_procs == 0) {
				4559	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				4560	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4561
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4562	#if KMP_GROUP_AFFINITY
				4563	if (__kmp_get_proc_group((kmp_affin_mask_t )(mask)) < 0) {
				4564	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				4565	}
				4566	#endif /* KMP_GROUP_AFFINITY */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4567	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4568	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4569
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4570	th = __kmp_threads[gtid];
				4571	KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
				4572	retval = __kmp_set_system_affinity((kmp_affin_mask_t )(mask), FALSE);
				4573	if (retval == 0) {
				4574	KMP_CPU_COPY(th->th.th_affin_mask, (kmp_affin_mask_t )(mask));
				4575	}
				4576
				4577	#if OMP_40_ENABLED
				4578	th->th.th_current_place = KMP_PLACE_UNDEFINED;
				4579	th->th.th_new_place = KMP_PLACE_UNDEFINED;
				4580	th->th.th_first_place = 0;
				4581	th->th.th_last_place = __kmp_affinity_num_masks - 1;
				4582
				4583	// Turn off 4.0 affinity for the current tread at this parallel level.
				4584	th->th.th_current_task->td_icvs.proc_bind = proc_bind_false;
				4585	#endif
				4586
				4587	return retval;
				4588	}
				4589
				4590	int __kmp_aux_get_affinity(void **mask) {
				4591	int gtid;
				4592	int retval;
				4593	kmp_info_t *th;
				4594
				4595	if (!KMP_AFFINITY_CAPABLE()) {
				4596	return -1;
				4597	}
				4598
				4599	gtid = __kmp_entry_gtid();
				4600	th = __kmp_threads[gtid];
				4601	KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
				4602
				4603	KA_TRACE(1000, ; {
				4604	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4605	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4606	th->th.th_affin_mask);
				4607	__kmp_printf("kmp_get_affinity: stored affinity mask for thread %d = %s\n",
				4608	gtid, buf);
				4609	});
				4610
				4611	if (__kmp_env_consistency_check) {
				4612	if ((mask == NULL) \|\| (*mask == NULL)) {
				4613	KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity");
				4614	}
				4615	}
				4616
				4617	#if !KMP_OS_WINDOWS
				4618
				4619	retval = __kmp_get_system_affinity((kmp_affin_mask_t )(mask), FALSE);
				4620	KA_TRACE(1000, ; {
				4621	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4622	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4623	(kmp_affin_mask_t )(mask));
				4624	__kmp_printf("kmp_get_affinity: system affinity mask for thread %d = %s\n",
				4625	gtid, buf);
				4626	});
				4627	return retval;
				4628
				4629	#else
				4630
				4631	KMP_CPU_COPY((kmp_affin_mask_t )(mask), th->th.th_affin_mask);
				4632	return 0;
				4633
				4634	#endif /* KMP_OS_WINDOWS */
				4635	}
				4636
				4637	int __kmp_aux_get_affinity_max_proc() {
				4638	if (!KMP_AFFINITY_CAPABLE()) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4639	return 0;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4640	}
				4641	#if KMP_GROUP_AFFINITY
				4642	if (__kmp_num_proc_groups > 1) {
				4643	return (int)(__kmp_num_proc_groups * sizeof(DWORD_PTR) * CHAR_BIT);
				4644	}
				4645	#endif
				4646	return __kmp_xproc;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4647	}
				4648
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4649	int __kmp_aux_set_affinity_mask_proc(int proc, void **mask) {
				4650	int retval;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4651
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4652	if (!KMP_AFFINITY_CAPABLE()) {
				4653	return -1;
				4654	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4655
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4656	KA_TRACE(1000, ; {
				4657	int gtid = __kmp_entry_gtid();
				4658	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4659	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4660	(kmp_affin_mask_t )(mask));
				4661	__kmp_debug_printf("kmp_set_affinity_mask_proc: setting proc %d in "
				4662	"affinity mask for thread %d = %s\n",
				4663	proc, gtid, buf);
				4664	});
				4665
				4666	if (__kmp_env_consistency_check) {
				4667	if ((mask == NULL) \|\| (*mask == NULL)) {
				4668	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity_mask_proc");
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4669	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4670	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4671
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4672	if ((proc < 0) \|\| (proc >= __kmp_aux_get_affinity_max_proc())) {
				4673	return -1;
				4674	}
				4675	if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
				4676	return -2;
				4677	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4678
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4679	KMP_CPU_SET(proc, (kmp_affin_mask_t )(mask));
				4680	return 0;
				4681	}
				4682
				4683	int __kmp_aux_unset_affinity_mask_proc(int proc, void **mask) {
				4684	int retval;
				4685
				4686	if (!KMP_AFFINITY_CAPABLE()) {
				4687	return -1;
				4688	}
				4689
				4690	KA_TRACE(1000, ; {
				4691	int gtid = __kmp_entry_gtid();
				4692	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4693	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4694	(kmp_affin_mask_t )(mask));
				4695	__kmp_debug_printf("kmp_unset_affinity_mask_proc: unsetting proc %d in "
				4696	"affinity mask for thread %d = %s\n",
				4697	proc, gtid, buf);
				4698	});
				4699
				4700	if (__kmp_env_consistency_check) {
				4701	if ((mask == NULL) \|\| (*mask == NULL)) {
				4702	KMP_FATAL(AffinityInvalidMask, "kmp_unset_affinity_mask_proc");
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4703	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4704	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4705
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4706	if ((proc < 0) \|\| (proc >= __kmp_aux_get_affinity_max_proc())) {
				4707	return -1;
				4708	}
				4709	if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
				4710	return -2;
				4711	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4712
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4713	KMP_CPU_CLR(proc, (kmp_affin_mask_t )(mask));
				4714	return 0;
				4715	}
				4716
				4717	int __kmp_aux_get_affinity_mask_proc(int proc, void **mask) {
				4718	int retval;
				4719
				4720	if (!KMP_AFFINITY_CAPABLE()) {
				4721	return -1;
				4722	}
				4723
				4724	KA_TRACE(1000, ; {
				4725	int gtid = __kmp_entry_gtid();
				4726	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4727	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4728	(kmp_affin_mask_t )(mask));
				4729	__kmp_debug_printf("kmp_get_affinity_mask_proc: getting proc %d in "
				4730	"affinity mask for thread %d = %s\n",
				4731	proc, gtid, buf);
				4732	});
				4733
				4734	if (__kmp_env_consistency_check) {
				4735	if ((mask == NULL) \|\| (*mask == NULL)) {
				4736	KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity_mask_proc");
				4737	}
				4738	}
				4739
				4740	if ((proc < 0) \|\| (proc >= __kmp_aux_get_affinity_max_proc())) {
				4741	return -1;
				4742	}
				4743	if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4744	return 0;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4745	}
				4746
				4747	return KMP_CPU_ISSET(proc, (kmp_affin_mask_t )(mask));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4748	}
				4749
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4750	// Dynamic affinity settings - Affinity balanced
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4751	void __kmp_balanced_affinity(int tid, int nthreads) {
				4752	bool fine_gran = true;
Paul Osmialowski	ecbe2ea	2016-07-29 20:55:03 +0000	[diff] [blame]	4753
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4754	switch (__kmp_affinity_gran) {
				4755	case affinity_gran_fine:
				4756	case affinity_gran_thread:
				4757	break;
				4758	case affinity_gran_core:
				4759	if (__kmp_nThreadsPerCore > 1) {
				4760	fine_gran = false;
				4761	}
				4762	break;
				4763	case affinity_gran_package:
				4764	if (nCoresPerPkg > 1) {
				4765	fine_gran = false;
				4766	}
				4767	break;
				4768	default:
				4769	fine_gran = false;
				4770	}
				4771
				4772	if (__kmp_affinity_uniform_topology()) {
				4773	int coreID;
				4774	int threadID;
				4775	// Number of hyper threads per core in HT machine
				4776	int __kmp_nth_per_core = __kmp_avail_proc / __kmp_ncores;
				4777	// Number of cores
				4778	int ncores = __kmp_ncores;
				4779	if ((nPackages > 1) && (__kmp_nth_per_core <= 1)) {
				4780	__kmp_nth_per_core = __kmp_avail_proc / nPackages;
				4781	ncores = nPackages;
				4782	}
				4783	// How many threads will be bound to each core
				4784	int chunk = nthreads / ncores;
				4785	// How many cores will have an additional thread bound to it - "big cores"
				4786	int big_cores = nthreads % ncores;
				4787	// Number of threads on the big cores
				4788	int big_nth = (chunk + 1) * big_cores;
				4789	if (tid < big_nth) {
				4790	coreID = tid / (chunk + 1);
				4791	threadID = (tid % (chunk + 1)) % __kmp_nth_per_core;
				4792	} else { // tid >= big_nth
				4793	coreID = (tid - big_cores) / chunk;
				4794	threadID = ((tid - big_cores) % chunk) % __kmp_nth_per_core;
Paul Osmialowski	ecbe2ea	2016-07-29 20:55:03 +0000	[diff] [blame]	4795	}
				4796
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4797	KMP_DEBUG_ASSERT2(KMP_AFFINITY_CAPABLE(),
				4798	"Illegal set affinity operation when not capable");
				4799
				4800	kmp_affin_mask_t *mask;
				4801	KMP_CPU_ALLOC_ON_STACK(mask);
				4802	KMP_CPU_ZERO(mask);
				4803
				4804	if (fine_gran) {
				4805	int osID = address2os[coreID * __kmp_nth_per_core + threadID].second;
				4806	KMP_CPU_SET(osID, mask);
				4807	} else {
				4808	for (int i = 0; i < __kmp_nth_per_core; i++) {
				4809	int osID;
				4810	osID = address2os[coreID * __kmp_nth_per_core + i].second;
				4811	KMP_CPU_SET(osID, mask);
				4812	}
				4813	}
				4814	if (__kmp_affinity_verbose) {
				4815	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4816	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
				4817	KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
				4818	__kmp_gettid(), tid, buf);
				4819	}
				4820	__kmp_set_system_affinity(mask, TRUE);
				4821	KMP_CPU_FREE_FROM_STACK(mask);
				4822	} else { // Non-uniform topology
				4823
				4824	kmp_affin_mask_t *mask;
				4825	KMP_CPU_ALLOC_ON_STACK(mask);
				4826	KMP_CPU_ZERO(mask);
				4827
				4828	int core_level = __kmp_affinity_find_core_level(
				4829	address2os, __kmp_avail_proc, __kmp_aff_depth - 1);
				4830	int ncores = __kmp_affinity_compute_ncores(address2os, __kmp_avail_proc,
				4831	__kmp_aff_depth - 1, core_level);
				4832	int nth_per_core = __kmp_affinity_max_proc_per_core(
				4833	address2os, __kmp_avail_proc, __kmp_aff_depth - 1, core_level);
				4834
				4835	// For performance gain consider the special case nthreads ==
				4836	// __kmp_avail_proc
				4837	if (nthreads == __kmp_avail_proc) {
				4838	if (fine_gran) {
				4839	int osID = address2os[tid].second;
				4840	KMP_CPU_SET(osID, mask);
				4841	} else {
				4842	int core = __kmp_affinity_find_core(address2os, tid,
				4843	__kmp_aff_depth - 1, core_level);
				4844	for (int i = 0; i < __kmp_avail_proc; i++) {
				4845	int osID = address2os[i].second;
				4846	if (__kmp_affinity_find_core(address2os, i, __kmp_aff_depth - 1,
				4847	core_level) == core) {
				4848	KMP_CPU_SET(osID, mask);
				4849	}
Paul Osmialowski	ecbe2ea	2016-07-29 20:55:03 +0000	[diff] [blame]	4850	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4851	}
				4852	} else if (nthreads <= ncores) {
				4853
				4854	int core = 0;
				4855	for (int i = 0; i < ncores; i++) {
				4856	// Check if this core from procarr[] is in the mask
				4857	int in_mask = 0;
				4858	for (int j = 0; j < nth_per_core; j++) {
				4859	if (procarr[i * nth_per_core + j] != -1) {
				4860	in_mask = 1;
				4861	break;
				4862	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4863	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4864	if (in_mask) {
				4865	if (tid == core) {
				4866	for (int j = 0; j < nth_per_core; j++) {
				4867	int osID = procarr[i * nth_per_core + j];
				4868	if (osID != -1) {
				4869	KMP_CPU_SET(osID, mask);
				4870	// For fine granularity it is enough to set the first available
				4871	// osID for this core
				4872	if (fine_gran) {
				4873	break;
				4874	}
				4875	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4876	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4877	break;
				4878	} else {
				4879	core++;
				4880	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4881	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4882	}
				4883	} else { // nthreads > ncores
				4884	// Array to save the number of processors at each core
				4885	int nproc_at_core = (int )KMP_ALLOCA(sizeof(int) * ncores);
				4886	// Array to save the number of cores with "x" available processors;
				4887	int *ncores_with_x_procs =
				4888	(int )KMP_ALLOCA(sizeof(int) (nth_per_core + 1));
				4889	// Array to save the number of cores with # procs from x to nth_per_core
				4890	int *ncores_with_x_to_max_procs =
				4891	(int )KMP_ALLOCA(sizeof(int) (nth_per_core + 1));
				4892
				4893	for (int i = 0; i <= nth_per_core; i++) {
				4894	ncores_with_x_procs[i] = 0;
				4895	ncores_with_x_to_max_procs[i] = 0;
				4896	}
				4897
				4898	for (int i = 0; i < ncores; i++) {
				4899	int cnt = 0;
				4900	for (int j = 0; j < nth_per_core; j++) {
				4901	if (procarr[i * nth_per_core + j] != -1) {
				4902	cnt++;
				4903	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4904	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4905	nproc_at_core[i] = cnt;
				4906	ncores_with_x_procs[cnt]++;
				4907	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4908
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4909	for (int i = 0; i <= nth_per_core; i++) {
				4910	for (int j = i; j <= nth_per_core; j++) {
				4911	ncores_with_x_to_max_procs[i] += ncores_with_x_procs[j];
				4912	}
				4913	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4914
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4915	// Max number of processors
				4916	int nproc = nth_per_core * ncores;
				4917	// An array to keep number of threads per each context
				4918	int newarr = (int )__kmp_allocate(sizeof(int) * nproc);
				4919	for (int i = 0; i < nproc; i++) {
				4920	newarr[i] = 0;
				4921	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4922
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4923	int nth = nthreads;
				4924	int flag = 0;
				4925	while (nth > 0) {
				4926	for (int j = 1; j <= nth_per_core; j++) {
				4927	int cnt = ncores_with_x_to_max_procs[j];
				4928	for (int i = 0; i < ncores; i++) {
				4929	// Skip the core with 0 processors
				4930	if (nproc_at_core[i] == 0) {
				4931	continue;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4932	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4933	for (int k = 0; k < nth_per_core; k++) {
				4934	if (procarr[i * nth_per_core + k] != -1) {
				4935	if (newarr[i * nth_per_core + k] == 0) {
				4936	newarr[i * nth_per_core + k] = 1;
				4937	cnt--;
				4938	nth--;
				4939	break;
				4940	} else {
				4941	if (flag != 0) {
				4942	newarr[i * nth_per_core + k]++;
				4943	cnt--;
				4944	nth--;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4945	break;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4946	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4947	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4948	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4949	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4950	if (cnt == 0 \|\| nth == 0) {
				4951	break;
				4952	}
				4953	}
				4954	if (nth == 0) {
				4955	break;
				4956	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4957	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4958	flag = 1;
				4959	}
				4960	int sum = 0;
				4961	for (int i = 0; i < nproc; i++) {
				4962	sum += newarr[i];
				4963	if (sum > tid) {
				4964	if (fine_gran) {
				4965	int osID = procarr[i];
				4966	KMP_CPU_SET(osID, mask);
				4967	} else {
				4968	int coreID = i / nth_per_core;
				4969	for (int ii = 0; ii < nth_per_core; ii++) {
				4970	int osID = procarr[coreID * nth_per_core + ii];
				4971	if (osID != -1) {
				4972	KMP_CPU_SET(osID, mask);
				4973	}
				4974	}
				4975	}
				4976	break;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4977	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4978	}
				4979	__kmp_free(newarr);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4980	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4981
				4982	if (__kmp_affinity_verbose) {
				4983	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4984	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
				4985	KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
				4986	__kmp_gettid(), tid, buf);
				4987	}
				4988	__kmp_set_system_affinity(mask, TRUE);
				4989	KMP_CPU_FREE_FROM_STACK(mask);
				4990	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4991	}
				4992
Jonathan Peyton	3076fa4	2016-01-12 17:21:55 +0000	[diff] [blame]	4993	#if KMP_OS_LINUX
				4994	// We don't need this entry for Windows because
				4995	// there is GetProcessAffinityMask() api
				4996	//
				4997	// The intended usage is indicated by these steps:
				4998	// 1) The user gets the current affinity mask
				4999	// 2) Then sets the affinity by calling this function
				5000	// 3) Error check the return value
				5001	// 4) Use non-OpenMP parallelization
				5002	// 5) Reset the affinity to what was stored in step 1)
				5003	#ifdef __cplusplus
				5004	extern "C"
				5005	#endif
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	5006	int
				5007	kmp_set_thread_affinity_mask_initial()
Jonathan Peyton	3076fa4	2016-01-12 17:21:55 +0000	[diff] [blame]	5008	// the function returns 0 on success,
				5009	// -1 if we cannot bind thread
				5010	// >0 (errno) if an error happened during binding
				5011	{
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	5012	int gtid = __kmp_get_gtid();
				5013	if (gtid < 0) {
				5014	// Do not touch non-omp threads
				5015	KA_TRACE(30, ("kmp_set_thread_affinity_mask_initial: "
				5016	"non-omp thread, returning\n"));
				5017	return -1;
				5018	}
				5019	if (!KMP_AFFINITY_CAPABLE() \|\| !__kmp_init_middle) {
				5020	KA_TRACE(30, ("kmp_set_thread_affinity_mask_initial: "
				5021	"affinity not initialized, returning\n"));
				5022	return -1;
				5023	}
				5024	KA_TRACE(30, ("kmp_set_thread_affinity_mask_initial: "
				5025	"set full mask for thread %d\n",
				5026	gtid));
				5027	KMP_DEBUG_ASSERT(__kmp_affin_fullMask != NULL);
				5028	return __kmp_set_system_affinity(__kmp_affin_fullMask, FALSE);
Jonathan Peyton	3076fa4	2016-01-12 17:21:55 +0000	[diff] [blame]	5029	}
				5030	#endif
				5031
Alp Toker	763b939	2014-02-28 09:42:41 +0000	[diff] [blame]	5032	#endif // KMP_AFFINITY_SUPPORTED