Blame - openmp/runtime/src/kmp_affinity.cpp - toolchain/llvm-project

blob: 2d7f7a3e434e8873cb1dc12c05528d32eee130ca [file] [log] [blame]

Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1	/*
				2	* kmp_affinity.cpp -- affinity management
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3	*/
				4
				5
				6	//===----------------------------------------------------------------------===//
				7	//
				8	// The LLVM Compiler Infrastructure
				9	//
				10	// This file is dual licensed under the MIT and the University of Illinois Open
				11	// Source Licenses. See LICENSE.txt for details.
				12	//
				13	//===----------------------------------------------------------------------===//
				14
				15
				16	#include "kmp.h"
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	17	#include "kmp_affinity.h"
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	18	#include "kmp_i18n.h"
				19	#include "kmp_io.h"
				20	#include "kmp_str.h"
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	21	#include "kmp_wrapper_getpid.h"
Jonathan Peyton	1707836	2015-09-10 19:22:07 +0000	[diff] [blame]	22
				23	// Store the real or imagined machine hierarchy here
				24	static hierarchy_info machine_hierarchy;
				25
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	26	void __kmp_cleanup_hierarchy() { machine_hierarchy.fini(); }
				27
Jonathan Peyton	1707836	2015-09-10 19:22:07 +0000	[diff] [blame]	28
				29	void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	30	kmp_uint32 depth;
				31	// The test below is true if affinity is available, but set to "none". Need to
				32	// init on first use of hierarchical barrier.
				33	if (TCR_1(machine_hierarchy.uninitialized))
				34	machine_hierarchy.init(NULL, nproc);
Jonathan Peyton	1707836	2015-09-10 19:22:07 +0000	[diff] [blame]	35
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	36	// Adjust the hierarchy in case num threads exceeds original
				37	if (nproc > machine_hierarchy.base_num_threads)
				38	machine_hierarchy.resize(nproc);
Jonathan Peyton	7dee82e	2015-11-09 16:24:53 +0000	[diff] [blame]	39
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	40	depth = machine_hierarchy.depth;
				41	KMP_DEBUG_ASSERT(depth > 0);
Jonathan Peyton	1707836	2015-09-10 19:22:07 +0000	[diff] [blame]	42
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	43	thr_bar->depth = depth;
				44	thr_bar->base_leaf_kids = (kmp_uint8)machine_hierarchy.numPerLevel[0] - 1;
				45	thr_bar->skip_per_level = machine_hierarchy.skipPerLevel;
Jonathan Peyton	1707836	2015-09-10 19:22:07 +0000	[diff] [blame]	46	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	47
Alp Toker	763b939	2014-02-28 09:42:41 +0000	[diff] [blame]	48	#if KMP_AFFINITY_SUPPORTED
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	49
Jonathan Peyton	1cdd87a	2016-11-14 21:08:35 +0000	[diff] [blame]	50	bool KMPAffinity::picked_api = false;
				51
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	52	void *KMPAffinity::Mask::operator new(size_t n) { return __kmp_allocate(n); }
				53	void *KMPAffinity::Mask::operator new[](size_t n) { return __kmp_allocate(n); }
				54	void KMPAffinity::Mask::operator delete(void *p) { __kmp_free(p); }
				55	void KMPAffinity::Mask::operator delete[](void *p) { __kmp_free(p); }
				56	void *KMPAffinity::operator new(size_t n) { return __kmp_allocate(n); }
				57	void KMPAffinity::operator delete(void *p) { __kmp_free(p); }
Jonathan Peyton	1cdd87a	2016-11-14 21:08:35 +0000	[diff] [blame]	58
				59	void KMPAffinity::pick_api() {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	60	KMPAffinity *affinity_dispatch;
				61	if (picked_api)
				62	return;
Jonathan Peyton	1cdd87a	2016-11-14 21:08:35 +0000	[diff] [blame]	63	#if KMP_USE_HWLOC
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	64	if (__kmp_affinity_top_method == affinity_top_method_hwloc) {
				65	affinity_dispatch = new KMPHwlocAffinity();
				66	} else
Jonathan Peyton	1cdd87a	2016-11-14 21:08:35 +0000	[diff] [blame]	67	#endif
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	68	{
				69	affinity_dispatch = new KMPNativeAffinity();
				70	}
				71	__kmp_affinity_dispatch = affinity_dispatch;
				72	picked_api = true;
Jonathan Peyton	1cdd87a	2016-11-14 21:08:35 +0000	[diff] [blame]	73	}
				74
				75	void KMPAffinity::destroy_api() {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	76	if (__kmp_affinity_dispatch != NULL) {
				77	delete __kmp_affinity_dispatch;
				78	__kmp_affinity_dispatch = NULL;
				79	picked_api = false;
				80	}
Jonathan Peyton	1cdd87a	2016-11-14 21:08:35 +0000	[diff] [blame]	81	}
				82
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	83	// Print the affinity mask to the character array in a pretty format.
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	84	char __kmp_affinity_print_mask(char buf, int buf_len,
				85	kmp_affin_mask_t *mask) {
				86	KMP_ASSERT(buf_len >= 40);
				87	char *scan = buf;
				88	char *end = buf + buf_len - 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	89
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	90	// Find first element / check for empty set.
				91	size_t i;
				92	i = mask->begin();
				93	if (i == mask->end()) {
				94	KMP_SNPRINTF(scan, end - scan + 1, "{<empty>}");
				95	while (*scan != '\0')
				96	scan++;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	97	KMP_ASSERT(scan <= end);
				98	return buf;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	99	}
				100
				101	KMP_SNPRINTF(scan, end - scan + 1, "{%ld", (long)i);
				102	while (*scan != '\0')
				103	scan++;
				104	i++;
				105	for (; i != mask->end(); i = mask->next(i)) {
				106	if (!KMP_CPU_ISSET(i, mask)) {
				107	continue;
				108	}
				109
				110	// Check for buffer overflow. A string of the form ",<n>" will have at most
				111	// 10 characters, plus we want to leave room to print ",...}" if the set is
				112	// too large to print for a total of 15 characters. We already left room for
				113	// '\0' in setting end.
				114	if (end - scan < 15) {
				115	break;
				116	}
				117	KMP_SNPRINTF(scan, end - scan + 1, ",%-ld", (long)i);
				118	while (*scan != '\0')
				119	scan++;
				120	}
				121	if (i != mask->end()) {
				122	KMP_SNPRINTF(scan, end - scan + 1, ",...");
				123	while (*scan != '\0')
				124	scan++;
				125	}
				126	KMP_SNPRINTF(scan, end - scan + 1, "}");
				127	while (*scan != '\0')
				128	scan++;
				129	KMP_ASSERT(scan <= end);
				130	return buf;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	131	}
				132
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	133	void __kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask) {
				134	KMP_CPU_ZERO(mask);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	135
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	136	#if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	137
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	138	if (__kmp_num_proc_groups > 1) {
				139	int group;
				140	KMP_DEBUG_ASSERT(__kmp_GetActiveProcessorCount != NULL);
				141	for (group = 0; group < __kmp_num_proc_groups; group++) {
				142	int i;
				143	int num = __kmp_GetActiveProcessorCount(group);
				144	for (i = 0; i < num; i++) {
				145	KMP_CPU_SET(i + group * (CHAR_BIT * sizeof(DWORD_PTR)), mask);
				146	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	147	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	148	} else
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	149
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	150	#endif /* KMP_GROUP_AFFINITY */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	151
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	152	{
				153	int proc;
				154	for (proc = 0; proc < __kmp_xproc; proc++) {
				155	KMP_CPU_SET(proc, mask);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	156	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	157	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	158	}
				159
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	160	// When sorting by labels, __kmp_affinity_assign_child_nums() must first be
				161	// called to renumber the labels from [0..n] and place them into the child_num
				162	// vector of the address object. This is done in case the labels used for
Alp Toker	8f2d3f0	2014-02-24 10:40:15 +0000	[diff] [blame]	163	// the children at one node of the hierarchy differ from those used for
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	164	// another node at the same level. Example: suppose the machine has 2 nodes
				165	// with 2 packages each. The first node contains packages 601 and 602, and
				166	// second node contains packages 603 and 604. If we try to sort the table
				167	// for "scatter" affinity, the table will still be sorted 601, 602, 603, 604
				168	// because we are paying attention to the labels themselves, not the ordinal
				169	// child numbers. By using the child numbers in the sort, the result is
				170	// {0,0}=601, {0,1}=603, {1,0}=602, {1,1}=604.
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	171	static void __kmp_affinity_assign_child_nums(AddrUnsPair *address2os,
				172	int numAddrs) {
				173	KMP_DEBUG_ASSERT(numAddrs > 0);
				174	int depth = address2os->first.depth;
				175	unsigned counts = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				176	unsigned lastLabel = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				177	int labCt;
				178	for (labCt = 0; labCt < depth; labCt++) {
				179	address2os[0].first.childNums[labCt] = counts[labCt] = 0;
				180	lastLabel[labCt] = address2os[0].first.labels[labCt];
				181	}
				182	int i;
				183	for (i = 1; i < numAddrs; i++) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	184	for (labCt = 0; labCt < depth; labCt++) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	185	if (address2os[i].first.labels[labCt] != lastLabel[labCt]) {
				186	int labCt2;
				187	for (labCt2 = labCt + 1; labCt2 < depth; labCt2++) {
				188	counts[labCt2] = 0;
				189	lastLabel[labCt2] = address2os[i].first.labels[labCt2];
				190	}
				191	counts[labCt]++;
				192	lastLabel[labCt] = address2os[i].first.labels[labCt];
				193	break;
				194	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	195	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	196	for (labCt = 0; labCt < depth; labCt++) {
				197	address2os[i].first.childNums[labCt] = counts[labCt];
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	198	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	199	for (; labCt < (int)Address::maxDepth; labCt++) {
				200	address2os[i].first.childNums[labCt] = 0;
				201	}
				202	}
				203	__kmp_free(lastLabel);
				204	__kmp_free(counts);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	205	}
				206
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	207	// All of the __kmp_affinity_create_*_map() routines should set
				208	// __kmp_affinity_masks to a vector of affinity mask objects of length
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	209	// __kmp_affinity_num_masks, if __kmp_affinity_type != affinity_none, and return
				210	// the number of levels in the machine topology tree (zero if
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	211	// __kmp_affinity_type == affinity_none).
				212	//
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	213	// All of the __kmp_affinity_create_*_map() routines should set
				214	// *__kmp_affin_fullMask to the affinity mask for the initialization thread.
				215	// They need to save and restore the mask, and it could be needed later, so
				216	// saving it is just an optimization to avoid calling kmp_get_system_affinity()
				217	// again.
Jonathan Peyton	c5304aa	2016-06-13 21:28:03 +0000	[diff] [blame]	218	kmp_affin_mask_t *__kmp_affin_fullMask = NULL;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	219
				220	static int nCoresPerPkg, nPackages;
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame]	221	static int __kmp_nThreadsPerCore;
				222	#ifndef KMP_DFLT_NTH_CORES
				223	static int __kmp_ncores;
				224	#endif
Jonathan Peyton	fd7cc42	2016-06-21 15:54:38 +0000	[diff] [blame]	225	static int *__kmp_pu_os_idx = NULL;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	226
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	227	// __kmp_affinity_uniform_topology() doesn't work when called from
				228	// places which support arbitrarily many levels in the machine topology
				229	// map, i.e. the non-default cases in __kmp_affinity_create_cpuinfo_map()
				230	// __kmp_affinity_create_x2apicid_map().
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	231	inline static bool __kmp_affinity_uniform_topology() {
				232	return __kmp_avail_proc == (__kmp_nThreadsPerCore * nCoresPerPkg * nPackages);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	233	}
				234
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	235	// Print out the detailed machine topology map, i.e. the physical locations
				236	// of each OS proc.
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	237	static void __kmp_affinity_print_topology(AddrUnsPair *address2os, int len,
				238	int depth, int pkgLevel,
				239	int coreLevel, int threadLevel) {
				240	int proc;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	241
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	242	KMP_INFORM(OSProcToPhysicalThreadMap, "KMP_AFFINITY");
				243	for (proc = 0; proc < len; proc++) {
				244	int level;
				245	kmp_str_buf_t buf;
				246	__kmp_str_buf_init(&buf);
				247	for (level = 0; level < depth; level++) {
				248	if (level == threadLevel) {
				249	__kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Thread));
				250	} else if (level == coreLevel) {
				251	__kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Core));
				252	} else if (level == pkgLevel) {
				253	__kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Package));
				254	} else if (level > pkgLevel) {
				255	__kmp_str_buf_print(&buf, "%s_%d ", KMP_I18N_STR(Node),
				256	level - pkgLevel - 1);
				257	} else {
				258	__kmp_str_buf_print(&buf, "L%d ", level);
				259	}
				260	__kmp_str_buf_print(&buf, "%d ", address2os[proc].first.labels[level]);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	261	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	262	KMP_INFORM(OSProcMapToPack, "KMP_AFFINITY", address2os[proc].second,
				263	buf.str);
				264	__kmp_str_buf_free(&buf);
				265	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	266	}
				267
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	268	#if KMP_USE_HWLOC
Jonathan Peyton	202a24d	2016-06-13 17:30:08 +0000	[diff] [blame]	269
				270	// This function removes the topology levels that are radix 1 and don't offer
				271	// further information about the topology. The most common example is when you
				272	// have one thread context per core, we don't want the extra thread context
				273	// level if it offers no unique labels. So they are removed.
				274	// return value: the new depth of address2os
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	275	static int __kmp_affinity_remove_radix_one_levels(AddrUnsPair *address2os,
				276	int nActiveThreads, int depth,
				277	int pkgLevel, int coreLevel,
				278	int *threadLevel) {
				279	int level;
				280	int i;
				281	int radix1_detected;
Jonathan Peyton	202a24d	2016-06-13 17:30:08 +0000	[diff] [blame]	282
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	283	for (level = depth - 1; level >= 0; --level) {
				284	// Always keep the package level
				285	if (level == *pkgLevel)
				286	continue;
				287	// Detect if this level is radix 1
				288	radix1_detected = 1;
				289	for (i = 1; i < nActiveThreads; ++i) {
				290	if (address2os[0].first.labels[level] !=
				291	address2os[i].first.labels[level]) {
				292	// There are differing label values for this level so it stays
				293	radix1_detected = 0;
				294	break;
				295	}
Jonathan Peyton	202a24d	2016-06-13 17:30:08 +0000	[diff] [blame]	296	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	297	if (!radix1_detected)
				298	continue;
				299	// Radix 1 was detected
				300	if (level == *threadLevel) {
				301	// If only one thread per core, then just decrement
				302	// the depth which removes the threadlevel from address2os
				303	for (i = 0; i < nActiveThreads; ++i) {
				304	address2os[i].first.depth--;
				305	}
				306	*threadLevel = -1;
				307	} else if (level == *coreLevel) {
				308	// For core level, we move the thread labels over if they are still
				309	// valid (*threadLevel != -1), and also reduce the depth another level
				310	for (i = 0; i < nActiveThreads; ++i) {
				311	if (*threadLevel != -1) {
				312	address2os[i].first.labels[*coreLevel] =
				313	address2os[i].first.labels[*threadLevel];
				314	}
				315	address2os[i].first.depth--;
				316	}
				317	*coreLevel = -1;
				318	}
				319	}
				320	return address2os[0].first.depth;
Jonathan Peyton	202a24d	2016-06-13 17:30:08 +0000	[diff] [blame]	321	}
				322
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	323	// Returns the number of objects of type 'type' below 'obj' within the topology
				324	// tree structure. e.g., if obj is a HWLOC_OBJ_PACKAGE object, and type is
				325	// HWLOC_OBJ_PU, then this will return the number of PU's under the SOCKET
				326	// object.
				327	static int __kmp_hwloc_get_nobjs_under_obj(hwloc_obj_t obj,
				328	hwloc_obj_type_t type) {
				329	int retval = 0;
				330	hwloc_obj_t first;
				331	for (first = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, obj->type,
				332	obj->logical_index, type, 0);
				333	first != NULL &&
				334	hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, obj->type, first) ==
				335	obj;
				336	first = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, first->type,
				337	first)) {
				338	++retval;
				339	}
				340	return retval;
Jonathan Peyton	202a24d	2016-06-13 17:30:08 +0000	[diff] [blame]	341	}
				342
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	343	static int __kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
				344	kmp_i18n_id_t *const msg_id) {
				345	*address2os = NULL;
				346	*msg_id = kmp_i18n_null;
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	347
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	348	// Save the affinity mask for the current thread.
				349	kmp_affin_mask_t *oldMask;
				350	KMP_CPU_ALLOC(oldMask);
				351	__kmp_get_system_affinity(oldMask, TRUE);
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	352
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	353	int depth = 3;
				354	int pkgLevel = 0;
				355	int coreLevel = 1;
				356	int threadLevel = 2;
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	357
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	358	if (!KMP_AFFINITY_CAPABLE()) {
				359	// Hack to try and infer the machine topology using only the data
				360	// available from cpuid on the current thread, and __kmp_xproc.
				361	KMP_ASSERT(__kmp_affinity_type == affinity_none);
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	362
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	363	nCoresPerPkg = __kmp_hwloc_get_nobjs_under_obj(
				364	hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_PACKAGE, 0),
				365	HWLOC_OBJ_CORE);
				366	__kmp_nThreadsPerCore = __kmp_hwloc_get_nobjs_under_obj(
				367	hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_CORE, 0),
				368	HWLOC_OBJ_PU);
				369	__kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
				370	nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	371	if (__kmp_affinity_verbose) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	372	KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY");
				373	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				374	if (__kmp_affinity_uniform_topology()) {
				375	KMP_INFORM(Uniform, "KMP_AFFINITY");
				376	} else {
				377	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				378	}
				379	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				380	__kmp_nThreadsPerCore, __kmp_ncores);
				381	}
				382	KMP_CPU_FREE(oldMask);
				383	return 0;
				384	}
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	385
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	386	// Allocate the data structure to be returned.
				387	AddrUnsPair *retval =
				388	(AddrUnsPair )__kmp_allocate(sizeof(AddrUnsPair) __kmp_avail_proc);
				389	__kmp_pu_os_idx = (int )__kmp_allocate(sizeof(int) __kmp_avail_proc);
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	390
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	391	// When affinity is off, this routine will still be called to set
				392	// __kmp_ncores, as well as __kmp_nThreadsPerCore,
				393	// nCoresPerPkg, & nPackages. Make sure all these vars are set
				394	// correctly, and return if affinity is not enabled.
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	395
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	396	hwloc_obj_t pu;
				397	hwloc_obj_t core;
				398	hwloc_obj_t socket;
				399	int nActiveThreads = 0;
				400	int socket_identifier = 0;
				401	// re-calculate globals to count only accessible resources
				402	__kmp_ncores = nPackages = nCoresPerPkg = __kmp_nThreadsPerCore = 0;
				403	for (socket =
				404	hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_PACKAGE, 0);
				405	socket != NULL;
				406	socket = hwloc_get_next_obj_by_type(__kmp_hwloc_topology,
				407	HWLOC_OBJ_PACKAGE, socket),
				408	socket_identifier++) {
				409	int core_identifier = 0;
				410	int num_active_cores = 0;
				411	for (core = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, socket->type,
				412	socket->logical_index,
				413	HWLOC_OBJ_CORE, 0);
				414	core != NULL &&
				415	hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, socket->type,
				416	core) == socket;
				417	core = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_CORE,
				418	core),
				419	core_identifier++) {
				420	int pu_identifier = 0;
				421	int num_active_threads = 0;
				422	for (pu = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, core->type,
				423	core->logical_index, HWLOC_OBJ_PU,
				424	0);
				425	pu != NULL &&
				426	hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, core->type,
				427	pu) == core;
				428	pu = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_PU,
				429	pu),
				430	pu_identifier++) {
				431	Address addr(3);
				432	if(!KMP_CPU_ISSET(pu->os_index, __kmp_affin_fullMask))
				433	continue; // skip inactive (inaccessible) unit
				434	KA_TRACE(20,
				435	("Hwloc inserting %d (%d) %d (%d) %d (%d) into address2os\n",
				436	socket->os_index, socket->logical_index, core->os_index,
				437	core->logical_index, pu->os_index,pu->logical_index));
				438	addr.labels[0] = socket_identifier; // package
				439	addr.labels[1] = core_identifier; // core
				440	addr.labels[2] = pu_identifier; // pu
				441	retval[nActiveThreads] = AddrUnsPair(addr, pu->os_index);
				442	__kmp_pu_os_idx[nActiveThreads] =
				443	pu->os_index; // keep os index for each active pu
				444	nActiveThreads++;
				445	++num_active_threads; // count active threads per core
				446	}
				447	if (num_active_threads) { // were there any active threads on the core?
				448	++__kmp_ncores; // count total active cores
				449	++num_active_cores; // count active cores per socket
				450	if (num_active_threads > __kmp_nThreadsPerCore)
				451	__kmp_nThreadsPerCore = num_active_threads; // calc maximum
				452	}
				453	}
				454	if (num_active_cores) { // were there any active cores on the socket?
				455	++nPackages; // count total active packages
				456	if (num_active_cores > nCoresPerPkg)
				457	nCoresPerPkg = num_active_cores; // calc maximum
				458	}
				459	}
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	460
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	461	// If there's only one thread context to bind to, return now.
				462	KMP_DEBUG_ASSERT(nActiveThreads == __kmp_avail_proc);
				463	KMP_ASSERT(nActiveThreads > 0);
				464	if (nActiveThreads == 1) {
				465	__kmp_ncores = nPackages = 1;
				466	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
				467	if (__kmp_affinity_verbose) {
				468	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				469	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				470
				471	KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
				472	if (__kmp_affinity_respect_mask) {
				473	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				474	} else {
				475	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				476	}
				477	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				478	KMP_INFORM(Uniform, "KMP_AFFINITY");
				479	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				480	__kmp_nThreadsPerCore, __kmp_ncores);
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	481	}
				482
				483	if (__kmp_affinity_type == affinity_none) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	484	__kmp_free(retval);
				485	KMP_CPU_FREE(oldMask);
				486	return 0;
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	487	}
				488
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	489	// Form an Address object which only includes the package level.
				490	Address addr(1);
				491	addr.labels[0] = retval[0].first.labels[pkgLevel];
				492	retval[0].first = addr;
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	493
				494	if (__kmp_affinity_gran_levels < 0) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	495	__kmp_affinity_gran_levels = 0;
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	496	}
				497
				498	if (__kmp_affinity_verbose) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	499	__kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	500	}
				501
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	502	*address2os = retval;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	503	KMP_CPU_FREE(oldMask);
				504	return 1;
				505	}
				506
				507	// Sort the table by physical Id.
				508	qsort(retval, nActiveThreads, sizeof(*retval),
				509	__kmp_affinity_cmp_Address_labels);
				510
				511	// Check to see if the machine topology is uniform
				512	unsigned uniform =
				513	(nPackages * nCoresPerPkg * __kmp_nThreadsPerCore == nActiveThreads);
				514
				515	// Print the machine topology summary.
				516	if (__kmp_affinity_verbose) {
				517	char mask[KMP_AFFIN_MASK_PRINT_LEN];
				518	__kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				519
				520	KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
				521	if (__kmp_affinity_respect_mask) {
				522	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask);
				523	} else {
				524	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask);
				525	}
				526	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				527	if (uniform) {
				528	KMP_INFORM(Uniform, "KMP_AFFINITY");
				529	} else {
				530	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				531	}
				532
				533	kmp_str_buf_t buf;
				534	__kmp_str_buf_init(&buf);
				535
				536	__kmp_str_buf_print(&buf, "%d", nPackages);
				537	// for (level = 1; level <= pkgLevel; level++) {
				538	// __kmp_str_buf_print(&buf, " x %d", maxCt[level]);
				539	// }
				540	KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg,
				541	__kmp_nThreadsPerCore, __kmp_ncores);
				542
				543	__kmp_str_buf_free(&buf);
				544	}
				545
				546	if (__kmp_affinity_type == affinity_none) {
				547	__kmp_free(retval);
				548	KMP_CPU_FREE(oldMask);
				549	return 0;
				550	}
				551
				552	// Find any levels with radiix 1, and remove them from the map
				553	// (except for the package level).
				554	depth = __kmp_affinity_remove_radix_one_levels(
				555	retval, nActiveThreads, depth, &pkgLevel, &coreLevel, &threadLevel);
				556
				557	if (__kmp_affinity_gran_levels < 0) {
				558	// Set the granularity level based on what levels are modeled
				559	// in the machine topology map.
				560	__kmp_affinity_gran_levels = 0;
				561	if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
				562	__kmp_affinity_gran_levels++;
				563	}
				564	if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
				565	__kmp_affinity_gran_levels++;
				566	}
				567	if (__kmp_affinity_gran > affinity_gran_package) {
				568	__kmp_affinity_gran_levels++;
				569	}
				570	}
				571
				572	if (__kmp_affinity_verbose) {
				573	__kmp_affinity_print_topology(retval, nActiveThreads, depth, pkgLevel,
				574	coreLevel, threadLevel);
				575	}
				576
				577	KMP_CPU_FREE(oldMask);
				578	*address2os = retval;
				579	return depth;
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	580	}
				581	#endif // KMP_USE_HWLOC
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	582
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	583	// If we don't know how to retrieve the machine's processor topology, or
				584	// encounter an error in doing so, this routine is called to form a "flat"
				585	// mapping of os thread id's <-> processor id's.
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	586	static int __kmp_affinity_create_flat_map(AddrUnsPair **address2os,
				587	kmp_i18n_id_t *const msg_id) {
				588	*address2os = NULL;
				589	*msg_id = kmp_i18n_null;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	590
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	591	// Even if __kmp_affinity_type == affinity_none, this routine might still
				592	// called to set __kmp_ncores, as well as
				593	// __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
				594	if (!KMP_AFFINITY_CAPABLE()) {
				595	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				596	__kmp_ncores = nPackages = __kmp_xproc;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	597	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	598	if (__kmp_affinity_verbose) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	599	KMP_INFORM(AffFlatTopology, "KMP_AFFINITY");
				600	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				601	KMP_INFORM(Uniform, "KMP_AFFINITY");
				602	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				603	__kmp_nThreadsPerCore, __kmp_ncores);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	604	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	605	return 0;
				606	}
				607
				608	// When affinity is off, this routine will still be called to set
				609	// __kmp_ncores, as well as __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
				610	// Make sure all these vars are set correctly, and return now if affinity is
				611	// not enabled.
				612	__kmp_ncores = nPackages = __kmp_avail_proc;
				613	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
				614	if (__kmp_affinity_verbose) {
				615	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				616	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				617	__kmp_affin_fullMask);
				618
				619	KMP_INFORM(AffCapableUseFlat, "KMP_AFFINITY");
				620	if (__kmp_affinity_respect_mask) {
				621	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				622	} else {
				623	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	624	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	625	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				626	KMP_INFORM(Uniform, "KMP_AFFINITY");
				627	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				628	__kmp_nThreadsPerCore, __kmp_ncores);
				629	}
				630	KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
				631	__kmp_pu_os_idx = (int )__kmp_allocate(sizeof(int) __kmp_avail_proc);
				632	if (__kmp_affinity_type == affinity_none) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	633	int avail_ct = 0;
				634	int i;
Jonathan Peyton	c5304aa	2016-06-13 21:28:03 +0000	[diff] [blame]	635	KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	636	if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask))
				637	continue;
				638	__kmp_pu_os_idx[avail_ct++] = i; // suppose indices are flat
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	639	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	640	return 0;
				641	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	642
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	643	// Contruct the data structure to be returned.
				644	*address2os =
				645	(AddrUnsPair )__kmp_allocate(sizeof(address2os) __kmp_avail_proc);
				646	int avail_ct = 0;
				647	unsigned int i;
				648	KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
				649	// Skip this proc if it is not included in the machine model.
				650	if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
				651	continue;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	652	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	653	__kmp_pu_os_idx[avail_ct] = i; // suppose indices are flat
				654	Address addr(1);
				655	addr.labels[0] = i;
				656	(*address2os)[avail_ct++] = AddrUnsPair(addr, i);
				657	}
				658	if (__kmp_affinity_verbose) {
				659	KMP_INFORM(OSProcToPackage, "KMP_AFFINITY");
				660	}
				661
				662	if (__kmp_affinity_gran_levels < 0) {
				663	// Only the package level is modeled in the machine topology map,
				664	// so the #levels of granularity is either 0 or 1.
				665	if (__kmp_affinity_gran > affinity_gran_package) {
				666	__kmp_affinity_gran_levels = 1;
				667	} else {
				668	__kmp_affinity_gran_levels = 0;
				669	}
				670	}
				671	return 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	672	}
				673
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	674	#if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	675
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	676	// If multiple Windows* OS processor groups exist, we can create a 2-level
				677	// topology map with the groups at level 0 and the individual procs at level 1.
				678	// This facilitates letting the threads float among all procs in a group,
				679	// if granularity=group (the default when there are multiple groups).
				680	static int __kmp_affinity_create_proc_group_map(AddrUnsPair **address2os,
				681	kmp_i18n_id_t *const msg_id) {
				682	*address2os = NULL;
				683	*msg_id = kmp_i18n_null;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	684
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	685	// If we don't have multiple processor groups, return now.
				686	// The flat mapping will be used.
				687	if ((!KMP_AFFINITY_CAPABLE()) \|\|
				688	(__kmp_get_proc_group(__kmp_affin_fullMask) >= 0)) {
				689	// FIXME set *msg_id
				690	return -1;
				691	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	692
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	693	// Contruct the data structure to be returned.
				694	*address2os =
				695	(AddrUnsPair )__kmp_allocate(sizeof(address2os) __kmp_avail_proc);
				696	KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
				697	__kmp_pu_os_idx = (int )__kmp_allocate(sizeof(int) __kmp_avail_proc);
				698	int avail_ct = 0;
				699	int i;
				700	KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
				701	// Skip this proc if it is not included in the machine model.
				702	if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
				703	continue;
				704	}
				705	__kmp_pu_os_idx[avail_ct] = i; // suppose indices are flat
				706	Address addr(2);
				707	addr.labels[0] = i / (CHAR_BIT * sizeof(DWORD_PTR));
				708	addr.labels[1] = i % (CHAR_BIT * sizeof(DWORD_PTR));
				709	(*address2os)[avail_ct++] = AddrUnsPair(addr, i);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	710
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	711	if (__kmp_affinity_verbose) {
				712	KMP_INFORM(AffOSProcToGroup, "KMP_AFFINITY", i, addr.labels[0],
				713	addr.labels[1]);
				714	}
				715	}
				716
				717	if (__kmp_affinity_gran_levels < 0) {
				718	if (__kmp_affinity_gran == affinity_gran_group) {
				719	__kmp_affinity_gran_levels = 1;
				720	} else if ((__kmp_affinity_gran == affinity_gran_fine) \|\|
				721	(__kmp_affinity_gran == affinity_gran_thread)) {
				722	__kmp_affinity_gran_levels = 0;
				723	} else {
				724	const char *gran_str = NULL;
				725	if (__kmp_affinity_gran == affinity_gran_core) {
				726	gran_str = "core";
				727	} else if (__kmp_affinity_gran == affinity_gran_package) {
				728	gran_str = "package";
				729	} else if (__kmp_affinity_gran == affinity_gran_node) {
				730	gran_str = "node";
				731	} else {
				732	KMP_ASSERT(0);
				733	}
				734
				735	// Warning: can't use affinity granularity \"gran\" with group topology
				736	// method, using "thread"
				737	__kmp_affinity_gran_levels = 0;
				738	}
				739	}
				740	return 2;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	741	}
				742
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	743	#endif /* KMP_GROUP_AFFINITY */
				744
				745	#if KMP_ARCH_X86 \|\| KMP_ARCH_X86_64
				746
				747	static int __kmp_cpuid_mask_width(int count) {
				748	int r = 0;
				749
				750	while ((1 << r) < count)
				751	++r;
				752	return r;
				753	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	754
				755	class apicThreadInfo {
				756	public:
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	757	unsigned osId; // param to __kmp_affinity_bind_thread
				758	unsigned apicId; // from cpuid after binding
				759	unsigned maxCoresPerPkg; // ""
				760	unsigned maxThreadsPerPkg; // ""
				761	unsigned pkgId; // inferred from above values
				762	unsigned coreId; // ""
				763	unsigned threadId; // ""
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	764	};
				765
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	766	static int __kmp_affinity_cmp_apicThreadInfo_os_id(const void *a,
				767	const void *b) {
				768	const apicThreadInfo aa = (const apicThreadInfo )a;
				769	const apicThreadInfo bb = (const apicThreadInfo )b;
				770	if (aa->osId < bb->osId)
				771	return -1;
				772	if (aa->osId > bb->osId)
				773	return 1;
				774	return 0;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	775	}
				776
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	777	static int __kmp_affinity_cmp_apicThreadInfo_phys_id(const void *a,
				778	const void *b) {
				779	const apicThreadInfo aa = (const apicThreadInfo )a;
				780	const apicThreadInfo bb = (const apicThreadInfo )b;
				781	if (aa->pkgId < bb->pkgId)
				782	return -1;
				783	if (aa->pkgId > bb->pkgId)
				784	return 1;
				785	if (aa->coreId < bb->coreId)
				786	return -1;
				787	if (aa->coreId > bb->coreId)
				788	return 1;
				789	if (aa->threadId < bb->threadId)
				790	return -1;
				791	if (aa->threadId > bb->threadId)
				792	return 1;
				793	return 0;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	794	}
				795
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	796	// On IA-32 architecture and Intel(R) 64 architecture, we attempt to use
				797	// an algorithm which cycles through the available os threads, setting
				798	// the current thread's affinity mask to that thread, and then retrieves
				799	// the Apic Id for each thread context using the cpuid instruction.
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	800	static int __kmp_affinity_create_apicid_map(AddrUnsPair **address2os,
				801	kmp_i18n_id_t *const msg_id) {
				802	kmp_cpuid buf;
				803	int rc;
				804	*address2os = NULL;
				805	*msg_id = kmp_i18n_null;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	806
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	807	// Check if cpuid leaf 4 is supported.
				808	__kmp_x86_cpuid(0, 0, &buf);
				809	if (buf.eax < 4) {
				810	*msg_id = kmp_i18n_str_NoLeaf4Support;
				811	return -1;
				812	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	813
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	814	// The algorithm used starts by setting the affinity to each available thread
				815	// and retrieving info from the cpuid instruction, so if we are not capable of
				816	// calling __kmp_get_system_affinity() and _kmp_get_system_affinity(), then we
				817	// need to do something else - use the defaults that we calculated from
				818	// issuing cpuid without binding to each proc.
				819	if (!KMP_AFFINITY_CAPABLE()) {
				820	// Hack to try and infer the machine topology using only the data
				821	// available from cpuid on the current thread, and __kmp_xproc.
				822	KMP_ASSERT(__kmp_affinity_type == affinity_none);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	823
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	824	// Get an upper bound on the number of threads per package using cpuid(1).
				825	// On some OS/chps combinations where HT is supported by the chip but is
				826	// disabled, this value will be 2 on a single core chip. Usually, it will be
				827	// 2 if HT is enabled and 1 if HT is disabled.
				828	__kmp_x86_cpuid(1, 0, &buf);
				829	int maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
				830	if (maxThreadsPerPkg == 0) {
				831	maxThreadsPerPkg = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	832	}
				833
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	834	// The num cores per pkg comes from cpuid(4). 1 must be added to the encoded
				835	// value.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	836	//
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	837	// The author of cpu_count.cpp treated this only an upper bound on the
				838	// number of cores, but I haven't seen any cases where it was greater than
				839	// the actual number of cores, so we will treat it as exact in this block of
				840	// code.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	841	//
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	842	// First, we need to check if cpuid(4) is supported on this chip. To see if
				843	// cpuid(n) is supported, issue cpuid(0) and check if eax has the value n or
				844	// greater.
				845	__kmp_x86_cpuid(0, 0, &buf);
				846	if (buf.eax >= 4) {
				847	__kmp_x86_cpuid(4, 0, &buf);
				848	nCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
				849	} else {
				850	nCoresPerPkg = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	851	}
				852
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	853	// There is no way to reliably tell if HT is enabled without issuing the
				854	// cpuid instruction from every thread, can correlating the cpuid info, so
				855	// if the machine is not affinity capable, we assume that HT is off. We have
				856	// seen quite a few machines where maxThreadsPerPkg is 2, yet the machine
				857	// does not support HT.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	858	//
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	859	// - Older OSes are usually found on machines with older chips, which do not
				860	// support HT.
				861	// - The performance penalty for mistakenly identifying a machine as HT when
				862	// it isn't (which results in blocktime being incorrecly set to 0) is
				863	// greater than the penalty when for mistakenly identifying a machine as
				864	// being 1 thread/core when it is really HT enabled (which results in
				865	// blocktime being incorrectly set to a positive value).
				866	__kmp_ncores = __kmp_xproc;
				867	nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	868	__kmp_nThreadsPerCore = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	869	if (__kmp_affinity_verbose) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	870	KMP_INFORM(AffNotCapableUseLocCpuid, "KMP_AFFINITY");
				871	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				872	if (__kmp_affinity_uniform_topology()) {
				873	KMP_INFORM(Uniform, "KMP_AFFINITY");
				874	} else {
				875	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				876	}
				877	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				878	__kmp_nThreadsPerCore, __kmp_ncores);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	879	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	880	return 0;
				881	}
				882
				883	// From here on, we can assume that it is safe to call
				884	// __kmp_get_system_affinity() and __kmp_set_system_affinity(), even if
				885	// __kmp_affinity_type = affinity_none.
				886
				887	// Save the affinity mask for the current thread.
				888	kmp_affin_mask_t *oldMask;
				889	KMP_CPU_ALLOC(oldMask);
				890	KMP_ASSERT(oldMask != NULL);
				891	__kmp_get_system_affinity(oldMask, TRUE);
				892
				893	// Run through each of the available contexts, binding the current thread
				894	// to it, and obtaining the pertinent information using the cpuid instr.
				895	//
				896	// The relevant information is:
				897	// - Apic Id: Bits 24:31 of ebx after issuing cpuid(1) - each thread context
				898	// has a uniqie Apic Id, which is of the form pkg# : core# : thread#.
				899	// - Max Threads Per Pkg: Bits 16:23 of ebx after issuing cpuid(1). The value
				900	// of this field determines the width of the core# + thread# fields in the
				901	// Apic Id. It is also an upper bound on the number of threads per
				902	// package, but it has been verified that situations happen were it is not
				903	// exact. In particular, on certain OS/chip combinations where Intel(R)
				904	// Hyper-Threading Technology is supported by the chip but has been
				905	// disabled, the value of this field will be 2 (for a single core chip).
				906	// On other OS/chip combinations supporting Intel(R) Hyper-Threading
				907	// Technology, the value of this field will be 1 when Intel(R)
				908	// Hyper-Threading Technology is disabled and 2 when it is enabled.
				909	// - Max Cores Per Pkg: Bits 26:31 of eax after issuing cpuid(4). The value
				910	// of this field (+1) determines the width of the core# field in the Apic
				911	// Id. The comments in "cpucount.cpp" say that this value is an upper
				912	// bound, but the IA-32 architecture manual says that it is exactly the
				913	// number of cores per package, and I haven't seen any case where it
				914	// wasn't.
				915	//
				916	// From this information, deduce the package Id, core Id, and thread Id,
				917	// and set the corresponding fields in the apicThreadInfo struct.
				918	unsigned i;
				919	apicThreadInfo threadInfo = (apicThreadInfo )__kmp_allocate(
				920	__kmp_avail_proc * sizeof(apicThreadInfo));
				921	unsigned nApics = 0;
				922	KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
				923	// Skip this proc if it is not included in the machine model.
				924	if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
				925	continue;
Jonathan Peyton	fd7cc42	2016-06-21 15:54:38 +0000	[diff] [blame]	926	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	927	KMP_DEBUG_ASSERT((int)nApics < __kmp_avail_proc);
				928
				929	__kmp_affinity_dispatch->bind_thread(i);
				930	threadInfo[nApics].osId = i;
				931
				932	// The apic id and max threads per pkg come from cpuid(1).
				933	__kmp_x86_cpuid(1, 0, &buf);
				934	if (((buf.edx >> 9) & 1) == 0) {
				935	__kmp_set_system_affinity(oldMask, TRUE);
				936	__kmp_free(threadInfo);
				937	KMP_CPU_FREE(oldMask);
				938	*msg_id = kmp_i18n_str_ApicNotPresent;
				939	return -1;
				940	}
				941	threadInfo[nApics].apicId = (buf.ebx >> 24) & 0xff;
				942	threadInfo[nApics].maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
				943	if (threadInfo[nApics].maxThreadsPerPkg == 0) {
				944	threadInfo[nApics].maxThreadsPerPkg = 1;
				945	}
				946
				947	// Max cores per pkg comes from cpuid(4). 1 must be added to the encoded
				948	// value.
				949	//
				950	// First, we need to check if cpuid(4) is supported on this chip. To see if
				951	// cpuid(n) is supported, issue cpuid(0) and check if eax has the value n
				952	// or greater.
				953	__kmp_x86_cpuid(0, 0, &buf);
				954	if (buf.eax >= 4) {
				955	__kmp_x86_cpuid(4, 0, &buf);
				956	threadInfo[nApics].maxCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
				957	} else {
				958	threadInfo[nApics].maxCoresPerPkg = 1;
				959	}
				960
				961	// Infer the pkgId / coreId / threadId using only the info obtained locally.
				962	int widthCT = __kmp_cpuid_mask_width(threadInfo[nApics].maxThreadsPerPkg);
				963	threadInfo[nApics].pkgId = threadInfo[nApics].apicId >> widthCT;
				964
				965	int widthC = __kmp_cpuid_mask_width(threadInfo[nApics].maxCoresPerPkg);
				966	int widthT = widthCT - widthC;
				967	if (widthT < 0) {
				968	// I've never seen this one happen, but I suppose it could, if the cpuid
				969	// instruction on a chip was really screwed up. Make sure to restore the
				970	// affinity mask before the tail call.
				971	__kmp_set_system_affinity(oldMask, TRUE);
				972	__kmp_free(threadInfo);
				973	KMP_CPU_FREE(oldMask);
				974	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				975	return -1;
				976	}
				977
				978	int maskC = (1 << widthC) - 1;
				979	threadInfo[nApics].coreId = (threadInfo[nApics].apicId >> widthT) & maskC;
				980
				981	int maskT = (1 << widthT) - 1;
				982	threadInfo[nApics].threadId = threadInfo[nApics].apicId & maskT;
				983
				984	nApics++;
				985	}
				986
				987	// We've collected all the info we need.
				988	// Restore the old affinity mask for this thread.
				989	__kmp_set_system_affinity(oldMask, TRUE);
				990
				991	// If there's only one thread context to bind to, form an Address object
				992	// with depth 1 and return immediately (or, if affinity is off, set
				993	// address2os to NULL and return).
				994	//
				995	// If it is configured to omit the package level when there is only a single
				996	// package, the logic at the end of this routine won't work if there is only
				997	// a single thread - it would try to form an Address object with depth 0.
				998	KMP_ASSERT(nApics > 0);
				999	if (nApics == 1) {
				1000	__kmp_ncores = nPackages = 1;
				1001	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
				1002	if (__kmp_affinity_verbose) {
				1003	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				1004	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				1005
				1006	KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
				1007	if (__kmp_affinity_respect_mask) {
				1008	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				1009	} else {
				1010	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				1011	}
				1012	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1013	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1014	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				1015	__kmp_nThreadsPerCore, __kmp_ncores);
				1016	}
				1017
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1018	if (__kmp_affinity_type == affinity_none) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	1019	__kmp_free(threadInfo);
				1020	KMP_CPU_FREE(oldMask);
				1021	return 0;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1022	}
				1023
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	1024	address2os = (AddrUnsPair )__kmp_allocate(sizeof(AddrUnsPair));
				1025	Address addr(1);
				1026	addr.labels[0] = threadInfo[0].pkgId;
				1027	(*address2os)[0] = AddrUnsPair(addr, threadInfo[0].osId);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1028
				1029	if (__kmp_affinity_gran_levels < 0) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	1030	__kmp_affinity_gran_levels = 0;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1031	}
				1032
				1033	if (__kmp_affinity_verbose) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	1034	__kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1035	}
				1036
				1037	__kmp_free(threadInfo);
				1038	KMP_CPU_FREE(oldMask);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	1039	return 1;
				1040	}
				1041
				1042	// Sort the threadInfo table by physical Id.
				1043	qsort(threadInfo, nApics, sizeof(*threadInfo),
				1044	__kmp_affinity_cmp_apicThreadInfo_phys_id);
				1045
				1046	// The table is now sorted by pkgId / coreId / threadId, but we really don't
				1047	// know the radix of any of the fields. pkgId's may be sparsely assigned among
				1048	// the chips on a system. Although coreId's are usually assigned
				1049	// [0 .. coresPerPkg-1] and threadId's are usually assigned
				1050	// [0..threadsPerCore-1], we don't want to make any such assumptions.
				1051	//
				1052	// For that matter, we don't know what coresPerPkg and threadsPerCore (or the
				1053	// total # packages) are at this point - we want to determine that now. We
				1054	// only have an upper bound on the first two figures.
				1055	//
				1056	// We also perform a consistency check at this point: the values returned by
				1057	// the cpuid instruction for any thread bound to a given package had better
				1058	// return the same info for maxThreadsPerPkg and maxCoresPerPkg.
				1059	nPackages = 1;
				1060	nCoresPerPkg = 1;
				1061	__kmp_nThreadsPerCore = 1;
				1062	unsigned nCores = 1;
				1063
				1064	unsigned pkgCt = 1; // to determine radii
				1065	unsigned lastPkgId = threadInfo[0].pkgId;
				1066	unsigned coreCt = 1;
				1067	unsigned lastCoreId = threadInfo[0].coreId;
				1068	unsigned threadCt = 1;
				1069	unsigned lastThreadId = threadInfo[0].threadId;
				1070
				1071	// intra-pkg consist checks
				1072	unsigned prevMaxCoresPerPkg = threadInfo[0].maxCoresPerPkg;
				1073	unsigned prevMaxThreadsPerPkg = threadInfo[0].maxThreadsPerPkg;
				1074
				1075	for (i = 1; i < nApics; i++) {
				1076	if (threadInfo[i].pkgId != lastPkgId) {
				1077	nCores++;
				1078	pkgCt++;
				1079	lastPkgId = threadInfo[i].pkgId;
				1080	if ((int)coreCt > nCoresPerPkg)
				1081	nCoresPerPkg = coreCt;
				1082	coreCt = 1;
				1083	lastCoreId = threadInfo[i].coreId;
				1084	if ((int)threadCt > __kmp_nThreadsPerCore)
				1085	__kmp_nThreadsPerCore = threadCt;
				1086	threadCt = 1;
				1087	lastThreadId = threadInfo[i].threadId;
				1088
				1089	// This is a different package, so go on to the next iteration without
				1090	// doing any consistency checks. Reset the consistency check vars, though.
				1091	prevMaxCoresPerPkg = threadInfo[i].maxCoresPerPkg;
				1092	prevMaxThreadsPerPkg = threadInfo[i].maxThreadsPerPkg;
				1093	continue;
				1094	}
				1095
				1096	if (threadInfo[i].coreId != lastCoreId) {
				1097	nCores++;
				1098	coreCt++;
				1099	lastCoreId = threadInfo[i].coreId;
				1100	if ((int)threadCt > __kmp_nThreadsPerCore)
				1101	__kmp_nThreadsPerCore = threadCt;
				1102	threadCt = 1;
				1103	lastThreadId = threadInfo[i].threadId;
				1104	} else if (threadInfo[i].threadId != lastThreadId) {
				1105	threadCt++;
				1106	lastThreadId = threadInfo[i].threadId;
				1107	} else {
				1108	__kmp_free(threadInfo);
				1109	KMP_CPU_FREE(oldMask);
				1110	*msg_id = kmp_i18n_str_LegacyApicIDsNotUnique;
				1111	return -1;
				1112	}
				1113
				1114	// Check to make certain that the maxCoresPerPkg and maxThreadsPerPkg
				1115	// fields agree between all the threads bounds to a given package.
				1116	if ((prevMaxCoresPerPkg != threadInfo[i].maxCoresPerPkg) \|\|
				1117	(prevMaxThreadsPerPkg != threadInfo[i].maxThreadsPerPkg)) {
				1118	__kmp_free(threadInfo);
				1119	KMP_CPU_FREE(oldMask);
				1120	*msg_id = kmp_i18n_str_InconsistentCpuidInfo;
				1121	return -1;
				1122	}
				1123	}
				1124	nPackages = pkgCt;
				1125	if ((int)coreCt > nCoresPerPkg)
				1126	nCoresPerPkg = coreCt;
				1127	if ((int)threadCt > __kmp_nThreadsPerCore)
				1128	__kmp_nThreadsPerCore = threadCt;
				1129
				1130	// When affinity is off, this routine will still be called to set
				1131	// __kmp_ncores, as well as __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
				1132	// Make sure all these vars are set correctly, and return now if affinity is
				1133	// not enabled.
				1134	__kmp_ncores = nCores;
				1135	if (__kmp_affinity_verbose) {
				1136	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				1137	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				1138
				1139	KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
				1140	if (__kmp_affinity_respect_mask) {
				1141	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				1142	} else {
				1143	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				1144	}
				1145	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1146	if (__kmp_affinity_uniform_topology()) {
				1147	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1148	} else {
				1149	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				1150	}
				1151	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				1152	__kmp_nThreadsPerCore, __kmp_ncores);
				1153	}
				1154	KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
				1155	KMP_DEBUG_ASSERT(nApics == __kmp_avail_proc);
				1156	__kmp_pu_os_idx = (int )__kmp_allocate(sizeof(int) __kmp_avail_proc);
				1157	for (i = 0; i < nApics; ++i) {
				1158	__kmp_pu_os_idx[i] = threadInfo[i].osId;
				1159	}
				1160	if (__kmp_affinity_type == affinity_none) {
				1161	__kmp_free(threadInfo);
				1162	KMP_CPU_FREE(oldMask);
				1163	return 0;
				1164	}
				1165
				1166	// Now that we've determined the number of packages, the number of cores per
				1167	// package, and the number of threads per core, we can construct the data
				1168	// structure that is to be returned.
				1169	int pkgLevel = 0;
				1170	int coreLevel = (nCoresPerPkg <= 1) ? -1 : 1;
				1171	int threadLevel =
				1172	(__kmp_nThreadsPerCore <= 1) ? -1 : ((coreLevel >= 0) ? 2 : 1);
				1173	unsigned depth = (pkgLevel >= 0) + (coreLevel >= 0) + (threadLevel >= 0);
				1174
				1175	KMP_ASSERT(depth > 0);
				1176	address2os = (AddrUnsPair )__kmp_allocate(sizeof(AddrUnsPair) * nApics);
				1177
				1178	for (i = 0; i < nApics; ++i) {
				1179	Address addr(depth);
				1180	unsigned os = threadInfo[i].osId;
				1181	int d = 0;
				1182
				1183	if (pkgLevel >= 0) {
				1184	addr.labels[d++] = threadInfo[i].pkgId;
				1185	}
				1186	if (coreLevel >= 0) {
				1187	addr.labels[d++] = threadInfo[i].coreId;
				1188	}
				1189	if (threadLevel >= 0) {
				1190	addr.labels[d++] = threadInfo[i].threadId;
				1191	}
				1192	(*address2os)[i] = AddrUnsPair(addr, os);
				1193	}
				1194
				1195	if (__kmp_affinity_gran_levels < 0) {
				1196	// Set the granularity level based on what levels are modeled in the machine
				1197	// topology map.
				1198	__kmp_affinity_gran_levels = 0;
				1199	if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
				1200	__kmp_affinity_gran_levels++;
				1201	}
				1202	if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
				1203	__kmp_affinity_gran_levels++;
				1204	}
				1205	if ((pkgLevel >= 0) && (__kmp_affinity_gran > affinity_gran_package)) {
				1206	__kmp_affinity_gran_levels++;
				1207	}
				1208	}
				1209
				1210	if (__kmp_affinity_verbose) {
				1211	__kmp_affinity_print_topology(*address2os, nApics, depth, pkgLevel,
				1212	coreLevel, threadLevel);
				1213	}
				1214
				1215	__kmp_free(threadInfo);
				1216	KMP_CPU_FREE(oldMask);
				1217	return depth;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1218	}
				1219
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1220	// Intel(R) microarchitecture code name Nehalem, Dunnington and later
				1221	// architectures support a newer interface for specifying the x2APIC Ids,
				1222	// based on cpuid leaf 11.
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	1223	static int __kmp_affinity_create_x2apicid_map(AddrUnsPair **address2os,
				1224	kmp_i18n_id_t *const msg_id) {
				1225	kmp_cpuid buf;
				1226	*address2os = NULL;
				1227	*msg_id = kmp_i18n_null;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1228
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	1229	// Check to see if cpuid leaf 11 is supported.
				1230	__kmp_x86_cpuid(0, 0, &buf);
				1231	if (buf.eax < 11) {
				1232	*msg_id = kmp_i18n_str_NoLeaf11Support;
				1233	return -1;
				1234	}
				1235	__kmp_x86_cpuid(11, 0, &buf);
				1236	if (buf.ebx == 0) {
				1237	*msg_id = kmp_i18n_str_NoLeaf11Support;
				1238	return -1;
				1239	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1240
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	1241	// Find the number of levels in the machine topology. While we're at it, get
				1242	// the default values for __kmp_nThreadsPerCore & nCoresPerPkg. We will try to
				1243	// get more accurate values later by explicitly counting them, but get
				1244	// reasonable defaults now, in case we return early.
				1245	int level;
				1246	int threadLevel = -1;
				1247	int coreLevel = -1;
				1248	int pkgLevel = -1;
				1249	__kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
				1250
				1251	for (level = 0;; level++) {
				1252	if (level > 31) {
				1253	// FIXME: Hack for DPD200163180
				1254	//
				1255	// If level is big then something went wrong -> exiting
				1256	//
				1257	// There could actually be 32 valid levels in the machine topology, but so
				1258	// far, the only machine we have seen which does not exit this loop before
				1259	// iteration 32 has fubar x2APIC settings.
				1260	//
				1261	// For now, just reject this case based upon loop trip count.
				1262	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1263	return -1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1264	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	1265	__kmp_x86_cpuid(11, level, &buf);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1266	if (buf.ebx == 0) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	1267	if (pkgLevel < 0) {
				1268	// Will infer nPackages from __kmp_xproc
				1269	pkgLevel = level;
				1270	level++;
				1271	}
				1272	break;
				1273	}
				1274	int kind = (buf.ecx >> 8) & 0xff;
				1275	if (kind == 1) {
				1276	// SMT level
				1277	threadLevel = level;
				1278	coreLevel = -1;
				1279	pkgLevel = -1;
				1280	__kmp_nThreadsPerCore = buf.ebx & 0xffff;
				1281	if (__kmp_nThreadsPerCore == 0) {
				1282	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1283	return -1;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	1284	}
				1285	} else if (kind == 2) {
				1286	// core level
				1287	coreLevel = level;
				1288	pkgLevel = -1;
				1289	nCoresPerPkg = buf.ebx & 0xffff;
				1290	if (nCoresPerPkg == 0) {
				1291	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1292	return -1;
				1293	}
				1294	} else {
				1295	if (level <= 0) {
				1296	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1297	return -1;
				1298	}
				1299	if (pkgLevel >= 0) {
				1300	continue;
				1301	}
				1302	pkgLevel = level;
				1303	nPackages = buf.ebx & 0xffff;
				1304	if (nPackages == 0) {
				1305	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1306	return -1;
				1307	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1308	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	1309	}
				1310	int depth = level;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1311
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	1312	// In the above loop, "level" was counted from the finest level (usually
				1313	// thread) to the coarsest. The caller expects that we will place the labels
				1314	// in (*address2os)[].first.labels[] in the inverse order, so we need to
				1315	// invert the vars saying which level means what.
				1316	if (threadLevel >= 0) {
				1317	threadLevel = depth - threadLevel - 1;
				1318	}
				1319	if (coreLevel >= 0) {
				1320	coreLevel = depth - coreLevel - 1;
				1321	}
				1322	KMP_DEBUG_ASSERT(pkgLevel >= 0);
				1323	pkgLevel = depth - pkgLevel - 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1324
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	1325	// The algorithm used starts by setting the affinity to each available thread
				1326	// and retrieving info from the cpuid instruction, so if we are not capable of
				1327	// calling __kmp_get_system_affinity() and _kmp_get_system_affinity(), then we
				1328	// need to do something else - use the defaults that we calculated from
				1329	// issuing cpuid without binding to each proc.
				1330	if (!KMP_AFFINITY_CAPABLE()) {
				1331	// Hack to try and infer the machine topology using only the data
				1332	// available from cpuid on the current thread, and __kmp_xproc.
				1333	KMP_ASSERT(__kmp_affinity_type == affinity_none);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1334
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	1335	__kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
				1336	nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1337	if (__kmp_affinity_verbose) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	1338	KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY");
				1339	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1340	if (__kmp_affinity_uniform_topology()) {
				1341	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1342	} else {
				1343	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				1344	}
				1345	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				1346	__kmp_nThreadsPerCore, __kmp_ncores);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1347	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	1348	return 0;
				1349	}
				1350
				1351	// From here on, we can assume that it is safe to call
				1352	// __kmp_get_system_affinity() and __kmp_set_system_affinity(), even if
				1353	// __kmp_affinity_type = affinity_none.
				1354
				1355	// Save the affinity mask for the current thread.
				1356	kmp_affin_mask_t *oldMask;
				1357	KMP_CPU_ALLOC(oldMask);
				1358	__kmp_get_system_affinity(oldMask, TRUE);
				1359
				1360	// Allocate the data structure to be returned.
				1361	AddrUnsPair *retval =
				1362	(AddrUnsPair )__kmp_allocate(sizeof(AddrUnsPair) __kmp_avail_proc);
				1363
				1364	// Run through each of the available contexts, binding the current thread
				1365	// to it, and obtaining the pertinent information using the cpuid instr.
				1366	unsigned int proc;
				1367	int nApics = 0;
				1368	KMP_CPU_SET_ITERATE(proc, __kmp_affin_fullMask) {
				1369	// Skip this proc if it is not included in the machine model.
				1370	if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
				1371	continue;
Jonathan Peyton	fd7cc42	2016-06-21 15:54:38 +0000	[diff] [blame]	1372	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	1373	KMP_DEBUG_ASSERT(nApics < __kmp_avail_proc);
				1374
				1375	__kmp_affinity_dispatch->bind_thread(proc);
				1376
				1377	// Extract labels for each level in the machine topology map from Apic ID.
				1378	Address addr(depth);
				1379	int prev_shift = 0;
				1380
				1381	for (level = 0; level < depth; level++) {
				1382	__kmp_x86_cpuid(11, level, &buf);
				1383	unsigned apicId = buf.edx;
				1384	if (buf.ebx == 0) {
				1385	if (level != depth - 1) {
				1386	KMP_CPU_FREE(oldMask);
				1387	*msg_id = kmp_i18n_str_InconsistentCpuidInfo;
				1388	return -1;
				1389	}
				1390	addr.labels[depth - level - 1] = apicId >> prev_shift;
				1391	level++;
				1392	break;
				1393	}
				1394	int shift = buf.eax & 0x1f;
				1395	int mask = (1 << shift) - 1;
				1396	addr.labels[depth - level - 1] = (apicId & mask) >> prev_shift;
				1397	prev_shift = shift;
				1398	}
				1399	if (level != depth) {
				1400	KMP_CPU_FREE(oldMask);
				1401	*msg_id = kmp_i18n_str_InconsistentCpuidInfo;
				1402	return -1;
				1403	}
				1404
				1405	retval[nApics] = AddrUnsPair(addr, proc);
				1406	nApics++;
				1407	}
				1408
				1409	// We've collected all the info we need.
				1410	// Restore the old affinity mask for this thread.
				1411	__kmp_set_system_affinity(oldMask, TRUE);
				1412
				1413	// If there's only one thread context to bind to, return now.
				1414	KMP_ASSERT(nApics > 0);
				1415	if (nApics == 1) {
				1416	__kmp_ncores = nPackages = 1;
				1417	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
				1418	if (__kmp_affinity_verbose) {
				1419	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				1420	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				1421
				1422	KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
				1423	if (__kmp_affinity_respect_mask) {
				1424	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				1425	} else {
				1426	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				1427	}
				1428	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1429	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1430	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				1431	__kmp_nThreadsPerCore, __kmp_ncores);
				1432	}
				1433
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1434	if (__kmp_affinity_type == affinity_none) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	1435	__kmp_free(retval);
				1436	KMP_CPU_FREE(oldMask);
				1437	return 0;
				1438	}
				1439
				1440	// Form an Address object which only includes the package level.
				1441	Address addr(1);
				1442	addr.labels[0] = retval[0].first.labels[pkgLevel];
				1443	retval[0].first = addr;
				1444
				1445	if (__kmp_affinity_gran_levels < 0) {
				1446	__kmp_affinity_gran_levels = 0;
				1447	}
				1448
				1449	if (__kmp_affinity_verbose) {
				1450	__kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
				1451	}
				1452
				1453	*address2os = retval;
				1454	KMP_CPU_FREE(oldMask);
				1455	return 1;
				1456	}
				1457
				1458	// Sort the table by physical Id.
				1459	qsort(retval, nApics, sizeof(*retval), __kmp_affinity_cmp_Address_labels);
				1460
				1461	// Find the radix at each of the levels.
				1462	unsigned totals = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1463	unsigned counts = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1464	unsigned maxCt = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1465	unsigned last = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1466	for (level = 0; level < depth; level++) {
				1467	totals[level] = 1;
				1468	maxCt[level] = 1;
				1469	counts[level] = 1;
				1470	last[level] = retval[0].first.labels[level];
				1471	}
				1472
				1473	// From here on, the iteration variable "level" runs from the finest level to
				1474	// the coarsest, i.e. we iterate forward through
				1475	// (*address2os)[].first.labels[] - in the previous loops, we iterated
				1476	// backwards.
				1477	for (proc = 1; (int)proc < nApics; proc++) {
				1478	int level;
				1479	for (level = 0; level < depth; level++) {
				1480	if (retval[proc].first.labels[level] != last[level]) {
				1481	int j;
				1482	for (j = level + 1; j < depth; j++) {
				1483	totals[j]++;
				1484	counts[j] = 1;
				1485	// The line below causes printing incorrect topology information in
				1486	// case the max value for some level (maxCt[level]) is encountered
				1487	// earlier than some less value while going through the array. For
				1488	// example, let pkg0 has 4 cores and pkg1 has 2 cores. Then
				1489	// maxCt[1] == 2
				1490	// whereas it must be 4.
				1491	// TODO!!! Check if it can be commented safely
				1492	// maxCt[j] = 1;
				1493	last[j] = retval[proc].first.labels[j];
				1494	}
				1495	totals[level]++;
				1496	counts[level]++;
				1497	if (counts[level] > maxCt[level]) {
				1498	maxCt[level] = counts[level];
				1499	}
				1500	last[level] = retval[proc].first.labels[level];
				1501	break;
				1502	} else if (level == depth - 1) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1503	__kmp_free(last);
				1504	__kmp_free(maxCt);
				1505	__kmp_free(counts);
				1506	__kmp_free(totals);
				1507	__kmp_free(retval);
				1508	KMP_CPU_FREE(oldMask);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	1509	*msg_id = kmp_i18n_str_x2ApicIDsNotUnique;
				1510	return -1;
				1511	}
				1512	}
				1513	}
				1514
				1515	// When affinity is off, this routine will still be called to set
				1516	// __kmp_ncores, as well as __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
				1517	// Make sure all these vars are set correctly, and return if affinity is not
				1518	// enabled.
				1519	if (threadLevel >= 0) {
				1520	__kmp_nThreadsPerCore = maxCt[threadLevel];
				1521	} else {
				1522	__kmp_nThreadsPerCore = 1;
				1523	}
				1524	nPackages = totals[pkgLevel];
				1525
				1526	if (coreLevel >= 0) {
				1527	__kmp_ncores = totals[coreLevel];
				1528	nCoresPerPkg = maxCt[coreLevel];
				1529	} else {
				1530	__kmp_ncores = nPackages;
				1531	nCoresPerPkg = 1;
				1532	}
				1533
				1534	// Check to see if the machine topology is uniform
				1535	unsigned prod = maxCt[0];
				1536	for (level = 1; level < depth; level++) {
				1537	prod *= maxCt[level];
				1538	}
				1539	bool uniform = (prod == totals[level - 1]);
				1540
				1541	// Print the machine topology summary.
				1542	if (__kmp_affinity_verbose) {
				1543	char mask[KMP_AFFIN_MASK_PRINT_LEN];
				1544	__kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				1545
				1546	KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
				1547	if (__kmp_affinity_respect_mask) {
				1548	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask);
				1549	} else {
				1550	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask);
				1551	}
				1552	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1553	if (uniform) {
				1554	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1555	} else {
				1556	KMP_INFORM(NonUniform, "KMP_AFFINITY");
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1557	}
				1558
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	1559	kmp_str_buf_t buf;
				1560	__kmp_str_buf_init(&buf);
				1561
				1562	__kmp_str_buf_print(&buf, "%d", totals[0]);
				1563	for (level = 1; level <= pkgLevel; level++) {
				1564	__kmp_str_buf_print(&buf, " x %d", maxCt[level]);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1565	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	1566	KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg,
				1567	__kmp_nThreadsPerCore, __kmp_ncores);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1568
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	1569	__kmp_str_buf_free(&buf);
				1570	}
				1571	KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
				1572	KMP_DEBUG_ASSERT(nApics == __kmp_avail_proc);
				1573	__kmp_pu_os_idx = (int )__kmp_allocate(sizeof(int) __kmp_avail_proc);
				1574	for (proc = 0; (int)proc < nApics; ++proc) {
				1575	__kmp_pu_os_idx[proc] = retval[proc].second;
				1576	}
				1577	if (__kmp_affinity_type == affinity_none) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1578	__kmp_free(last);
				1579	__kmp_free(maxCt);
				1580	__kmp_free(counts);
				1581	__kmp_free(totals);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	1582	__kmp_free(retval);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1583	KMP_CPU_FREE(oldMask);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	1584	return 0;
				1585	}
				1586
				1587	// Find any levels with radiix 1, and remove them from the map
				1588	// (except for the package level).
				1589	int new_depth = 0;
				1590	for (level = 0; level < depth; level++) {
				1591	if ((maxCt[level] == 1) && (level != pkgLevel)) {
				1592	continue;
				1593	}
				1594	new_depth++;
				1595	}
				1596
				1597	// If we are removing any levels, allocate a new vector to return,
				1598	// and copy the relevant information to it.
				1599	if (new_depth != depth) {
				1600	AddrUnsPair *new_retval =
				1601	(AddrUnsPair )__kmp_allocate(sizeof(AddrUnsPair) nApics);
				1602	for (proc = 0; (int)proc < nApics; proc++) {
				1603	Address addr(new_depth);
				1604	new_retval[proc] = AddrUnsPair(addr, retval[proc].second);
				1605	}
				1606	int new_level = 0;
				1607	int newPkgLevel = -1;
				1608	int newCoreLevel = -1;
				1609	int newThreadLevel = -1;
				1610	int i;
				1611	for (level = 0; level < depth; level++) {
				1612	if ((maxCt[level] == 1) && (level != pkgLevel)) {
				1613	// Remove this level. Never remove the package level
				1614	continue;
				1615	}
				1616	if (level == pkgLevel) {
				1617	newPkgLevel = level;
				1618	}
				1619	if (level == coreLevel) {
				1620	newCoreLevel = level;
				1621	}
				1622	if (level == threadLevel) {
				1623	newThreadLevel = level;
				1624	}
				1625	for (proc = 0; (int)proc < nApics; proc++) {
				1626	new_retval[proc].first.labels[new_level] =
				1627	retval[proc].first.labels[level];
				1628	}
				1629	new_level++;
				1630	}
				1631
				1632	__kmp_free(retval);
				1633	retval = new_retval;
				1634	depth = new_depth;
				1635	pkgLevel = newPkgLevel;
				1636	coreLevel = newCoreLevel;
				1637	threadLevel = newThreadLevel;
				1638	}
				1639
				1640	if (__kmp_affinity_gran_levels < 0) {
				1641	// Set the granularity level based on what levels are modeled
				1642	// in the machine topology map.
				1643	__kmp_affinity_gran_levels = 0;
				1644	if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
				1645	__kmp_affinity_gran_levels++;
				1646	}
				1647	if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
				1648	__kmp_affinity_gran_levels++;
				1649	}
				1650	if (__kmp_affinity_gran > affinity_gran_package) {
				1651	__kmp_affinity_gran_levels++;
				1652	}
				1653	}
				1654
				1655	if (__kmp_affinity_verbose) {
				1656	__kmp_affinity_print_topology(retval, nApics, depth, pkgLevel, coreLevel,
				1657	threadLevel);
				1658	}
				1659
				1660	__kmp_free(last);
				1661	__kmp_free(maxCt);
				1662	__kmp_free(counts);
				1663	__kmp_free(totals);
				1664	KMP_CPU_FREE(oldMask);
				1665	*address2os = retval;
				1666	return depth;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1667	}
				1668
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	1669	#endif /* KMP_ARCH_X86 \|\| KMP_ARCH_X86_64 */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1670
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	1671	#define osIdIndex 0
				1672	#define threadIdIndex 1
				1673	#define coreIdIndex 2
				1674	#define pkgIdIndex 3
				1675	#define nodeIdIndex 4
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1676
				1677	typedef unsigned *ProcCpuInfo;
				1678	static unsigned maxIndex = pkgIdIndex;
				1679
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	1680	static int __kmp_affinity_cmp_ProcCpuInfo_os_id(const void a, const void b) {
				1681	const unsigned aa = (const unsigned )a;
				1682	const unsigned bb = (const unsigned )b;
				1683	if (aa[osIdIndex] < bb[osIdIndex])
				1684	return -1;
				1685	if (aa[osIdIndex] > bb[osIdIndex])
				1686	return 1;
				1687	return 0;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1688	};
				1689
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	1690	static int __kmp_affinity_cmp_ProcCpuInfo_phys_id(const void *a,
				1691	const void *b) {
				1692	unsigned i;
				1693	const unsigned aa = ((const unsigned **)a);
				1694	const unsigned bb = ((const unsigned **)b);
				1695	for (i = maxIndex;; i--) {
				1696	if (aa[i] < bb[i])
				1697	return -1;
				1698	if (aa[i] > bb[i])
				1699	return 1;
				1700	if (i == osIdIndex)
				1701	break;
				1702	}
				1703	return 0;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1704	}
				1705
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1706	// Parse /proc/cpuinfo (or an alternate file in the same format) to obtain the
				1707	// affinity map.
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	1708	static int __kmp_affinity_create_cpuinfo_map(AddrUnsPair **address2os,
				1709	int *line,
				1710	kmp_i18n_id_t *const msg_id,
				1711	FILE *f) {
				1712	*address2os = NULL;
				1713	*msg_id = kmp_i18n_null;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1714
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	1715	// Scan of the file, and count the number of "processor" (osId) fields,
				1716	// and find the highest value of <n> for a node_<n> field.
				1717	char buf[256];
				1718	unsigned num_records = 0;
				1719	while (!feof(f)) {
				1720	buf[sizeof(buf) - 1] = 1;
				1721	if (!fgets(buf, sizeof(buf), f)) {
				1722	// Read errors presumably because of EOF
				1723	break;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1724	}
				1725
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	1726	char s1[] = "processor";
				1727	if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
				1728	num_records++;
				1729	continue;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1730	}
				1731
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	1732	// FIXME - this will match "node_<n> <garbage>"
				1733	unsigned level;
				1734	if (KMP_SSCANF(buf, "node_%d id", &level) == 1) {
				1735	if (nodeIdIndex + level >= maxIndex) {
				1736	maxIndex = nodeIdIndex + level;
				1737	}
				1738	continue;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1739	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	1740	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1741
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	1742	// Check for empty file / no valid processor records, or too many. The number
				1743	// of records can't exceed the number of valid bits in the affinity mask.
				1744	if (num_records == 0) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1745	*line = 0;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	1746	*msg_id = kmp_i18n_str_NoProcRecords;
				1747	return -1;
				1748	}
				1749	if (num_records > (unsigned)__kmp_xproc) {
				1750	*line = 0;
				1751	*msg_id = kmp_i18n_str_TooManyProcRecords;
				1752	return -1;
				1753	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1754
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	1755	// Set the file pointer back to the begginning, so that we can scan the file
				1756	// again, this time performing a full parse of the data. Allocate a vector of
				1757	// ProcCpuInfo object, where we will place the data. Adding an extra element
				1758	// at the end allows us to remove a lot of extra checks for termination
				1759	// conditions.
				1760	if (fseek(f, 0, SEEK_SET) != 0) {
				1761	*line = 0;
				1762	*msg_id = kmp_i18n_str_CantRewindCpuinfo;
				1763	return -1;
				1764	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1765
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	1766	// Allocate the array of records to store the proc info in. The dummy
				1767	// element at the end makes the logic in filling them out easier to code.
				1768	unsigned **threadInfo =
				1769	(unsigned *)__kmp_allocate((num_records + 1) sizeof(unsigned *));
				1770	unsigned i;
				1771	for (i = 0; i <= num_records; i++) {
				1772	threadInfo[i] =
				1773	(unsigned )__kmp_allocate((maxIndex + 1) sizeof(unsigned));
				1774	}
				1775
				1776	#define CLEANUP_THREAD_INFO \
				1777	for (i = 0; i <= num_records; i++) { \
				1778	__kmp_free(threadInfo[i]); \
				1779	} \
				1780	__kmp_free(threadInfo);
				1781
				1782	// A value of UINT_MAX means that we didn't find the field
				1783	unsigned __index;
				1784
				1785	#define INIT_PROC_INFO(p) \
				1786	for (__index = 0; __index <= maxIndex; __index++) { \
				1787	(p)[__index] = UINT_MAX; \
				1788	}
				1789
				1790	for (i = 0; i <= num_records; i++) {
				1791	INIT_PROC_INFO(threadInfo[i]);
				1792	}
				1793
				1794	unsigned num_avail = 0;
				1795	*line = 0;
				1796	while (!feof(f)) {
				1797	// Create an inner scoping level, so that all the goto targets at the end of
				1798	// the loop appear in an outer scoping level. This avoids warnings about
				1799	// jumping past an initialization to a target in the same block.
				1800	{
				1801	buf[sizeof(buf) - 1] = 1;
				1802	bool long_line = false;
				1803	if (!fgets(buf, sizeof(buf), f)) {
				1804	// Read errors presumably because of EOF
				1805	// If there is valid data in threadInfo[num_avail], then fake
				1806	// a blank line in ensure that the last address gets parsed.
				1807	bool valid = false;
				1808	for (i = 0; i <= maxIndex; i++) {
				1809	if (threadInfo[num_avail][i] != UINT_MAX) {
				1810	valid = true;
				1811	}
				1812	}
				1813	if (!valid) {
				1814	break;
				1815	}
				1816	buf[0] = 0;
				1817	} else if (!buf[sizeof(buf) - 1]) {
				1818	// The line is longer than the buffer. Set a flag and don't
				1819	// emit an error if we were going to ignore the line, anyway.
				1820	long_line = true;
				1821
				1822	#define CHECK_LINE \
				1823	if (long_line) { \
				1824	CLEANUP_THREAD_INFO; \
				1825	*msg_id = kmp_i18n_str_LongLineCpuinfo; \
				1826	return -1; \
				1827	}
				1828	}
				1829	(*line)++;
				1830
				1831	char s1[] = "processor";
				1832	if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
				1833	CHECK_LINE;
				1834	char *p = strchr(buf + sizeof(s1) - 1, ':');
				1835	unsigned val;
				1836	if ((p == NULL) \|\| (KMP_SSCANF(p + 1, "%u\n", &val) != 1))
				1837	goto no_val;
				1838	if (threadInfo[num_avail][osIdIndex] != UINT_MAX)
				1839	goto dup_field;
				1840	threadInfo[num_avail][osIdIndex] = val;
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	1841	#if KMP_OS_LINUX && USE_SYSFS_INFO
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	1842	char path[256];
				1843	KMP_SNPRINTF(
				1844	path, sizeof(path),
				1845	"/sys/devices/system/cpu/cpu%u/topology/physical_package_id",
				1846	threadInfo[num_avail][osIdIndex]);
				1847	__kmp_read_from_file(path, "%u", &threadInfo[num_avail][pkgIdIndex]);
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	1848
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	1849	KMP_SNPRINTF(path, sizeof(path),
				1850	"/sys/devices/system/cpu/cpu%u/topology/core_id",
				1851	threadInfo[num_avail][osIdIndex]);
				1852	__kmp_read_from_file(path, "%u", &threadInfo[num_avail][coreIdIndex]);
				1853	continue;
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	1854	#else
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	1855	}
				1856	char s2[] = "physical id";
				1857	if (strncmp(buf, s2, sizeof(s2) - 1) == 0) {
				1858	CHECK_LINE;
				1859	char *p = strchr(buf + sizeof(s2) - 1, ':');
				1860	unsigned val;
				1861	if ((p == NULL) \|\| (KMP_SSCANF(p + 1, "%u\n", &val) != 1))
				1862	goto no_val;
				1863	if (threadInfo[num_avail][pkgIdIndex] != UINT_MAX)
				1864	goto dup_field;
				1865	threadInfo[num_avail][pkgIdIndex] = val;
				1866	continue;
				1867	}
				1868	char s3[] = "core id";
				1869	if (strncmp(buf, s3, sizeof(s3) - 1) == 0) {
				1870	CHECK_LINE;
				1871	char *p = strchr(buf + sizeof(s3) - 1, ':');
				1872	unsigned val;
				1873	if ((p == NULL) \|\| (KMP_SSCANF(p + 1, "%u\n", &val) != 1))
				1874	goto no_val;
				1875	if (threadInfo[num_avail][coreIdIndex] != UINT_MAX)
				1876	goto dup_field;
				1877	threadInfo[num_avail][coreIdIndex] = val;
				1878	continue;
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	1879	#endif // KMP_OS_LINUX && USE_SYSFS_INFO
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	1880	}
				1881	char s4[] = "thread id";
				1882	if (strncmp(buf, s4, sizeof(s4) - 1) == 0) {
				1883	CHECK_LINE;
				1884	char *p = strchr(buf + sizeof(s4) - 1, ':');
				1885	unsigned val;
				1886	if ((p == NULL) \|\| (KMP_SSCANF(p + 1, "%u\n", &val) != 1))
				1887	goto no_val;
				1888	if (threadInfo[num_avail][threadIdIndex] != UINT_MAX)
				1889	goto dup_field;
				1890	threadInfo[num_avail][threadIdIndex] = val;
				1891	continue;
				1892	}
				1893	unsigned level;
				1894	if (KMP_SSCANF(buf, "node_%d id", &level) == 1) {
				1895	CHECK_LINE;
				1896	char *p = strchr(buf + sizeof(s4) - 1, ':');
				1897	unsigned val;
				1898	if ((p == NULL) \|\| (KMP_SSCANF(p + 1, "%u\n", &val) != 1))
				1899	goto no_val;
				1900	KMP_ASSERT(nodeIdIndex + level <= maxIndex);
				1901	if (threadInfo[num_avail][nodeIdIndex + level] != UINT_MAX)
				1902	goto dup_field;
				1903	threadInfo[num_avail][nodeIdIndex + level] = val;
				1904	continue;
				1905	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1906
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	1907	// We didn't recognize the leading token on the line. There are lots of
				1908	// leading tokens that we don't recognize - if the line isn't empty, go on
				1909	// to the next line.
				1910	if ((buf != 0) && (buf != '\n')) {
				1911	// If the line is longer than the buffer, read characters
				1912	// until we find a newline.
				1913	if (long_line) {
				1914	int ch;
				1915	while (((ch = fgetc(f)) != EOF) && (ch != '\n'))
				1916	;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1917	}
				1918	continue;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	1919	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1920
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	1921	// A newline has signalled the end of the processor record.
				1922	// Check that there aren't too many procs specified.
				1923	if ((int)num_avail == __kmp_xproc) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1924	CLEANUP_THREAD_INFO;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	1925	*msg_id = kmp_i18n_str_TooManyEntries;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1926	return -1;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	1927	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1928
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	1929	// Check for missing fields. The osId field must be there, and we
				1930	// currently require that the physical id field is specified, also.
				1931	if (threadInfo[num_avail][osIdIndex] == UINT_MAX) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1932	CLEANUP_THREAD_INFO;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	1933	*msg_id = kmp_i18n_str_MissingProcField;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1934	return -1;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	1935	}
				1936	if (threadInfo[0][pkgIdIndex] == UINT_MAX) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1937	CLEANUP_THREAD_INFO;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	1938	*msg_id = kmp_i18n_str_MissingPhysicalIDField;
				1939	return -1;
				1940	}
				1941
				1942	// Skip this proc if it is not included in the machine model.
				1943	if (!KMP_CPU_ISSET(threadInfo[num_avail][osIdIndex],
				1944	__kmp_affin_fullMask)) {
				1945	INIT_PROC_INFO(threadInfo[num_avail]);
				1946	continue;
				1947	}
				1948
				1949	// We have a successful parse of this proc's info.
				1950	// Increment the counter, and prepare for the next proc.
				1951	num_avail++;
				1952	KMP_ASSERT(num_avail <= num_records);
				1953	INIT_PROC_INFO(threadInfo[num_avail]);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1954	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	1955	continue;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1956
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	1957	no_val:
				1958	CLEANUP_THREAD_INFO;
				1959	*msg_id = kmp_i18n_str_MissingValCpuinfo;
				1960	return -1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1961
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	1962	dup_field:
				1963	CLEANUP_THREAD_INFO;
				1964	*msg_id = kmp_i18n_str_DuplicateFieldCpuinfo;
				1965	return -1;
				1966	}
				1967	*line = 0;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1968
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	1969	#if KMP_MIC && REDUCE_TEAM_SIZE
				1970	unsigned teamSize = 0;
				1971	#endif // KMP_MIC && REDUCE_TEAM_SIZE
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1972
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	1973	// check for num_records == __kmp_xproc ???
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1974
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	1975	// If there's only one thread context to bind to, form an Address object with
				1976	// depth 1 and return immediately (or, if affinity is off, set address2os to
				1977	// NULL and return).
				1978	//
				1979	// If it is configured to omit the package level when there is only a single
				1980	// package, the logic at the end of this routine won't work if there is only a
				1981	// single thread - it would try to form an Address object with depth 0.
				1982	KMP_ASSERT(num_avail > 0);
				1983	KMP_ASSERT(num_avail <= num_records);
				1984	if (num_avail == 1) {
				1985	__kmp_ncores = 1;
				1986	__kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1987	if (__kmp_affinity_verbose) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	1988	if (!KMP_AFFINITY_CAPABLE()) {
				1989	KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
				1990	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1991	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1992	} else {
				1993	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				1994	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				1995	__kmp_affin_fullMask);
				1996	KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
				1997	if (__kmp_affinity_respect_mask) {
				1998	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				1999	} else {
				2000	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2001	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	2002	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				2003	KMP_INFORM(Uniform, "KMP_AFFINITY");
				2004	}
				2005	int index;
				2006	kmp_str_buf_t buf;
				2007	__kmp_str_buf_init(&buf);
				2008	__kmp_str_buf_print(&buf, "1");
				2009	for (index = maxIndex - 1; index > pkgIdIndex; index--) {
				2010	__kmp_str_buf_print(&buf, " x 1");
				2011	}
				2012	KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, 1, 1, 1);
				2013	__kmp_str_buf_free(&buf);
Jonathan Peyton	fd7cc42	2016-06-21 15:54:38 +0000	[diff] [blame]	2014	}
				2015
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2016	if (__kmp_affinity_type == affinity_none) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	2017	CLEANUP_THREAD_INFO;
				2018	return 0;
				2019	}
				2020
				2021	address2os = (AddrUnsPair )__kmp_allocate(sizeof(AddrUnsPair));
				2022	Address addr(1);
				2023	addr.labels[0] = threadInfo[0][pkgIdIndex];
				2024	(*address2os)[0] = AddrUnsPair(addr, threadInfo[0][osIdIndex]);
				2025
				2026	if (__kmp_affinity_gran_levels < 0) {
				2027	__kmp_affinity_gran_levels = 0;
				2028	}
				2029
				2030	if (__kmp_affinity_verbose) {
				2031	__kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
				2032	}
				2033
				2034	CLEANUP_THREAD_INFO;
				2035	return 1;
				2036	}
				2037
				2038	// Sort the threadInfo table by physical Id.
				2039	qsort(threadInfo, num_avail, sizeof(*threadInfo),
				2040	__kmp_affinity_cmp_ProcCpuInfo_phys_id);
				2041
				2042	// The table is now sorted by pkgId / coreId / threadId, but we really don't
				2043	// know the radix of any of the fields. pkgId's may be sparsely assigned among
				2044	// the chips on a system. Although coreId's are usually assigned
				2045	// [0 .. coresPerPkg-1] and threadId's are usually assigned
				2046	// [0..threadsPerCore-1], we don't want to make any such assumptions.
				2047	//
				2048	// For that matter, we don't know what coresPerPkg and threadsPerCore (or the
				2049	// total # packages) are at this point - we want to determine that now. We
				2050	// only have an upper bound on the first two figures.
				2051	unsigned *counts =
				2052	(unsigned )__kmp_allocate((maxIndex + 1) sizeof(unsigned));
				2053	unsigned *maxCt =
				2054	(unsigned )__kmp_allocate((maxIndex + 1) sizeof(unsigned));
				2055	unsigned *totals =
				2056	(unsigned )__kmp_allocate((maxIndex + 1) sizeof(unsigned));
				2057	unsigned *lastId =
				2058	(unsigned )__kmp_allocate((maxIndex + 1) sizeof(unsigned));
				2059
				2060	bool assign_thread_ids = false;
				2061	unsigned threadIdCt;
				2062	unsigned index;
				2063
				2064	restart_radix_check:
				2065	threadIdCt = 0;
				2066
				2067	// Initialize the counter arrays with data from threadInfo[0].
				2068	if (assign_thread_ids) {
				2069	if (threadInfo[0][threadIdIndex] == UINT_MAX) {
				2070	threadInfo[0][threadIdIndex] = threadIdCt++;
				2071	} else if (threadIdCt <= threadInfo[0][threadIdIndex]) {
				2072	threadIdCt = threadInfo[0][threadIdIndex] + 1;
				2073	}
				2074	}
				2075	for (index = 0; index <= maxIndex; index++) {
				2076	counts[index] = 1;
				2077	maxCt[index] = 1;
				2078	totals[index] = 1;
				2079	lastId[index] = threadInfo[0][index];
				2080	;
				2081	}
				2082
				2083	// Run through the rest of the OS procs.
				2084	for (i = 1; i < num_avail; i++) {
				2085	// Find the most significant index whose id differs from the id for the
				2086	// previous OS proc.
				2087	for (index = maxIndex; index >= threadIdIndex; index--) {
				2088	if (assign_thread_ids && (index == threadIdIndex)) {
				2089	// Auto-assign the thread id field if it wasn't specified.
				2090	if (threadInfo[i][threadIdIndex] == UINT_MAX) {
				2091	threadInfo[i][threadIdIndex] = threadIdCt++;
				2092	}
				2093	// Aparrently the thread id field was specified for some entries and not
				2094	// others. Start the thread id counter off at the next higher thread id.
				2095	else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
				2096	threadIdCt = threadInfo[i][threadIdIndex] + 1;
				2097	}
				2098	}
				2099	if (threadInfo[i][index] != lastId[index]) {
				2100	// Run through all indices which are less significant, and reset the
				2101	// counts to 1. At all levels up to and including index, we need to
				2102	// increment the totals and record the last id.
				2103	unsigned index2;
				2104	for (index2 = threadIdIndex; index2 < index; index2++) {
				2105	totals[index2]++;
				2106	if (counts[index2] > maxCt[index2]) {
				2107	maxCt[index2] = counts[index2];
				2108	}
				2109	counts[index2] = 1;
				2110	lastId[index2] = threadInfo[i][index2];
				2111	}
				2112	counts[index]++;
				2113	totals[index]++;
				2114	lastId[index] = threadInfo[i][index];
				2115
				2116	if (assign_thread_ids && (index > threadIdIndex)) {
				2117
				2118	#if KMP_MIC && REDUCE_TEAM_SIZE
				2119	// The default team size is the total #threads in the machine
				2120	// minus 1 thread for every core that has 3 or more threads.
				2121	teamSize += (threadIdCt <= 2) ? (threadIdCt) : (threadIdCt - 1);
				2122	#endif // KMP_MIC && REDUCE_TEAM_SIZE
				2123
				2124	// Restart the thread counter, as we are on a new core.
				2125	threadIdCt = 0;
				2126
				2127	// Auto-assign the thread id field if it wasn't specified.
				2128	if (threadInfo[i][threadIdIndex] == UINT_MAX) {
				2129	threadInfo[i][threadIdIndex] = threadIdCt++;
				2130	}
				2131
				2132	// Aparrently the thread id field was specified for some entries and
				2133	// not others. Start the thread id counter off at the next higher
				2134	// thread id.
				2135	else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
				2136	threadIdCt = threadInfo[i][threadIdIndex] + 1;
				2137	}
				2138	}
				2139	break;
				2140	}
				2141	}
				2142	if (index < threadIdIndex) {
				2143	// If thread ids were specified, it is an error if they are not unique.
				2144	// Also, check that we waven't already restarted the loop (to be safe -
				2145	// shouldn't need to).
				2146	if ((threadInfo[i][threadIdIndex] != UINT_MAX) \|\| assign_thread_ids) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2147	__kmp_free(lastId);
				2148	__kmp_free(totals);
				2149	__kmp_free(maxCt);
				2150	__kmp_free(counts);
				2151	CLEANUP_THREAD_INFO;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	2152	*msg_id = kmp_i18n_str_PhysicalIDsNotUnique;
				2153	return -1;
				2154	}
				2155
				2156	// If the thread ids were not specified and we see entries entries that
				2157	// are duplicates, start the loop over and assign the thread ids manually.
				2158	assign_thread_ids = true;
				2159	goto restart_radix_check;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2160	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	2161	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2162
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	2163	#if KMP_MIC && REDUCE_TEAM_SIZE
				2164	// The default team size is the total #threads in the machine
				2165	// minus 1 thread for every core that has 3 or more threads.
				2166	teamSize += (threadIdCt <= 2) ? (threadIdCt) : (threadIdCt - 1);
				2167	#endif // KMP_MIC && REDUCE_TEAM_SIZE
				2168
				2169	for (index = threadIdIndex; index <= maxIndex; index++) {
				2170	if (counts[index] > maxCt[index]) {
				2171	maxCt[index] = counts[index];
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2172	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	2173	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2174
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	2175	__kmp_nThreadsPerCore = maxCt[threadIdIndex];
				2176	nCoresPerPkg = maxCt[coreIdIndex];
				2177	nPackages = totals[pkgIdIndex];
				2178
				2179	// Check to see if the machine topology is uniform
				2180	unsigned prod = totals[maxIndex];
				2181	for (index = threadIdIndex; index < maxIndex; index++) {
				2182	prod *= maxCt[index];
				2183	}
				2184	bool uniform = (prod == totals[threadIdIndex]);
				2185
				2186	// When affinity is off, this routine will still be called to set
				2187	// __kmp_ncores, as well as __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
				2188	// Make sure all these vars are set correctly, and return now if affinity is
				2189	// not enabled.
				2190	__kmp_ncores = totals[coreIdIndex];
				2191
				2192	if (__kmp_affinity_verbose) {
				2193	if (!KMP_AFFINITY_CAPABLE()) {
				2194	KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
				2195	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				2196	if (uniform) {
				2197	KMP_INFORM(Uniform, "KMP_AFFINITY");
				2198	} else {
				2199	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				2200	}
				2201	} else {
				2202	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				2203	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				2204	__kmp_affin_fullMask);
				2205	KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
				2206	if (__kmp_affinity_respect_mask) {
				2207	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				2208	} else {
				2209	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				2210	}
				2211	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				2212	if (uniform) {
				2213	KMP_INFORM(Uniform, "KMP_AFFINITY");
				2214	} else {
				2215	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				2216	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2217	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	2218	kmp_str_buf_t buf;
				2219	__kmp_str_buf_init(&buf);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2220
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	2221	__kmp_str_buf_print(&buf, "%d", totals[maxIndex]);
				2222	for (index = maxIndex - 1; index >= pkgIdIndex; index--) {
				2223	__kmp_str_buf_print(&buf, " x %d", maxCt[index]);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2224	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	2225	KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, maxCt[coreIdIndex],
				2226	maxCt[threadIdIndex], __kmp_ncores);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2227
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	2228	__kmp_str_buf_free(&buf);
				2229	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2230
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	2231	#if KMP_MIC && REDUCE_TEAM_SIZE
				2232	// Set the default team size.
				2233	if ((__kmp_dflt_team_nth == 0) && (teamSize > 0)) {
				2234	__kmp_dflt_team_nth = teamSize;
				2235	KA_TRACE(20, ("__kmp_affinity_create_cpuinfo_map: setting "
				2236	"__kmp_dflt_team_nth = %d\n",
				2237	__kmp_dflt_team_nth));
				2238	}
				2239	#endif // KMP_MIC && REDUCE_TEAM_SIZE
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2240
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	2241	KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
				2242	KMP_DEBUG_ASSERT(num_avail == __kmp_avail_proc);
				2243	__kmp_pu_os_idx = (int )__kmp_allocate(sizeof(int) __kmp_avail_proc);
				2244	for (i = 0; i < num_avail; ++i) { // fill the os indices
				2245	__kmp_pu_os_idx[i] = threadInfo[i][osIdIndex];
				2246	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2247
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	2248	if (__kmp_affinity_type == affinity_none) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2249	__kmp_free(lastId);
				2250	__kmp_free(totals);
				2251	__kmp_free(maxCt);
				2252	__kmp_free(counts);
				2253	CLEANUP_THREAD_INFO;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	2254	return 0;
				2255	}
				2256
				2257	// Count the number of levels which have more nodes at that level than at the
				2258	// parent's level (with there being an implicit root node of the top level).
				2259	// This is equivalent to saying that there is at least one node at this level
				2260	// which has a sibling. These levels are in the map, and the package level is
				2261	// always in the map.
				2262	bool inMap = (bool )__kmp_allocate((maxIndex + 1) * sizeof(bool));
				2263	int level = 0;
				2264	for (index = threadIdIndex; index < maxIndex; index++) {
				2265	KMP_ASSERT(totals[index] >= totals[index + 1]);
				2266	inMap[index] = (totals[index] > totals[index + 1]);
				2267	}
				2268	inMap[maxIndex] = (totals[maxIndex] > 1);
				2269	inMap[pkgIdIndex] = true;
				2270
				2271	int depth = 0;
				2272	for (index = threadIdIndex; index <= maxIndex; index++) {
				2273	if (inMap[index]) {
				2274	depth++;
				2275	}
				2276	}
				2277	KMP_ASSERT(depth > 0);
				2278
				2279	// Construct the data structure that is to be returned.
				2280	address2os = (AddrUnsPair )__kmp_allocate(sizeof(AddrUnsPair) * num_avail);
				2281	int pkgLevel = -1;
				2282	int coreLevel = -1;
				2283	int threadLevel = -1;
				2284
				2285	for (i = 0; i < num_avail; ++i) {
				2286	Address addr(depth);
				2287	unsigned os = threadInfo[i][osIdIndex];
				2288	int src_index;
				2289	int dst_index = 0;
				2290
				2291	for (src_index = maxIndex; src_index >= threadIdIndex; src_index--) {
				2292	if (!inMap[src_index]) {
				2293	continue;
				2294	}
				2295	addr.labels[dst_index] = threadInfo[i][src_index];
				2296	if (src_index == pkgIdIndex) {
				2297	pkgLevel = dst_index;
				2298	} else if (src_index == coreIdIndex) {
				2299	coreLevel = dst_index;
				2300	} else if (src_index == threadIdIndex) {
				2301	threadLevel = dst_index;
				2302	}
				2303	dst_index++;
				2304	}
				2305	(*address2os)[i] = AddrUnsPair(addr, os);
				2306	}
				2307
				2308	if (__kmp_affinity_gran_levels < 0) {
				2309	// Set the granularity level based on what levels are modeled
				2310	// in the machine topology map.
				2311	unsigned src_index;
				2312	__kmp_affinity_gran_levels = 0;
				2313	for (src_index = threadIdIndex; src_index <= maxIndex; src_index++) {
				2314	if (!inMap[src_index]) {
				2315	continue;
				2316	}
				2317	switch (src_index) {
				2318	case threadIdIndex:
				2319	if (__kmp_affinity_gran > affinity_gran_thread) {
				2320	__kmp_affinity_gran_levels++;
				2321	}
				2322
				2323	break;
				2324	case coreIdIndex:
				2325	if (__kmp_affinity_gran > affinity_gran_core) {
				2326	__kmp_affinity_gran_levels++;
				2327	}
				2328	break;
				2329
				2330	case pkgIdIndex:
				2331	if (__kmp_affinity_gran > affinity_gran_package) {
				2332	__kmp_affinity_gran_levels++;
				2333	}
				2334	break;
				2335	}
				2336	}
				2337	}
				2338
				2339	if (__kmp_affinity_verbose) {
				2340	__kmp_affinity_print_topology(*address2os, num_avail, depth, pkgLevel,
				2341	coreLevel, threadLevel);
				2342	}
				2343
				2344	__kmp_free(inMap);
				2345	__kmp_free(lastId);
				2346	__kmp_free(totals);
				2347	__kmp_free(maxCt);
				2348	__kmp_free(counts);
				2349	CLEANUP_THREAD_INFO;
				2350	return depth;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2351	}
				2352
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2353	// Create and return a table of affinity masks, indexed by OS thread ID.
				2354	// This routine handles OR'ing together all the affinity masks of threads
				2355	// that are sufficiently close, if granularity > fine.
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	2356	static kmp_affin_mask_t __kmp_create_masks(unsigned maxIndex,
				2357	unsigned *numUnique,
				2358	AddrUnsPair *address2os,
				2359	unsigned numAddrs) {
				2360	// First form a table of affinity masks in order of OS thread id.
				2361	unsigned depth;
				2362	unsigned maxOsId;
				2363	unsigned i;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2364
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	2365	KMP_ASSERT(numAddrs > 0);
				2366	depth = address2os[0].first.depth;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2367
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	2368	maxOsId = 0;
				2369	for (i = 0; i < numAddrs; i++) {
				2370	unsigned osId = address2os[i].second;
				2371	if (osId > maxOsId) {
				2372	maxOsId = osId;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2373	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	2374	}
				2375	kmp_affin_mask_t *osId2Mask;
				2376	KMP_CPU_ALLOC_ARRAY(osId2Mask, (maxOsId + 1));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2377
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	2378	// Sort the address2os table according to physical order. Doing so will put
				2379	// all threads on the same core/package/node in consecutive locations.
				2380	qsort(address2os, numAddrs, sizeof(*address2os),
				2381	__kmp_affinity_cmp_Address_labels);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2382
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	2383	KMP_ASSERT(__kmp_affinity_gran_levels >= 0);
				2384	if (__kmp_affinity_verbose && (__kmp_affinity_gran_levels > 0)) {
				2385	KMP_INFORM(ThreadsMigrate, "KMP_AFFINITY", __kmp_affinity_gran_levels);
				2386	}
				2387	if (__kmp_affinity_gran_levels >= (int)depth) {
				2388	if (__kmp_affinity_verbose \|\|
				2389	(__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) {
				2390	KMP_WARNING(AffThreadsMayMigrate);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2391	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	2392	}
				2393
				2394	// Run through the table, forming the masks for all threads on each core.
				2395	// Threads on the same core will have identical "Address" objects, not
				2396	// considering the last level, which must be the thread id. All threads on a
				2397	// core will appear consecutively.
				2398	unsigned unique = 0;
				2399	unsigned j = 0; // index of 1st thread on core
				2400	unsigned leader = 0;
				2401	Address *leaderAddr = &(address2os[0].first);
				2402	kmp_affin_mask_t *sum;
				2403	KMP_CPU_ALLOC_ON_STACK(sum);
				2404	KMP_CPU_ZERO(sum);
				2405	KMP_CPU_SET(address2os[0].second, sum);
				2406	for (i = 1; i < numAddrs; i++) {
				2407	// If this thread is sufficiently close to the leader (within the
				2408	// granularity setting), then set the bit for this os thread in the
				2409	// affinity mask for this group, and go on to the next thread.
				2410	if (leaderAddr->isClose(address2os[i].first, __kmp_affinity_gran_levels)) {
				2411	KMP_CPU_SET(address2os[i].second, sum);
				2412	continue;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2413	}
				2414
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	2415	// For every thread in this group, copy the mask to the thread's entry in
				2416	// the osId2Mask table. Mark the first address as a leader.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2417	for (; j < i; j++) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	2418	unsigned osId = address2os[j].second;
				2419	KMP_DEBUG_ASSERT(osId <= maxOsId);
				2420	kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
				2421	KMP_CPU_COPY(mask, sum);
				2422	address2os[j].first.leader = (j == leader);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2423	}
				2424	unique++;
				2425
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	2426	// Start a new mask.
				2427	leader = i;
				2428	leaderAddr = &(address2os[i].first);
				2429	KMP_CPU_ZERO(sum);
				2430	KMP_CPU_SET(address2os[i].second, sum);
				2431	}
				2432
				2433	// For every thread in last group, copy the mask to the thread's
				2434	// entry in the osId2Mask table.
				2435	for (; j < i; j++) {
				2436	unsigned osId = address2os[j].second;
				2437	KMP_DEBUG_ASSERT(osId <= maxOsId);
				2438	kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
				2439	KMP_CPU_COPY(mask, sum);
				2440	address2os[j].first.leader = (j == leader);
				2441	}
				2442	unique++;
				2443	KMP_CPU_FREE_FROM_STACK(sum);
				2444
				2445	*maxIndex = maxOsId;
				2446	*numUnique = unique;
				2447	return osId2Mask;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2448	}
				2449
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2450	// Stuff for the affinity proclist parsers. It's easier to declare these vars
				2451	// as file-static than to try and pass them through the calling sequence of
				2452	// the recursive-descent OMP_PLACES parser.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2453	static kmp_affin_mask_t *newMasks;
				2454	static int numNewMasks;
				2455	static int nextNewMask;
				2456
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	2457	#define ADD_MASK(_mask) \
				2458	{ \
				2459	if (nextNewMask >= numNewMasks) { \
				2460	int i; \
				2461	numNewMasks *= 2; \
				2462	kmp_affin_mask_t *temp; \
				2463	KMP_CPU_INTERNAL_ALLOC_ARRAY(temp, numNewMasks); \
				2464	for (i = 0; i < numNewMasks / 2; i++) { \
				2465	kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i); \
				2466	kmp_affin_mask_t *dest = KMP_CPU_INDEX(temp, i); \
				2467	KMP_CPU_COPY(dest, src); \
				2468	} \
				2469	KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks / 2); \
				2470	newMasks = temp; \
				2471	} \
				2472	KMP_CPU_COPY(KMP_CPU_INDEX(newMasks, nextNewMask), (_mask)); \
				2473	nextNewMask++; \
				2474	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2475
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	2476	#define ADD_MASK_OSID(_osId, _osId2Mask, _maxOsId) \
				2477	{ \
				2478	if (((_osId) > _maxOsId) \|\| \
				2479	(!KMP_CPU_ISSET((_osId), KMP_CPU_INDEX((_osId2Mask), (_osId))))) { \
				2480	if (__kmp_affinity_verbose \|\| \
				2481	(__kmp_affinity_warnings && \
				2482	(__kmp_affinity_type != affinity_none))) { \
				2483	KMP_WARNING(AffIgnoreInvalidProcID, _osId); \
				2484	} \
				2485	} else { \
				2486	ADD_MASK(KMP_CPU_INDEX(_osId2Mask, (_osId))); \
				2487	} \
				2488	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2489
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2490	// Re-parse the proclist (for the explicit affinity type), and form the list
				2491	// of affinity newMasks indexed by gtid.
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	2492	static void __kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks,
				2493	unsigned int *out_numMasks,
				2494	const char *proclist,
				2495	kmp_affin_mask_t *osId2Mask,
				2496	int maxOsId) {
				2497	int i;
				2498	const char *scan = proclist;
				2499	const char *next = proclist;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2500
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	2501	// We use malloc() for the temporary mask vector, so that we can use
				2502	// realloc() to extend it.
				2503	numNewMasks = 2;
				2504	KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks);
				2505	nextNewMask = 0;
				2506	kmp_affin_mask_t *sumMask;
				2507	KMP_CPU_ALLOC(sumMask);
				2508	int setSize = 0;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2509
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	2510	for (;;) {
				2511	int start, end, stride;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2512
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	2513	SKIP_WS(scan);
				2514	next = scan;
				2515	if (*next == '\0') {
				2516	break;
				2517	}
				2518
				2519	if (*next == '{') {
				2520	int num;
				2521	setSize = 0;
				2522	next++; // skip '{'
				2523	SKIP_WS(next);
				2524	scan = next;
				2525
				2526	// Read the first integer in the set.
				2527	KMP_ASSERT2((next >= '0') && (next <= '9'), "bad proclist");
				2528	SKIP_DIGITS(next);
				2529	num = __kmp_str_to_int(scan, *next);
				2530	KMP_ASSERT2(num >= 0, "bad explicit proc list");
				2531
				2532	// Copy the mask for that osId to the sum (union) mask.
				2533	if ((num > maxOsId) \|\|
				2534	(!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
				2535	if (__kmp_affinity_verbose \|\|
				2536	(__kmp_affinity_warnings &&
				2537	(__kmp_affinity_type != affinity_none))) {
				2538	KMP_WARNING(AffIgnoreInvalidProcID, num);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2539	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	2540	KMP_CPU_ZERO(sumMask);
				2541	} else {
				2542	KMP_CPU_COPY(sumMask, KMP_CPU_INDEX(osId2Mask, num));
				2543	setSize = 1;
				2544	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2545
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	2546	for (;;) {
				2547	// Check for end of set.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2548	SKIP_WS(next);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	2549	if (*next == '}') {
				2550	next++; // skip '}'
				2551	break;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2552	}
				2553
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2554	// Skip optional comma.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2555	if (*next == ',') {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	2556	next++;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2557	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	2558	SKIP_WS(next);
				2559
				2560	// Read the next integer in the set.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2561	scan = next;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	2562	KMP_ASSERT2((next >= '0') && (next <= '9'), "bad explicit proc list");
				2563
				2564	SKIP_DIGITS(next);
				2565	num = __kmp_str_to_int(scan, *next);
				2566	KMP_ASSERT2(num >= 0, "bad explicit proc list");
				2567
				2568	// Add the mask for that osId to the sum mask.
				2569	if ((num > maxOsId) \|\|
				2570	(!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
				2571	if (__kmp_affinity_verbose \|\|
				2572	(__kmp_affinity_warnings &&
				2573	(__kmp_affinity_type != affinity_none))) {
				2574	KMP_WARNING(AffIgnoreInvalidProcID, num);
				2575	}
				2576	} else {
				2577	KMP_CPU_UNION(sumMask, KMP_CPU_INDEX(osId2Mask, num));
				2578	setSize++;
				2579	}
				2580	}
				2581	if (setSize > 0) {
				2582	ADD_MASK(sumMask);
				2583	}
				2584
				2585	SKIP_WS(next);
				2586	if (*next == ',') {
				2587	next++;
				2588	}
				2589	scan = next;
				2590	continue;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2591	}
				2592
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	2593	// Read the first integer.
				2594	KMP_ASSERT2((next >= '0') && (next <= '9'), "bad explicit proc list");
				2595	SKIP_DIGITS(next);
				2596	start = __kmp_str_to_int(scan, *next);
				2597	KMP_ASSERT2(start >= 0, "bad explicit proc list");
				2598	SKIP_WS(next);
				2599
				2600	// If this isn't a range, then add a mask to the list and go on.
				2601	if (*next != '-') {
				2602	ADD_MASK_OSID(start, osId2Mask, maxOsId);
				2603
				2604	// Skip optional comma.
				2605	if (*next == ',') {
				2606	next++;
				2607	}
				2608	scan = next;
				2609	continue;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2610	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	2611
				2612	// This is a range. Skip over the '-' and read in the 2nd int.
				2613	next++; // skip '-'
				2614	SKIP_WS(next);
				2615	scan = next;
				2616	KMP_ASSERT2((next >= '0') && (next <= '9'), "bad explicit proc list");
				2617	SKIP_DIGITS(next);
				2618	end = __kmp_str_to_int(scan, *next);
				2619	KMP_ASSERT2(end >= 0, "bad explicit proc list");
				2620
				2621	// Check for a stride parameter
				2622	stride = 1;
				2623	SKIP_WS(next);
				2624	if (*next == ':') {
				2625	// A stride is specified. Skip over the ':" and read the 3rd int.
				2626	int sign = +1;
				2627	next++; // skip ':'
				2628	SKIP_WS(next);
				2629	scan = next;
				2630	if (*next == '-') {
				2631	sign = -1;
				2632	next++;
				2633	SKIP_WS(next);
				2634	scan = next;
				2635	}
				2636	KMP_ASSERT2((next >= '0') && (next <= '9'), "bad explicit proc list");
				2637	SKIP_DIGITS(next);
				2638	stride = __kmp_str_to_int(scan, *next);
				2639	KMP_ASSERT2(stride >= 0, "bad explicit proc list");
				2640	stride *= sign;
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	2641	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	2642
				2643	// Do some range checks.
				2644	KMP_ASSERT2(stride != 0, "bad explicit proc list");
				2645	if (stride > 0) {
				2646	KMP_ASSERT2(start <= end, "bad explicit proc list");
				2647	} else {
				2648	KMP_ASSERT2(start >= end, "bad explicit proc list");
				2649	}
				2650	KMP_ASSERT2((end - start) / stride <= 65536, "bad explicit proc list");
				2651
				2652	// Add the mask for each OS proc # to the list.
				2653	if (stride > 0) {
				2654	do {
				2655	ADD_MASK_OSID(start, osId2Mask, maxOsId);
				2656	start += stride;
				2657	} while (start <= end);
				2658	} else {
				2659	do {
				2660	ADD_MASK_OSID(start, osId2Mask, maxOsId);
				2661	start += stride;
				2662	} while (start >= end);
				2663	}
				2664
				2665	// Skip optional comma.
				2666	SKIP_WS(next);
				2667	if (*next == ',') {
				2668	next++;
				2669	}
				2670	scan = next;
				2671	}
				2672
				2673	*out_numMasks = nextNewMask;
				2674	if (nextNewMask == 0) {
				2675	*out_masks = NULL;
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	2676	KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	2677	return;
				2678	}
				2679	KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask);
				2680	for (i = 0; i < nextNewMask; i++) {
				2681	kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i);
				2682	kmp_affin_mask_t dest = KMP_CPU_INDEX((out_masks), i);
				2683	KMP_CPU_COPY(dest, src);
				2684	}
				2685	KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
				2686	KMP_CPU_FREE(sumMask);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2687	}
				2688
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	2689	#if OMP_40_ENABLED
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2690
				2691	/*-----------------------------------------------------------------------------
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2692	Re-parse the OMP_PLACES proc id list, forming the newMasks for the different
				2693	places. Again, Here is the grammar:
				2694
				2695	place_list := place
				2696	place_list := place , place_list
				2697	place := num
				2698	place := place : num
				2699	place := place : num : signed
				2700	place := { subplacelist }
				2701	place := ! place // (lowest priority)
				2702	subplace_list := subplace
				2703	subplace_list := subplace , subplace_list
				2704	subplace := num
				2705	subplace := num : num
				2706	subplace := num : num : signed
				2707	signed := num
				2708	signed := + signed
				2709	signed := - signed
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2710	-----------------------------------------------------------------------------*/
				2711
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	2712	static void __kmp_process_subplace_list(const char **scan,
				2713	kmp_affin_mask_t *osId2Mask,
				2714	int maxOsId, kmp_affin_mask_t *tempMask,
				2715	int *setSize) {
				2716	const char *next;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2717
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	2718	for (;;) {
				2719	int start, count, stride, i;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2720
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	2721	// Read in the starting proc id
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2722	SKIP_WS(*scan);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	2723	KMP_ASSERT2((scan >= '0') && (scan <= '9'), "bad explicit places list");
				2724	next = *scan;
				2725	SKIP_DIGITS(next);
				2726	start = __kmp_str_to_int(scan, next);
				2727	KMP_ASSERT(start >= 0);
				2728	*scan = next;
				2729
				2730	// valid follow sets are ',' ':' and '}'
				2731	SKIP_WS(*scan);
				2732	if (scan == '}' \|\| scan == ',') {
				2733	if ((start > maxOsId) \|\|
				2734	(!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
				2735	if (__kmp_affinity_verbose \|\|
				2736	(__kmp_affinity_warnings &&
				2737	(__kmp_affinity_type != affinity_none))) {
				2738	KMP_WARNING(AffIgnoreInvalidProcID, start);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2739	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	2740	} else {
				2741	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
				2742	(*setSize)++;
				2743	}
				2744	if (**scan == '}') {
				2745	break;
				2746	}
				2747	(*scan)++; // skip ','
				2748	continue;
				2749	}
				2750	KMP_ASSERT2(**scan == ':', "bad explicit places list");
				2751	(*scan)++; // skip ':'
				2752
				2753	// Read count parameter
				2754	SKIP_WS(*scan);
				2755	KMP_ASSERT2((scan >= '0') && (scan <= '9'), "bad explicit places list");
				2756	next = *scan;
				2757	SKIP_DIGITS(next);
				2758	count = __kmp_str_to_int(scan, next);
				2759	KMP_ASSERT(count >= 0);
				2760	*scan = next;
				2761
				2762	// valid follow sets are ',' ':' and '}'
				2763	SKIP_WS(*scan);
				2764	if (scan == '}' \|\| scan == ',') {
				2765	for (i = 0; i < count; i++) {
				2766	if ((start > maxOsId) \|\|
				2767	(!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
				2768	if (__kmp_affinity_verbose \|\|
				2769	(__kmp_affinity_warnings &&
				2770	(__kmp_affinity_type != affinity_none))) {
				2771	KMP_WARNING(AffIgnoreInvalidProcID, start);
				2772	}
				2773	break; // don't proliferate warnings for large count
				2774	} else {
				2775	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
				2776	start++;
				2777	(*setSize)++;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2778	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	2779	}
				2780	if (**scan == '}') {
				2781	break;
				2782	}
				2783	(*scan)++; // skip ','
				2784	continue;
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	2785	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	2786	KMP_ASSERT2(**scan == ':', "bad explicit places list");
				2787	(*scan)++; // skip ':'
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2788
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	2789	// Read stride parameter
				2790	int sign = +1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2791	for (;;) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	2792	SKIP_WS(*scan);
				2793	if (**scan == '+') {
				2794	(*scan)++; // skip '+'
				2795	continue;
				2796	}
				2797	if (**scan == '-') {
				2798	sign *= -1;
				2799	(*scan)++; // skip '-'
				2800	continue;
				2801	}
				2802	break;
				2803	}
				2804	SKIP_WS(*scan);
				2805	KMP_ASSERT2((scan >= '0') && (scan <= '9'), "bad explicit places list");
				2806	next = *scan;
				2807	SKIP_DIGITS(next);
				2808	stride = __kmp_str_to_int(scan, next);
				2809	KMP_ASSERT(stride >= 0);
				2810	*scan = next;
				2811	stride *= sign;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2812
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	2813	// valid follow sets are ',' and '}'
				2814	SKIP_WS(*scan);
				2815	if (scan == '}' \|\| scan == ',') {
				2816	for (i = 0; i < count; i++) {
				2817	if ((start > maxOsId) \|\|
				2818	(!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
				2819	if (__kmp_affinity_verbose \|\|
				2820	(__kmp_affinity_warnings &&
				2821	(__kmp_affinity_type != affinity_none))) {
				2822	KMP_WARNING(AffIgnoreInvalidProcID, start);
				2823	}
				2824	break; // don't proliferate warnings for large count
				2825	} else {
				2826	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
				2827	start += stride;
				2828	(*setSize)++;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2829	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	2830	}
				2831	if (**scan == '}') {
				2832	break;
				2833	}
				2834	(*scan)++; // skip ','
				2835	continue;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2836	}
				2837
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	2838	KMP_ASSERT2(0, "bad explicit places list");
				2839	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2840	}
				2841
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	2842	static void __kmp_process_place(const char *scan, kmp_affin_mask_t osId2Mask,
				2843	int maxOsId, kmp_affin_mask_t *tempMask,
				2844	int *setSize) {
				2845	const char *next;
				2846
				2847	// valid follow sets are '{' '!' and num
				2848	SKIP_WS(*scan);
				2849	if (**scan == '{') {
				2850	(*scan)++; // skip '{'
				2851	__kmp_process_subplace_list(scan, osId2Mask, maxOsId, tempMask, setSize);
				2852	KMP_ASSERT2(**scan == '}', "bad explicit places list");
				2853	(*scan)++; // skip '}'
				2854	} else if (**scan == '!') {
				2855	(*scan)++; // skip '!'
				2856	__kmp_process_place(scan, osId2Mask, maxOsId, tempMask, setSize);
				2857	KMP_CPU_COMPLEMENT(maxOsId, tempMask);
				2858	} else if ((scan >= '0') && (scan <= '9')) {
				2859	next = *scan;
				2860	SKIP_DIGITS(next);
				2861	int num = __kmp_str_to_int(scan, next);
				2862	KMP_ASSERT(num >= 0);
				2863	if ((num > maxOsId) \|\|
				2864	(!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
				2865	if (__kmp_affinity_verbose \|\|
				2866	(__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) {
				2867	KMP_WARNING(AffIgnoreInvalidProcID, num);
				2868	}
				2869	} else {
				2870	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, num));
				2871	(*setSize)++;
				2872	}
				2873	*scan = next; // skip num
				2874	} else {
				2875	KMP_ASSERT2(0, "bad explicit places list");
				2876	}
				2877	}
				2878
				2879	// static void
				2880	void __kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
				2881	unsigned int *out_numMasks,
				2882	const char *placelist,
				2883	kmp_affin_mask_t *osId2Mask,
				2884	int maxOsId) {
				2885	int i, j, count, stride, sign;
				2886	const char *scan = placelist;
				2887	const char *next = placelist;
				2888
				2889	numNewMasks = 2;
				2890	KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks);
				2891	nextNewMask = 0;
				2892
				2893	// tempMask is modified based on the previous or initial
				2894	// place to form the current place
				2895	// previousMask contains the previous place
				2896	kmp_affin_mask_t *tempMask;
				2897	kmp_affin_mask_t *previousMask;
				2898	KMP_CPU_ALLOC(tempMask);
				2899	KMP_CPU_ZERO(tempMask);
				2900	KMP_CPU_ALLOC(previousMask);
				2901	KMP_CPU_ZERO(previousMask);
				2902	int setSize = 0;
				2903
				2904	for (;;) {
				2905	__kmp_process_place(&scan, osId2Mask, maxOsId, tempMask, &setSize);
				2906
				2907	// valid follow sets are ',' ':' and EOL
				2908	SKIP_WS(scan);
				2909	if (scan == '\0' \|\| scan == ',') {
				2910	if (setSize > 0) {
				2911	ADD_MASK(tempMask);
				2912	}
				2913	KMP_CPU_ZERO(tempMask);
				2914	setSize = 0;
				2915	if (*scan == '\0') {
				2916	break;
				2917	}
				2918	scan++; // skip ','
				2919	continue;
				2920	}
				2921
				2922	KMP_ASSERT2(*scan == ':', "bad explicit places list");
				2923	scan++; // skip ':'
				2924
				2925	// Read count parameter
				2926	SKIP_WS(scan);
				2927	KMP_ASSERT2((scan >= '0') && (scan <= '9'), "bad explicit places list");
				2928	next = scan;
				2929	SKIP_DIGITS(next);
				2930	count = __kmp_str_to_int(scan, *next);
				2931	KMP_ASSERT(count >= 0);
				2932	scan = next;
				2933
				2934	// valid follow sets are ',' ':' and EOL
				2935	SKIP_WS(scan);
				2936	if (scan == '\0' \|\| scan == ',') {
				2937	stride = +1;
				2938	} else {
				2939	KMP_ASSERT2(*scan == ':', "bad explicit places list");
				2940	scan++; // skip ':'
				2941
				2942	// Read stride parameter
				2943	sign = +1;
				2944	for (;;) {
				2945	SKIP_WS(scan);
				2946	if (*scan == '+') {
				2947	scan++; // skip '+'
				2948	continue;
				2949	}
				2950	if (*scan == '-') {
				2951	sign *= -1;
				2952	scan++; // skip '-'
				2953	continue;
				2954	}
				2955	break;
				2956	}
				2957	SKIP_WS(scan);
				2958	KMP_ASSERT2((scan >= '0') && (scan <= '9'), "bad explicit places list");
				2959	next = scan;
				2960	SKIP_DIGITS(next);
				2961	stride = __kmp_str_to_int(scan, *next);
				2962	KMP_DEBUG_ASSERT(stride >= 0);
				2963	scan = next;
				2964	stride *= sign;
				2965	}
				2966
				2967	// Add places determined by initial_place : count : stride
				2968	for (i = 0; i < count; i++) {
				2969	if (setSize == 0) {
				2970	break;
				2971	}
				2972	// Add the current place, then build the next place (tempMask) from that
				2973	KMP_CPU_COPY(previousMask, tempMask);
				2974	ADD_MASK(previousMask);
				2975	KMP_CPU_ZERO(tempMask);
				2976	setSize = 0;
				2977	KMP_CPU_SET_ITERATE(j, previousMask) {
				2978	if (!KMP_CPU_ISSET(j, previousMask)) {
				2979	continue;
				2980	}
				2981	if ((j + stride > maxOsId) \|\| (j + stride < 0) \|\|
				2982	(!KMP_CPU_ISSET(j, __kmp_affin_fullMask)) \|\|
				2983	(!KMP_CPU_ISSET(j + stride,
				2984	KMP_CPU_INDEX(osId2Mask, j + stride)))) {
				2985	if ((__kmp_affinity_verbose \|\|
				2986	(__kmp_affinity_warnings &&
				2987	(__kmp_affinity_type != affinity_none))) &&
				2988	i < count - 1) {
				2989	KMP_WARNING(AffIgnoreInvalidProcID, j + stride);
				2990	}
				2991	continue;
				2992	}
				2993	KMP_CPU_SET(j + stride, tempMask);
				2994	setSize++;
				2995	}
				2996	}
				2997	KMP_CPU_ZERO(tempMask);
				2998	setSize = 0;
				2999
				3000	// valid follow sets are ',' and EOL
				3001	SKIP_WS(scan);
				3002	if (*scan == '\0') {
				3003	break;
				3004	}
				3005	if (*scan == ',') {
				3006	scan++; // skip ','
				3007	continue;
				3008	}
				3009
				3010	KMP_ASSERT2(0, "bad explicit places list");
				3011	}
				3012
				3013	*out_numMasks = nextNewMask;
				3014	if (nextNewMask == 0) {
				3015	*out_masks = NULL;
				3016	KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
				3017	return;
				3018	}
				3019	KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask);
				3020	KMP_CPU_FREE(tempMask);
				3021	KMP_CPU_FREE(previousMask);
				3022	for (i = 0; i < nextNewMask; i++) {
				3023	kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i);
				3024	kmp_affin_mask_t dest = KMP_CPU_INDEX((out_masks), i);
				3025	KMP_CPU_COPY(dest, src);
				3026	}
				3027	KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
				3028	}
				3029
				3030	#endif /* OMP_40_ENABLED */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3031
				3032	#undef ADD_MASK
				3033	#undef ADD_MASK_OSID
				3034
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3035	#if KMP_USE_HWLOC
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	3036	static int __kmp_hwloc_count_children_by_type(hwloc_topology_t t, hwloc_obj_t o,
				3037	hwloc_obj_type_t type,
				3038	hwloc_obj_t* f) {
				3039	if (!hwloc_compare_types(o->type, type)) {
				3040	if (*f == NULL)
				3041	*f = o; // output first descendant found
				3042	return 1;
				3043	}
				3044	int sum = 0;
				3045	for (unsigned i = 0; i < o->arity; i++)
				3046	sum += __kmp_hwloc_count_children_by_type(t, o->children[i], type, f);
				3047	return sum; // will be 0 if no one found (as PU arity is 0)
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3048	}
				3049
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	3050	static int __kmp_hwloc_count_children_by_depth(hwloc_topology_t t,
				3051	hwloc_obj_t o, unsigned depth,
				3052	hwloc_obj_t* f) {
				3053	if (o->depth == depth) {
				3054	if (*f == NULL)
				3055	*f = o; // output first descendant found
				3056	return 1;
				3057	}
				3058	int sum = 0;
				3059	for (unsigned i = 0; i < o->arity; i++)
				3060	sum += __kmp_hwloc_count_children_by_depth(t, o->children[i], depth, f);
				3061	return sum; // will be 0 if no one found (as PU arity is 0)
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3062	}
				3063
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	3064	static int __kmp_hwloc_skip_PUs_obj(hwloc_topology_t t, hwloc_obj_t o) {
				3065	// skip PUs descendants of the object o
				3066	int skipped = 0;
				3067	hwloc_obj_t hT = NULL;
				3068	int N = __kmp_hwloc_count_children_by_type(t, o, HWLOC_OBJ_PU, &hT);
				3069	for (int i = 0; i < N; ++i) {
				3070	KMP_DEBUG_ASSERT(hT);
				3071	unsigned idx = hT->os_index;
				3072	if (KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
				3073	KMP_CPU_CLR(idx, __kmp_affin_fullMask);
				3074	KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx));
				3075	++skipped;
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3076	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	3077	hT = hwloc_get_next_obj_by_type(t, HWLOC_OBJ_PU, hT);
				3078	}
				3079	return skipped; // count number of skipped units
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3080	}
				3081
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	3082	static int __kmp_hwloc_obj_has_PUs(hwloc_topology_t t, hwloc_obj_t o) {
				3083	// check if obj has PUs present in fullMask
				3084	hwloc_obj_t hT = NULL;
				3085	int N = __kmp_hwloc_count_children_by_type(t, o, HWLOC_OBJ_PU, &hT);
				3086	for (int i = 0; i < N; ++i) {
				3087	KMP_DEBUG_ASSERT(hT);
				3088	unsigned idx = hT->os_index;
				3089	if (KMP_CPU_ISSET(idx, __kmp_affin_fullMask))
				3090	return 1; // found PU
				3091	hT = hwloc_get_next_obj_by_type(t, HWLOC_OBJ_PU, hT);
				3092	}
				3093	return 0; // no PUs found
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3094	}
				3095	#endif // KMP_USE_HWLOC
				3096
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	3097	static void __kmp_apply_thread_places(AddrUnsPair **pAddr, int depth) {
				3098	AddrUnsPair *newAddr;
				3099	if (__kmp_hws_requested == 0)
				3100	goto _exit; // no topology limiting actions requested, exit
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3101	#if KMP_USE_HWLOC
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	3102	if (__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC) {
				3103	// Number of subobjects calculated dynamically, this works fine for
				3104	// any non-uniform topology.
				3105	// L2 cache objects are determined by depth, other objects - by type.
				3106	hwloc_topology_t tp = __kmp_hwloc_topology;
				3107	int nS=0, nN=0, nL=0, nC=0, nT=0; // logical index including skipped
				3108	int nCr=0, nTr=0; // number of requested units
				3109	int nPkg=0, nCo=0, n_new=0, n_old = 0, nCpP=0, nTpC=0; // counters
				3110	hwloc_obj_t hT, hC, hL, hN, hS; // hwloc objects (pointers to)
				3111	int L2depth, idx;
Jonathan Peyton	dd4aa9b	2015-10-08 17:55:54 +0000	[diff] [blame]	3112
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	3113	// check support of extensions ----------------------------------
				3114	int numa_support = 0, tile_support = 0;
				3115	if (__kmp_pu_os_idx)
				3116	hT = hwloc_get_pu_obj_by_os_index(tp,
				3117	__kmp_pu_os_idx[__kmp_avail_proc - 1]);
				3118	else
				3119	hT = hwloc_get_obj_by_type(tp, HWLOC_OBJ_PU, __kmp_avail_proc - 1);
				3120	if (hT == NULL) { // something's gone wrong
				3121	KMP_WARNING(AffHWSubsetUnsupported);
				3122	goto _exit;
				3123	}
				3124	// check NUMA node
				3125	hN = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hT);
				3126	hS = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_PACKAGE, hT);
				3127	if (hN != NULL && hN->depth > hS->depth) {
				3128	numa_support = 1; // 1 in case socket includes node(s)
				3129	} else if (__kmp_hws_node.num > 0) {
				3130	// don't support sockets inside NUMA node (no such HW found for testing)
				3131	KMP_WARNING(AffHWSubsetUnsupported);
				3132	goto _exit;
				3133	}
				3134	// check L2 cahce, get object by depth because of multiple caches
				3135	L2depth = hwloc_get_cache_type_depth(tp, 2, HWLOC_OBJ_CACHE_UNIFIED);
				3136	hL = hwloc_get_ancestor_obj_by_depth(tp, L2depth, hT);
				3137	if (hL != NULL && __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE,
				3138	&hC) > 1) {
				3139	tile_support = 1; // no sense to count L2 if it includes single core
				3140	} else if (__kmp_hws_tile.num > 0) {
				3141	if (__kmp_hws_core.num == 0) {
				3142	__kmp_hws_core = __kmp_hws_tile; // replace L2 with core
				3143	__kmp_hws_tile.num = 0;
				3144	} else {
				3145	// L2 and core are both requested, but represent same object
				3146	KMP_WARNING(AffHWSubsetInvalid);
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3147	goto _exit;
				3148	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	3149	}
				3150	// end of check of extensions -----------------------------------
				3151
				3152	// fill in unset items, validate settings -----------------------
				3153	if (__kmp_hws_socket.num == 0)
				3154	__kmp_hws_socket.num = nPackages; // use all available sockets
				3155	if (__kmp_hws_socket.offset >= nPackages) {
				3156	KMP_WARNING(AffHWSubsetManySockets);
				3157	goto _exit;
				3158	}
				3159	if (numa_support) {
				3160	int NN = __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_NUMANODE,
				3161	&hN); // num nodes in socket
				3162	if (__kmp_hws_node.num == 0)
				3163	__kmp_hws_node.num = NN; // use all available nodes
				3164	if (__kmp_hws_node.offset >= NN) {
				3165	KMP_WARNING(AffHWSubsetManyNodes);
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3166	goto _exit;
				3167	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	3168	if (tile_support) {
				3169	// get num tiles in node
				3170	int NL = __kmp_hwloc_count_children_by_depth(tp, hN, L2depth, &hL);
				3171	if (__kmp_hws_tile.num == 0) {
				3172	__kmp_hws_tile.num = NL + 1;
				3173	} // use all available tiles, some node may have more tiles, thus +1
				3174	if (__kmp_hws_tile.offset >= NL) {
				3175	KMP_WARNING(AffHWSubsetManyTiles);
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3176	goto _exit;
				3177	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	3178	int NC = __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE,
				3179	&hC); // num cores in tile
				3180	if (__kmp_hws_core.num == 0)
				3181	__kmp_hws_core.num = NC; // use all available cores
				3182	if (__kmp_hws_core.offset >= NC) {
				3183	KMP_WARNING(AffHWSubsetManyCores);
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3184	goto _exit;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	3185	}
				3186	} else { // tile_support
				3187	int NC = __kmp_hwloc_count_children_by_type(tp, hN, HWLOC_OBJ_CORE,
				3188	&hC); // num cores in node
				3189	if (__kmp_hws_core.num == 0)
				3190	__kmp_hws_core.num = NC; // use all available cores
				3191	if (__kmp_hws_core.offset >= NC) {
				3192	KMP_WARNING(AffHWSubsetManyCores);
				3193	goto _exit;
				3194	}
				3195	} // tile_support
				3196	} else { // numa_support
				3197	if (tile_support) {
				3198	// get num tiles in socket
				3199	int NL = __kmp_hwloc_count_children_by_depth(tp, hS, L2depth, &hL);
				3200	if (__kmp_hws_tile.num == 0)
				3201	__kmp_hws_tile.num = NL; // use all available tiles
				3202	if (__kmp_hws_tile.offset >= NL) {
				3203	KMP_WARNING(AffHWSubsetManyTiles);
				3204	goto _exit;
				3205	}
				3206	int NC = __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE,
				3207	&hC); // num cores in tile
				3208	if (__kmp_hws_core.num == 0)
				3209	__kmp_hws_core.num = NC; // use all available cores
				3210	if (__kmp_hws_core.offset >= NC) {
				3211	KMP_WARNING(AffHWSubsetManyCores);
				3212	goto _exit;
				3213	}
				3214	} else { // tile_support
				3215	int NC = __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_CORE,
				3216	&hC); // num cores in socket
				3217	if (__kmp_hws_core.num == 0)
				3218	__kmp_hws_core.num = NC; // use all available cores
				3219	if (__kmp_hws_core.offset >= NC) {
				3220	KMP_WARNING(AffHWSubsetManyCores);
				3221	goto _exit;
				3222	}
				3223	} // tile_support
				3224	}
				3225	if (__kmp_hws_proc.num == 0)
				3226	__kmp_hws_proc.num = __kmp_nThreadsPerCore; // use all available procs
				3227	if (__kmp_hws_proc.offset >= __kmp_nThreadsPerCore) {
				3228	KMP_WARNING(AffHWSubsetManyProcs);
				3229	goto _exit;
				3230	}
				3231	// end of validation --------------------------------------------
				3232
				3233	if (pAddr) // pAddr is NULL in case of affinity_none
				3234	newAddr = (AddrUnsPair )__kmp_allocate(sizeof(AddrUnsPair)
				3235	__kmp_avail_proc); // max size
				3236	// main loop to form HW subset ----------------------------------
				3237	hS = NULL;
				3238	int NP = hwloc_get_nbobjs_by_type(tp, HWLOC_OBJ_PACKAGE);
				3239	for (int s = 0; s < NP; ++s) {
				3240	// Check Socket -----------------------------------------------
				3241	hS = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PACKAGE, hS);
				3242	if (!__kmp_hwloc_obj_has_PUs(tp, hS))
				3243	continue; // skip socket if all PUs are out of fullMask
				3244	++nS; // only count objects those have PUs in affinity mask
				3245	if (nS <= __kmp_hws_socket.offset \|\|
				3246	nS > __kmp_hws_socket.num + __kmp_hws_socket.offset) {
				3247	n_old += __kmp_hwloc_skip_PUs_obj(tp, hS); // skip socket
				3248	continue; // move to next socket
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3249	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	3250	nCr = 0; // count number of cores per socket
				3251	// socket requested, go down the topology tree
				3252	// check 4 cases: (+NUMA+Tile), (+NUMA-Tile), (-NUMA+Tile), (-NUMA-Tile)
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3253	if (numa_support) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	3254	nN = 0;
				3255	hN = NULL;
				3256	// num nodes in current socket
				3257	int NN = __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_NUMANODE,
				3258	&hN);
				3259	for (int n = 0; n < NN; ++n) {
				3260	// Check NUMA Node ----------------------------------------
				3261	if (!__kmp_hwloc_obj_has_PUs(tp, hN)) {
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3262	hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	3263	continue; // skip node if all PUs are out of fullMask
				3264	}
				3265	++nN;
				3266	if (nN <= __kmp_hws_node.offset \|\|
				3267	nN > __kmp_hws_node.num + __kmp_hws_node.offset) {
				3268	// skip node as not requested
				3269	n_old += __kmp_hwloc_skip_PUs_obj(tp, hN); // skip node
				3270	hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN);
				3271	continue; // move to next node
				3272	}
				3273	// node requested, go down the topology tree
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3274	if (tile_support) {
				3275	nL = 0;
				3276	hL = NULL;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	3277	int NL = __kmp_hwloc_count_children_by_depth(tp, hN, L2depth, &hL);
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3278	for (int l = 0; l < NL; ++l) {
				3279	// Check L2 (tile) ------------------------------------
				3280	if (!__kmp_hwloc_obj_has_PUs(tp, hL)) {
				3281	hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
				3282	continue; // skip tile if all PUs are out of fullMask
				3283	}
				3284	++nL;
				3285	if (nL <= __kmp_hws_tile.offset \|\|
				3286	nL > __kmp_hws_tile.num + __kmp_hws_tile.offset) {
				3287	// skip tile as not requested
				3288	n_old += __kmp_hwloc_skip_PUs_obj(tp, hL); // skip tile
				3289	hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
				3290	continue; // move to next tile
				3291	}
				3292	// tile requested, go down the topology tree
				3293	nC = 0;
				3294	hC = NULL;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	3295	// num cores in current tile
				3296	int NC = __kmp_hwloc_count_children_by_type(tp, hL,
				3297	HWLOC_OBJ_CORE, &hC);
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3298	for (int c = 0; c < NC; ++c) {
				3299	// Check Core ---------------------------------------
				3300	if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
				3301	hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
				3302	continue; // skip core if all PUs are out of fullMask
				3303	}
				3304	++nC;
				3305	if (nC <= __kmp_hws_core.offset \|\|
				3306	nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
				3307	// skip node as not requested
				3308	n_old += __kmp_hwloc_skip_PUs_obj(tp, hC); // skip core
				3309	hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
				3310	continue; // move to next node
				3311	}
				3312	// core requested, go down to PUs
				3313	nT = 0;
				3314	nTr = 0;
				3315	hT = NULL;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	3316	// num procs in current core
				3317	int NT = __kmp_hwloc_count_children_by_type(tp, hC,
				3318	HWLOC_OBJ_PU, &hT);
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3319	for (int t = 0; t < NT; ++t) {
				3320	// Check PU ---------------------------------------
				3321	idx = hT->os_index;
				3322	if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
				3323	hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
				3324	continue; // skip PU if not in fullMask
				3325	}
				3326	++nT;
				3327	if (nT <= __kmp_hws_proc.offset \|\|
				3328	nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
				3329	// skip PU
				3330	KMP_CPU_CLR(idx, __kmp_affin_fullMask);
				3331	++n_old;
				3332	KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx));
				3333	hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
				3334	continue; // move to next node
				3335	}
				3336	++nTr;
				3337	if (pAddr) // collect requested thread's data
				3338	newAddr[n_new] = (*pAddr)[n_old];
				3339	++n_new;
				3340	++n_old;
				3341	hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
				3342	} // threads loop
				3343	if (nTr > 0) {
				3344	++nCr; // num cores per socket
				3345	++nCo; // total num cores
				3346	if (nTr > nTpC)
				3347	nTpC = nTr; // calc max threads per core
				3348	}
				3349	hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
				3350	} // cores loop
				3351	hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
				3352	} // tiles loop
				3353	} else { // tile_support
				3354	// no tiles, check cores
				3355	nC = 0;
				3356	hC = NULL;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	3357	// num cores in current node
				3358	int NC = __kmp_hwloc_count_children_by_type(tp, hN, HWLOC_OBJ_CORE,
				3359	&hC);
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3360	for (int c = 0; c < NC; ++c) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	3361	// Check Core ---------------------------------------
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3362	if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
				3363	hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
				3364	continue; // skip core if all PUs are out of fullMask
				3365	}
				3366	++nC;
				3367	if (nC <= __kmp_hws_core.offset \|\|
				3368	nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
				3369	// skip node as not requested
				3370	n_old += __kmp_hwloc_skip_PUs_obj(tp, hC); // skip core
				3371	hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
				3372	continue; // move to next node
				3373	}
				3374	// core requested, go down to PUs
				3375	nT = 0;
				3376	nTr = 0;
				3377	hT = NULL;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	3378	int NT = __kmp_hwloc_count_children_by_type(tp, hC, HWLOC_OBJ_PU,
				3379	&hT);
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3380	for (int t = 0; t < NT; ++t) {
				3381	// Check PU ---------------------------------------
				3382	idx = hT->os_index;
				3383	if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
				3384	hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
				3385	continue; // skip PU if not in fullMask
				3386	}
				3387	++nT;
				3388	if (nT <= __kmp_hws_proc.offset \|\|
				3389	nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
				3390	// skip PU
				3391	KMP_CPU_CLR(idx, __kmp_affin_fullMask);
				3392	++n_old;
				3393	KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx));
				3394	hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
				3395	continue; // move to next node
				3396	}
				3397	++nTr;
				3398	if (pAddr) // collect requested thread's data
				3399	newAddr[n_new] = (*pAddr)[n_old];
				3400	++n_new;
				3401	++n_old;
				3402	hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
				3403	} // threads loop
				3404	if (nTr > 0) {
				3405	++nCr; // num cores per socket
				3406	++nCo; // total num cores
				3407	if (nTr > nTpC)
				3408	nTpC = nTr; // calc max threads per core
				3409	}
				3410	hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
				3411	} // cores loop
				3412	} // tiles support
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	3413	hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN);
				3414	} // nodes loop
				3415	} else { // numa_support
				3416	// no NUMA support
				3417	if (tile_support) {
				3418	nL = 0;
				3419	hL = NULL;
				3420	// num tiles in current socket
				3421	int NL = __kmp_hwloc_count_children_by_depth(tp, hS, L2depth, &hL);
				3422	for (int l = 0; l < NL; ++l) {
				3423	// Check L2 (tile) ------------------------------------
				3424	if (!__kmp_hwloc_obj_has_PUs(tp, hL)) {
				3425	hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
				3426	continue; // skip tile if all PUs are out of fullMask
				3427	}
				3428	++nL;
				3429	if (nL <= __kmp_hws_tile.offset \|\|
				3430	nL > __kmp_hws_tile.num + __kmp_hws_tile.offset) {
				3431	// skip tile as not requested
				3432	n_old += __kmp_hwloc_skip_PUs_obj(tp, hL); // skip tile
				3433	hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
				3434	continue; // move to next tile
				3435	}
				3436	// tile requested, go down the topology tree
				3437	nC = 0;
				3438	hC = NULL;
				3439	// num cores per tile
				3440	int NC = __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE,
				3441	&hC);
				3442	for (int c = 0; c < NC; ++c) {
				3443	// Check Core ---------------------------------------
				3444	if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
				3445	hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
				3446	continue; // skip core if all PUs are out of fullMask
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3447	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	3448	++nC;
				3449	if (nC <= __kmp_hws_core.offset \|\|
				3450	nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
				3451	// skip node as not requested
				3452	n_old += __kmp_hwloc_skip_PUs_obj(tp, hC); // skip core
				3453	hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
				3454	continue; // move to next node
				3455	}
				3456	// core requested, go down to PUs
				3457	nT = 0;
				3458	nTr = 0;
				3459	hT = NULL;
				3460	// num procs per core
				3461	int NT = __kmp_hwloc_count_children_by_type(tp, hC, HWLOC_OBJ_PU,
				3462	&hT);
				3463	for (int t = 0; t < NT; ++t) {
				3464	// Check PU ---------------------------------------
				3465	idx = hT->os_index;
				3466	if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
				3467	hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
				3468	continue; // skip PU if not in fullMask
				3469	}
				3470	++nT;
				3471	if (nT <= __kmp_hws_proc.offset \|\|
				3472	nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
				3473	// skip PU
				3474	KMP_CPU_CLR(idx, __kmp_affin_fullMask);
				3475	++n_old;
				3476	KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx));
				3477	hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
				3478	continue; // move to next node
				3479	}
				3480	++nTr;
				3481	if (pAddr) // collect requested thread's data
				3482	newAddr[n_new] = (*pAddr)[n_old];
				3483	++n_new;
				3484	++n_old;
				3485	hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
				3486	} // threads loop
				3487	if (nTr > 0) {
				3488	++nCr; // num cores per socket
				3489	++nCo; // total num cores
				3490	if (nTr > nTpC)
				3491	nTpC = nTr; // calc max threads per core
				3492	}
				3493	hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
				3494	} // cores loop
				3495	hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
				3496	} // tiles loop
				3497	} else { // tile_support
				3498	// no tiles, check cores
				3499	nC = 0;
				3500	hC = NULL;
				3501	// num cores in socket
				3502	int NC = __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_CORE,
				3503	&hC);
				3504	for (int c = 0; c < NC; ++c) {
				3505	// Check Core -------------------------------------------
				3506	if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
				3507	hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
				3508	continue; // skip core if all PUs are out of fullMask
				3509	}
				3510	++nC;
				3511	if (nC <= __kmp_hws_core.offset \|\|
				3512	nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
				3513	// skip node as not requested
				3514	n_old += __kmp_hwloc_skip_PUs_obj(tp, hC); // skip core
				3515	hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
				3516	continue; // move to next node
				3517	}
				3518	// core requested, go down to PUs
				3519	nT = 0;
				3520	nTr = 0;
				3521	hT = NULL;
				3522	// num procs per core
				3523	int NT = __kmp_hwloc_count_children_by_type(tp, hC, HWLOC_OBJ_PU,
				3524	&hT);
				3525	for (int t = 0; t < NT; ++t) {
				3526	// Check PU ---------------------------------------
				3527	idx = hT->os_index;
				3528	if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
				3529	hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
				3530	continue; // skip PU if not in fullMask
				3531	}
				3532	++nT;
				3533	if (nT <= __kmp_hws_proc.offset \|\|
				3534	nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
				3535	// skip PU
				3536	KMP_CPU_CLR(idx, __kmp_affin_fullMask);
				3537	++n_old;
				3538	KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx));
				3539	hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
				3540	continue; // move to next node
				3541	}
				3542	++nTr;
				3543	if (pAddr) // collect requested thread's data
				3544	newAddr[n_new] = (*pAddr)[n_old];
				3545	++n_new;
				3546	++n_old;
				3547	hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
				3548	} // threads loop
				3549	if (nTr > 0) {
				3550	++nCr; // num cores per socket
				3551	++nCo; // total num cores
				3552	if (nTr > nTpC)
				3553	nTpC = nTr; // calc max threads per core
				3554	}
				3555	hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
				3556	} // cores loop
				3557	} // tiles support
				3558	} // numa_support
				3559	if (nCr > 0) { // found cores?
				3560	++nPkg; // num sockets
				3561	if (nCr > nCpP)
				3562	nCpP = nCr; // calc max cores per socket
				3563	}
				3564	} // sockets loop
				3565
				3566	// check the subset is valid
				3567	KMP_DEBUG_ASSERT(n_old == __kmp_avail_proc);
				3568	KMP_DEBUG_ASSERT(nPkg > 0);
				3569	KMP_DEBUG_ASSERT(nCpP > 0);
				3570	KMP_DEBUG_ASSERT(nTpC > 0);
				3571	KMP_DEBUG_ASSERT(nCo > 0);
				3572	KMP_DEBUG_ASSERT(nPkg <= nPackages);
				3573	KMP_DEBUG_ASSERT(nCpP <= nCoresPerPkg);
				3574	KMP_DEBUG_ASSERT(nTpC <= __kmp_nThreadsPerCore);
				3575	KMP_DEBUG_ASSERT(nCo <= __kmp_ncores);
				3576
				3577	nPackages = nPkg; // correct num sockets
				3578	nCoresPerPkg = nCpP; // correct num cores per socket
				3579	__kmp_nThreadsPerCore = nTpC; // correct num threads per core
				3580	__kmp_avail_proc = n_new; // correct num procs
				3581	__kmp_ncores = nCo; // correct num cores
				3582	// hwloc topology method end
				3583	} else
				3584	#endif // KMP_USE_HWLOC
				3585	{
				3586	int n_old = 0, n_new = 0, proc_num = 0;
				3587	if (__kmp_hws_node.num > 0 \|\| __kmp_hws_tile.num > 0) {
				3588	KMP_WARNING(AffHWSubsetNoHWLOC);
				3589	goto _exit;
				3590	}
				3591	if (__kmp_hws_socket.num == 0)
				3592	__kmp_hws_socket.num = nPackages; // use all available sockets
				3593	if (__kmp_hws_core.num == 0)
				3594	__kmp_hws_core.num = nCoresPerPkg; // use all available cores
				3595	if (__kmp_hws_proc.num == 0 \|\|
				3596	__kmp_hws_proc.num > __kmp_nThreadsPerCore)
				3597	__kmp_hws_proc.num = __kmp_nThreadsPerCore; // use all HW contexts
				3598	if ( !__kmp_affinity_uniform_topology() ) {
				3599	KMP_WARNING( AffHWSubsetNonUniform );
				3600	goto _exit; // don't support non-uniform topology
				3601	}
				3602	if ( depth > 3 ) {
				3603	KMP_WARNING( AffHWSubsetNonThreeLevel );
				3604	goto _exit; // don't support not-3-level topology
				3605	}
				3606	if (__kmp_hws_socket.offset + __kmp_hws_socket.num > nPackages) {
				3607	KMP_WARNING(AffHWSubsetManySockets);
				3608	goto _exit;
				3609	}
				3610	if ( __kmp_hws_core.offset + __kmp_hws_core.num > nCoresPerPkg ) {
				3611	KMP_WARNING( AffHWSubsetManyCores );
				3612	goto _exit;
				3613	}
				3614	// Form the requested subset
				3615	if (pAddr) // pAddr is NULL in case of affinity_none
				3616	newAddr = (AddrUnsPair )__kmp_allocate(sizeof(AddrUnsPair)
				3617	__kmp_hws_socket.num *
				3618	__kmp_hws_core.num *
				3619	__kmp_hws_proc.num);
				3620	for (int i = 0; i < nPackages; ++i) {
				3621	if (i < __kmp_hws_socket.offset \|\|
				3622	i >= __kmp_hws_socket.offset + __kmp_hws_socket.num) {
				3623	// skip not-requested socket
				3624	n_old += nCoresPerPkg * __kmp_nThreadsPerCore;
				3625	if (__kmp_pu_os_idx != NULL) {
				3626	// walk through skipped socket
				3627	for (int j = 0; j < nCoresPerPkg; ++j) {
				3628	for (int k = 0; k < __kmp_nThreadsPerCore; ++k) {
				3629	KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
				3630	++proc_num;
Jonathan Peyton	fd7cc42	2016-06-21 15:54:38 +0000	[diff] [blame]	3631	}
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3632	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	3633	}
				3634	} else {
				3635	// walk through requested socket
				3636	for (int j = 0; j < nCoresPerPkg; ++j) {
				3637	if (j < __kmp_hws_core.offset \|\|
				3638	j >= __kmp_hws_core.offset + __kmp_hws_core.num)
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3639	{ // skip not-requested core
				3640	n_old += __kmp_nThreadsPerCore;
				3641	if (__kmp_pu_os_idx != NULL) {
				3642	for (int k = 0; k < __kmp_nThreadsPerCore; ++k) {
				3643	KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
				3644	++proc_num;
Jonathan Peyton	fd7cc42	2016-06-21 15:54:38 +0000	[diff] [blame]	3645	}
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3646	}
				3647	} else {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	3648	// walk through requested core
				3649	for (int k = 0; k < __kmp_nThreadsPerCore; ++k) {
				3650	if (k < __kmp_hws_proc.num) {
				3651	if (pAddr) // collect requested thread's data
				3652	newAddr[n_new] = (*pAddr)[n_old];
				3653	n_new++;
				3654	} else {
				3655	if (__kmp_pu_os_idx != NULL)
				3656	KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3657	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	3658	n_old++;
				3659	++proc_num;
Jonathan Peyton	fd7cc42	2016-06-21 15:54:38 +0000	[diff] [blame]	3660	}
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3661	}
Jonathan Peyton	fd7cc42	2016-06-21 15:54:38 +0000	[diff] [blame]	3662	}
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3663	}
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3664	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	3665	KMP_DEBUG_ASSERT(n_old == nPackages * nCoresPerPkg * __kmp_nThreadsPerCore);
				3666	KMP_DEBUG_ASSERT(n_new == __kmp_hws_socket.num * __kmp_hws_core.num *
				3667	__kmp_hws_proc.num);
				3668	nPackages = __kmp_hws_socket.num; // correct nPackages
				3669	nCoresPerPkg = __kmp_hws_core.num; // correct nCoresPerPkg
				3670	__kmp_nThreadsPerCore = __kmp_hws_proc.num; // correct __kmp_nThreadsPerCore
				3671	__kmp_avail_proc = n_new; // correct avail_proc
				3672	__kmp_ncores = nPackages * __kmp_hws_core.num; // correct ncores
				3673	} // non-hwloc topology method
				3674	if (pAddr) {
				3675	__kmp_free( *pAddr );
				3676	*pAddr = newAddr; // replace old topology with new one
				3677	}
				3678	if (__kmp_affinity_verbose) {
				3679	char m[KMP_AFFIN_MASK_PRINT_LEN];
				3680	__kmp_affinity_print_mask(m,KMP_AFFIN_MASK_PRINT_LEN,__kmp_affin_fullMask);
				3681	if (__kmp_affinity_respect_mask) {
				3682	KMP_INFORM(InitOSProcSetRespect, "KMP_HW_SUBSET", m);
Paul Osmialowski	ecbe2ea	2016-07-29 20:55:03 +0000	[diff] [blame]	3683	} else {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	3684	KMP_INFORM(InitOSProcSetNotRespect, "KMP_HW_SUBSET", m);
Paul Osmialowski	ecbe2ea	2016-07-29 20:55:03 +0000	[diff] [blame]	3685	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	3686	KMP_INFORM(AvailableOSProc, "KMP_HW_SUBSET", __kmp_avail_proc);
				3687	kmp_str_buf_t buf;
				3688	__kmp_str_buf_init(&buf);
				3689	__kmp_str_buf_print(&buf, "%d", nPackages);
				3690	KMP_INFORM(TopologyExtra, "KMP_HW_SUBSET", buf.str, nCoresPerPkg,
				3691	__kmp_nThreadsPerCore, __kmp_ncores);
				3692	__kmp_str_buf_free(&buf);
				3693	}
				3694	_exit:
				3695	if (__kmp_pu_os_idx != NULL) {
				3696	__kmp_free(__kmp_pu_os_idx);
				3697	__kmp_pu_os_idx = NULL;
				3698	}
				3699	}
				3700
				3701	// This function figures out the deepest level at which there is at least one
				3702	// cluster/core with more than one processing unit bound to it.
				3703	static int __kmp_affinity_find_core_level(const AddrUnsPair *address2os,
				3704	int nprocs, int bottom_level) {
				3705	int core_level = 0;
				3706
				3707	for (int i = 0; i < nprocs; i++) {
				3708	for (int j = bottom_level; j > 0; j--) {
				3709	if (address2os[i].first.labels[j] > 0) {
				3710	if (core_level < (j - 1)) {
				3711	core_level = j - 1;
				3712	}
				3713	}
				3714	}
				3715	}
				3716	return core_level;
				3717	}
				3718
				3719	// This function counts number of clusters/cores at given level.
				3720	static int __kmp_affinity_compute_ncores(const AddrUnsPair *address2os,
				3721	int nprocs, int bottom_level,
				3722	int core_level) {
				3723	int ncores = 0;
				3724	int i, j;
				3725
				3726	j = bottom_level;
				3727	for (i = 0; i < nprocs; i++) {
				3728	for (j = bottom_level; j > core_level; j--) {
				3729	if ((i + 1) < nprocs) {
				3730	if (address2os[i + 1].first.labels[j] > 0) {
				3731	break;
				3732	}
				3733	}
				3734	}
				3735	if (j == core_level) {
				3736	ncores++;
				3737	}
				3738	}
				3739	if (j > core_level) {
				3740	// In case of ( nprocs < __kmp_avail_proc ) we may end too deep and miss one
				3741	// core. May occur when called from __kmp_affinity_find_core().
				3742	ncores++;
				3743	}
				3744	return ncores;
				3745	}
				3746
				3747	// This function finds to which cluster/core given processing unit is bound.
				3748	static int __kmp_affinity_find_core(const AddrUnsPair *address2os, int proc,
				3749	int bottom_level, int core_level) {
				3750	return __kmp_affinity_compute_ncores(address2os, proc + 1, bottom_level,
				3751	core_level) - 1;
				3752	}
				3753
				3754	// This function finds maximal number of processing units bound to a
				3755	// cluster/core at given level.
				3756	static int __kmp_affinity_max_proc_per_core(const AddrUnsPair *address2os,
				3757	int nprocs, int bottom_level,
				3758	int core_level) {
				3759	int maxprocpercore = 0;
				3760
				3761	if (core_level < bottom_level) {
				3762	for (int i = 0; i < nprocs; i++) {
				3763	int percore = address2os[i].first.labels[core_level + 1] + 1;
				3764
				3765	if (percore > maxprocpercore) {
				3766	maxprocpercore = percore;
				3767	}
				3768	}
				3769	} else {
				3770	maxprocpercore = 1;
				3771	}
				3772	return maxprocpercore;
Paul Osmialowski	ecbe2ea	2016-07-29 20:55:03 +0000	[diff] [blame]	3773	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3774
				3775	static AddrUnsPair *address2os = NULL;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	3776	static int *procarr = NULL;
				3777	static int __kmp_aff_depth = 0;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3778
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	3779	#define KMP_EXIT_AFF_NONE \
				3780	KMP_ASSERT(__kmp_affinity_type == affinity_none); \
				3781	KMP_ASSERT(address2os == NULL); \
				3782	__kmp_apply_thread_places(NULL, 0); \
				3783	return;
Jonathan Peyton	fd7cc42	2016-06-21 15:54:38 +0000	[diff] [blame]	3784
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	3785	static int __kmp_affinity_cmp_Address_child_num(const void a, const void b) {
				3786	const Address aa = (const Address )&(((AddrUnsPair *)a)->first);
				3787	const Address bb = (const Address )&(((AddrUnsPair *)b)->first);
				3788	unsigned depth = aa->depth;
				3789	unsigned i;
				3790	KMP_DEBUG_ASSERT(depth == bb->depth);
				3791	KMP_DEBUG_ASSERT((unsigned)__kmp_affinity_compact <= depth);
				3792	KMP_DEBUG_ASSERT(__kmp_affinity_compact >= 0);
				3793	for (i = 0; i < (unsigned)__kmp_affinity_compact; i++) {
				3794	int j = depth - i - 1;
				3795	if (aa->childNums[j] < bb->childNums[j])
				3796	return -1;
				3797	if (aa->childNums[j] > bb->childNums[j])
				3798	return 1;
				3799	}
				3800	for (; i < depth; i++) {
				3801	int j = i - __kmp_affinity_compact;
				3802	if (aa->childNums[j] < bb->childNums[j])
				3803	return -1;
				3804	if (aa->childNums[j] > bb->childNums[j])
				3805	return 1;
				3806	}
				3807	return 0;
Jonathan Peyton	e6abe52	2016-09-02 20:54:58 +0000	[diff] [blame]	3808	}
				3809
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	3810	static void __kmp_aux_affinity_initialize(void) {
				3811	if (__kmp_affinity_masks != NULL) {
				3812	KMP_ASSERT(__kmp_affin_fullMask != NULL);
				3813	return;
				3814	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3815
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	3816	// Create the "full" mask - this defines all of the processors that we
				3817	// consider to be in the machine model. If respect is set, then it is the
				3818	// initialization thread's affinity mask. Otherwise, it is all processors that
				3819	// we know about on the machine.
				3820	if (__kmp_affin_fullMask == NULL) {
				3821	KMP_CPU_ALLOC(__kmp_affin_fullMask);
				3822	}
				3823	if (KMP_AFFINITY_CAPABLE()) {
				3824	if (__kmp_affinity_respect_mask) {
				3825	__kmp_get_system_affinity(__kmp_affin_fullMask, TRUE);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3826
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	3827	// Count the number of available processors.
				3828	unsigned i;
				3829	__kmp_avail_proc = 0;
				3830	KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
				3831	if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
				3832	continue;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3833	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	3834	__kmp_avail_proc++;
				3835	}
				3836	if (__kmp_avail_proc > __kmp_xproc) {
				3837	if (__kmp_affinity_verbose \|\|
				3838	(__kmp_affinity_warnings &&
				3839	(__kmp_affinity_type != affinity_none))) {
				3840	KMP_WARNING(ErrorInitializeAffinity);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3841	}
				3842	__kmp_affinity_type = affinity_none;
Andrey Churbanov	1f037e4	2015-03-10 09:15:26 +0000	[diff] [blame]	3843	KMP_AFFINITY_DISABLE();
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3844	return;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	3845	}
				3846	} else {
				3847	__kmp_affinity_entire_machine_mask(__kmp_affin_fullMask);
				3848	__kmp_avail_proc = __kmp_xproc;
				3849	}
				3850	}
				3851
				3852	int depth = -1;
				3853	kmp_i18n_id_t msg_id = kmp_i18n_null;
				3854
				3855	// For backward compatibility, setting KMP_CPUINFO_FILE =>
				3856	// KMP_TOPOLOGY_METHOD=cpuinfo
				3857	if ((__kmp_cpuinfo_file != NULL) &&
				3858	(__kmp_affinity_top_method == affinity_top_method_all)) {
				3859	__kmp_affinity_top_method = affinity_top_method_cpuinfo;
				3860	}
				3861
				3862	if (__kmp_affinity_top_method == affinity_top_method_all) {
				3863	// In the default code path, errors are not fatal - we just try using
				3864	// another method. We only emit a warning message if affinity is on, or the
				3865	// verbose flag is set, an the nowarnings flag was not set.
				3866	const char *file_name = NULL;
				3867	int line = 0;
				3868	#if KMP_USE_HWLOC
				3869	if (depth < 0 &&
				3870	__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC) {
				3871	if (__kmp_affinity_verbose) {
				3872	KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
				3873	}
				3874	if (!__kmp_hwloc_error) {
				3875	depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id);
				3876	if (depth == 0) {
				3877	KMP_EXIT_AFF_NONE;
				3878	} else if (depth < 0 && __kmp_affinity_verbose) {
				3879	KMP_INFORM(AffIgnoringHwloc, "KMP_AFFINITY");
				3880	}
				3881	} else if (__kmp_affinity_verbose) {
				3882	KMP_INFORM(AffIgnoringHwloc, "KMP_AFFINITY");
				3883	}
				3884	}
				3885	#endif
				3886
				3887	#if KMP_ARCH_X86 \|\| KMP_ARCH_X86_64
				3888
				3889	if (depth < 0) {
				3890	if (__kmp_affinity_verbose) {
				3891	KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
				3892	}
				3893
				3894	file_name = NULL;
				3895	depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
				3896	if (depth == 0) {
				3897	KMP_EXIT_AFF_NONE;
				3898	}
				3899
				3900	if (depth < 0) {
				3901	if (__kmp_affinity_verbose) {
				3902	if (msg_id != kmp_i18n_null) {
				3903	KMP_INFORM(AffInfoStrStr, "KMP_AFFINITY",
				3904	__kmp_i18n_catgets(msg_id),
				3905	KMP_I18N_STR(DecodingLegacyAPIC));
				3906	} else {
				3907	KMP_INFORM(AffInfoStr, "KMP_AFFINITY",
				3908	KMP_I18N_STR(DecodingLegacyAPIC));
				3909	}
				3910	}
				3911
				3912	file_name = NULL;
				3913	depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
				3914	if (depth == 0) {
				3915	KMP_EXIT_AFF_NONE;
				3916	}
				3917	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3918	}
				3919
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	3920	#endif /* KMP_ARCH_X86 \|\| KMP_ARCH_X86_64 */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3921
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	3922	#if KMP_OS_LINUX
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3923
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	3924	if (depth < 0) {
				3925	if (__kmp_affinity_verbose) {
				3926	if (msg_id != kmp_i18n_null) {
				3927	KMP_INFORM(AffStrParseFilename, "KMP_AFFINITY",
				3928	__kmp_i18n_catgets(msg_id), "/proc/cpuinfo");
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3929	} else {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	3930	KMP_INFORM(AffParseFilename, "KMP_AFFINITY", "/proc/cpuinfo");
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3931	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	3932	}
				3933
				3934	FILE *f = fopen("/proc/cpuinfo", "r");
				3935	if (f == NULL) {
				3936	msg_id = kmp_i18n_str_CantOpenCpuinfo;
				3937	} else {
				3938	file_name = "/proc/cpuinfo";
				3939	depth =
				3940	__kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
				3941	fclose(f);
				3942	if (depth == 0) {
				3943	KMP_EXIT_AFF_NONE;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3944	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	3945	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3946	}
				3947
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	3948	#endif /* KMP_OS_LINUX */
				3949
				3950	#if KMP_GROUP_AFFINITY
				3951
				3952	if ((depth < 0) && (__kmp_num_proc_groups > 1)) {
				3953	if (__kmp_affinity_verbose) {
				3954	KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
				3955	}
				3956
				3957	depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
				3958	KMP_ASSERT(depth != 0);
				3959	}
				3960
				3961	#endif /* KMP_GROUP_AFFINITY */
				3962
				3963	if (depth < 0) {
				3964	if (__kmp_affinity_verbose && (msg_id != kmp_i18n_null)) {
				3965	if (file_name == NULL) {
				3966	KMP_INFORM(UsingFlatOS, __kmp_i18n_catgets(msg_id));
				3967	} else if (line == 0) {
				3968	KMP_INFORM(UsingFlatOSFile, file_name, __kmp_i18n_catgets(msg_id));
				3969	} else {
				3970	KMP_INFORM(UsingFlatOSFileLine, file_name, line,
				3971	__kmp_i18n_catgets(msg_id));
				3972	}
				3973	}
				3974	// FIXME - print msg if msg_id = kmp_i18n_null ???
				3975
				3976	file_name = "";
				3977	depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
				3978	if (depth == 0) {
				3979	KMP_EXIT_AFF_NONE;
				3980	}
				3981	KMP_ASSERT(depth > 0);
				3982	KMP_ASSERT(address2os != NULL);
				3983	}
				3984	}
				3985
				3986	// If the user has specified that a paricular topology discovery method is to be
				3987	// used, then we abort if that method fails. The exception is group affinity,
				3988	// which might have been implicitly set.
				3989
				3990	#if KMP_ARCH_X86 \|\| KMP_ARCH_X86_64
				3991
				3992	else if (__kmp_affinity_top_method == affinity_top_method_x2apicid) {
				3993	if (__kmp_affinity_verbose) {
				3994	KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
				3995	}
				3996
				3997	depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
				3998	if (depth == 0) {
				3999	KMP_EXIT_AFF_NONE;
				4000	}
				4001	if (depth < 0) {
				4002	KMP_ASSERT(msg_id != kmp_i18n_null);
				4003	KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
				4004	}
				4005	} else if (__kmp_affinity_top_method == affinity_top_method_apicid) {
				4006	if (__kmp_affinity_verbose) {
				4007	KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(DecodingLegacyAPIC));
				4008	}
				4009
				4010	depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
				4011	if (depth == 0) {
				4012	KMP_EXIT_AFF_NONE;
				4013	}
				4014	if (depth < 0) {
				4015	KMP_ASSERT(msg_id != kmp_i18n_null);
				4016	KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
				4017	}
				4018	}
				4019
				4020	#endif /* KMP_ARCH_X86 \|\| KMP_ARCH_X86_64 */
				4021
				4022	else if (__kmp_affinity_top_method == affinity_top_method_cpuinfo) {
				4023	const char *filename;
				4024	if (__kmp_cpuinfo_file != NULL) {
				4025	filename = __kmp_cpuinfo_file;
				4026	} else {
				4027	filename = "/proc/cpuinfo";
				4028	}
				4029
				4030	if (__kmp_affinity_verbose) {
				4031	KMP_INFORM(AffParseFilename, "KMP_AFFINITY", filename);
				4032	}
				4033
				4034	FILE *f = fopen(filename, "r");
				4035	if (f == NULL) {
				4036	int code = errno;
				4037	if (__kmp_cpuinfo_file != NULL) {
				4038	__kmp_msg(kmp_ms_fatal, KMP_MSG(CantOpenFileForReading, filename),
				4039	KMP_ERR(code), KMP_HNT(NameComesFrom_CPUINFO_FILE),
				4040	__kmp_msg_null);
				4041	} else {
				4042	__kmp_msg(kmp_ms_fatal, KMP_MSG(CantOpenFileForReading, filename),
				4043	KMP_ERR(code), __kmp_msg_null);
				4044	}
				4045	}
				4046	int line = 0;
				4047	depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
				4048	fclose(f);
				4049	if (depth < 0) {
				4050	KMP_ASSERT(msg_id != kmp_i18n_null);
				4051	if (line > 0) {
				4052	KMP_FATAL(FileLineMsgExiting, filename, line,
				4053	__kmp_i18n_catgets(msg_id));
				4054	} else {
				4055	KMP_FATAL(FileMsgExiting, filename, __kmp_i18n_catgets(msg_id));
				4056	}
				4057	}
				4058	if (__kmp_affinity_type == affinity_none) {
				4059	KMP_ASSERT(depth == 0);
				4060	KMP_EXIT_AFF_NONE;
				4061	}
				4062	}
				4063
				4064	#if KMP_GROUP_AFFINITY
				4065
				4066	else if (__kmp_affinity_top_method == affinity_top_method_group) {
				4067	if (__kmp_affinity_verbose) {
				4068	KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
				4069	}
				4070
				4071	depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
				4072	KMP_ASSERT(depth != 0);
				4073	if (depth < 0) {
				4074	KMP_ASSERT(msg_id != kmp_i18n_null);
				4075	KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
				4076	}
				4077	}
				4078
				4079	#endif /* KMP_GROUP_AFFINITY */
				4080
				4081	else if (__kmp_affinity_top_method == affinity_top_method_flat) {
				4082	if (__kmp_affinity_verbose) {
				4083	KMP_INFORM(AffUsingFlatOS, "KMP_AFFINITY");
				4084	}
				4085
				4086	depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
				4087	if (depth == 0) {
				4088	KMP_EXIT_AFF_NONE;
				4089	}
				4090	// should not fail
				4091	KMP_ASSERT(depth > 0);
				4092	KMP_ASSERT(address2os != NULL);
				4093	}
				4094
				4095	#if KMP_USE_HWLOC
				4096	else if (__kmp_affinity_top_method == affinity_top_method_hwloc) {
				4097	KMP_ASSERT(__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC);
				4098	if (__kmp_affinity_verbose) {
				4099	KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
				4100	}
				4101	depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id);
				4102	if (depth == 0) {
				4103	KMP_EXIT_AFF_NONE;
				4104	}
				4105	}
				4106	#endif // KMP_USE_HWLOC
				4107
				4108	if (address2os == NULL) {
				4109	if (KMP_AFFINITY_CAPABLE() &&
				4110	(__kmp_affinity_verbose \|\|
				4111	(__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none)))) {
				4112	KMP_WARNING(ErrorInitializeAffinity);
				4113	}
				4114	__kmp_affinity_type = affinity_none;
				4115	KMP_AFFINITY_DISABLE();
				4116	return;
				4117	}
				4118
				4119	__kmp_apply_thread_places(&address2os, depth);
				4120
				4121	// Create the table of masks, indexed by thread Id.
				4122	unsigned maxIndex;
				4123	unsigned numUnique;
				4124	kmp_affin_mask_t *osId2Mask =
				4125	__kmp_create_masks(&maxIndex, &numUnique, address2os, __kmp_avail_proc);
				4126	if (__kmp_affinity_gran_levels == 0) {
				4127	KMP_DEBUG_ASSERT((int)numUnique == __kmp_avail_proc);
				4128	}
				4129
				4130	// Set the childNums vector in all Address objects. This must be done before
				4131	// we can sort using __kmp_affinity_cmp_Address_child_num(), which takes into
				4132	// account the setting of __kmp_affinity_compact.
				4133	__kmp_affinity_assign_child_nums(address2os, __kmp_avail_proc);
				4134
				4135	switch (__kmp_affinity_type) {
				4136
				4137	case affinity_explicit:
				4138	KMP_DEBUG_ASSERT(__kmp_affinity_proclist != NULL);
				4139	#if OMP_40_ENABLED
				4140	if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
				4141	#endif
				4142	{
				4143	__kmp_affinity_process_proclist(
				4144	&__kmp_affinity_masks, &__kmp_affinity_num_masks,
				4145	__kmp_affinity_proclist, osId2Mask, maxIndex);
				4146	}
				4147	#if OMP_40_ENABLED
				4148	else {
				4149	__kmp_affinity_process_placelist(
				4150	&__kmp_affinity_masks, &__kmp_affinity_num_masks,
				4151	__kmp_affinity_proclist, osId2Mask, maxIndex);
				4152	}
				4153	#endif
				4154	if (__kmp_affinity_num_masks == 0) {
				4155	if (__kmp_affinity_verbose \|\|
				4156	(__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) {
				4157	KMP_WARNING(AffNoValidProcID);
				4158	}
				4159	__kmp_affinity_type = affinity_none;
				4160	return;
				4161	}
				4162	break;
				4163
				4164	// The other affinity types rely on sorting the Addresses according to some
				4165	// permutation of the machine topology tree. Set __kmp_affinity_compact and
				4166	// __kmp_affinity_offset appropriately, then jump to a common code fragment
				4167	// to do the sort and create the array of affinity masks.
				4168
				4169	case affinity_logical:
				4170	__kmp_affinity_compact = 0;
				4171	if (__kmp_affinity_offset) {
				4172	__kmp_affinity_offset =
				4173	__kmp_nThreadsPerCore * __kmp_affinity_offset % __kmp_avail_proc;
				4174	}
				4175	goto sortAddresses;
				4176
				4177	case affinity_physical:
				4178	if (__kmp_nThreadsPerCore > 1) {
				4179	__kmp_affinity_compact = 1;
				4180	if (__kmp_affinity_compact >= depth) {
				4181	__kmp_affinity_compact = 0;
				4182	}
				4183	} else {
				4184	__kmp_affinity_compact = 0;
				4185	}
				4186	if (__kmp_affinity_offset) {
				4187	__kmp_affinity_offset =
				4188	__kmp_nThreadsPerCore * __kmp_affinity_offset % __kmp_avail_proc;
				4189	}
				4190	goto sortAddresses;
				4191
				4192	case affinity_scatter:
				4193	if (__kmp_affinity_compact >= depth) {
				4194	__kmp_affinity_compact = 0;
				4195	} else {
				4196	__kmp_affinity_compact = depth - 1 - __kmp_affinity_compact;
				4197	}
				4198	goto sortAddresses;
				4199
				4200	case affinity_compact:
				4201	if (__kmp_affinity_compact >= depth) {
				4202	__kmp_affinity_compact = depth - 1;
				4203	}
				4204	goto sortAddresses;
				4205
				4206	case affinity_balanced:
				4207	if (depth <= 1) {
				4208	if (__kmp_affinity_verbose \|\| __kmp_affinity_warnings) {
				4209	KMP_WARNING(AffBalancedNotAvail, "KMP_AFFINITY");
				4210	}
				4211	__kmp_affinity_type = affinity_none;
				4212	return;
				4213	} else if (__kmp_affinity_uniform_topology()) {
				4214	break;
				4215	} else { // Non-uniform topology
				4216
				4217	// Save the depth for further usage
				4218	__kmp_aff_depth = depth;
				4219
				4220	int core_level = __kmp_affinity_find_core_level(
				4221	address2os, __kmp_avail_proc, depth - 1);
				4222	int ncores = __kmp_affinity_compute_ncores(address2os, __kmp_avail_proc,
				4223	depth - 1, core_level);
				4224	int maxprocpercore = __kmp_affinity_max_proc_per_core(
				4225	address2os, __kmp_avail_proc, depth - 1, core_level);
				4226
				4227	int nproc = ncores * maxprocpercore;
				4228	if ((nproc < 2) \|\| (nproc < __kmp_avail_proc)) {
				4229	if (__kmp_affinity_verbose \|\| __kmp_affinity_warnings) {
				4230	KMP_WARNING(AffBalancedNotAvail, "KMP_AFFINITY");
				4231	}
				4232	__kmp_affinity_type = affinity_none;
				4233	return;
				4234	}
				4235
				4236	procarr = (int )__kmp_allocate(sizeof(int) nproc);
				4237	for (int i = 0; i < nproc; i++) {
				4238	procarr[i] = -1;
				4239	}
				4240
				4241	int lastcore = -1;
				4242	int inlastcore = 0;
				4243	for (int i = 0; i < __kmp_avail_proc; i++) {
				4244	int proc = address2os[i].second;
				4245	int core =
				4246	__kmp_affinity_find_core(address2os, i, depth - 1, core_level);
				4247
				4248	if (core == lastcore) {
				4249	inlastcore++;
				4250	} else {
				4251	inlastcore = 0;
				4252	}
				4253	lastcore = core;
				4254
				4255	procarr[core * maxprocpercore + inlastcore] = proc;
				4256	}
				4257
				4258	break;
				4259	}
				4260
				4261	sortAddresses:
				4262	// Allocate the gtid->affinity mask table.
				4263	if (__kmp_affinity_dups) {
				4264	__kmp_affinity_num_masks = __kmp_avail_proc;
				4265	} else {
				4266	__kmp_affinity_num_masks = numUnique;
				4267	}
				4268
				4269	#if OMP_40_ENABLED
				4270	if ((__kmp_nested_proc_bind.bind_types[0] != proc_bind_intel) &&
				4271	(__kmp_affinity_num_places > 0) &&
				4272	((unsigned)__kmp_affinity_num_places < __kmp_affinity_num_masks)) {
				4273	__kmp_affinity_num_masks = __kmp_affinity_num_places;
				4274	}
				4275	#endif
				4276
				4277	KMP_CPU_ALLOC_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
				4278
				4279	// Sort the address2os table according to the current setting of
				4280	// __kmp_affinity_compact, then fill out __kmp_affinity_masks.
				4281	qsort(address2os, __kmp_avail_proc, sizeof(*address2os),
				4282	__kmp_affinity_cmp_Address_child_num);
				4283	{
				4284	int i;
				4285	unsigned j;
				4286	for (i = 0, j = 0; i < __kmp_avail_proc; i++) {
				4287	if ((!__kmp_affinity_dups) && (!address2os[i].first.leader)) {
				4288	continue;
				4289	}
				4290	unsigned osId = address2os[i].second;
				4291	kmp_affin_mask_t *src = KMP_CPU_INDEX(osId2Mask, osId);
				4292	kmp_affin_mask_t *dest = KMP_CPU_INDEX(__kmp_affinity_masks, j);
				4293	KMP_ASSERT(KMP_CPU_ISSET(osId, src));
				4294	KMP_CPU_COPY(dest, src);
				4295	if (++j >= __kmp_affinity_num_masks) {
				4296	break;
				4297	}
				4298	}
				4299	KMP_DEBUG_ASSERT(j == __kmp_affinity_num_masks);
				4300	}
				4301	break;
				4302
				4303	default:
				4304	KMP_ASSERT2(0, "Unexpected affinity setting");
				4305	}
				4306
				4307	KMP_CPU_FREE_ARRAY(osId2Mask, maxIndex + 1);
				4308	machine_hierarchy.init(address2os, __kmp_avail_proc);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4309	}
Jonathan Peyton	fd7cc42	2016-06-21 15:54:38 +0000	[diff] [blame]	4310	#undef KMP_EXIT_AFF_NONE
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4311
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	4312	void __kmp_affinity_initialize(void) {
				4313	// Much of the code above was written assumming that if a machine was not
				4314	// affinity capable, then __kmp_affinity_type == affinity_none. We now
				4315	// explicitly represent this as __kmp_affinity_type == affinity_disabled.
				4316	// There are too many checks for __kmp_affinity_type == affinity_none
				4317	// in this code. Instead of trying to change them all, check if
				4318	// __kmp_affinity_type == affinity_disabled, and if so, slam it with
				4319	// affinity_none, call the real initialization routine, then restore
				4320	// __kmp_affinity_type to affinity_disabled.
				4321	int disabled = (__kmp_affinity_type == affinity_disabled);
				4322	if (!KMP_AFFINITY_CAPABLE()) {
				4323	KMP_ASSERT(disabled);
				4324	}
				4325	if (disabled) {
				4326	__kmp_affinity_type = affinity_none;
				4327	}
				4328	__kmp_aux_affinity_initialize();
				4329	if (disabled) {
				4330	__kmp_affinity_type = affinity_disabled;
				4331	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4332	}
				4333
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	4334	void __kmp_affinity_uninitialize(void) {
				4335	if (__kmp_affinity_masks != NULL) {
				4336	KMP_CPU_FREE_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
				4337	__kmp_affinity_masks = NULL;
				4338	}
				4339	if (__kmp_affin_fullMask != NULL) {
				4340	KMP_CPU_FREE(__kmp_affin_fullMask);
				4341	__kmp_affin_fullMask = NULL;
				4342	}
				4343	__kmp_affinity_num_masks = 0;
				4344	__kmp_affinity_type = affinity_default;
				4345	#if OMP_40_ENABLED
				4346	__kmp_affinity_num_places = 0;
				4347	#endif
				4348	if (__kmp_affinity_proclist != NULL) {
				4349	__kmp_free(__kmp_affinity_proclist);
				4350	__kmp_affinity_proclist = NULL;
				4351	}
				4352	if (address2os != NULL) {
				4353	__kmp_free(address2os);
				4354	address2os = NULL;
				4355	}
				4356	if (procarr != NULL) {
				4357	__kmp_free(procarr);
				4358	procarr = NULL;
				4359	}
				4360	#if KMP_USE_HWLOC
				4361	if (__kmp_hwloc_topology != NULL) {
				4362	hwloc_topology_destroy(__kmp_hwloc_topology);
				4363	__kmp_hwloc_topology = NULL;
				4364	}
				4365	#endif
				4366	KMPAffinity::destroy_api();
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4367	}
				4368
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	4369	void __kmp_affinity_set_init_mask(int gtid, int isa_root) {
				4370	if (!KMP_AFFINITY_CAPABLE()) {
				4371	return;
				4372	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4373
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	4374	kmp_info_t th = (kmp_info_t )TCR_SYNC_PTR(__kmp_threads[gtid]);
				4375	if (th->th.th_affin_mask == NULL) {
				4376	KMP_CPU_ALLOC(th->th.th_affin_mask);
				4377	} else {
				4378	KMP_CPU_ZERO(th->th.th_affin_mask);
				4379	}
				4380
				4381	// Copy the thread mask to the kmp_info_t strucuture. If
				4382	// __kmp_affinity_type == affinity_none, copy the "full" mask, i.e. one that
				4383	// has all of the OS proc ids set, or if __kmp_affinity_respect_mask is set,
				4384	// then the full mask is the same as the mask of the initialization thread.
				4385	kmp_affin_mask_t *mask;
				4386	int i;
				4387
				4388	#if OMP_40_ENABLED
				4389	if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
				4390	#endif
				4391	{
				4392	if ((__kmp_affinity_type == affinity_none) \|\|
				4393	(__kmp_affinity_type == affinity_balanced)) {
				4394	#if KMP_GROUP_AFFINITY
				4395	if (__kmp_num_proc_groups > 1) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4396	return;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	4397	}
				4398	#endif
				4399	KMP_ASSERT(__kmp_affin_fullMask != NULL);
				4400	i = KMP_PLACE_ALL;
				4401	mask = __kmp_affin_fullMask;
				4402	} else {
				4403	KMP_DEBUG_ASSERT(__kmp_affinity_num_masks > 0);
				4404	i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
				4405	mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4406	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	4407	}
				4408	#if OMP_40_ENABLED
				4409	else {
				4410	if ((!isa_root) \|\|
				4411	(__kmp_nested_proc_bind.bind_types[0] == proc_bind_false)) {
				4412	#if KMP_GROUP_AFFINITY
				4413	if (__kmp_num_proc_groups > 1) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4414	return;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	4415	}
				4416	#endif
				4417	KMP_ASSERT(__kmp_affin_fullMask != NULL);
				4418	i = KMP_PLACE_ALL;
				4419	mask = __kmp_affin_fullMask;
				4420	} else {
				4421	// int i = some hash function or just a counter that doesn't
				4422	// always start at 0. Use gtid for now.
				4423	KMP_DEBUG_ASSERT(__kmp_affinity_num_masks > 0);
				4424	i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
				4425	mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4426	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	4427	}
				4428	#endif
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4429
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	4430	#if OMP_40_ENABLED
				4431	th->th.th_current_place = i;
				4432	if (isa_root) {
				4433	th->th.th_new_place = i;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4434	th->th.th_first_place = 0;
				4435	th->th.th_last_place = __kmp_affinity_num_masks - 1;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	4436	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4437
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	4438	if (i == KMP_PLACE_ALL) {
				4439	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to all places\n",
				4440	gtid));
				4441	} else {
				4442	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to place %d\n",
				4443	gtid, i));
				4444	}
				4445	#else
				4446	if (i == -1) {
				4447	KA_TRACE(
				4448	100,
				4449	("__kmp_affinity_set_init_mask: binding T#%d to __kmp_affin_fullMask\n",
				4450	gtid));
				4451	} else {
				4452	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to mask %d\n",
				4453	gtid, i));
				4454	}
				4455	#endif /* OMP_40_ENABLED */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4456
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	4457	KMP_CPU_COPY(th->th.th_affin_mask, mask);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4458
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	4459	if (__kmp_affinity_verbose) {
				4460	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4461	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4462	th->th.th_affin_mask);
				4463	KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
				4464	__kmp_gettid(), gtid, buf);
				4465	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4466
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	4467	#if KMP_OS_WINDOWS
				4468	// On Windows* OS, the process affinity mask might have changed. If the user
				4469	// didn't request affinity and this call fails, just continue silently.
				4470	// See CQ171393.
				4471	if (__kmp_affinity_type == affinity_none) {
				4472	__kmp_set_system_affinity(th->th.th_affin_mask, FALSE);
				4473	} else
Jonathan Peyton	7c465a5	2016-09-12 19:02:53 +0000	[diff] [blame]	4474	#endif
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	4475	__kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
Jonathan Peyton	7c465a5	2016-09-12 19:02:53 +0000	[diff] [blame]	4476	}
				4477
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	4478	#if OMP_40_ENABLED
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4479
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	4480	void __kmp_affinity_set_place(int gtid) {
				4481	int retval;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4482
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	4483	if (!KMP_AFFINITY_CAPABLE()) {
				4484	return;
				4485	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4486
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	4487	kmp_info_t th = (kmp_info_t )TCR_SYNC_PTR(__kmp_threads[gtid]);
				4488
				4489	KA_TRACE(100, ("__kmp_affinity_set_place: binding T#%d to place %d (current "
				4490	"place = %d)\n",
				4491	gtid, th->th.th_new_place, th->th.th_current_place));
				4492
				4493	// Check that the new place is within this thread's partition.
				4494	KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
				4495	KMP_ASSERT(th->th.th_new_place >= 0);
				4496	KMP_ASSERT((unsigned)th->th.th_new_place <= __kmp_affinity_num_masks);
				4497	if (th->th.th_first_place <= th->th.th_last_place) {
				4498	KMP_ASSERT((th->th.th_new_place >= th->th.th_first_place) &&
				4499	(th->th.th_new_place <= th->th.th_last_place));
				4500	} else {
				4501	KMP_ASSERT((th->th.th_new_place <= th->th.th_first_place) \|\|
				4502	(th->th.th_new_place >= th->th.th_last_place));
				4503	}
				4504
				4505	// Copy the thread mask to the kmp_info_t strucuture,
				4506	// and set this thread's affinity.
				4507	kmp_affin_mask_t *mask =
				4508	KMP_CPU_INDEX(__kmp_affinity_masks, th->th.th_new_place);
				4509	KMP_CPU_COPY(th->th.th_affin_mask, mask);
				4510	th->th.th_current_place = th->th.th_new_place;
				4511
				4512	if (__kmp_affinity_verbose) {
				4513	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4514	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4515	th->th.th_affin_mask);
				4516	KMP_INFORM(BoundToOSProcSet, "OMP_PROC_BIND", (kmp_int32)getpid(),
				4517	__kmp_gettid(), gtid, buf);
				4518	}
				4519	__kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
				4520	}
				4521
				4522	#endif /* OMP_40_ENABLED */
				4523
				4524	int __kmp_aux_set_affinity(void **mask) {
				4525	int gtid;
				4526	kmp_info_t *th;
				4527	int retval;
				4528
				4529	if (!KMP_AFFINITY_CAPABLE()) {
				4530	return -1;
				4531	}
				4532
				4533	gtid = __kmp_entry_gtid();
				4534	KA_TRACE(1000, ; {
				4535	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4536	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4537	(kmp_affin_mask_t )(mask));
				4538	__kmp_debug_printf(
				4539	"kmp_set_affinity: setting affinity mask for thread %d = %s\n", gtid,
				4540	buf);
				4541	});
				4542
				4543	if (__kmp_env_consistency_check) {
				4544	if ((mask == NULL) \|\| (*mask == NULL)) {
				4545	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				4546	} else {
				4547	unsigned proc;
				4548	int num_procs = 0;
				4549
				4550	KMP_CPU_SET_ITERATE(proc, ((kmp_affin_mask_t )(mask))) {
				4551	if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
				4552	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4553	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	4554	if (!KMP_CPU_ISSET(proc, (kmp_affin_mask_t )(mask))) {
				4555	continue;
				4556	}
				4557	num_procs++;
				4558	}
				4559	if (num_procs == 0) {
				4560	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				4561	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4562
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	4563	#if KMP_GROUP_AFFINITY
				4564	if (__kmp_get_proc_group((kmp_affin_mask_t )(mask)) < 0) {
				4565	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				4566	}
				4567	#endif /* KMP_GROUP_AFFINITY */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4568	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	4569	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4570
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	4571	th = __kmp_threads[gtid];
				4572	KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
				4573	retval = __kmp_set_system_affinity((kmp_affin_mask_t )(mask), FALSE);
				4574	if (retval == 0) {
				4575	KMP_CPU_COPY(th->th.th_affin_mask, (kmp_affin_mask_t )(mask));
				4576	}
				4577
				4578	#if OMP_40_ENABLED
				4579	th->th.th_current_place = KMP_PLACE_UNDEFINED;
				4580	th->th.th_new_place = KMP_PLACE_UNDEFINED;
				4581	th->th.th_first_place = 0;
				4582	th->th.th_last_place = __kmp_affinity_num_masks - 1;
				4583
				4584	// Turn off 4.0 affinity for the current tread at this parallel level.
				4585	th->th.th_current_task->td_icvs.proc_bind = proc_bind_false;
				4586	#endif
				4587
				4588	return retval;
				4589	}
				4590
				4591	int __kmp_aux_get_affinity(void **mask) {
				4592	int gtid;
				4593	int retval;
				4594	kmp_info_t *th;
				4595
				4596	if (!KMP_AFFINITY_CAPABLE()) {
				4597	return -1;
				4598	}
				4599
				4600	gtid = __kmp_entry_gtid();
				4601	th = __kmp_threads[gtid];
				4602	KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
				4603
				4604	KA_TRACE(1000, ; {
				4605	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4606	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4607	th->th.th_affin_mask);
				4608	__kmp_printf("kmp_get_affinity: stored affinity mask for thread %d = %s\n",
				4609	gtid, buf);
				4610	});
				4611
				4612	if (__kmp_env_consistency_check) {
				4613	if ((mask == NULL) \|\| (*mask == NULL)) {
				4614	KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity");
				4615	}
				4616	}
				4617
				4618	#if !KMP_OS_WINDOWS
				4619
				4620	retval = __kmp_get_system_affinity((kmp_affin_mask_t )(mask), FALSE);
				4621	KA_TRACE(1000, ; {
				4622	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4623	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4624	(kmp_affin_mask_t )(mask));
				4625	__kmp_printf("kmp_get_affinity: system affinity mask for thread %d = %s\n",
				4626	gtid, buf);
				4627	});
				4628	return retval;
				4629
				4630	#else
				4631
				4632	KMP_CPU_COPY((kmp_affin_mask_t )(mask), th->th.th_affin_mask);
				4633	return 0;
				4634
				4635	#endif /* KMP_OS_WINDOWS */
				4636	}
				4637
				4638	int __kmp_aux_get_affinity_max_proc() {
				4639	if (!KMP_AFFINITY_CAPABLE()) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4640	return 0;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	4641	}
				4642	#if KMP_GROUP_AFFINITY
				4643	if (__kmp_num_proc_groups > 1) {
				4644	return (int)(__kmp_num_proc_groups * sizeof(DWORD_PTR) * CHAR_BIT);
				4645	}
				4646	#endif
				4647	return __kmp_xproc;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4648	}
				4649
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	4650	int __kmp_aux_set_affinity_mask_proc(int proc, void **mask) {
				4651	int retval;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4652
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	4653	if (!KMP_AFFINITY_CAPABLE()) {
				4654	return -1;
				4655	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4656
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	4657	KA_TRACE(1000, ; {
				4658	int gtid = __kmp_entry_gtid();
				4659	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4660	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4661	(kmp_affin_mask_t )(mask));
				4662	__kmp_debug_printf("kmp_set_affinity_mask_proc: setting proc %d in "
				4663	"affinity mask for thread %d = %s\n",
				4664	proc, gtid, buf);
				4665	});
				4666
				4667	if (__kmp_env_consistency_check) {
				4668	if ((mask == NULL) \|\| (*mask == NULL)) {
				4669	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity_mask_proc");
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4670	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	4671	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4672
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	4673	if ((proc < 0) \|\| (proc >= __kmp_aux_get_affinity_max_proc())) {
				4674	return -1;
				4675	}
				4676	if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
				4677	return -2;
				4678	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4679
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	4680	KMP_CPU_SET(proc, (kmp_affin_mask_t )(mask));
				4681	return 0;
				4682	}
				4683
				4684	int __kmp_aux_unset_affinity_mask_proc(int proc, void **mask) {
				4685	int retval;
				4686
				4687	if (!KMP_AFFINITY_CAPABLE()) {
				4688	return -1;
				4689	}
				4690
				4691	KA_TRACE(1000, ; {
				4692	int gtid = __kmp_entry_gtid();
				4693	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4694	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4695	(kmp_affin_mask_t )(mask));
				4696	__kmp_debug_printf("kmp_unset_affinity_mask_proc: unsetting proc %d in "
				4697	"affinity mask for thread %d = %s\n",
				4698	proc, gtid, buf);
				4699	});
				4700
				4701	if (__kmp_env_consistency_check) {
				4702	if ((mask == NULL) \|\| (*mask == NULL)) {
				4703	KMP_FATAL(AffinityInvalidMask, "kmp_unset_affinity_mask_proc");
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4704	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	4705	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4706
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	4707	if ((proc < 0) \|\| (proc >= __kmp_aux_get_affinity_max_proc())) {
				4708	return -1;
				4709	}
				4710	if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
				4711	return -2;
				4712	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4713
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	4714	KMP_CPU_CLR(proc, (kmp_affin_mask_t )(mask));
				4715	return 0;
				4716	}
				4717
				4718	int __kmp_aux_get_affinity_mask_proc(int proc, void **mask) {
				4719	int retval;
				4720
				4721	if (!KMP_AFFINITY_CAPABLE()) {
				4722	return -1;
				4723	}
				4724
				4725	KA_TRACE(1000, ; {
				4726	int gtid = __kmp_entry_gtid();
				4727	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4728	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4729	(kmp_affin_mask_t )(mask));
				4730	__kmp_debug_printf("kmp_get_affinity_mask_proc: getting proc %d in "
				4731	"affinity mask for thread %d = %s\n",
				4732	proc, gtid, buf);
				4733	});
				4734
				4735	if (__kmp_env_consistency_check) {
				4736	if ((mask == NULL) \|\| (*mask == NULL)) {
				4737	KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity_mask_proc");
				4738	}
				4739	}
				4740
				4741	if ((proc < 0) \|\| (proc >= __kmp_aux_get_affinity_max_proc())) {
				4742	return -1;
				4743	}
				4744	if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4745	return 0;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	4746	}
				4747
				4748	return KMP_CPU_ISSET(proc, (kmp_affin_mask_t )(mask));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4749	}
				4750
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4751	// Dynamic affinity settings - Affinity balanced
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	4752	void __kmp_balanced_affinity(int tid, int nthreads) {
				4753	bool fine_gran = true;
Paul Osmialowski	ecbe2ea	2016-07-29 20:55:03 +0000	[diff] [blame]	4754
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	4755	switch (__kmp_affinity_gran) {
				4756	case affinity_gran_fine:
				4757	case affinity_gran_thread:
				4758	break;
				4759	case affinity_gran_core:
				4760	if (__kmp_nThreadsPerCore > 1) {
				4761	fine_gran = false;
				4762	}
				4763	break;
				4764	case affinity_gran_package:
				4765	if (nCoresPerPkg > 1) {
				4766	fine_gran = false;
				4767	}
				4768	break;
				4769	default:
				4770	fine_gran = false;
				4771	}
				4772
				4773	if (__kmp_affinity_uniform_topology()) {
				4774	int coreID;
				4775	int threadID;
				4776	// Number of hyper threads per core in HT machine
				4777	int __kmp_nth_per_core = __kmp_avail_proc / __kmp_ncores;
				4778	// Number of cores
				4779	int ncores = __kmp_ncores;
				4780	if ((nPackages > 1) && (__kmp_nth_per_core <= 1)) {
				4781	__kmp_nth_per_core = __kmp_avail_proc / nPackages;
				4782	ncores = nPackages;
				4783	}
				4784	// How many threads will be bound to each core
				4785	int chunk = nthreads / ncores;
				4786	// How many cores will have an additional thread bound to it - "big cores"
				4787	int big_cores = nthreads % ncores;
				4788	// Number of threads on the big cores
				4789	int big_nth = (chunk + 1) * big_cores;
				4790	if (tid < big_nth) {
				4791	coreID = tid / (chunk + 1);
				4792	threadID = (tid % (chunk + 1)) % __kmp_nth_per_core;
				4793	} else { // tid >= big_nth
				4794	coreID = (tid - big_cores) / chunk;
				4795	threadID = ((tid - big_cores) % chunk) % __kmp_nth_per_core;
Paul Osmialowski	ecbe2ea	2016-07-29 20:55:03 +0000	[diff] [blame]	4796	}
				4797
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	4798	KMP_DEBUG_ASSERT2(KMP_AFFINITY_CAPABLE(),
				4799	"Illegal set affinity operation when not capable");
				4800
				4801	kmp_affin_mask_t *mask;
				4802	KMP_CPU_ALLOC_ON_STACK(mask);
				4803	KMP_CPU_ZERO(mask);
				4804
				4805	if (fine_gran) {
				4806	int osID = address2os[coreID * __kmp_nth_per_core + threadID].second;
				4807	KMP_CPU_SET(osID, mask);
				4808	} else {
				4809	for (int i = 0; i < __kmp_nth_per_core; i++) {
				4810	int osID;
				4811	osID = address2os[coreID * __kmp_nth_per_core + i].second;
				4812	KMP_CPU_SET(osID, mask);
				4813	}
				4814	}
				4815	if (__kmp_affinity_verbose) {
				4816	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4817	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
				4818	KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
				4819	__kmp_gettid(), tid, buf);
				4820	}
				4821	__kmp_set_system_affinity(mask, TRUE);
				4822	KMP_CPU_FREE_FROM_STACK(mask);
				4823	} else { // Non-uniform topology
				4824
				4825	kmp_affin_mask_t *mask;
				4826	KMP_CPU_ALLOC_ON_STACK(mask);
				4827	KMP_CPU_ZERO(mask);
				4828
				4829	int core_level = __kmp_affinity_find_core_level(
				4830	address2os, __kmp_avail_proc, __kmp_aff_depth - 1);
				4831	int ncores = __kmp_affinity_compute_ncores(address2os, __kmp_avail_proc,
				4832	__kmp_aff_depth - 1, core_level);
				4833	int nth_per_core = __kmp_affinity_max_proc_per_core(
				4834	address2os, __kmp_avail_proc, __kmp_aff_depth - 1, core_level);
				4835
				4836	// For performance gain consider the special case nthreads ==
				4837	// __kmp_avail_proc
				4838	if (nthreads == __kmp_avail_proc) {
				4839	if (fine_gran) {
				4840	int osID = address2os[tid].second;
				4841	KMP_CPU_SET(osID, mask);
				4842	} else {
				4843	int core = __kmp_affinity_find_core(address2os, tid,
				4844	__kmp_aff_depth - 1, core_level);
				4845	for (int i = 0; i < __kmp_avail_proc; i++) {
				4846	int osID = address2os[i].second;
				4847	if (__kmp_affinity_find_core(address2os, i, __kmp_aff_depth - 1,
				4848	core_level) == core) {
				4849	KMP_CPU_SET(osID, mask);
				4850	}
Paul Osmialowski	ecbe2ea	2016-07-29 20:55:03 +0000	[diff] [blame]	4851	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	4852	}
				4853	} else if (nthreads <= ncores) {
				4854
				4855	int core = 0;
				4856	for (int i = 0; i < ncores; i++) {
				4857	// Check if this core from procarr[] is in the mask
				4858	int in_mask = 0;
				4859	for (int j = 0; j < nth_per_core; j++) {
				4860	if (procarr[i * nth_per_core + j] != -1) {
				4861	in_mask = 1;
				4862	break;
				4863	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4864	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	4865	if (in_mask) {
				4866	if (tid == core) {
				4867	for (int j = 0; j < nth_per_core; j++) {
				4868	int osID = procarr[i * nth_per_core + j];
				4869	if (osID != -1) {
				4870	KMP_CPU_SET(osID, mask);
				4871	// For fine granularity it is enough to set the first available
				4872	// osID for this core
				4873	if (fine_gran) {
				4874	break;
				4875	}
				4876	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4877	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	4878	break;
				4879	} else {
				4880	core++;
				4881	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4882	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	4883	}
				4884	} else { // nthreads > ncores
				4885	// Array to save the number of processors at each core
				4886	int nproc_at_core = (int )KMP_ALLOCA(sizeof(int) * ncores);
				4887	// Array to save the number of cores with "x" available processors;
				4888	int *ncores_with_x_procs =
				4889	(int )KMP_ALLOCA(sizeof(int) (nth_per_core + 1));
				4890	// Array to save the number of cores with # procs from x to nth_per_core
				4891	int *ncores_with_x_to_max_procs =
				4892	(int )KMP_ALLOCA(sizeof(int) (nth_per_core + 1));
				4893
				4894	for (int i = 0; i <= nth_per_core; i++) {
				4895	ncores_with_x_procs[i] = 0;
				4896	ncores_with_x_to_max_procs[i] = 0;
				4897	}
				4898
				4899	for (int i = 0; i < ncores; i++) {
				4900	int cnt = 0;
				4901	for (int j = 0; j < nth_per_core; j++) {
				4902	if (procarr[i * nth_per_core + j] != -1) {
				4903	cnt++;
				4904	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4905	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	4906	nproc_at_core[i] = cnt;
				4907	ncores_with_x_procs[cnt]++;
				4908	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4909
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	4910	for (int i = 0; i <= nth_per_core; i++) {
				4911	for (int j = i; j <= nth_per_core; j++) {
				4912	ncores_with_x_to_max_procs[i] += ncores_with_x_procs[j];
				4913	}
				4914	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4915
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	4916	// Max number of processors
				4917	int nproc = nth_per_core * ncores;
				4918	// An array to keep number of threads per each context
				4919	int newarr = (int )__kmp_allocate(sizeof(int) * nproc);
				4920	for (int i = 0; i < nproc; i++) {
				4921	newarr[i] = 0;
				4922	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4923
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	4924	int nth = nthreads;
				4925	int flag = 0;
				4926	while (nth > 0) {
				4927	for (int j = 1; j <= nth_per_core; j++) {
				4928	int cnt = ncores_with_x_to_max_procs[j];
				4929	for (int i = 0; i < ncores; i++) {
				4930	// Skip the core with 0 processors
				4931	if (nproc_at_core[i] == 0) {
				4932	continue;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4933	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	4934	for (int k = 0; k < nth_per_core; k++) {
				4935	if (procarr[i * nth_per_core + k] != -1) {
				4936	if (newarr[i * nth_per_core + k] == 0) {
				4937	newarr[i * nth_per_core + k] = 1;
				4938	cnt--;
				4939	nth--;
				4940	break;
				4941	} else {
				4942	if (flag != 0) {
				4943	newarr[i * nth_per_core + k]++;
				4944	cnt--;
				4945	nth--;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4946	break;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	4947	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4948	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	4949	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4950	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	4951	if (cnt == 0 \|\| nth == 0) {
				4952	break;
				4953	}
				4954	}
				4955	if (nth == 0) {
				4956	break;
				4957	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4958	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	4959	flag = 1;
				4960	}
				4961	int sum = 0;
				4962	for (int i = 0; i < nproc; i++) {
				4963	sum += newarr[i];
				4964	if (sum > tid) {
				4965	if (fine_gran) {
				4966	int osID = procarr[i];
				4967	KMP_CPU_SET(osID, mask);
				4968	} else {
				4969	int coreID = i / nth_per_core;
				4970	for (int ii = 0; ii < nth_per_core; ii++) {
				4971	int osID = procarr[coreID * nth_per_core + ii];
				4972	if (osID != -1) {
				4973	KMP_CPU_SET(osID, mask);
				4974	}
				4975	}
				4976	}
				4977	break;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4978	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	4979	}
				4980	__kmp_free(newarr);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4981	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	4982
				4983	if (__kmp_affinity_verbose) {
				4984	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4985	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
				4986	KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
				4987	__kmp_gettid(), tid, buf);
				4988	}
				4989	__kmp_set_system_affinity(mask, TRUE);
				4990	KMP_CPU_FREE_FROM_STACK(mask);
				4991	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4992	}
				4993
Jonathan Peyton	3076fa4	2016-01-12 17:21:55 +0000	[diff] [blame]	4994	#if KMP_OS_LINUX
				4995	// We don't need this entry for Windows because
				4996	// there is GetProcessAffinityMask() api
				4997	//
				4998	// The intended usage is indicated by these steps:
				4999	// 1) The user gets the current affinity mask
				5000	// 2) Then sets the affinity by calling this function
				5001	// 3) Error check the return value
				5002	// 4) Use non-OpenMP parallelization
				5003	// 5) Reset the affinity to what was stored in step 1)
				5004	#ifdef __cplusplus
				5005	extern "C"
				5006	#endif
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	5007	int
				5008	kmp_set_thread_affinity_mask_initial()
Jonathan Peyton	3076fa4	2016-01-12 17:21:55 +0000	[diff] [blame]	5009	// the function returns 0 on success,
				5010	// -1 if we cannot bind thread
				5011	// >0 (errno) if an error happened during binding
				5012	{
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame^]	5013	int gtid = __kmp_get_gtid();
				5014	if (gtid < 0) {
				5015	// Do not touch non-omp threads
				5016	KA_TRACE(30, ("kmp_set_thread_affinity_mask_initial: "
				5017	"non-omp thread, returning\n"));
				5018	return -1;
				5019	}
				5020	if (!KMP_AFFINITY_CAPABLE() \|\| !__kmp_init_middle) {
				5021	KA_TRACE(30, ("kmp_set_thread_affinity_mask_initial: "
				5022	"affinity not initialized, returning\n"));
				5023	return -1;
				5024	}
				5025	KA_TRACE(30, ("kmp_set_thread_affinity_mask_initial: "
				5026	"set full mask for thread %d\n",
				5027	gtid));
				5028	KMP_DEBUG_ASSERT(__kmp_affin_fullMask != NULL);
				5029	return __kmp_set_system_affinity(__kmp_affin_fullMask, FALSE);
Jonathan Peyton	3076fa4	2016-01-12 17:21:55 +0000	[diff] [blame]	5030	}
				5031	#endif
				5032
Alp Toker	763b939	2014-02-28 09:42:41 +0000	[diff] [blame]	5033	#endif // KMP_AFFINITY_SUPPORTED