Blame - openmp/runtime/src/kmp_affinity.cpp - toolchain/llvm-project

blob: 5460bd9ed42df95bd2bfb6c7ff38f186876758e5 [file] [log] [blame]

Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1	/*
				2	* kmp_affinity.cpp -- affinity management
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3	*/
				4
				5
				6	//===----------------------------------------------------------------------===//
				7	//
				8	// The LLVM Compiler Infrastructure
				9	//
				10	// This file is dual licensed under the MIT and the University of Illinois Open
				11	// Source Licenses. See LICENSE.txt for details.
				12	//
				13	//===----------------------------------------------------------------------===//
				14
				15
				16	#include "kmp.h"
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	17	#include "kmp_affinity.h"
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	18	#include "kmp_i18n.h"
				19	#include "kmp_io.h"
				20	#include "kmp_str.h"
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	21	#include "kmp_wrapper_getpid.h"
Jonathan Peyton	1707836	2015-09-10 19:22:07 +0000	[diff] [blame]	22
				23	// Store the real or imagined machine hierarchy here
				24	static hierarchy_info machine_hierarchy;
				25
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	26	void __kmp_cleanup_hierarchy() { machine_hierarchy.fini(); }
				27
Jonathan Peyton	1707836	2015-09-10 19:22:07 +0000	[diff] [blame]	28
				29	void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	30	kmp_uint32 depth;
				31	// The test below is true if affinity is available, but set to "none". Need to
				32	// init on first use of hierarchical barrier.
				33	if (TCR_1(machine_hierarchy.uninitialized))
				34	machine_hierarchy.init(NULL, nproc);
Jonathan Peyton	1707836	2015-09-10 19:22:07 +0000	[diff] [blame]	35
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	36	// Adjust the hierarchy in case num threads exceeds original
				37	if (nproc > machine_hierarchy.base_num_threads)
				38	machine_hierarchy.resize(nproc);
Jonathan Peyton	7dee82e	2015-11-09 16:24:53 +0000	[diff] [blame]	39
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	40	depth = machine_hierarchy.depth;
				41	KMP_DEBUG_ASSERT(depth > 0);
Jonathan Peyton	1707836	2015-09-10 19:22:07 +0000	[diff] [blame]	42
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	43	thr_bar->depth = depth;
				44	thr_bar->base_leaf_kids = (kmp_uint8)machine_hierarchy.numPerLevel[0] - 1;
				45	thr_bar->skip_per_level = machine_hierarchy.skipPerLevel;
Jonathan Peyton	1707836	2015-09-10 19:22:07 +0000	[diff] [blame]	46	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	47
Alp Toker	763b939	2014-02-28 09:42:41 +0000	[diff] [blame]	48	#if KMP_AFFINITY_SUPPORTED
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	49
Jonathan Peyton	1cdd87a	2016-11-14 21:08:35 +0000	[diff] [blame]	50	bool KMPAffinity::picked_api = false;
				51
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	52	void *KMPAffinity::Mask::operator new(size_t n) { return __kmp_allocate(n); }
				53	void *KMPAffinity::Mask::operator new[](size_t n) { return __kmp_allocate(n); }
				54	void KMPAffinity::Mask::operator delete(void *p) { __kmp_free(p); }
				55	void KMPAffinity::Mask::operator delete[](void *p) { __kmp_free(p); }
				56	void *KMPAffinity::operator new(size_t n) { return __kmp_allocate(n); }
				57	void KMPAffinity::operator delete(void *p) { __kmp_free(p); }
Jonathan Peyton	1cdd87a	2016-11-14 21:08:35 +0000	[diff] [blame]	58
				59	void KMPAffinity::pick_api() {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	60	KMPAffinity *affinity_dispatch;
				61	if (picked_api)
				62	return;
Jonathan Peyton	1cdd87a	2016-11-14 21:08:35 +0000	[diff] [blame]	63	#if KMP_USE_HWLOC
Jonathan Peyton	e3e2aaf	2017-05-31 20:35:22 +0000	[diff] [blame]	64	// Only use Hwloc if affinity isn't explicitly disabled and
				65	// user requests Hwloc topology method
				66	if (__kmp_affinity_top_method == affinity_top_method_hwloc &&
				67	__kmp_affinity_type != affinity_disabled) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	68	affinity_dispatch = new KMPHwlocAffinity();
				69	} else
Jonathan Peyton	1cdd87a	2016-11-14 21:08:35 +0000	[diff] [blame]	70	#endif
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	71	{
				72	affinity_dispatch = new KMPNativeAffinity();
				73	}
				74	__kmp_affinity_dispatch = affinity_dispatch;
				75	picked_api = true;
Jonathan Peyton	1cdd87a	2016-11-14 21:08:35 +0000	[diff] [blame]	76	}
				77
				78	void KMPAffinity::destroy_api() {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	79	if (__kmp_affinity_dispatch != NULL) {
				80	delete __kmp_affinity_dispatch;
				81	__kmp_affinity_dispatch = NULL;
				82	picked_api = false;
				83	}
Jonathan Peyton	1cdd87a	2016-11-14 21:08:35 +0000	[diff] [blame]	84	}
				85
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	86	// Print the affinity mask to the character array in a pretty format.
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	87	char __kmp_affinity_print_mask(char buf, int buf_len,
				88	kmp_affin_mask_t *mask) {
				89	KMP_ASSERT(buf_len >= 40);
				90	char *scan = buf;
				91	char *end = buf + buf_len - 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	92
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	93	// Find first element / check for empty set.
				94	size_t i;
				95	i = mask->begin();
				96	if (i == mask->end()) {
				97	KMP_SNPRINTF(scan, end - scan + 1, "{<empty>}");
				98	while (*scan != '\0')
				99	scan++;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	100	KMP_ASSERT(scan <= end);
				101	return buf;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	102	}
				103
				104	KMP_SNPRINTF(scan, end - scan + 1, "{%ld", (long)i);
				105	while (*scan != '\0')
				106	scan++;
				107	i++;
				108	for (; i != mask->end(); i = mask->next(i)) {
				109	if (!KMP_CPU_ISSET(i, mask)) {
				110	continue;
				111	}
				112
				113	// Check for buffer overflow. A string of the form ",<n>" will have at most
				114	// 10 characters, plus we want to leave room to print ",...}" if the set is
				115	// too large to print for a total of 15 characters. We already left room for
				116	// '\0' in setting end.
				117	if (end - scan < 15) {
				118	break;
				119	}
				120	KMP_SNPRINTF(scan, end - scan + 1, ",%-ld", (long)i);
				121	while (*scan != '\0')
				122	scan++;
				123	}
				124	if (i != mask->end()) {
				125	KMP_SNPRINTF(scan, end - scan + 1, ",...");
				126	while (*scan != '\0')
				127	scan++;
				128	}
				129	KMP_SNPRINTF(scan, end - scan + 1, "}");
				130	while (*scan != '\0')
				131	scan++;
				132	KMP_ASSERT(scan <= end);
				133	return buf;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	134	}
				135
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	136	void __kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask) {
				137	KMP_CPU_ZERO(mask);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	138
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	139	#if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	140
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	141	if (__kmp_num_proc_groups > 1) {
				142	int group;
				143	KMP_DEBUG_ASSERT(__kmp_GetActiveProcessorCount != NULL);
				144	for (group = 0; group < __kmp_num_proc_groups; group++) {
				145	int i;
				146	int num = __kmp_GetActiveProcessorCount(group);
				147	for (i = 0; i < num; i++) {
				148	KMP_CPU_SET(i + group * (CHAR_BIT * sizeof(DWORD_PTR)), mask);
				149	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	150	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	151	} else
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	152
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	153	#endif /* KMP_GROUP_AFFINITY */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	154
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	155	{
				156	int proc;
				157	for (proc = 0; proc < __kmp_xproc; proc++) {
				158	KMP_CPU_SET(proc, mask);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	159	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	160	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	161	}
				162
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	163	// When sorting by labels, __kmp_affinity_assign_child_nums() must first be
				164	// called to renumber the labels from [0..n] and place them into the child_num
				165	// vector of the address object. This is done in case the labels used for
Alp Toker	8f2d3f0	2014-02-24 10:40:15 +0000	[diff] [blame]	166	// the children at one node of the hierarchy differ from those used for
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	167	// another node at the same level. Example: suppose the machine has 2 nodes
				168	// with 2 packages each. The first node contains packages 601 and 602, and
				169	// second node contains packages 603 and 604. If we try to sort the table
				170	// for "scatter" affinity, the table will still be sorted 601, 602, 603, 604
				171	// because we are paying attention to the labels themselves, not the ordinal
				172	// child numbers. By using the child numbers in the sort, the result is
				173	// {0,0}=601, {0,1}=603, {1,0}=602, {1,1}=604.
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	174	static void __kmp_affinity_assign_child_nums(AddrUnsPair *address2os,
				175	int numAddrs) {
				176	KMP_DEBUG_ASSERT(numAddrs > 0);
				177	int depth = address2os->first.depth;
				178	unsigned counts = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				179	unsigned lastLabel = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				180	int labCt;
				181	for (labCt = 0; labCt < depth; labCt++) {
				182	address2os[0].first.childNums[labCt] = counts[labCt] = 0;
				183	lastLabel[labCt] = address2os[0].first.labels[labCt];
				184	}
				185	int i;
				186	for (i = 1; i < numAddrs; i++) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	187	for (labCt = 0; labCt < depth; labCt++) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	188	if (address2os[i].first.labels[labCt] != lastLabel[labCt]) {
				189	int labCt2;
				190	for (labCt2 = labCt + 1; labCt2 < depth; labCt2++) {
				191	counts[labCt2] = 0;
				192	lastLabel[labCt2] = address2os[i].first.labels[labCt2];
				193	}
				194	counts[labCt]++;
				195	lastLabel[labCt] = address2os[i].first.labels[labCt];
				196	break;
				197	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	198	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	199	for (labCt = 0; labCt < depth; labCt++) {
				200	address2os[i].first.childNums[labCt] = counts[labCt];
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	201	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	202	for (; labCt < (int)Address::maxDepth; labCt++) {
				203	address2os[i].first.childNums[labCt] = 0;
				204	}
				205	}
				206	__kmp_free(lastLabel);
				207	__kmp_free(counts);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	208	}
				209
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	210	// All of the __kmp_affinity_create_*_map() routines should set
				211	// __kmp_affinity_masks to a vector of affinity mask objects of length
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	212	// __kmp_affinity_num_masks, if __kmp_affinity_type != affinity_none, and return
				213	// the number of levels in the machine topology tree (zero if
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	214	// __kmp_affinity_type == affinity_none).
				215	//
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	216	// All of the __kmp_affinity_create_*_map() routines should set
				217	// *__kmp_affin_fullMask to the affinity mask for the initialization thread.
				218	// They need to save and restore the mask, and it could be needed later, so
				219	// saving it is just an optimization to avoid calling kmp_get_system_affinity()
				220	// again.
Jonathan Peyton	c5304aa	2016-06-13 21:28:03 +0000	[diff] [blame]	221	kmp_affin_mask_t *__kmp_affin_fullMask = NULL;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	222
				223	static int nCoresPerPkg, nPackages;
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame]	224	static int __kmp_nThreadsPerCore;
				225	#ifndef KMP_DFLT_NTH_CORES
				226	static int __kmp_ncores;
				227	#endif
Jonathan Peyton	fd7cc42	2016-06-21 15:54:38 +0000	[diff] [blame]	228	static int *__kmp_pu_os_idx = NULL;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	229
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	230	// __kmp_affinity_uniform_topology() doesn't work when called from
				231	// places which support arbitrarily many levels in the machine topology
				232	// map, i.e. the non-default cases in __kmp_affinity_create_cpuinfo_map()
				233	// __kmp_affinity_create_x2apicid_map().
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	234	inline static bool __kmp_affinity_uniform_topology() {
				235	return __kmp_avail_proc == (__kmp_nThreadsPerCore * nCoresPerPkg * nPackages);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	236	}
				237
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	238	// Print out the detailed machine topology map, i.e. the physical locations
				239	// of each OS proc.
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	240	static void __kmp_affinity_print_topology(AddrUnsPair *address2os, int len,
				241	int depth, int pkgLevel,
				242	int coreLevel, int threadLevel) {
				243	int proc;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	244
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	245	KMP_INFORM(OSProcToPhysicalThreadMap, "KMP_AFFINITY");
				246	for (proc = 0; proc < len; proc++) {
				247	int level;
				248	kmp_str_buf_t buf;
				249	__kmp_str_buf_init(&buf);
				250	for (level = 0; level < depth; level++) {
				251	if (level == threadLevel) {
				252	__kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Thread));
				253	} else if (level == coreLevel) {
				254	__kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Core));
				255	} else if (level == pkgLevel) {
				256	__kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Package));
				257	} else if (level > pkgLevel) {
				258	__kmp_str_buf_print(&buf, "%s_%d ", KMP_I18N_STR(Node),
				259	level - pkgLevel - 1);
				260	} else {
				261	__kmp_str_buf_print(&buf, "L%d ", level);
				262	}
				263	__kmp_str_buf_print(&buf, "%d ", address2os[proc].first.labels[level]);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	264	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	265	KMP_INFORM(OSProcMapToPack, "KMP_AFFINITY", address2os[proc].second,
				266	buf.str);
				267	__kmp_str_buf_free(&buf);
				268	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	269	}
				270
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	271	#if KMP_USE_HWLOC
Jonathan Peyton	202a24d	2016-06-13 17:30:08 +0000	[diff] [blame]	272
				273	// This function removes the topology levels that are radix 1 and don't offer
				274	// further information about the topology. The most common example is when you
				275	// have one thread context per core, we don't want the extra thread context
				276	// level if it offers no unique labels. So they are removed.
				277	// return value: the new depth of address2os
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	278	static int __kmp_affinity_remove_radix_one_levels(AddrUnsPair *address2os,
				279	int nActiveThreads, int depth,
				280	int pkgLevel, int coreLevel,
				281	int *threadLevel) {
				282	int level;
				283	int i;
				284	int radix1_detected;
Jonathan Peyton	202a24d	2016-06-13 17:30:08 +0000	[diff] [blame]	285
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	286	for (level = depth - 1; level >= 0; --level) {
				287	// Always keep the package level
				288	if (level == *pkgLevel)
				289	continue;
				290	// Detect if this level is radix 1
				291	radix1_detected = 1;
				292	for (i = 1; i < nActiveThreads; ++i) {
				293	if (address2os[0].first.labels[level] !=
				294	address2os[i].first.labels[level]) {
				295	// There are differing label values for this level so it stays
				296	radix1_detected = 0;
				297	break;
				298	}
Jonathan Peyton	202a24d	2016-06-13 17:30:08 +0000	[diff] [blame]	299	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	300	if (!radix1_detected)
				301	continue;
				302	// Radix 1 was detected
				303	if (level == *threadLevel) {
				304	// If only one thread per core, then just decrement
				305	// the depth which removes the threadlevel from address2os
				306	for (i = 0; i < nActiveThreads; ++i) {
				307	address2os[i].first.depth--;
				308	}
				309	*threadLevel = -1;
				310	} else if (level == *coreLevel) {
				311	// For core level, we move the thread labels over if they are still
				312	// valid (*threadLevel != -1), and also reduce the depth another level
				313	for (i = 0; i < nActiveThreads; ++i) {
				314	if (*threadLevel != -1) {
				315	address2os[i].first.labels[*coreLevel] =
				316	address2os[i].first.labels[*threadLevel];
				317	}
				318	address2os[i].first.depth--;
				319	}
				320	*coreLevel = -1;
				321	}
				322	}
				323	return address2os[0].first.depth;
Jonathan Peyton	202a24d	2016-06-13 17:30:08 +0000	[diff] [blame]	324	}
				325
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	326	// Returns the number of objects of type 'type' below 'obj' within the topology
				327	// tree structure. e.g., if obj is a HWLOC_OBJ_PACKAGE object, and type is
				328	// HWLOC_OBJ_PU, then this will return the number of PU's under the SOCKET
				329	// object.
				330	static int __kmp_hwloc_get_nobjs_under_obj(hwloc_obj_t obj,
				331	hwloc_obj_type_t type) {
				332	int retval = 0;
				333	hwloc_obj_t first;
				334	for (first = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, obj->type,
				335	obj->logical_index, type, 0);
				336	first != NULL &&
				337	hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, obj->type, first) ==
				338	obj;
				339	first = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, first->type,
				340	first)) {
				341	++retval;
				342	}
				343	return retval;
Jonathan Peyton	202a24d	2016-06-13 17:30:08 +0000	[diff] [blame]	344	}
				345
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	346	static int __kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
				347	kmp_i18n_id_t *const msg_id) {
				348	*address2os = NULL;
				349	*msg_id = kmp_i18n_null;
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	350
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	351	// Save the affinity mask for the current thread.
				352	kmp_affin_mask_t *oldMask;
				353	KMP_CPU_ALLOC(oldMask);
				354	__kmp_get_system_affinity(oldMask, TRUE);
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	355
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	356	int depth = 3;
				357	int pkgLevel = 0;
				358	int coreLevel = 1;
				359	int threadLevel = 2;
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	360
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	361	if (!KMP_AFFINITY_CAPABLE()) {
				362	// Hack to try and infer the machine topology using only the data
				363	// available from cpuid on the current thread, and __kmp_xproc.
				364	KMP_ASSERT(__kmp_affinity_type == affinity_none);
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	365
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	366	nCoresPerPkg = __kmp_hwloc_get_nobjs_under_obj(
				367	hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_PACKAGE, 0),
				368	HWLOC_OBJ_CORE);
				369	__kmp_nThreadsPerCore = __kmp_hwloc_get_nobjs_under_obj(
				370	hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_CORE, 0),
				371	HWLOC_OBJ_PU);
				372	__kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
				373	nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	374	if (__kmp_affinity_verbose) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	375	KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY");
				376	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				377	if (__kmp_affinity_uniform_topology()) {
				378	KMP_INFORM(Uniform, "KMP_AFFINITY");
				379	} else {
				380	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				381	}
				382	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				383	__kmp_nThreadsPerCore, __kmp_ncores);
				384	}
				385	KMP_CPU_FREE(oldMask);
				386	return 0;
				387	}
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	388
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	389	// Allocate the data structure to be returned.
				390	AddrUnsPair *retval =
				391	(AddrUnsPair )__kmp_allocate(sizeof(AddrUnsPair) __kmp_avail_proc);
				392	__kmp_pu_os_idx = (int )__kmp_allocate(sizeof(int) __kmp_avail_proc);
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	393
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	394	// When affinity is off, this routine will still be called to set
				395	// __kmp_ncores, as well as __kmp_nThreadsPerCore,
				396	// nCoresPerPkg, & nPackages. Make sure all these vars are set
				397	// correctly, and return if affinity is not enabled.
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	398
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	399	hwloc_obj_t pu;
				400	hwloc_obj_t core;
				401	hwloc_obj_t socket;
				402	int nActiveThreads = 0;
				403	int socket_identifier = 0;
				404	// re-calculate globals to count only accessible resources
				405	__kmp_ncores = nPackages = nCoresPerPkg = __kmp_nThreadsPerCore = 0;
				406	for (socket =
				407	hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_PACKAGE, 0);
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame^]	408	socket != NULL; socket = hwloc_get_next_obj_by_type(
				409	__kmp_hwloc_topology, HWLOC_OBJ_PACKAGE, socket),
				410	socket_identifier++) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	411	int core_identifier = 0;
				412	int num_active_cores = 0;
				413	for (core = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, socket->type,
				414	socket->logical_index,
				415	HWLOC_OBJ_CORE, 0);
				416	core != NULL &&
				417	hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, socket->type,
				418	core) == socket;
				419	core = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_CORE,
				420	core),
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame^]	421	core_identifier++) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	422	int pu_identifier = 0;
				423	int num_active_threads = 0;
				424	for (pu = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, core->type,
				425	core->logical_index, HWLOC_OBJ_PU,
				426	0);
				427	pu != NULL &&
				428	hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, core->type,
				429	pu) == core;
				430	pu = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_PU,
				431	pu),
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame^]	432	pu_identifier++) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	433	Address addr(3);
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame^]	434	if (!KMP_CPU_ISSET(pu->os_index, __kmp_affin_fullMask))
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	435	continue; // skip inactive (inaccessible) unit
				436	KA_TRACE(20,
				437	("Hwloc inserting %d (%d) %d (%d) %d (%d) into address2os\n",
				438	socket->os_index, socket->logical_index, core->os_index,
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame^]	439	core->logical_index, pu->os_index, pu->logical_index));
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	440	addr.labels[0] = socket_identifier; // package
				441	addr.labels[1] = core_identifier; // core
				442	addr.labels[2] = pu_identifier; // pu
				443	retval[nActiveThreads] = AddrUnsPair(addr, pu->os_index);
				444	__kmp_pu_os_idx[nActiveThreads] =
				445	pu->os_index; // keep os index for each active pu
				446	nActiveThreads++;
				447	++num_active_threads; // count active threads per core
				448	}
				449	if (num_active_threads) { // were there any active threads on the core?
				450	++__kmp_ncores; // count total active cores
				451	++num_active_cores; // count active cores per socket
				452	if (num_active_threads > __kmp_nThreadsPerCore)
				453	__kmp_nThreadsPerCore = num_active_threads; // calc maximum
				454	}
				455	}
				456	if (num_active_cores) { // were there any active cores on the socket?
				457	++nPackages; // count total active packages
				458	if (num_active_cores > nCoresPerPkg)
				459	nCoresPerPkg = num_active_cores; // calc maximum
				460	}
				461	}
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	462
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	463	// If there's only one thread context to bind to, return now.
				464	KMP_DEBUG_ASSERT(nActiveThreads == __kmp_avail_proc);
				465	KMP_ASSERT(nActiveThreads > 0);
				466	if (nActiveThreads == 1) {
				467	__kmp_ncores = nPackages = 1;
				468	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
				469	if (__kmp_affinity_verbose) {
				470	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				471	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				472
				473	KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
				474	if (__kmp_affinity_respect_mask) {
				475	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				476	} else {
				477	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				478	}
				479	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				480	KMP_INFORM(Uniform, "KMP_AFFINITY");
				481	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				482	__kmp_nThreadsPerCore, __kmp_ncores);
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	483	}
				484
				485	if (__kmp_affinity_type == affinity_none) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	486	__kmp_free(retval);
				487	KMP_CPU_FREE(oldMask);
				488	return 0;
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	489	}
				490
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	491	// Form an Address object which only includes the package level.
				492	Address addr(1);
				493	addr.labels[0] = retval[0].first.labels[pkgLevel];
				494	retval[0].first = addr;
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	495
				496	if (__kmp_affinity_gran_levels < 0) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	497	__kmp_affinity_gran_levels = 0;
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	498	}
				499
				500	if (__kmp_affinity_verbose) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	501	__kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	502	}
				503
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	504	*address2os = retval;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	505	KMP_CPU_FREE(oldMask);
				506	return 1;
				507	}
				508
				509	// Sort the table by physical Id.
				510	qsort(retval, nActiveThreads, sizeof(*retval),
				511	__kmp_affinity_cmp_Address_labels);
				512
				513	// Check to see if the machine topology is uniform
				514	unsigned uniform =
				515	(nPackages * nCoresPerPkg * __kmp_nThreadsPerCore == nActiveThreads);
				516
				517	// Print the machine topology summary.
				518	if (__kmp_affinity_verbose) {
				519	char mask[KMP_AFFIN_MASK_PRINT_LEN];
				520	__kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				521
				522	KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
				523	if (__kmp_affinity_respect_mask) {
				524	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask);
				525	} else {
				526	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask);
				527	}
				528	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				529	if (uniform) {
				530	KMP_INFORM(Uniform, "KMP_AFFINITY");
				531	} else {
				532	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				533	}
				534
				535	kmp_str_buf_t buf;
				536	__kmp_str_buf_init(&buf);
				537
				538	__kmp_str_buf_print(&buf, "%d", nPackages);
				539	// for (level = 1; level <= pkgLevel; level++) {
				540	// __kmp_str_buf_print(&buf, " x %d", maxCt[level]);
				541	// }
				542	KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg,
				543	__kmp_nThreadsPerCore, __kmp_ncores);
				544
				545	__kmp_str_buf_free(&buf);
				546	}
				547
				548	if (__kmp_affinity_type == affinity_none) {
				549	__kmp_free(retval);
				550	KMP_CPU_FREE(oldMask);
				551	return 0;
				552	}
				553
				554	// Find any levels with radiix 1, and remove them from the map
				555	// (except for the package level).
				556	depth = __kmp_affinity_remove_radix_one_levels(
				557	retval, nActiveThreads, depth, &pkgLevel, &coreLevel, &threadLevel);
				558
				559	if (__kmp_affinity_gran_levels < 0) {
				560	// Set the granularity level based on what levels are modeled
				561	// in the machine topology map.
				562	__kmp_affinity_gran_levels = 0;
				563	if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
				564	__kmp_affinity_gran_levels++;
				565	}
				566	if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
				567	__kmp_affinity_gran_levels++;
				568	}
				569	if (__kmp_affinity_gran > affinity_gran_package) {
				570	__kmp_affinity_gran_levels++;
				571	}
				572	}
				573
				574	if (__kmp_affinity_verbose) {
				575	__kmp_affinity_print_topology(retval, nActiveThreads, depth, pkgLevel,
				576	coreLevel, threadLevel);
				577	}
				578
				579	KMP_CPU_FREE(oldMask);
				580	*address2os = retval;
				581	return depth;
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	582	}
				583	#endif // KMP_USE_HWLOC
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	584
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	585	// If we don't know how to retrieve the machine's processor topology, or
				586	// encounter an error in doing so, this routine is called to form a "flat"
				587	// mapping of os thread id's <-> processor id's.
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	588	static int __kmp_affinity_create_flat_map(AddrUnsPair **address2os,
				589	kmp_i18n_id_t *const msg_id) {
				590	*address2os = NULL;
				591	*msg_id = kmp_i18n_null;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	592
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	593	// Even if __kmp_affinity_type == affinity_none, this routine might still
				594	// called to set __kmp_ncores, as well as
				595	// __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
				596	if (!KMP_AFFINITY_CAPABLE()) {
				597	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				598	__kmp_ncores = nPackages = __kmp_xproc;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	599	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	600	if (__kmp_affinity_verbose) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	601	KMP_INFORM(AffFlatTopology, "KMP_AFFINITY");
				602	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				603	KMP_INFORM(Uniform, "KMP_AFFINITY");
				604	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				605	__kmp_nThreadsPerCore, __kmp_ncores);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	606	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	607	return 0;
				608	}
				609
				610	// When affinity is off, this routine will still be called to set
				611	// __kmp_ncores, as well as __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
				612	// Make sure all these vars are set correctly, and return now if affinity is
				613	// not enabled.
				614	__kmp_ncores = nPackages = __kmp_avail_proc;
				615	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
				616	if (__kmp_affinity_verbose) {
				617	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				618	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				619	__kmp_affin_fullMask);
				620
				621	KMP_INFORM(AffCapableUseFlat, "KMP_AFFINITY");
				622	if (__kmp_affinity_respect_mask) {
				623	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				624	} else {
				625	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	626	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	627	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				628	KMP_INFORM(Uniform, "KMP_AFFINITY");
				629	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				630	__kmp_nThreadsPerCore, __kmp_ncores);
				631	}
				632	KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
				633	__kmp_pu_os_idx = (int )__kmp_allocate(sizeof(int) __kmp_avail_proc);
				634	if (__kmp_affinity_type == affinity_none) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	635	int avail_ct = 0;
				636	int i;
Jonathan Peyton	c5304aa	2016-06-13 21:28:03 +0000	[diff] [blame]	637	KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	638	if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask))
				639	continue;
				640	__kmp_pu_os_idx[avail_ct++] = i; // suppose indices are flat
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	641	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	642	return 0;
				643	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	644
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	645	// Contruct the data structure to be returned.
				646	*address2os =
				647	(AddrUnsPair )__kmp_allocate(sizeof(address2os) __kmp_avail_proc);
				648	int avail_ct = 0;
				649	unsigned int i;
				650	KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
				651	// Skip this proc if it is not included in the machine model.
				652	if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
				653	continue;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	654	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	655	__kmp_pu_os_idx[avail_ct] = i; // suppose indices are flat
				656	Address addr(1);
				657	addr.labels[0] = i;
				658	(*address2os)[avail_ct++] = AddrUnsPair(addr, i);
				659	}
				660	if (__kmp_affinity_verbose) {
				661	KMP_INFORM(OSProcToPackage, "KMP_AFFINITY");
				662	}
				663
				664	if (__kmp_affinity_gran_levels < 0) {
				665	// Only the package level is modeled in the machine topology map,
				666	// so the #levels of granularity is either 0 or 1.
				667	if (__kmp_affinity_gran > affinity_gran_package) {
				668	__kmp_affinity_gran_levels = 1;
				669	} else {
				670	__kmp_affinity_gran_levels = 0;
				671	}
				672	}
				673	return 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	674	}
				675
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	676	#if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	677
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	678	// If multiple Windows* OS processor groups exist, we can create a 2-level
				679	// topology map with the groups at level 0 and the individual procs at level 1.
				680	// This facilitates letting the threads float among all procs in a group,
				681	// if granularity=group (the default when there are multiple groups).
				682	static int __kmp_affinity_create_proc_group_map(AddrUnsPair **address2os,
				683	kmp_i18n_id_t *const msg_id) {
				684	*address2os = NULL;
				685	*msg_id = kmp_i18n_null;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	686
Jonathan Peyton	5868499	2017-05-15 19:05:59 +0000	[diff] [blame]	687	// If we aren't affinity capable, then return now.
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	688	// The flat mapping will be used.
Jonathan Peyton	5868499	2017-05-15 19:05:59 +0000	[diff] [blame]	689	if (!KMP_AFFINITY_CAPABLE()) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	690	// FIXME set *msg_id
				691	return -1;
				692	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	693
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	694	// Contruct the data structure to be returned.
				695	*address2os =
				696	(AddrUnsPair )__kmp_allocate(sizeof(address2os) __kmp_avail_proc);
				697	KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
				698	__kmp_pu_os_idx = (int )__kmp_allocate(sizeof(int) __kmp_avail_proc);
				699	int avail_ct = 0;
				700	int i;
				701	KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
				702	// Skip this proc if it is not included in the machine model.
				703	if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
				704	continue;
				705	}
				706	__kmp_pu_os_idx[avail_ct] = i; // suppose indices are flat
				707	Address addr(2);
				708	addr.labels[0] = i / (CHAR_BIT * sizeof(DWORD_PTR));
				709	addr.labels[1] = i % (CHAR_BIT * sizeof(DWORD_PTR));
				710	(*address2os)[avail_ct++] = AddrUnsPair(addr, i);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	711
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	712	if (__kmp_affinity_verbose) {
				713	KMP_INFORM(AffOSProcToGroup, "KMP_AFFINITY", i, addr.labels[0],
				714	addr.labels[1]);
				715	}
				716	}
				717
				718	if (__kmp_affinity_gran_levels < 0) {
				719	if (__kmp_affinity_gran == affinity_gran_group) {
				720	__kmp_affinity_gran_levels = 1;
				721	} else if ((__kmp_affinity_gran == affinity_gran_fine) \|\|
				722	(__kmp_affinity_gran == affinity_gran_thread)) {
				723	__kmp_affinity_gran_levels = 0;
				724	} else {
				725	const char *gran_str = NULL;
				726	if (__kmp_affinity_gran == affinity_gran_core) {
				727	gran_str = "core";
				728	} else if (__kmp_affinity_gran == affinity_gran_package) {
				729	gran_str = "package";
				730	} else if (__kmp_affinity_gran == affinity_gran_node) {
				731	gran_str = "node";
				732	} else {
				733	KMP_ASSERT(0);
				734	}
				735
				736	// Warning: can't use affinity granularity \"gran\" with group topology
				737	// method, using "thread"
				738	__kmp_affinity_gran_levels = 0;
				739	}
				740	}
				741	return 2;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	742	}
				743
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	744	#endif /* KMP_GROUP_AFFINITY */
				745
				746	#if KMP_ARCH_X86 \|\| KMP_ARCH_X86_64
				747
				748	static int __kmp_cpuid_mask_width(int count) {
				749	int r = 0;
				750
				751	while ((1 << r) < count)
				752	++r;
				753	return r;
				754	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	755
				756	class apicThreadInfo {
				757	public:
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	758	unsigned osId; // param to __kmp_affinity_bind_thread
				759	unsigned apicId; // from cpuid after binding
				760	unsigned maxCoresPerPkg; // ""
				761	unsigned maxThreadsPerPkg; // ""
				762	unsigned pkgId; // inferred from above values
				763	unsigned coreId; // ""
				764	unsigned threadId; // ""
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	765	};
				766
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	767	static int __kmp_affinity_cmp_apicThreadInfo_os_id(const void *a,
				768	const void *b) {
				769	const apicThreadInfo aa = (const apicThreadInfo )a;
				770	const apicThreadInfo bb = (const apicThreadInfo )b;
				771	if (aa->osId < bb->osId)
				772	return -1;
				773	if (aa->osId > bb->osId)
				774	return 1;
				775	return 0;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	776	}
				777
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	778	static int __kmp_affinity_cmp_apicThreadInfo_phys_id(const void *a,
				779	const void *b) {
				780	const apicThreadInfo aa = (const apicThreadInfo )a;
				781	const apicThreadInfo bb = (const apicThreadInfo )b;
				782	if (aa->pkgId < bb->pkgId)
				783	return -1;
				784	if (aa->pkgId > bb->pkgId)
				785	return 1;
				786	if (aa->coreId < bb->coreId)
				787	return -1;
				788	if (aa->coreId > bb->coreId)
				789	return 1;
				790	if (aa->threadId < bb->threadId)
				791	return -1;
				792	if (aa->threadId > bb->threadId)
				793	return 1;
				794	return 0;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	795	}
				796
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	797	// On IA-32 architecture and Intel(R) 64 architecture, we attempt to use
				798	// an algorithm which cycles through the available os threads, setting
				799	// the current thread's affinity mask to that thread, and then retrieves
				800	// the Apic Id for each thread context using the cpuid instruction.
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	801	static int __kmp_affinity_create_apicid_map(AddrUnsPair **address2os,
				802	kmp_i18n_id_t *const msg_id) {
				803	kmp_cpuid buf;
				804	int rc;
				805	*address2os = NULL;
				806	*msg_id = kmp_i18n_null;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	807
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	808	// Check if cpuid leaf 4 is supported.
				809	__kmp_x86_cpuid(0, 0, &buf);
				810	if (buf.eax < 4) {
				811	*msg_id = kmp_i18n_str_NoLeaf4Support;
				812	return -1;
				813	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	814
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	815	// The algorithm used starts by setting the affinity to each available thread
				816	// and retrieving info from the cpuid instruction, so if we are not capable of
				817	// calling __kmp_get_system_affinity() and _kmp_get_system_affinity(), then we
				818	// need to do something else - use the defaults that we calculated from
				819	// issuing cpuid without binding to each proc.
				820	if (!KMP_AFFINITY_CAPABLE()) {
				821	// Hack to try and infer the machine topology using only the data
				822	// available from cpuid on the current thread, and __kmp_xproc.
				823	KMP_ASSERT(__kmp_affinity_type == affinity_none);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	824
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	825	// Get an upper bound on the number of threads per package using cpuid(1).
				826	// On some OS/chps combinations where HT is supported by the chip but is
				827	// disabled, this value will be 2 on a single core chip. Usually, it will be
				828	// 2 if HT is enabled and 1 if HT is disabled.
				829	__kmp_x86_cpuid(1, 0, &buf);
				830	int maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
				831	if (maxThreadsPerPkg == 0) {
				832	maxThreadsPerPkg = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	833	}
				834
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	835	// The num cores per pkg comes from cpuid(4). 1 must be added to the encoded
				836	// value.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	837	//
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	838	// The author of cpu_count.cpp treated this only an upper bound on the
				839	// number of cores, but I haven't seen any cases where it was greater than
				840	// the actual number of cores, so we will treat it as exact in this block of
				841	// code.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	842	//
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	843	// First, we need to check if cpuid(4) is supported on this chip. To see if
				844	// cpuid(n) is supported, issue cpuid(0) and check if eax has the value n or
				845	// greater.
				846	__kmp_x86_cpuid(0, 0, &buf);
				847	if (buf.eax >= 4) {
				848	__kmp_x86_cpuid(4, 0, &buf);
				849	nCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
				850	} else {
				851	nCoresPerPkg = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	852	}
				853
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	854	// There is no way to reliably tell if HT is enabled without issuing the
				855	// cpuid instruction from every thread, can correlating the cpuid info, so
				856	// if the machine is not affinity capable, we assume that HT is off. We have
				857	// seen quite a few machines where maxThreadsPerPkg is 2, yet the machine
				858	// does not support HT.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	859	//
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	860	// - Older OSes are usually found on machines with older chips, which do not
				861	// support HT.
				862	// - The performance penalty for mistakenly identifying a machine as HT when
				863	// it isn't (which results in blocktime being incorrecly set to 0) is
				864	// greater than the penalty when for mistakenly identifying a machine as
				865	// being 1 thread/core when it is really HT enabled (which results in
				866	// blocktime being incorrectly set to a positive value).
				867	__kmp_ncores = __kmp_xproc;
				868	nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	869	__kmp_nThreadsPerCore = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	870	if (__kmp_affinity_verbose) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	871	KMP_INFORM(AffNotCapableUseLocCpuid, "KMP_AFFINITY");
				872	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				873	if (__kmp_affinity_uniform_topology()) {
				874	KMP_INFORM(Uniform, "KMP_AFFINITY");
				875	} else {
				876	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				877	}
				878	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				879	__kmp_nThreadsPerCore, __kmp_ncores);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	880	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	881	return 0;
				882	}
				883
				884	// From here on, we can assume that it is safe to call
				885	// __kmp_get_system_affinity() and __kmp_set_system_affinity(), even if
				886	// __kmp_affinity_type = affinity_none.
				887
				888	// Save the affinity mask for the current thread.
				889	kmp_affin_mask_t *oldMask;
				890	KMP_CPU_ALLOC(oldMask);
				891	KMP_ASSERT(oldMask != NULL);
				892	__kmp_get_system_affinity(oldMask, TRUE);
				893
				894	// Run through each of the available contexts, binding the current thread
				895	// to it, and obtaining the pertinent information using the cpuid instr.
				896	//
				897	// The relevant information is:
				898	// - Apic Id: Bits 24:31 of ebx after issuing cpuid(1) - each thread context
				899	// has a uniqie Apic Id, which is of the form pkg# : core# : thread#.
				900	// - Max Threads Per Pkg: Bits 16:23 of ebx after issuing cpuid(1). The value
				901	// of this field determines the width of the core# + thread# fields in the
				902	// Apic Id. It is also an upper bound on the number of threads per
				903	// package, but it has been verified that situations happen were it is not
				904	// exact. In particular, on certain OS/chip combinations where Intel(R)
				905	// Hyper-Threading Technology is supported by the chip but has been
				906	// disabled, the value of this field will be 2 (for a single core chip).
				907	// On other OS/chip combinations supporting Intel(R) Hyper-Threading
				908	// Technology, the value of this field will be 1 when Intel(R)
				909	// Hyper-Threading Technology is disabled and 2 when it is enabled.
				910	// - Max Cores Per Pkg: Bits 26:31 of eax after issuing cpuid(4). The value
				911	// of this field (+1) determines the width of the core# field in the Apic
				912	// Id. The comments in "cpucount.cpp" say that this value is an upper
				913	// bound, but the IA-32 architecture manual says that it is exactly the
				914	// number of cores per package, and I haven't seen any case where it
				915	// wasn't.
				916	//
				917	// From this information, deduce the package Id, core Id, and thread Id,
				918	// and set the corresponding fields in the apicThreadInfo struct.
				919	unsigned i;
				920	apicThreadInfo threadInfo = (apicThreadInfo )__kmp_allocate(
				921	__kmp_avail_proc * sizeof(apicThreadInfo));
				922	unsigned nApics = 0;
				923	KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
				924	// Skip this proc if it is not included in the machine model.
				925	if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
				926	continue;
Jonathan Peyton	fd7cc42	2016-06-21 15:54:38 +0000	[diff] [blame]	927	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	928	KMP_DEBUG_ASSERT((int)nApics < __kmp_avail_proc);
				929
				930	__kmp_affinity_dispatch->bind_thread(i);
				931	threadInfo[nApics].osId = i;
				932
				933	// The apic id and max threads per pkg come from cpuid(1).
				934	__kmp_x86_cpuid(1, 0, &buf);
				935	if (((buf.edx >> 9) & 1) == 0) {
				936	__kmp_set_system_affinity(oldMask, TRUE);
				937	__kmp_free(threadInfo);
				938	KMP_CPU_FREE(oldMask);
				939	*msg_id = kmp_i18n_str_ApicNotPresent;
				940	return -1;
				941	}
				942	threadInfo[nApics].apicId = (buf.ebx >> 24) & 0xff;
				943	threadInfo[nApics].maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
				944	if (threadInfo[nApics].maxThreadsPerPkg == 0) {
				945	threadInfo[nApics].maxThreadsPerPkg = 1;
				946	}
				947
				948	// Max cores per pkg comes from cpuid(4). 1 must be added to the encoded
				949	// value.
				950	//
				951	// First, we need to check if cpuid(4) is supported on this chip. To see if
				952	// cpuid(n) is supported, issue cpuid(0) and check if eax has the value n
				953	// or greater.
				954	__kmp_x86_cpuid(0, 0, &buf);
				955	if (buf.eax >= 4) {
				956	__kmp_x86_cpuid(4, 0, &buf);
				957	threadInfo[nApics].maxCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
				958	} else {
				959	threadInfo[nApics].maxCoresPerPkg = 1;
				960	}
				961
				962	// Infer the pkgId / coreId / threadId using only the info obtained locally.
				963	int widthCT = __kmp_cpuid_mask_width(threadInfo[nApics].maxThreadsPerPkg);
				964	threadInfo[nApics].pkgId = threadInfo[nApics].apicId >> widthCT;
				965
				966	int widthC = __kmp_cpuid_mask_width(threadInfo[nApics].maxCoresPerPkg);
				967	int widthT = widthCT - widthC;
				968	if (widthT < 0) {
				969	// I've never seen this one happen, but I suppose it could, if the cpuid
				970	// instruction on a chip was really screwed up. Make sure to restore the
				971	// affinity mask before the tail call.
				972	__kmp_set_system_affinity(oldMask, TRUE);
				973	__kmp_free(threadInfo);
				974	KMP_CPU_FREE(oldMask);
				975	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				976	return -1;
				977	}
				978
				979	int maskC = (1 << widthC) - 1;
				980	threadInfo[nApics].coreId = (threadInfo[nApics].apicId >> widthT) & maskC;
				981
				982	int maskT = (1 << widthT) - 1;
				983	threadInfo[nApics].threadId = threadInfo[nApics].apicId & maskT;
				984
				985	nApics++;
				986	}
				987
				988	// We've collected all the info we need.
				989	// Restore the old affinity mask for this thread.
				990	__kmp_set_system_affinity(oldMask, TRUE);
				991
				992	// If there's only one thread context to bind to, form an Address object
				993	// with depth 1 and return immediately (or, if affinity is off, set
				994	// address2os to NULL and return).
				995	//
				996	// If it is configured to omit the package level when there is only a single
				997	// package, the logic at the end of this routine won't work if there is only
				998	// a single thread - it would try to form an Address object with depth 0.
				999	KMP_ASSERT(nApics > 0);
				1000	if (nApics == 1) {
				1001	__kmp_ncores = nPackages = 1;
				1002	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
				1003	if (__kmp_affinity_verbose) {
				1004	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				1005	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				1006
				1007	KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
				1008	if (__kmp_affinity_respect_mask) {
				1009	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				1010	} else {
				1011	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				1012	}
				1013	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1014	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1015	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				1016	__kmp_nThreadsPerCore, __kmp_ncores);
				1017	}
				1018
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1019	if (__kmp_affinity_type == affinity_none) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1020	__kmp_free(threadInfo);
				1021	KMP_CPU_FREE(oldMask);
				1022	return 0;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1023	}
				1024
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1025	address2os = (AddrUnsPair )__kmp_allocate(sizeof(AddrUnsPair));
				1026	Address addr(1);
				1027	addr.labels[0] = threadInfo[0].pkgId;
				1028	(*address2os)[0] = AddrUnsPair(addr, threadInfo[0].osId);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1029
				1030	if (__kmp_affinity_gran_levels < 0) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1031	__kmp_affinity_gran_levels = 0;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1032	}
				1033
				1034	if (__kmp_affinity_verbose) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1035	__kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1036	}
				1037
				1038	__kmp_free(threadInfo);
				1039	KMP_CPU_FREE(oldMask);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1040	return 1;
				1041	}
				1042
				1043	// Sort the threadInfo table by physical Id.
				1044	qsort(threadInfo, nApics, sizeof(*threadInfo),
				1045	__kmp_affinity_cmp_apicThreadInfo_phys_id);
				1046
				1047	// The table is now sorted by pkgId / coreId / threadId, but we really don't
				1048	// know the radix of any of the fields. pkgId's may be sparsely assigned among
				1049	// the chips on a system. Although coreId's are usually assigned
				1050	// [0 .. coresPerPkg-1] and threadId's are usually assigned
				1051	// [0..threadsPerCore-1], we don't want to make any such assumptions.
				1052	//
				1053	// For that matter, we don't know what coresPerPkg and threadsPerCore (or the
				1054	// total # packages) are at this point - we want to determine that now. We
				1055	// only have an upper bound on the first two figures.
				1056	//
				1057	// We also perform a consistency check at this point: the values returned by
				1058	// the cpuid instruction for any thread bound to a given package had better
				1059	// return the same info for maxThreadsPerPkg and maxCoresPerPkg.
				1060	nPackages = 1;
				1061	nCoresPerPkg = 1;
				1062	__kmp_nThreadsPerCore = 1;
				1063	unsigned nCores = 1;
				1064
				1065	unsigned pkgCt = 1; // to determine radii
				1066	unsigned lastPkgId = threadInfo[0].pkgId;
				1067	unsigned coreCt = 1;
				1068	unsigned lastCoreId = threadInfo[0].coreId;
				1069	unsigned threadCt = 1;
				1070	unsigned lastThreadId = threadInfo[0].threadId;
				1071
				1072	// intra-pkg consist checks
				1073	unsigned prevMaxCoresPerPkg = threadInfo[0].maxCoresPerPkg;
				1074	unsigned prevMaxThreadsPerPkg = threadInfo[0].maxThreadsPerPkg;
				1075
				1076	for (i = 1; i < nApics; i++) {
				1077	if (threadInfo[i].pkgId != lastPkgId) {
				1078	nCores++;
				1079	pkgCt++;
				1080	lastPkgId = threadInfo[i].pkgId;
				1081	if ((int)coreCt > nCoresPerPkg)
				1082	nCoresPerPkg = coreCt;
				1083	coreCt = 1;
				1084	lastCoreId = threadInfo[i].coreId;
				1085	if ((int)threadCt > __kmp_nThreadsPerCore)
				1086	__kmp_nThreadsPerCore = threadCt;
				1087	threadCt = 1;
				1088	lastThreadId = threadInfo[i].threadId;
				1089
				1090	// This is a different package, so go on to the next iteration without
				1091	// doing any consistency checks. Reset the consistency check vars, though.
				1092	prevMaxCoresPerPkg = threadInfo[i].maxCoresPerPkg;
				1093	prevMaxThreadsPerPkg = threadInfo[i].maxThreadsPerPkg;
				1094	continue;
				1095	}
				1096
				1097	if (threadInfo[i].coreId != lastCoreId) {
				1098	nCores++;
				1099	coreCt++;
				1100	lastCoreId = threadInfo[i].coreId;
				1101	if ((int)threadCt > __kmp_nThreadsPerCore)
				1102	__kmp_nThreadsPerCore = threadCt;
				1103	threadCt = 1;
				1104	lastThreadId = threadInfo[i].threadId;
				1105	} else if (threadInfo[i].threadId != lastThreadId) {
				1106	threadCt++;
				1107	lastThreadId = threadInfo[i].threadId;
				1108	} else {
				1109	__kmp_free(threadInfo);
				1110	KMP_CPU_FREE(oldMask);
				1111	*msg_id = kmp_i18n_str_LegacyApicIDsNotUnique;
				1112	return -1;
				1113	}
				1114
				1115	// Check to make certain that the maxCoresPerPkg and maxThreadsPerPkg
				1116	// fields agree between all the threads bounds to a given package.
				1117	if ((prevMaxCoresPerPkg != threadInfo[i].maxCoresPerPkg) \|\|
				1118	(prevMaxThreadsPerPkg != threadInfo[i].maxThreadsPerPkg)) {
				1119	__kmp_free(threadInfo);
				1120	KMP_CPU_FREE(oldMask);
				1121	*msg_id = kmp_i18n_str_InconsistentCpuidInfo;
				1122	return -1;
				1123	}
				1124	}
				1125	nPackages = pkgCt;
				1126	if ((int)coreCt > nCoresPerPkg)
				1127	nCoresPerPkg = coreCt;
				1128	if ((int)threadCt > __kmp_nThreadsPerCore)
				1129	__kmp_nThreadsPerCore = threadCt;
				1130
				1131	// When affinity is off, this routine will still be called to set
				1132	// __kmp_ncores, as well as __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
				1133	// Make sure all these vars are set correctly, and return now if affinity is
				1134	// not enabled.
				1135	__kmp_ncores = nCores;
				1136	if (__kmp_affinity_verbose) {
				1137	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				1138	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				1139
				1140	KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
				1141	if (__kmp_affinity_respect_mask) {
				1142	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				1143	} else {
				1144	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				1145	}
				1146	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1147	if (__kmp_affinity_uniform_topology()) {
				1148	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1149	} else {
				1150	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				1151	}
				1152	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				1153	__kmp_nThreadsPerCore, __kmp_ncores);
				1154	}
				1155	KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
				1156	KMP_DEBUG_ASSERT(nApics == __kmp_avail_proc);
				1157	__kmp_pu_os_idx = (int )__kmp_allocate(sizeof(int) __kmp_avail_proc);
				1158	for (i = 0; i < nApics; ++i) {
				1159	__kmp_pu_os_idx[i] = threadInfo[i].osId;
				1160	}
				1161	if (__kmp_affinity_type == affinity_none) {
				1162	__kmp_free(threadInfo);
				1163	KMP_CPU_FREE(oldMask);
				1164	return 0;
				1165	}
				1166
				1167	// Now that we've determined the number of packages, the number of cores per
				1168	// package, and the number of threads per core, we can construct the data
				1169	// structure that is to be returned.
				1170	int pkgLevel = 0;
				1171	int coreLevel = (nCoresPerPkg <= 1) ? -1 : 1;
				1172	int threadLevel =
				1173	(__kmp_nThreadsPerCore <= 1) ? -1 : ((coreLevel >= 0) ? 2 : 1);
				1174	unsigned depth = (pkgLevel >= 0) + (coreLevel >= 0) + (threadLevel >= 0);
				1175
				1176	KMP_ASSERT(depth > 0);
				1177	address2os = (AddrUnsPair )__kmp_allocate(sizeof(AddrUnsPair) * nApics);
				1178
				1179	for (i = 0; i < nApics; ++i) {
				1180	Address addr(depth);
				1181	unsigned os = threadInfo[i].osId;
				1182	int d = 0;
				1183
				1184	if (pkgLevel >= 0) {
				1185	addr.labels[d++] = threadInfo[i].pkgId;
				1186	}
				1187	if (coreLevel >= 0) {
				1188	addr.labels[d++] = threadInfo[i].coreId;
				1189	}
				1190	if (threadLevel >= 0) {
				1191	addr.labels[d++] = threadInfo[i].threadId;
				1192	}
				1193	(*address2os)[i] = AddrUnsPair(addr, os);
				1194	}
				1195
				1196	if (__kmp_affinity_gran_levels < 0) {
				1197	// Set the granularity level based on what levels are modeled in the machine
				1198	// topology map.
				1199	__kmp_affinity_gran_levels = 0;
				1200	if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
				1201	__kmp_affinity_gran_levels++;
				1202	}
				1203	if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
				1204	__kmp_affinity_gran_levels++;
				1205	}
				1206	if ((pkgLevel >= 0) && (__kmp_affinity_gran > affinity_gran_package)) {
				1207	__kmp_affinity_gran_levels++;
				1208	}
				1209	}
				1210
				1211	if (__kmp_affinity_verbose) {
				1212	__kmp_affinity_print_topology(*address2os, nApics, depth, pkgLevel,
				1213	coreLevel, threadLevel);
				1214	}
				1215
				1216	__kmp_free(threadInfo);
				1217	KMP_CPU_FREE(oldMask);
				1218	return depth;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1219	}
				1220
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1221	// Intel(R) microarchitecture code name Nehalem, Dunnington and later
				1222	// architectures support a newer interface for specifying the x2APIC Ids,
				1223	// based on cpuid leaf 11.
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1224	static int __kmp_affinity_create_x2apicid_map(AddrUnsPair **address2os,
				1225	kmp_i18n_id_t *const msg_id) {
				1226	kmp_cpuid buf;
				1227	*address2os = NULL;
				1228	*msg_id = kmp_i18n_null;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1229
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1230	// Check to see if cpuid leaf 11 is supported.
				1231	__kmp_x86_cpuid(0, 0, &buf);
				1232	if (buf.eax < 11) {
				1233	*msg_id = kmp_i18n_str_NoLeaf11Support;
				1234	return -1;
				1235	}
				1236	__kmp_x86_cpuid(11, 0, &buf);
				1237	if (buf.ebx == 0) {
				1238	*msg_id = kmp_i18n_str_NoLeaf11Support;
				1239	return -1;
				1240	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1241
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1242	// Find the number of levels in the machine topology. While we're at it, get
				1243	// the default values for __kmp_nThreadsPerCore & nCoresPerPkg. We will try to
				1244	// get more accurate values later by explicitly counting them, but get
				1245	// reasonable defaults now, in case we return early.
				1246	int level;
				1247	int threadLevel = -1;
				1248	int coreLevel = -1;
				1249	int pkgLevel = -1;
				1250	__kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
				1251
				1252	for (level = 0;; level++) {
				1253	if (level > 31) {
				1254	// FIXME: Hack for DPD200163180
				1255	//
				1256	// If level is big then something went wrong -> exiting
				1257	//
				1258	// There could actually be 32 valid levels in the machine topology, but so
				1259	// far, the only machine we have seen which does not exit this loop before
				1260	// iteration 32 has fubar x2APIC settings.
				1261	//
				1262	// For now, just reject this case based upon loop trip count.
				1263	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1264	return -1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1265	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1266	__kmp_x86_cpuid(11, level, &buf);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1267	if (buf.ebx == 0) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1268	if (pkgLevel < 0) {
				1269	// Will infer nPackages from __kmp_xproc
				1270	pkgLevel = level;
				1271	level++;
				1272	}
				1273	break;
				1274	}
				1275	int kind = (buf.ecx >> 8) & 0xff;
				1276	if (kind == 1) {
				1277	// SMT level
				1278	threadLevel = level;
				1279	coreLevel = -1;
				1280	pkgLevel = -1;
				1281	__kmp_nThreadsPerCore = buf.ebx & 0xffff;
				1282	if (__kmp_nThreadsPerCore == 0) {
				1283	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1284	return -1;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1285	}
				1286	} else if (kind == 2) {
				1287	// core level
				1288	coreLevel = level;
				1289	pkgLevel = -1;
				1290	nCoresPerPkg = buf.ebx & 0xffff;
				1291	if (nCoresPerPkg == 0) {
				1292	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1293	return -1;
				1294	}
				1295	} else {
				1296	if (level <= 0) {
				1297	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1298	return -1;
				1299	}
				1300	if (pkgLevel >= 0) {
				1301	continue;
				1302	}
				1303	pkgLevel = level;
				1304	nPackages = buf.ebx & 0xffff;
				1305	if (nPackages == 0) {
				1306	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1307	return -1;
				1308	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1309	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1310	}
				1311	int depth = level;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1312
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1313	// In the above loop, "level" was counted from the finest level (usually
				1314	// thread) to the coarsest. The caller expects that we will place the labels
				1315	// in (*address2os)[].first.labels[] in the inverse order, so we need to
				1316	// invert the vars saying which level means what.
				1317	if (threadLevel >= 0) {
				1318	threadLevel = depth - threadLevel - 1;
				1319	}
				1320	if (coreLevel >= 0) {
				1321	coreLevel = depth - coreLevel - 1;
				1322	}
				1323	KMP_DEBUG_ASSERT(pkgLevel >= 0);
				1324	pkgLevel = depth - pkgLevel - 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1325
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1326	// The algorithm used starts by setting the affinity to each available thread
				1327	// and retrieving info from the cpuid instruction, so if we are not capable of
				1328	// calling __kmp_get_system_affinity() and _kmp_get_system_affinity(), then we
				1329	// need to do something else - use the defaults that we calculated from
				1330	// issuing cpuid without binding to each proc.
				1331	if (!KMP_AFFINITY_CAPABLE()) {
				1332	// Hack to try and infer the machine topology using only the data
				1333	// available from cpuid on the current thread, and __kmp_xproc.
				1334	KMP_ASSERT(__kmp_affinity_type == affinity_none);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1335
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1336	__kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
				1337	nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1338	if (__kmp_affinity_verbose) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1339	KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY");
				1340	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1341	if (__kmp_affinity_uniform_topology()) {
				1342	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1343	} else {
				1344	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				1345	}
				1346	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				1347	__kmp_nThreadsPerCore, __kmp_ncores);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1348	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1349	return 0;
				1350	}
				1351
				1352	// From here on, we can assume that it is safe to call
				1353	// __kmp_get_system_affinity() and __kmp_set_system_affinity(), even if
				1354	// __kmp_affinity_type = affinity_none.
				1355
				1356	// Save the affinity mask for the current thread.
				1357	kmp_affin_mask_t *oldMask;
				1358	KMP_CPU_ALLOC(oldMask);
				1359	__kmp_get_system_affinity(oldMask, TRUE);
				1360
				1361	// Allocate the data structure to be returned.
				1362	AddrUnsPair *retval =
				1363	(AddrUnsPair )__kmp_allocate(sizeof(AddrUnsPair) __kmp_avail_proc);
				1364
				1365	// Run through each of the available contexts, binding the current thread
				1366	// to it, and obtaining the pertinent information using the cpuid instr.
				1367	unsigned int proc;
				1368	int nApics = 0;
				1369	KMP_CPU_SET_ITERATE(proc, __kmp_affin_fullMask) {
				1370	// Skip this proc if it is not included in the machine model.
				1371	if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
				1372	continue;
Jonathan Peyton	fd7cc42	2016-06-21 15:54:38 +0000	[diff] [blame]	1373	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1374	KMP_DEBUG_ASSERT(nApics < __kmp_avail_proc);
				1375
				1376	__kmp_affinity_dispatch->bind_thread(proc);
				1377
				1378	// Extract labels for each level in the machine topology map from Apic ID.
				1379	Address addr(depth);
				1380	int prev_shift = 0;
				1381
				1382	for (level = 0; level < depth; level++) {
				1383	__kmp_x86_cpuid(11, level, &buf);
				1384	unsigned apicId = buf.edx;
				1385	if (buf.ebx == 0) {
				1386	if (level != depth - 1) {
				1387	KMP_CPU_FREE(oldMask);
				1388	*msg_id = kmp_i18n_str_InconsistentCpuidInfo;
				1389	return -1;
				1390	}
				1391	addr.labels[depth - level - 1] = apicId >> prev_shift;
				1392	level++;
				1393	break;
				1394	}
				1395	int shift = buf.eax & 0x1f;
				1396	int mask = (1 << shift) - 1;
				1397	addr.labels[depth - level - 1] = (apicId & mask) >> prev_shift;
				1398	prev_shift = shift;
				1399	}
				1400	if (level != depth) {
				1401	KMP_CPU_FREE(oldMask);
				1402	*msg_id = kmp_i18n_str_InconsistentCpuidInfo;
				1403	return -1;
				1404	}
				1405
				1406	retval[nApics] = AddrUnsPair(addr, proc);
				1407	nApics++;
				1408	}
				1409
				1410	// We've collected all the info we need.
				1411	// Restore the old affinity mask for this thread.
				1412	__kmp_set_system_affinity(oldMask, TRUE);
				1413
				1414	// If there's only one thread context to bind to, return now.
				1415	KMP_ASSERT(nApics > 0);
				1416	if (nApics == 1) {
				1417	__kmp_ncores = nPackages = 1;
				1418	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
				1419	if (__kmp_affinity_verbose) {
				1420	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				1421	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				1422
				1423	KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
				1424	if (__kmp_affinity_respect_mask) {
				1425	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				1426	} else {
				1427	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				1428	}
				1429	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1430	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1431	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				1432	__kmp_nThreadsPerCore, __kmp_ncores);
				1433	}
				1434
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1435	if (__kmp_affinity_type == affinity_none) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1436	__kmp_free(retval);
				1437	KMP_CPU_FREE(oldMask);
				1438	return 0;
				1439	}
				1440
				1441	// Form an Address object which only includes the package level.
				1442	Address addr(1);
				1443	addr.labels[0] = retval[0].first.labels[pkgLevel];
				1444	retval[0].first = addr;
				1445
				1446	if (__kmp_affinity_gran_levels < 0) {
				1447	__kmp_affinity_gran_levels = 0;
				1448	}
				1449
				1450	if (__kmp_affinity_verbose) {
				1451	__kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
				1452	}
				1453
				1454	*address2os = retval;
				1455	KMP_CPU_FREE(oldMask);
				1456	return 1;
				1457	}
				1458
				1459	// Sort the table by physical Id.
				1460	qsort(retval, nApics, sizeof(*retval), __kmp_affinity_cmp_Address_labels);
				1461
				1462	// Find the radix at each of the levels.
				1463	unsigned totals = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1464	unsigned counts = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1465	unsigned maxCt = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1466	unsigned last = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1467	for (level = 0; level < depth; level++) {
				1468	totals[level] = 1;
				1469	maxCt[level] = 1;
				1470	counts[level] = 1;
				1471	last[level] = retval[0].first.labels[level];
				1472	}
				1473
				1474	// From here on, the iteration variable "level" runs from the finest level to
				1475	// the coarsest, i.e. we iterate forward through
				1476	// (*address2os)[].first.labels[] - in the previous loops, we iterated
				1477	// backwards.
				1478	for (proc = 1; (int)proc < nApics; proc++) {
				1479	int level;
				1480	for (level = 0; level < depth; level++) {
				1481	if (retval[proc].first.labels[level] != last[level]) {
				1482	int j;
				1483	for (j = level + 1; j < depth; j++) {
				1484	totals[j]++;
				1485	counts[j] = 1;
				1486	// The line below causes printing incorrect topology information in
				1487	// case the max value for some level (maxCt[level]) is encountered
				1488	// earlier than some less value while going through the array. For
				1489	// example, let pkg0 has 4 cores and pkg1 has 2 cores. Then
				1490	// maxCt[1] == 2
				1491	// whereas it must be 4.
				1492	// TODO!!! Check if it can be commented safely
				1493	// maxCt[j] = 1;
				1494	last[j] = retval[proc].first.labels[j];
				1495	}
				1496	totals[level]++;
				1497	counts[level]++;
				1498	if (counts[level] > maxCt[level]) {
				1499	maxCt[level] = counts[level];
				1500	}
				1501	last[level] = retval[proc].first.labels[level];
				1502	break;
				1503	} else if (level == depth - 1) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1504	__kmp_free(last);
				1505	__kmp_free(maxCt);
				1506	__kmp_free(counts);
				1507	__kmp_free(totals);
				1508	__kmp_free(retval);
				1509	KMP_CPU_FREE(oldMask);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1510	*msg_id = kmp_i18n_str_x2ApicIDsNotUnique;
				1511	return -1;
				1512	}
				1513	}
				1514	}
				1515
				1516	// When affinity is off, this routine will still be called to set
				1517	// __kmp_ncores, as well as __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
				1518	// Make sure all these vars are set correctly, and return if affinity is not
				1519	// enabled.
				1520	if (threadLevel >= 0) {
				1521	__kmp_nThreadsPerCore = maxCt[threadLevel];
				1522	} else {
				1523	__kmp_nThreadsPerCore = 1;
				1524	}
				1525	nPackages = totals[pkgLevel];
				1526
				1527	if (coreLevel >= 0) {
				1528	__kmp_ncores = totals[coreLevel];
				1529	nCoresPerPkg = maxCt[coreLevel];
				1530	} else {
				1531	__kmp_ncores = nPackages;
				1532	nCoresPerPkg = 1;
				1533	}
				1534
				1535	// Check to see if the machine topology is uniform
				1536	unsigned prod = maxCt[0];
				1537	for (level = 1; level < depth; level++) {
				1538	prod *= maxCt[level];
				1539	}
				1540	bool uniform = (prod == totals[level - 1]);
				1541
				1542	// Print the machine topology summary.
				1543	if (__kmp_affinity_verbose) {
				1544	char mask[KMP_AFFIN_MASK_PRINT_LEN];
				1545	__kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				1546
				1547	KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
				1548	if (__kmp_affinity_respect_mask) {
				1549	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask);
				1550	} else {
				1551	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask);
				1552	}
				1553	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1554	if (uniform) {
				1555	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1556	} else {
				1557	KMP_INFORM(NonUniform, "KMP_AFFINITY");
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1558	}
				1559
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1560	kmp_str_buf_t buf;
				1561	__kmp_str_buf_init(&buf);
				1562
				1563	__kmp_str_buf_print(&buf, "%d", totals[0]);
				1564	for (level = 1; level <= pkgLevel; level++) {
				1565	__kmp_str_buf_print(&buf, " x %d", maxCt[level]);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1566	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1567	KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg,
				1568	__kmp_nThreadsPerCore, __kmp_ncores);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1569
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1570	__kmp_str_buf_free(&buf);
				1571	}
				1572	KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
				1573	KMP_DEBUG_ASSERT(nApics == __kmp_avail_proc);
				1574	__kmp_pu_os_idx = (int )__kmp_allocate(sizeof(int) __kmp_avail_proc);
				1575	for (proc = 0; (int)proc < nApics; ++proc) {
				1576	__kmp_pu_os_idx[proc] = retval[proc].second;
				1577	}
				1578	if (__kmp_affinity_type == affinity_none) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1579	__kmp_free(last);
				1580	__kmp_free(maxCt);
				1581	__kmp_free(counts);
				1582	__kmp_free(totals);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1583	__kmp_free(retval);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1584	KMP_CPU_FREE(oldMask);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1585	return 0;
				1586	}
				1587
				1588	// Find any levels with radiix 1, and remove them from the map
				1589	// (except for the package level).
				1590	int new_depth = 0;
				1591	for (level = 0; level < depth; level++) {
				1592	if ((maxCt[level] == 1) && (level != pkgLevel)) {
				1593	continue;
				1594	}
				1595	new_depth++;
				1596	}
				1597
				1598	// If we are removing any levels, allocate a new vector to return,
				1599	// and copy the relevant information to it.
				1600	if (new_depth != depth) {
				1601	AddrUnsPair *new_retval =
				1602	(AddrUnsPair )__kmp_allocate(sizeof(AddrUnsPair) nApics);
				1603	for (proc = 0; (int)proc < nApics; proc++) {
				1604	Address addr(new_depth);
				1605	new_retval[proc] = AddrUnsPair(addr, retval[proc].second);
				1606	}
				1607	int new_level = 0;
				1608	int newPkgLevel = -1;
				1609	int newCoreLevel = -1;
				1610	int newThreadLevel = -1;
				1611	int i;
				1612	for (level = 0; level < depth; level++) {
				1613	if ((maxCt[level] == 1) && (level != pkgLevel)) {
				1614	// Remove this level. Never remove the package level
				1615	continue;
				1616	}
				1617	if (level == pkgLevel) {
				1618	newPkgLevel = level;
				1619	}
				1620	if (level == coreLevel) {
				1621	newCoreLevel = level;
				1622	}
				1623	if (level == threadLevel) {
				1624	newThreadLevel = level;
				1625	}
				1626	for (proc = 0; (int)proc < nApics; proc++) {
				1627	new_retval[proc].first.labels[new_level] =
				1628	retval[proc].first.labels[level];
				1629	}
				1630	new_level++;
				1631	}
				1632
				1633	__kmp_free(retval);
				1634	retval = new_retval;
				1635	depth = new_depth;
				1636	pkgLevel = newPkgLevel;
				1637	coreLevel = newCoreLevel;
				1638	threadLevel = newThreadLevel;
				1639	}
				1640
				1641	if (__kmp_affinity_gran_levels < 0) {
				1642	// Set the granularity level based on what levels are modeled
				1643	// in the machine topology map.
				1644	__kmp_affinity_gran_levels = 0;
				1645	if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
				1646	__kmp_affinity_gran_levels++;
				1647	}
				1648	if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
				1649	__kmp_affinity_gran_levels++;
				1650	}
				1651	if (__kmp_affinity_gran > affinity_gran_package) {
				1652	__kmp_affinity_gran_levels++;
				1653	}
				1654	}
				1655
				1656	if (__kmp_affinity_verbose) {
				1657	__kmp_affinity_print_topology(retval, nApics, depth, pkgLevel, coreLevel,
				1658	threadLevel);
				1659	}
				1660
				1661	__kmp_free(last);
				1662	__kmp_free(maxCt);
				1663	__kmp_free(counts);
				1664	__kmp_free(totals);
				1665	KMP_CPU_FREE(oldMask);
				1666	*address2os = retval;
				1667	return depth;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1668	}
				1669
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1670	#endif /* KMP_ARCH_X86 \|\| KMP_ARCH_X86_64 */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1671
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1672	#define osIdIndex 0
				1673	#define threadIdIndex 1
				1674	#define coreIdIndex 2
				1675	#define pkgIdIndex 3
				1676	#define nodeIdIndex 4
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1677
				1678	typedef unsigned *ProcCpuInfo;
				1679	static unsigned maxIndex = pkgIdIndex;
				1680
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1681	static int __kmp_affinity_cmp_ProcCpuInfo_os_id(const void a, const void b) {
				1682	const unsigned aa = (const unsigned )a;
				1683	const unsigned bb = (const unsigned )b;
				1684	if (aa[osIdIndex] < bb[osIdIndex])
				1685	return -1;
				1686	if (aa[osIdIndex] > bb[osIdIndex])
				1687	return 1;
				1688	return 0;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1689	};
				1690
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1691	static int __kmp_affinity_cmp_ProcCpuInfo_phys_id(const void *a,
				1692	const void *b) {
				1693	unsigned i;
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame^]	1694	const unsigned aa = (RCAST(unsigned *, CCAST(void , a)));
				1695	const unsigned bb = (RCAST(unsigned *, CCAST(void , b)));
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1696	for (i = maxIndex;; i--) {
				1697	if (aa[i] < bb[i])
				1698	return -1;
				1699	if (aa[i] > bb[i])
				1700	return 1;
				1701	if (i == osIdIndex)
				1702	break;
				1703	}
				1704	return 0;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1705	}
				1706
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1707	// Parse /proc/cpuinfo (or an alternate file in the same format) to obtain the
				1708	// affinity map.
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1709	static int __kmp_affinity_create_cpuinfo_map(AddrUnsPair **address2os,
				1710	int *line,
				1711	kmp_i18n_id_t *const msg_id,
				1712	FILE *f) {
				1713	*address2os = NULL;
				1714	*msg_id = kmp_i18n_null;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1715
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1716	// Scan of the file, and count the number of "processor" (osId) fields,
				1717	// and find the highest value of <n> for a node_<n> field.
				1718	char buf[256];
				1719	unsigned num_records = 0;
				1720	while (!feof(f)) {
				1721	buf[sizeof(buf) - 1] = 1;
				1722	if (!fgets(buf, sizeof(buf), f)) {
				1723	// Read errors presumably because of EOF
				1724	break;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1725	}
				1726
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1727	char s1[] = "processor";
				1728	if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
				1729	num_records++;
				1730	continue;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1731	}
				1732
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1733	// FIXME - this will match "node_<n> <garbage>"
				1734	unsigned level;
				1735	if (KMP_SSCANF(buf, "node_%d id", &level) == 1) {
				1736	if (nodeIdIndex + level >= maxIndex) {
				1737	maxIndex = nodeIdIndex + level;
				1738	}
				1739	continue;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1740	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1741	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1742
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1743	// Check for empty file / no valid processor records, or too many. The number
				1744	// of records can't exceed the number of valid bits in the affinity mask.
				1745	if (num_records == 0) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1746	*line = 0;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1747	*msg_id = kmp_i18n_str_NoProcRecords;
				1748	return -1;
				1749	}
				1750	if (num_records > (unsigned)__kmp_xproc) {
				1751	*line = 0;
				1752	*msg_id = kmp_i18n_str_TooManyProcRecords;
				1753	return -1;
				1754	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1755
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1756	// Set the file pointer back to the begginning, so that we can scan the file
				1757	// again, this time performing a full parse of the data. Allocate a vector of
				1758	// ProcCpuInfo object, where we will place the data. Adding an extra element
				1759	// at the end allows us to remove a lot of extra checks for termination
				1760	// conditions.
				1761	if (fseek(f, 0, SEEK_SET) != 0) {
				1762	*line = 0;
				1763	*msg_id = kmp_i18n_str_CantRewindCpuinfo;
				1764	return -1;
				1765	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1766
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1767	// Allocate the array of records to store the proc info in. The dummy
				1768	// element at the end makes the logic in filling them out easier to code.
				1769	unsigned **threadInfo =
				1770	(unsigned *)__kmp_allocate((num_records + 1) sizeof(unsigned *));
				1771	unsigned i;
				1772	for (i = 0; i <= num_records; i++) {
				1773	threadInfo[i] =
				1774	(unsigned )__kmp_allocate((maxIndex + 1) sizeof(unsigned));
				1775	}
				1776
				1777	#define CLEANUP_THREAD_INFO \
				1778	for (i = 0; i <= num_records; i++) { \
				1779	__kmp_free(threadInfo[i]); \
				1780	} \
				1781	__kmp_free(threadInfo);
				1782
				1783	// A value of UINT_MAX means that we didn't find the field
				1784	unsigned __index;
				1785
				1786	#define INIT_PROC_INFO(p) \
				1787	for (__index = 0; __index <= maxIndex; __index++) { \
				1788	(p)[__index] = UINT_MAX; \
				1789	}
				1790
				1791	for (i = 0; i <= num_records; i++) {
				1792	INIT_PROC_INFO(threadInfo[i]);
				1793	}
				1794
				1795	unsigned num_avail = 0;
				1796	*line = 0;
				1797	while (!feof(f)) {
				1798	// Create an inner scoping level, so that all the goto targets at the end of
				1799	// the loop appear in an outer scoping level. This avoids warnings about
				1800	// jumping past an initialization to a target in the same block.
				1801	{
				1802	buf[sizeof(buf) - 1] = 1;
				1803	bool long_line = false;
				1804	if (!fgets(buf, sizeof(buf), f)) {
				1805	// Read errors presumably because of EOF
				1806	// If there is valid data in threadInfo[num_avail], then fake
				1807	// a blank line in ensure that the last address gets parsed.
				1808	bool valid = false;
				1809	for (i = 0; i <= maxIndex; i++) {
				1810	if (threadInfo[num_avail][i] != UINT_MAX) {
				1811	valid = true;
				1812	}
				1813	}
				1814	if (!valid) {
				1815	break;
				1816	}
				1817	buf[0] = 0;
				1818	} else if (!buf[sizeof(buf) - 1]) {
				1819	// The line is longer than the buffer. Set a flag and don't
				1820	// emit an error if we were going to ignore the line, anyway.
				1821	long_line = true;
				1822
				1823	#define CHECK_LINE \
				1824	if (long_line) { \
				1825	CLEANUP_THREAD_INFO; \
				1826	*msg_id = kmp_i18n_str_LongLineCpuinfo; \
				1827	return -1; \
				1828	}
				1829	}
				1830	(*line)++;
				1831
				1832	char s1[] = "processor";
				1833	if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
				1834	CHECK_LINE;
				1835	char *p = strchr(buf + sizeof(s1) - 1, ':');
				1836	unsigned val;
				1837	if ((p == NULL) \|\| (KMP_SSCANF(p + 1, "%u\n", &val) != 1))
				1838	goto no_val;
				1839	if (threadInfo[num_avail][osIdIndex] != UINT_MAX)
				1840	goto dup_field;
				1841	threadInfo[num_avail][osIdIndex] = val;
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	1842	#if KMP_OS_LINUX && USE_SYSFS_INFO
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1843	char path[256];
				1844	KMP_SNPRINTF(
				1845	path, sizeof(path),
				1846	"/sys/devices/system/cpu/cpu%u/topology/physical_package_id",
				1847	threadInfo[num_avail][osIdIndex]);
				1848	__kmp_read_from_file(path, "%u", &threadInfo[num_avail][pkgIdIndex]);
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	1849
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1850	KMP_SNPRINTF(path, sizeof(path),
				1851	"/sys/devices/system/cpu/cpu%u/topology/core_id",
				1852	threadInfo[num_avail][osIdIndex]);
				1853	__kmp_read_from_file(path, "%u", &threadInfo[num_avail][coreIdIndex]);
				1854	continue;
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	1855	#else
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1856	}
				1857	char s2[] = "physical id";
				1858	if (strncmp(buf, s2, sizeof(s2) - 1) == 0) {
				1859	CHECK_LINE;
				1860	char *p = strchr(buf + sizeof(s2) - 1, ':');
				1861	unsigned val;
				1862	if ((p == NULL) \|\| (KMP_SSCANF(p + 1, "%u\n", &val) != 1))
				1863	goto no_val;
				1864	if (threadInfo[num_avail][pkgIdIndex] != UINT_MAX)
				1865	goto dup_field;
				1866	threadInfo[num_avail][pkgIdIndex] = val;
				1867	continue;
				1868	}
				1869	char s3[] = "core id";
				1870	if (strncmp(buf, s3, sizeof(s3) - 1) == 0) {
				1871	CHECK_LINE;
				1872	char *p = strchr(buf + sizeof(s3) - 1, ':');
				1873	unsigned val;
				1874	if ((p == NULL) \|\| (KMP_SSCANF(p + 1, "%u\n", &val) != 1))
				1875	goto no_val;
				1876	if (threadInfo[num_avail][coreIdIndex] != UINT_MAX)
				1877	goto dup_field;
				1878	threadInfo[num_avail][coreIdIndex] = val;
				1879	continue;
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	1880	#endif // KMP_OS_LINUX && USE_SYSFS_INFO
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1881	}
				1882	char s4[] = "thread id";
				1883	if (strncmp(buf, s4, sizeof(s4) - 1) == 0) {
				1884	CHECK_LINE;
				1885	char *p = strchr(buf + sizeof(s4) - 1, ':');
				1886	unsigned val;
				1887	if ((p == NULL) \|\| (KMP_SSCANF(p + 1, "%u\n", &val) != 1))
				1888	goto no_val;
				1889	if (threadInfo[num_avail][threadIdIndex] != UINT_MAX)
				1890	goto dup_field;
				1891	threadInfo[num_avail][threadIdIndex] = val;
				1892	continue;
				1893	}
				1894	unsigned level;
				1895	if (KMP_SSCANF(buf, "node_%d id", &level) == 1) {
				1896	CHECK_LINE;
				1897	char *p = strchr(buf + sizeof(s4) - 1, ':');
				1898	unsigned val;
				1899	if ((p == NULL) \|\| (KMP_SSCANF(p + 1, "%u\n", &val) != 1))
				1900	goto no_val;
				1901	KMP_ASSERT(nodeIdIndex + level <= maxIndex);
				1902	if (threadInfo[num_avail][nodeIdIndex + level] != UINT_MAX)
				1903	goto dup_field;
				1904	threadInfo[num_avail][nodeIdIndex + level] = val;
				1905	continue;
				1906	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1907
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1908	// We didn't recognize the leading token on the line. There are lots of
				1909	// leading tokens that we don't recognize - if the line isn't empty, go on
				1910	// to the next line.
				1911	if ((buf != 0) && (buf != '\n')) {
				1912	// If the line is longer than the buffer, read characters
				1913	// until we find a newline.
				1914	if (long_line) {
				1915	int ch;
				1916	while (((ch = fgetc(f)) != EOF) && (ch != '\n'))
				1917	;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1918	}
				1919	continue;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1920	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1921
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1922	// A newline has signalled the end of the processor record.
				1923	// Check that there aren't too many procs specified.
				1924	if ((int)num_avail == __kmp_xproc) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1925	CLEANUP_THREAD_INFO;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1926	*msg_id = kmp_i18n_str_TooManyEntries;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1927	return -1;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1928	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1929
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1930	// Check for missing fields. The osId field must be there, and we
				1931	// currently require that the physical id field is specified, also.
				1932	if (threadInfo[num_avail][osIdIndex] == UINT_MAX) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1933	CLEANUP_THREAD_INFO;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1934	*msg_id = kmp_i18n_str_MissingProcField;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1935	return -1;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1936	}
				1937	if (threadInfo[0][pkgIdIndex] == UINT_MAX) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1938	CLEANUP_THREAD_INFO;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1939	*msg_id = kmp_i18n_str_MissingPhysicalIDField;
				1940	return -1;
				1941	}
				1942
				1943	// Skip this proc if it is not included in the machine model.
				1944	if (!KMP_CPU_ISSET(threadInfo[num_avail][osIdIndex],
				1945	__kmp_affin_fullMask)) {
				1946	INIT_PROC_INFO(threadInfo[num_avail]);
				1947	continue;
				1948	}
				1949
				1950	// We have a successful parse of this proc's info.
				1951	// Increment the counter, and prepare for the next proc.
				1952	num_avail++;
				1953	KMP_ASSERT(num_avail <= num_records);
				1954	INIT_PROC_INFO(threadInfo[num_avail]);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1955	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1956	continue;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1957
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1958	no_val:
				1959	CLEANUP_THREAD_INFO;
				1960	*msg_id = kmp_i18n_str_MissingValCpuinfo;
				1961	return -1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1962
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1963	dup_field:
				1964	CLEANUP_THREAD_INFO;
				1965	*msg_id = kmp_i18n_str_DuplicateFieldCpuinfo;
				1966	return -1;
				1967	}
				1968	*line = 0;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1969
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1970	#if KMP_MIC && REDUCE_TEAM_SIZE
				1971	unsigned teamSize = 0;
				1972	#endif // KMP_MIC && REDUCE_TEAM_SIZE
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1973
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1974	// check for num_records == __kmp_xproc ???
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1975
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1976	// If there's only one thread context to bind to, form an Address object with
				1977	// depth 1 and return immediately (or, if affinity is off, set address2os to
				1978	// NULL and return).
				1979	//
				1980	// If it is configured to omit the package level when there is only a single
				1981	// package, the logic at the end of this routine won't work if there is only a
				1982	// single thread - it would try to form an Address object with depth 0.
				1983	KMP_ASSERT(num_avail > 0);
				1984	KMP_ASSERT(num_avail <= num_records);
				1985	if (num_avail == 1) {
				1986	__kmp_ncores = 1;
				1987	__kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1988	if (__kmp_affinity_verbose) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1989	if (!KMP_AFFINITY_CAPABLE()) {
				1990	KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
				1991	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1992	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1993	} else {
				1994	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				1995	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				1996	__kmp_affin_fullMask);
				1997	KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
				1998	if (__kmp_affinity_respect_mask) {
				1999	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				2000	} else {
				2001	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2002	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2003	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				2004	KMP_INFORM(Uniform, "KMP_AFFINITY");
				2005	}
				2006	int index;
				2007	kmp_str_buf_t buf;
				2008	__kmp_str_buf_init(&buf);
				2009	__kmp_str_buf_print(&buf, "1");
				2010	for (index = maxIndex - 1; index > pkgIdIndex; index--) {
				2011	__kmp_str_buf_print(&buf, " x 1");
				2012	}
				2013	KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, 1, 1, 1);
				2014	__kmp_str_buf_free(&buf);
Jonathan Peyton	fd7cc42	2016-06-21 15:54:38 +0000	[diff] [blame]	2015	}
				2016
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2017	if (__kmp_affinity_type == affinity_none) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2018	CLEANUP_THREAD_INFO;
				2019	return 0;
				2020	}
				2021
				2022	address2os = (AddrUnsPair )__kmp_allocate(sizeof(AddrUnsPair));
				2023	Address addr(1);
				2024	addr.labels[0] = threadInfo[0][pkgIdIndex];
				2025	(*address2os)[0] = AddrUnsPair(addr, threadInfo[0][osIdIndex]);
				2026
				2027	if (__kmp_affinity_gran_levels < 0) {
				2028	__kmp_affinity_gran_levels = 0;
				2029	}
				2030
				2031	if (__kmp_affinity_verbose) {
				2032	__kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
				2033	}
				2034
				2035	CLEANUP_THREAD_INFO;
				2036	return 1;
				2037	}
				2038
				2039	// Sort the threadInfo table by physical Id.
				2040	qsort(threadInfo, num_avail, sizeof(*threadInfo),
				2041	__kmp_affinity_cmp_ProcCpuInfo_phys_id);
				2042
				2043	// The table is now sorted by pkgId / coreId / threadId, but we really don't
				2044	// know the radix of any of the fields. pkgId's may be sparsely assigned among
				2045	// the chips on a system. Although coreId's are usually assigned
				2046	// [0 .. coresPerPkg-1] and threadId's are usually assigned
				2047	// [0..threadsPerCore-1], we don't want to make any such assumptions.
				2048	//
				2049	// For that matter, we don't know what coresPerPkg and threadsPerCore (or the
				2050	// total # packages) are at this point - we want to determine that now. We
				2051	// only have an upper bound on the first two figures.
				2052	unsigned *counts =
				2053	(unsigned )__kmp_allocate((maxIndex + 1) sizeof(unsigned));
				2054	unsigned *maxCt =
				2055	(unsigned )__kmp_allocate((maxIndex + 1) sizeof(unsigned));
				2056	unsigned *totals =
				2057	(unsigned )__kmp_allocate((maxIndex + 1) sizeof(unsigned));
				2058	unsigned *lastId =
				2059	(unsigned )__kmp_allocate((maxIndex + 1) sizeof(unsigned));
				2060
				2061	bool assign_thread_ids = false;
				2062	unsigned threadIdCt;
				2063	unsigned index;
				2064
				2065	restart_radix_check:
				2066	threadIdCt = 0;
				2067
				2068	// Initialize the counter arrays with data from threadInfo[0].
				2069	if (assign_thread_ids) {
				2070	if (threadInfo[0][threadIdIndex] == UINT_MAX) {
				2071	threadInfo[0][threadIdIndex] = threadIdCt++;
				2072	} else if (threadIdCt <= threadInfo[0][threadIdIndex]) {
				2073	threadIdCt = threadInfo[0][threadIdIndex] + 1;
				2074	}
				2075	}
				2076	for (index = 0; index <= maxIndex; index++) {
				2077	counts[index] = 1;
				2078	maxCt[index] = 1;
				2079	totals[index] = 1;
				2080	lastId[index] = threadInfo[0][index];
				2081	;
				2082	}
				2083
				2084	// Run through the rest of the OS procs.
				2085	for (i = 1; i < num_avail; i++) {
				2086	// Find the most significant index whose id differs from the id for the
				2087	// previous OS proc.
				2088	for (index = maxIndex; index >= threadIdIndex; index--) {
				2089	if (assign_thread_ids && (index == threadIdIndex)) {
				2090	// Auto-assign the thread id field if it wasn't specified.
				2091	if (threadInfo[i][threadIdIndex] == UINT_MAX) {
				2092	threadInfo[i][threadIdIndex] = threadIdCt++;
				2093	}
Jonathan Peyton	642688b	2017-06-01 16:46:36 +0000	[diff] [blame]	2094	// Apparently the thread id field was specified for some entries and not
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2095	// others. Start the thread id counter off at the next higher thread id.
				2096	else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
				2097	threadIdCt = threadInfo[i][threadIdIndex] + 1;
				2098	}
				2099	}
				2100	if (threadInfo[i][index] != lastId[index]) {
				2101	// Run through all indices which are less significant, and reset the
				2102	// counts to 1. At all levels up to and including index, we need to
				2103	// increment the totals and record the last id.
				2104	unsigned index2;
				2105	for (index2 = threadIdIndex; index2 < index; index2++) {
				2106	totals[index2]++;
				2107	if (counts[index2] > maxCt[index2]) {
				2108	maxCt[index2] = counts[index2];
				2109	}
				2110	counts[index2] = 1;
				2111	lastId[index2] = threadInfo[i][index2];
				2112	}
				2113	counts[index]++;
				2114	totals[index]++;
				2115	lastId[index] = threadInfo[i][index];
				2116
				2117	if (assign_thread_ids && (index > threadIdIndex)) {
				2118
				2119	#if KMP_MIC && REDUCE_TEAM_SIZE
				2120	// The default team size is the total #threads in the machine
				2121	// minus 1 thread for every core that has 3 or more threads.
				2122	teamSize += (threadIdCt <= 2) ? (threadIdCt) : (threadIdCt - 1);
				2123	#endif // KMP_MIC && REDUCE_TEAM_SIZE
				2124
				2125	// Restart the thread counter, as we are on a new core.
				2126	threadIdCt = 0;
				2127
				2128	// Auto-assign the thread id field if it wasn't specified.
				2129	if (threadInfo[i][threadIdIndex] == UINT_MAX) {
				2130	threadInfo[i][threadIdIndex] = threadIdCt++;
				2131	}
				2132
				2133	// Aparrently the thread id field was specified for some entries and
				2134	// not others. Start the thread id counter off at the next higher
				2135	// thread id.
				2136	else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
				2137	threadIdCt = threadInfo[i][threadIdIndex] + 1;
				2138	}
				2139	}
				2140	break;
				2141	}
				2142	}
				2143	if (index < threadIdIndex) {
				2144	// If thread ids were specified, it is an error if they are not unique.
				2145	// Also, check that we waven't already restarted the loop (to be safe -
				2146	// shouldn't need to).
				2147	if ((threadInfo[i][threadIdIndex] != UINT_MAX) \|\| assign_thread_ids) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2148	__kmp_free(lastId);
				2149	__kmp_free(totals);
				2150	__kmp_free(maxCt);
				2151	__kmp_free(counts);
				2152	CLEANUP_THREAD_INFO;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2153	*msg_id = kmp_i18n_str_PhysicalIDsNotUnique;
				2154	return -1;
				2155	}
				2156
				2157	// If the thread ids were not specified and we see entries entries that
				2158	// are duplicates, start the loop over and assign the thread ids manually.
				2159	assign_thread_ids = true;
				2160	goto restart_radix_check;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2161	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2162	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2163
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2164	#if KMP_MIC && REDUCE_TEAM_SIZE
				2165	// The default team size is the total #threads in the machine
				2166	// minus 1 thread for every core that has 3 or more threads.
				2167	teamSize += (threadIdCt <= 2) ? (threadIdCt) : (threadIdCt - 1);
				2168	#endif // KMP_MIC && REDUCE_TEAM_SIZE
				2169
				2170	for (index = threadIdIndex; index <= maxIndex; index++) {
				2171	if (counts[index] > maxCt[index]) {
				2172	maxCt[index] = counts[index];
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2173	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2174	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2175
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2176	__kmp_nThreadsPerCore = maxCt[threadIdIndex];
				2177	nCoresPerPkg = maxCt[coreIdIndex];
				2178	nPackages = totals[pkgIdIndex];
				2179
				2180	// Check to see if the machine topology is uniform
				2181	unsigned prod = totals[maxIndex];
				2182	for (index = threadIdIndex; index < maxIndex; index++) {
				2183	prod *= maxCt[index];
				2184	}
				2185	bool uniform = (prod == totals[threadIdIndex]);
				2186
				2187	// When affinity is off, this routine will still be called to set
				2188	// __kmp_ncores, as well as __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
				2189	// Make sure all these vars are set correctly, and return now if affinity is
				2190	// not enabled.
				2191	__kmp_ncores = totals[coreIdIndex];
				2192
				2193	if (__kmp_affinity_verbose) {
				2194	if (!KMP_AFFINITY_CAPABLE()) {
				2195	KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
				2196	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				2197	if (uniform) {
				2198	KMP_INFORM(Uniform, "KMP_AFFINITY");
				2199	} else {
				2200	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				2201	}
				2202	} else {
				2203	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				2204	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				2205	__kmp_affin_fullMask);
				2206	KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
				2207	if (__kmp_affinity_respect_mask) {
				2208	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				2209	} else {
				2210	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				2211	}
				2212	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				2213	if (uniform) {
				2214	KMP_INFORM(Uniform, "KMP_AFFINITY");
				2215	} else {
				2216	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				2217	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2218	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2219	kmp_str_buf_t buf;
				2220	__kmp_str_buf_init(&buf);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2221
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2222	__kmp_str_buf_print(&buf, "%d", totals[maxIndex]);
				2223	for (index = maxIndex - 1; index >= pkgIdIndex; index--) {
				2224	__kmp_str_buf_print(&buf, " x %d", maxCt[index]);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2225	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2226	KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, maxCt[coreIdIndex],
				2227	maxCt[threadIdIndex], __kmp_ncores);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2228
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2229	__kmp_str_buf_free(&buf);
				2230	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2231
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2232	#if KMP_MIC && REDUCE_TEAM_SIZE
				2233	// Set the default team size.
				2234	if ((__kmp_dflt_team_nth == 0) && (teamSize > 0)) {
				2235	__kmp_dflt_team_nth = teamSize;
				2236	KA_TRACE(20, ("__kmp_affinity_create_cpuinfo_map: setting "
				2237	"__kmp_dflt_team_nth = %d\n",
				2238	__kmp_dflt_team_nth));
				2239	}
				2240	#endif // KMP_MIC && REDUCE_TEAM_SIZE
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2241
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2242	KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
				2243	KMP_DEBUG_ASSERT(num_avail == __kmp_avail_proc);
				2244	__kmp_pu_os_idx = (int )__kmp_allocate(sizeof(int) __kmp_avail_proc);
				2245	for (i = 0; i < num_avail; ++i) { // fill the os indices
				2246	__kmp_pu_os_idx[i] = threadInfo[i][osIdIndex];
				2247	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2248
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2249	if (__kmp_affinity_type == affinity_none) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2250	__kmp_free(lastId);
				2251	__kmp_free(totals);
				2252	__kmp_free(maxCt);
				2253	__kmp_free(counts);
				2254	CLEANUP_THREAD_INFO;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2255	return 0;
				2256	}
				2257
				2258	// Count the number of levels which have more nodes at that level than at the
				2259	// parent's level (with there being an implicit root node of the top level).
				2260	// This is equivalent to saying that there is at least one node at this level
				2261	// which has a sibling. These levels are in the map, and the package level is
				2262	// always in the map.
				2263	bool inMap = (bool )__kmp_allocate((maxIndex + 1) * sizeof(bool));
				2264	int level = 0;
				2265	for (index = threadIdIndex; index < maxIndex; index++) {
				2266	KMP_ASSERT(totals[index] >= totals[index + 1]);
				2267	inMap[index] = (totals[index] > totals[index + 1]);
				2268	}
				2269	inMap[maxIndex] = (totals[maxIndex] > 1);
				2270	inMap[pkgIdIndex] = true;
				2271
				2272	int depth = 0;
				2273	for (index = threadIdIndex; index <= maxIndex; index++) {
				2274	if (inMap[index]) {
				2275	depth++;
				2276	}
				2277	}
				2278	KMP_ASSERT(depth > 0);
				2279
				2280	// Construct the data structure that is to be returned.
				2281	address2os = (AddrUnsPair )__kmp_allocate(sizeof(AddrUnsPair) * num_avail);
				2282	int pkgLevel = -1;
				2283	int coreLevel = -1;
				2284	int threadLevel = -1;
				2285
				2286	for (i = 0; i < num_avail; ++i) {
				2287	Address addr(depth);
				2288	unsigned os = threadInfo[i][osIdIndex];
				2289	int src_index;
				2290	int dst_index = 0;
				2291
				2292	for (src_index = maxIndex; src_index >= threadIdIndex; src_index--) {
				2293	if (!inMap[src_index]) {
				2294	continue;
				2295	}
				2296	addr.labels[dst_index] = threadInfo[i][src_index];
				2297	if (src_index == pkgIdIndex) {
				2298	pkgLevel = dst_index;
				2299	} else if (src_index == coreIdIndex) {
				2300	coreLevel = dst_index;
				2301	} else if (src_index == threadIdIndex) {
				2302	threadLevel = dst_index;
				2303	}
				2304	dst_index++;
				2305	}
				2306	(*address2os)[i] = AddrUnsPair(addr, os);
				2307	}
				2308
				2309	if (__kmp_affinity_gran_levels < 0) {
				2310	// Set the granularity level based on what levels are modeled
				2311	// in the machine topology map.
				2312	unsigned src_index;
				2313	__kmp_affinity_gran_levels = 0;
				2314	for (src_index = threadIdIndex; src_index <= maxIndex; src_index++) {
				2315	if (!inMap[src_index]) {
				2316	continue;
				2317	}
				2318	switch (src_index) {
				2319	case threadIdIndex:
				2320	if (__kmp_affinity_gran > affinity_gran_thread) {
				2321	__kmp_affinity_gran_levels++;
				2322	}
				2323
				2324	break;
				2325	case coreIdIndex:
				2326	if (__kmp_affinity_gran > affinity_gran_core) {
				2327	__kmp_affinity_gran_levels++;
				2328	}
				2329	break;
				2330
				2331	case pkgIdIndex:
				2332	if (__kmp_affinity_gran > affinity_gran_package) {
				2333	__kmp_affinity_gran_levels++;
				2334	}
				2335	break;
				2336	}
				2337	}
				2338	}
				2339
				2340	if (__kmp_affinity_verbose) {
				2341	__kmp_affinity_print_topology(*address2os, num_avail, depth, pkgLevel,
				2342	coreLevel, threadLevel);
				2343	}
				2344
				2345	__kmp_free(inMap);
				2346	__kmp_free(lastId);
				2347	__kmp_free(totals);
				2348	__kmp_free(maxCt);
				2349	__kmp_free(counts);
				2350	CLEANUP_THREAD_INFO;
				2351	return depth;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2352	}
				2353
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2354	// Create and return a table of affinity masks, indexed by OS thread ID.
				2355	// This routine handles OR'ing together all the affinity masks of threads
				2356	// that are sufficiently close, if granularity > fine.
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2357	static kmp_affin_mask_t __kmp_create_masks(unsigned maxIndex,
				2358	unsigned *numUnique,
				2359	AddrUnsPair *address2os,
				2360	unsigned numAddrs) {
				2361	// First form a table of affinity masks in order of OS thread id.
				2362	unsigned depth;
				2363	unsigned maxOsId;
				2364	unsigned i;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2365
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2366	KMP_ASSERT(numAddrs > 0);
				2367	depth = address2os[0].first.depth;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2368
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2369	maxOsId = 0;
				2370	for (i = 0; i < numAddrs; i++) {
				2371	unsigned osId = address2os[i].second;
				2372	if (osId > maxOsId) {
				2373	maxOsId = osId;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2374	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2375	}
				2376	kmp_affin_mask_t *osId2Mask;
				2377	KMP_CPU_ALLOC_ARRAY(osId2Mask, (maxOsId + 1));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2378
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2379	// Sort the address2os table according to physical order. Doing so will put
				2380	// all threads on the same core/package/node in consecutive locations.
				2381	qsort(address2os, numAddrs, sizeof(*address2os),
				2382	__kmp_affinity_cmp_Address_labels);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2383
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2384	KMP_ASSERT(__kmp_affinity_gran_levels >= 0);
				2385	if (__kmp_affinity_verbose && (__kmp_affinity_gran_levels > 0)) {
				2386	KMP_INFORM(ThreadsMigrate, "KMP_AFFINITY", __kmp_affinity_gran_levels);
				2387	}
				2388	if (__kmp_affinity_gran_levels >= (int)depth) {
				2389	if (__kmp_affinity_verbose \|\|
				2390	(__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) {
				2391	KMP_WARNING(AffThreadsMayMigrate);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2392	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2393	}
				2394
				2395	// Run through the table, forming the masks for all threads on each core.
				2396	// Threads on the same core will have identical "Address" objects, not
				2397	// considering the last level, which must be the thread id. All threads on a
				2398	// core will appear consecutively.
				2399	unsigned unique = 0;
				2400	unsigned j = 0; // index of 1st thread on core
				2401	unsigned leader = 0;
				2402	Address *leaderAddr = &(address2os[0].first);
				2403	kmp_affin_mask_t *sum;
				2404	KMP_CPU_ALLOC_ON_STACK(sum);
				2405	KMP_CPU_ZERO(sum);
				2406	KMP_CPU_SET(address2os[0].second, sum);
				2407	for (i = 1; i < numAddrs; i++) {
				2408	// If this thread is sufficiently close to the leader (within the
				2409	// granularity setting), then set the bit for this os thread in the
				2410	// affinity mask for this group, and go on to the next thread.
				2411	if (leaderAddr->isClose(address2os[i].first, __kmp_affinity_gran_levels)) {
				2412	KMP_CPU_SET(address2os[i].second, sum);
				2413	continue;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2414	}
				2415
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2416	// For every thread in this group, copy the mask to the thread's entry in
				2417	// the osId2Mask table. Mark the first address as a leader.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2418	for (; j < i; j++) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2419	unsigned osId = address2os[j].second;
				2420	KMP_DEBUG_ASSERT(osId <= maxOsId);
				2421	kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
				2422	KMP_CPU_COPY(mask, sum);
				2423	address2os[j].first.leader = (j == leader);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2424	}
				2425	unique++;
				2426
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2427	// Start a new mask.
				2428	leader = i;
				2429	leaderAddr = &(address2os[i].first);
				2430	KMP_CPU_ZERO(sum);
				2431	KMP_CPU_SET(address2os[i].second, sum);
				2432	}
				2433
				2434	// For every thread in last group, copy the mask to the thread's
				2435	// entry in the osId2Mask table.
				2436	for (; j < i; j++) {
				2437	unsigned osId = address2os[j].second;
				2438	KMP_DEBUG_ASSERT(osId <= maxOsId);
				2439	kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
				2440	KMP_CPU_COPY(mask, sum);
				2441	address2os[j].first.leader = (j == leader);
				2442	}
				2443	unique++;
				2444	KMP_CPU_FREE_FROM_STACK(sum);
				2445
				2446	*maxIndex = maxOsId;
				2447	*numUnique = unique;
				2448	return osId2Mask;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2449	}
				2450
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2451	// Stuff for the affinity proclist parsers. It's easier to declare these vars
				2452	// as file-static than to try and pass them through the calling sequence of
				2453	// the recursive-descent OMP_PLACES parser.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2454	static kmp_affin_mask_t *newMasks;
				2455	static int numNewMasks;
				2456	static int nextNewMask;
				2457
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2458	#define ADD_MASK(_mask) \
				2459	{ \
				2460	if (nextNewMask >= numNewMasks) { \
				2461	int i; \
				2462	numNewMasks *= 2; \
				2463	kmp_affin_mask_t *temp; \
				2464	KMP_CPU_INTERNAL_ALLOC_ARRAY(temp, numNewMasks); \
				2465	for (i = 0; i < numNewMasks / 2; i++) { \
				2466	kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i); \
				2467	kmp_affin_mask_t *dest = KMP_CPU_INDEX(temp, i); \
				2468	KMP_CPU_COPY(dest, src); \
				2469	} \
				2470	KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks / 2); \
				2471	newMasks = temp; \
				2472	} \
				2473	KMP_CPU_COPY(KMP_CPU_INDEX(newMasks, nextNewMask), (_mask)); \
				2474	nextNewMask++; \
				2475	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2476
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2477	#define ADD_MASK_OSID(_osId, _osId2Mask, _maxOsId) \
				2478	{ \
				2479	if (((_osId) > _maxOsId) \|\| \
				2480	(!KMP_CPU_ISSET((_osId), KMP_CPU_INDEX((_osId2Mask), (_osId))))) { \
				2481	if (__kmp_affinity_verbose \|\| \
				2482	(__kmp_affinity_warnings && \
				2483	(__kmp_affinity_type != affinity_none))) { \
				2484	KMP_WARNING(AffIgnoreInvalidProcID, _osId); \
				2485	} \
				2486	} else { \
				2487	ADD_MASK(KMP_CPU_INDEX(_osId2Mask, (_osId))); \
				2488	} \
				2489	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2490
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2491	// Re-parse the proclist (for the explicit affinity type), and form the list
				2492	// of affinity newMasks indexed by gtid.
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2493	static void __kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks,
				2494	unsigned int *out_numMasks,
				2495	const char *proclist,
				2496	kmp_affin_mask_t *osId2Mask,
				2497	int maxOsId) {
				2498	int i;
				2499	const char *scan = proclist;
				2500	const char *next = proclist;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2501
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2502	// We use malloc() for the temporary mask vector, so that we can use
				2503	// realloc() to extend it.
				2504	numNewMasks = 2;
				2505	KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks);
				2506	nextNewMask = 0;
				2507	kmp_affin_mask_t *sumMask;
				2508	KMP_CPU_ALLOC(sumMask);
				2509	int setSize = 0;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2510
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2511	for (;;) {
				2512	int start, end, stride;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2513
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2514	SKIP_WS(scan);
				2515	next = scan;
				2516	if (*next == '\0') {
				2517	break;
				2518	}
				2519
				2520	if (*next == '{') {
				2521	int num;
				2522	setSize = 0;
				2523	next++; // skip '{'
				2524	SKIP_WS(next);
				2525	scan = next;
				2526
				2527	// Read the first integer in the set.
				2528	KMP_ASSERT2((next >= '0') && (next <= '9'), "bad proclist");
				2529	SKIP_DIGITS(next);
				2530	num = __kmp_str_to_int(scan, *next);
				2531	KMP_ASSERT2(num >= 0, "bad explicit proc list");
				2532
				2533	// Copy the mask for that osId to the sum (union) mask.
				2534	if ((num > maxOsId) \|\|
				2535	(!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
				2536	if (__kmp_affinity_verbose \|\|
				2537	(__kmp_affinity_warnings &&
				2538	(__kmp_affinity_type != affinity_none))) {
				2539	KMP_WARNING(AffIgnoreInvalidProcID, num);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2540	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2541	KMP_CPU_ZERO(sumMask);
				2542	} else {
				2543	KMP_CPU_COPY(sumMask, KMP_CPU_INDEX(osId2Mask, num));
				2544	setSize = 1;
				2545	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2546
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2547	for (;;) {
				2548	// Check for end of set.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2549	SKIP_WS(next);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2550	if (*next == '}') {
				2551	next++; // skip '}'
				2552	break;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2553	}
				2554
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2555	// Skip optional comma.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2556	if (*next == ',') {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2557	next++;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2558	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2559	SKIP_WS(next);
				2560
				2561	// Read the next integer in the set.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2562	scan = next;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2563	KMP_ASSERT2((next >= '0') && (next <= '9'), "bad explicit proc list");
				2564
				2565	SKIP_DIGITS(next);
				2566	num = __kmp_str_to_int(scan, *next);
				2567	KMP_ASSERT2(num >= 0, "bad explicit proc list");
				2568
				2569	// Add the mask for that osId to the sum mask.
				2570	if ((num > maxOsId) \|\|
				2571	(!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
				2572	if (__kmp_affinity_verbose \|\|
				2573	(__kmp_affinity_warnings &&
				2574	(__kmp_affinity_type != affinity_none))) {
				2575	KMP_WARNING(AffIgnoreInvalidProcID, num);
				2576	}
				2577	} else {
				2578	KMP_CPU_UNION(sumMask, KMP_CPU_INDEX(osId2Mask, num));
				2579	setSize++;
				2580	}
				2581	}
				2582	if (setSize > 0) {
				2583	ADD_MASK(sumMask);
				2584	}
				2585
				2586	SKIP_WS(next);
				2587	if (*next == ',') {
				2588	next++;
				2589	}
				2590	scan = next;
				2591	continue;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2592	}
				2593
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2594	// Read the first integer.
				2595	KMP_ASSERT2((next >= '0') && (next <= '9'), "bad explicit proc list");
				2596	SKIP_DIGITS(next);
				2597	start = __kmp_str_to_int(scan, *next);
				2598	KMP_ASSERT2(start >= 0, "bad explicit proc list");
				2599	SKIP_WS(next);
				2600
				2601	// If this isn't a range, then add a mask to the list and go on.
				2602	if (*next != '-') {
				2603	ADD_MASK_OSID(start, osId2Mask, maxOsId);
				2604
				2605	// Skip optional comma.
				2606	if (*next == ',') {
				2607	next++;
				2608	}
				2609	scan = next;
				2610	continue;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2611	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2612
				2613	// This is a range. Skip over the '-' and read in the 2nd int.
				2614	next++; // skip '-'
				2615	SKIP_WS(next);
				2616	scan = next;
				2617	KMP_ASSERT2((next >= '0') && (next <= '9'), "bad explicit proc list");
				2618	SKIP_DIGITS(next);
				2619	end = __kmp_str_to_int(scan, *next);
				2620	KMP_ASSERT2(end >= 0, "bad explicit proc list");
				2621
				2622	// Check for a stride parameter
				2623	stride = 1;
				2624	SKIP_WS(next);
				2625	if (*next == ':') {
				2626	// A stride is specified. Skip over the ':" and read the 3rd int.
				2627	int sign = +1;
				2628	next++; // skip ':'
				2629	SKIP_WS(next);
				2630	scan = next;
				2631	if (*next == '-') {
				2632	sign = -1;
				2633	next++;
				2634	SKIP_WS(next);
				2635	scan = next;
				2636	}
				2637	KMP_ASSERT2((next >= '0') && (next <= '9'), "bad explicit proc list");
				2638	SKIP_DIGITS(next);
				2639	stride = __kmp_str_to_int(scan, *next);
				2640	KMP_ASSERT2(stride >= 0, "bad explicit proc list");
				2641	stride *= sign;
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	2642	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2643
				2644	// Do some range checks.
				2645	KMP_ASSERT2(stride != 0, "bad explicit proc list");
				2646	if (stride > 0) {
				2647	KMP_ASSERT2(start <= end, "bad explicit proc list");
				2648	} else {
				2649	KMP_ASSERT2(start >= end, "bad explicit proc list");
				2650	}
				2651	KMP_ASSERT2((end - start) / stride <= 65536, "bad explicit proc list");
				2652
				2653	// Add the mask for each OS proc # to the list.
				2654	if (stride > 0) {
				2655	do {
				2656	ADD_MASK_OSID(start, osId2Mask, maxOsId);
				2657	start += stride;
				2658	} while (start <= end);
				2659	} else {
				2660	do {
				2661	ADD_MASK_OSID(start, osId2Mask, maxOsId);
				2662	start += stride;
				2663	} while (start >= end);
				2664	}
				2665
				2666	// Skip optional comma.
				2667	SKIP_WS(next);
				2668	if (*next == ',') {
				2669	next++;
				2670	}
				2671	scan = next;
				2672	}
				2673
				2674	*out_numMasks = nextNewMask;
				2675	if (nextNewMask == 0) {
				2676	*out_masks = NULL;
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	2677	KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2678	return;
				2679	}
				2680	KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask);
				2681	for (i = 0; i < nextNewMask; i++) {
				2682	kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i);
				2683	kmp_affin_mask_t dest = KMP_CPU_INDEX((out_masks), i);
				2684	KMP_CPU_COPY(dest, src);
				2685	}
				2686	KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
				2687	KMP_CPU_FREE(sumMask);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2688	}
				2689
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2690	#if OMP_40_ENABLED
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2691
				2692	/*-----------------------------------------------------------------------------
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2693	Re-parse the OMP_PLACES proc id list, forming the newMasks for the different
				2694	places. Again, Here is the grammar:
				2695
				2696	place_list := place
				2697	place_list := place , place_list
				2698	place := num
				2699	place := place : num
				2700	place := place : num : signed
				2701	place := { subplacelist }
				2702	place := ! place // (lowest priority)
				2703	subplace_list := subplace
				2704	subplace_list := subplace , subplace_list
				2705	subplace := num
				2706	subplace := num : num
				2707	subplace := num : num : signed
				2708	signed := num
				2709	signed := + signed
				2710	signed := - signed
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2711	-----------------------------------------------------------------------------*/
				2712
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2713	static void __kmp_process_subplace_list(const char **scan,
				2714	kmp_affin_mask_t *osId2Mask,
				2715	int maxOsId, kmp_affin_mask_t *tempMask,
				2716	int *setSize) {
				2717	const char *next;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2718
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2719	for (;;) {
				2720	int start, count, stride, i;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2721
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2722	// Read in the starting proc id
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2723	SKIP_WS(*scan);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2724	KMP_ASSERT2((scan >= '0') && (scan <= '9'), "bad explicit places list");
				2725	next = *scan;
				2726	SKIP_DIGITS(next);
				2727	start = __kmp_str_to_int(scan, next);
				2728	KMP_ASSERT(start >= 0);
				2729	*scan = next;
				2730
				2731	// valid follow sets are ',' ':' and '}'
				2732	SKIP_WS(*scan);
				2733	if (scan == '}' \|\| scan == ',') {
				2734	if ((start > maxOsId) \|\|
				2735	(!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
				2736	if (__kmp_affinity_verbose \|\|
				2737	(__kmp_affinity_warnings &&
				2738	(__kmp_affinity_type != affinity_none))) {
				2739	KMP_WARNING(AffIgnoreInvalidProcID, start);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2740	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2741	} else {
				2742	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
				2743	(*setSize)++;
				2744	}
				2745	if (**scan == '}') {
				2746	break;
				2747	}
				2748	(*scan)++; // skip ','
				2749	continue;
				2750	}
				2751	KMP_ASSERT2(**scan == ':', "bad explicit places list");
				2752	(*scan)++; // skip ':'
				2753
				2754	// Read count parameter
				2755	SKIP_WS(*scan);
				2756	KMP_ASSERT2((scan >= '0') && (scan <= '9'), "bad explicit places list");
				2757	next = *scan;
				2758	SKIP_DIGITS(next);
				2759	count = __kmp_str_to_int(scan, next);
				2760	KMP_ASSERT(count >= 0);
				2761	*scan = next;
				2762
				2763	// valid follow sets are ',' ':' and '}'
				2764	SKIP_WS(*scan);
				2765	if (scan == '}' \|\| scan == ',') {
				2766	for (i = 0; i < count; i++) {
				2767	if ((start > maxOsId) \|\|
				2768	(!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
				2769	if (__kmp_affinity_verbose \|\|
				2770	(__kmp_affinity_warnings &&
				2771	(__kmp_affinity_type != affinity_none))) {
				2772	KMP_WARNING(AffIgnoreInvalidProcID, start);
				2773	}
				2774	break; // don't proliferate warnings for large count
				2775	} else {
				2776	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
				2777	start++;
				2778	(*setSize)++;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2779	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2780	}
				2781	if (**scan == '}') {
				2782	break;
				2783	}
				2784	(*scan)++; // skip ','
				2785	continue;
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	2786	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2787	KMP_ASSERT2(**scan == ':', "bad explicit places list");
				2788	(*scan)++; // skip ':'
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2789
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2790	// Read stride parameter
				2791	int sign = +1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2792	for (;;) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2793	SKIP_WS(*scan);
				2794	if (**scan == '+') {
				2795	(*scan)++; // skip '+'
				2796	continue;
				2797	}
				2798	if (**scan == '-') {
				2799	sign *= -1;
				2800	(*scan)++; // skip '-'
				2801	continue;
				2802	}
				2803	break;
				2804	}
				2805	SKIP_WS(*scan);
				2806	KMP_ASSERT2((scan >= '0') && (scan <= '9'), "bad explicit places list");
				2807	next = *scan;
				2808	SKIP_DIGITS(next);
				2809	stride = __kmp_str_to_int(scan, next);
				2810	KMP_ASSERT(stride >= 0);
				2811	*scan = next;
				2812	stride *= sign;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2813
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2814	// valid follow sets are ',' and '}'
				2815	SKIP_WS(*scan);
				2816	if (scan == '}' \|\| scan == ',') {
				2817	for (i = 0; i < count; i++) {
				2818	if ((start > maxOsId) \|\|
				2819	(!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
				2820	if (__kmp_affinity_verbose \|\|
				2821	(__kmp_affinity_warnings &&
				2822	(__kmp_affinity_type != affinity_none))) {
				2823	KMP_WARNING(AffIgnoreInvalidProcID, start);
				2824	}
				2825	break; // don't proliferate warnings for large count
				2826	} else {
				2827	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
				2828	start += stride;
				2829	(*setSize)++;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2830	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2831	}
				2832	if (**scan == '}') {
				2833	break;
				2834	}
				2835	(*scan)++; // skip ','
				2836	continue;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2837	}
				2838
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2839	KMP_ASSERT2(0, "bad explicit places list");
				2840	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2841	}
				2842
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2843	static void __kmp_process_place(const char *scan, kmp_affin_mask_t osId2Mask,
				2844	int maxOsId, kmp_affin_mask_t *tempMask,
				2845	int *setSize) {
				2846	const char *next;
				2847
				2848	// valid follow sets are '{' '!' and num
				2849	SKIP_WS(*scan);
				2850	if (**scan == '{') {
				2851	(*scan)++; // skip '{'
				2852	__kmp_process_subplace_list(scan, osId2Mask, maxOsId, tempMask, setSize);
				2853	KMP_ASSERT2(**scan == '}', "bad explicit places list");
				2854	(*scan)++; // skip '}'
				2855	} else if (**scan == '!') {
				2856	(*scan)++; // skip '!'
				2857	__kmp_process_place(scan, osId2Mask, maxOsId, tempMask, setSize);
				2858	KMP_CPU_COMPLEMENT(maxOsId, tempMask);
				2859	} else if ((scan >= '0') && (scan <= '9')) {
				2860	next = *scan;
				2861	SKIP_DIGITS(next);
				2862	int num = __kmp_str_to_int(scan, next);
				2863	KMP_ASSERT(num >= 0);
				2864	if ((num > maxOsId) \|\|
				2865	(!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
				2866	if (__kmp_affinity_verbose \|\|
				2867	(__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) {
				2868	KMP_WARNING(AffIgnoreInvalidProcID, num);
				2869	}
				2870	} else {
				2871	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, num));
				2872	(*setSize)++;
				2873	}
				2874	*scan = next; // skip num
				2875	} else {
				2876	KMP_ASSERT2(0, "bad explicit places list");
				2877	}
				2878	}
				2879
				2880	// static void
				2881	void __kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
				2882	unsigned int *out_numMasks,
				2883	const char *placelist,
				2884	kmp_affin_mask_t *osId2Mask,
				2885	int maxOsId) {
				2886	int i, j, count, stride, sign;
				2887	const char *scan = placelist;
				2888	const char *next = placelist;
				2889
				2890	numNewMasks = 2;
				2891	KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks);
				2892	nextNewMask = 0;
				2893
				2894	// tempMask is modified based on the previous or initial
				2895	// place to form the current place
				2896	// previousMask contains the previous place
				2897	kmp_affin_mask_t *tempMask;
				2898	kmp_affin_mask_t *previousMask;
				2899	KMP_CPU_ALLOC(tempMask);
				2900	KMP_CPU_ZERO(tempMask);
				2901	KMP_CPU_ALLOC(previousMask);
				2902	KMP_CPU_ZERO(previousMask);
				2903	int setSize = 0;
				2904
				2905	for (;;) {
				2906	__kmp_process_place(&scan, osId2Mask, maxOsId, tempMask, &setSize);
				2907
				2908	// valid follow sets are ',' ':' and EOL
				2909	SKIP_WS(scan);
				2910	if (scan == '\0' \|\| scan == ',') {
				2911	if (setSize > 0) {
				2912	ADD_MASK(tempMask);
				2913	}
				2914	KMP_CPU_ZERO(tempMask);
				2915	setSize = 0;
				2916	if (*scan == '\0') {
				2917	break;
				2918	}
				2919	scan++; // skip ','
				2920	continue;
				2921	}
				2922
				2923	KMP_ASSERT2(*scan == ':', "bad explicit places list");
				2924	scan++; // skip ':'
				2925
				2926	// Read count parameter
				2927	SKIP_WS(scan);
				2928	KMP_ASSERT2((scan >= '0') && (scan <= '9'), "bad explicit places list");
				2929	next = scan;
				2930	SKIP_DIGITS(next);
				2931	count = __kmp_str_to_int(scan, *next);
				2932	KMP_ASSERT(count >= 0);
				2933	scan = next;
				2934
				2935	// valid follow sets are ',' ':' and EOL
				2936	SKIP_WS(scan);
				2937	if (scan == '\0' \|\| scan == ',') {
				2938	stride = +1;
				2939	} else {
				2940	KMP_ASSERT2(*scan == ':', "bad explicit places list");
				2941	scan++; // skip ':'
				2942
				2943	// Read stride parameter
				2944	sign = +1;
				2945	for (;;) {
				2946	SKIP_WS(scan);
				2947	if (*scan == '+') {
				2948	scan++; // skip '+'
				2949	continue;
				2950	}
				2951	if (*scan == '-') {
				2952	sign *= -1;
				2953	scan++; // skip '-'
				2954	continue;
				2955	}
				2956	break;
				2957	}
				2958	SKIP_WS(scan);
				2959	KMP_ASSERT2((scan >= '0') && (scan <= '9'), "bad explicit places list");
				2960	next = scan;
				2961	SKIP_DIGITS(next);
				2962	stride = __kmp_str_to_int(scan, *next);
				2963	KMP_DEBUG_ASSERT(stride >= 0);
				2964	scan = next;
				2965	stride *= sign;
				2966	}
				2967
				2968	// Add places determined by initial_place : count : stride
				2969	for (i = 0; i < count; i++) {
				2970	if (setSize == 0) {
				2971	break;
				2972	}
				2973	// Add the current place, then build the next place (tempMask) from that
				2974	KMP_CPU_COPY(previousMask, tempMask);
				2975	ADD_MASK(previousMask);
				2976	KMP_CPU_ZERO(tempMask);
				2977	setSize = 0;
				2978	KMP_CPU_SET_ITERATE(j, previousMask) {
				2979	if (!KMP_CPU_ISSET(j, previousMask)) {
				2980	continue;
				2981	}
				2982	if ((j + stride > maxOsId) \|\| (j + stride < 0) \|\|
				2983	(!KMP_CPU_ISSET(j, __kmp_affin_fullMask)) \|\|
				2984	(!KMP_CPU_ISSET(j + stride,
				2985	KMP_CPU_INDEX(osId2Mask, j + stride)))) {
				2986	if ((__kmp_affinity_verbose \|\|
				2987	(__kmp_affinity_warnings &&
				2988	(__kmp_affinity_type != affinity_none))) &&
				2989	i < count - 1) {
				2990	KMP_WARNING(AffIgnoreInvalidProcID, j + stride);
				2991	}
				2992	continue;
				2993	}
				2994	KMP_CPU_SET(j + stride, tempMask);
				2995	setSize++;
				2996	}
				2997	}
				2998	KMP_CPU_ZERO(tempMask);
				2999	setSize = 0;
				3000
				3001	// valid follow sets are ',' and EOL
				3002	SKIP_WS(scan);
				3003	if (*scan == '\0') {
				3004	break;
				3005	}
				3006	if (*scan == ',') {
				3007	scan++; // skip ','
				3008	continue;
				3009	}
				3010
				3011	KMP_ASSERT2(0, "bad explicit places list");
				3012	}
				3013
				3014	*out_numMasks = nextNewMask;
				3015	if (nextNewMask == 0) {
				3016	*out_masks = NULL;
				3017	KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
				3018	return;
				3019	}
				3020	KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask);
				3021	KMP_CPU_FREE(tempMask);
				3022	KMP_CPU_FREE(previousMask);
				3023	for (i = 0; i < nextNewMask; i++) {
				3024	kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i);
				3025	kmp_affin_mask_t dest = KMP_CPU_INDEX((out_masks), i);
				3026	KMP_CPU_COPY(dest, src);
				3027	}
				3028	KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
				3029	}
				3030
				3031	#endif /* OMP_40_ENABLED */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3032
				3033	#undef ADD_MASK
				3034	#undef ADD_MASK_OSID
				3035
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3036	#if KMP_USE_HWLOC
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3037	static int __kmp_hwloc_count_children_by_type(hwloc_topology_t t, hwloc_obj_t o,
				3038	hwloc_obj_type_t type,
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame^]	3039	hwloc_obj_t *f) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3040	if (!hwloc_compare_types(o->type, type)) {
				3041	if (*f == NULL)
				3042	*f = o; // output first descendant found
				3043	return 1;
				3044	}
				3045	int sum = 0;
				3046	for (unsigned i = 0; i < o->arity; i++)
				3047	sum += __kmp_hwloc_count_children_by_type(t, o->children[i], type, f);
				3048	return sum; // will be 0 if no one found (as PU arity is 0)
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3049	}
				3050
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3051	static int __kmp_hwloc_count_children_by_depth(hwloc_topology_t t,
				3052	hwloc_obj_t o, unsigned depth,
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame^]	3053	hwloc_obj_t *f) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3054	if (o->depth == depth) {
				3055	if (*f == NULL)
				3056	*f = o; // output first descendant found
				3057	return 1;
				3058	}
				3059	int sum = 0;
				3060	for (unsigned i = 0; i < o->arity; i++)
				3061	sum += __kmp_hwloc_count_children_by_depth(t, o->children[i], depth, f);
				3062	return sum; // will be 0 if no one found (as PU arity is 0)
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3063	}
				3064
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3065	static int __kmp_hwloc_skip_PUs_obj(hwloc_topology_t t, hwloc_obj_t o) {
				3066	// skip PUs descendants of the object o
				3067	int skipped = 0;
				3068	hwloc_obj_t hT = NULL;
				3069	int N = __kmp_hwloc_count_children_by_type(t, o, HWLOC_OBJ_PU, &hT);
				3070	for (int i = 0; i < N; ++i) {
				3071	KMP_DEBUG_ASSERT(hT);
				3072	unsigned idx = hT->os_index;
				3073	if (KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
				3074	KMP_CPU_CLR(idx, __kmp_affin_fullMask);
				3075	KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx));
				3076	++skipped;
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3077	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3078	hT = hwloc_get_next_obj_by_type(t, HWLOC_OBJ_PU, hT);
				3079	}
				3080	return skipped; // count number of skipped units
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3081	}
				3082
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3083	static int __kmp_hwloc_obj_has_PUs(hwloc_topology_t t, hwloc_obj_t o) {
				3084	// check if obj has PUs present in fullMask
				3085	hwloc_obj_t hT = NULL;
				3086	int N = __kmp_hwloc_count_children_by_type(t, o, HWLOC_OBJ_PU, &hT);
				3087	for (int i = 0; i < N; ++i) {
				3088	KMP_DEBUG_ASSERT(hT);
				3089	unsigned idx = hT->os_index;
				3090	if (KMP_CPU_ISSET(idx, __kmp_affin_fullMask))
				3091	return 1; // found PU
				3092	hT = hwloc_get_next_obj_by_type(t, HWLOC_OBJ_PU, hT);
				3093	}
				3094	return 0; // no PUs found
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3095	}
				3096	#endif // KMP_USE_HWLOC
				3097
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3098	static void __kmp_apply_thread_places(AddrUnsPair **pAddr, int depth) {
				3099	AddrUnsPair *newAddr;
				3100	if (__kmp_hws_requested == 0)
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame^]	3101	goto _exit; // no topology limiting actions requested, exit
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3102	#if KMP_USE_HWLOC
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3103	if (__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC) {
				3104	// Number of subobjects calculated dynamically, this works fine for
				3105	// any non-uniform topology.
				3106	// L2 cache objects are determined by depth, other objects - by type.
				3107	hwloc_topology_t tp = __kmp_hwloc_topology;
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame^]	3108	int nS = 0, nN = 0, nL = 0, nC = 0,
				3109	nT = 0; // logical index including skipped
				3110	int nCr = 0, nTr = 0; // number of requested units
				3111	int nPkg = 0, nCo = 0, n_new = 0, n_old = 0, nCpP = 0, nTpC = 0; // counters
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3112	hwloc_obj_t hT, hC, hL, hN, hS; // hwloc objects (pointers to)
				3113	int L2depth, idx;
Jonathan Peyton	dd4aa9b	2015-10-08 17:55:54 +0000	[diff] [blame]	3114
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3115	// check support of extensions ----------------------------------
				3116	int numa_support = 0, tile_support = 0;
				3117	if (__kmp_pu_os_idx)
				3118	hT = hwloc_get_pu_obj_by_os_index(tp,
				3119	__kmp_pu_os_idx[__kmp_avail_proc - 1]);
				3120	else
				3121	hT = hwloc_get_obj_by_type(tp, HWLOC_OBJ_PU, __kmp_avail_proc - 1);
				3122	if (hT == NULL) { // something's gone wrong
				3123	KMP_WARNING(AffHWSubsetUnsupported);
				3124	goto _exit;
				3125	}
				3126	// check NUMA node
				3127	hN = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hT);
				3128	hS = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_PACKAGE, hT);
				3129	if (hN != NULL && hN->depth > hS->depth) {
				3130	numa_support = 1; // 1 in case socket includes node(s)
				3131	} else if (__kmp_hws_node.num > 0) {
				3132	// don't support sockets inside NUMA node (no such HW found for testing)
				3133	KMP_WARNING(AffHWSubsetUnsupported);
				3134	goto _exit;
				3135	}
				3136	// check L2 cahce, get object by depth because of multiple caches
				3137	L2depth = hwloc_get_cache_type_depth(tp, 2, HWLOC_OBJ_CACHE_UNIFIED);
				3138	hL = hwloc_get_ancestor_obj_by_depth(tp, L2depth, hT);
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame^]	3139	if (hL != NULL &&
				3140	__kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE, &hC) > 1) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3141	tile_support = 1; // no sense to count L2 if it includes single core
				3142	} else if (__kmp_hws_tile.num > 0) {
				3143	if (__kmp_hws_core.num == 0) {
				3144	__kmp_hws_core = __kmp_hws_tile; // replace L2 with core
				3145	__kmp_hws_tile.num = 0;
				3146	} else {
				3147	// L2 and core are both requested, but represent same object
				3148	KMP_WARNING(AffHWSubsetInvalid);
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3149	goto _exit;
				3150	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3151	}
				3152	// end of check of extensions -----------------------------------
				3153
				3154	// fill in unset items, validate settings -----------------------
				3155	if (__kmp_hws_socket.num == 0)
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame^]	3156	__kmp_hws_socket.num = nPackages; // use all available sockets
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3157	if (__kmp_hws_socket.offset >= nPackages) {
				3158	KMP_WARNING(AffHWSubsetManySockets);
				3159	goto _exit;
				3160	}
				3161	if (numa_support) {
				3162	int NN = __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_NUMANODE,
				3163	&hN); // num nodes in socket
				3164	if (__kmp_hws_node.num == 0)
				3165	__kmp_hws_node.num = NN; // use all available nodes
				3166	if (__kmp_hws_node.offset >= NN) {
				3167	KMP_WARNING(AffHWSubsetManyNodes);
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3168	goto _exit;
				3169	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3170	if (tile_support) {
				3171	// get num tiles in node
				3172	int NL = __kmp_hwloc_count_children_by_depth(tp, hN, L2depth, &hL);
				3173	if (__kmp_hws_tile.num == 0) {
				3174	__kmp_hws_tile.num = NL + 1;
				3175	} // use all available tiles, some node may have more tiles, thus +1
				3176	if (__kmp_hws_tile.offset >= NL) {
				3177	KMP_WARNING(AffHWSubsetManyTiles);
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3178	goto _exit;
				3179	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3180	int NC = __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE,
				3181	&hC); // num cores in tile
				3182	if (__kmp_hws_core.num == 0)
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame^]	3183	__kmp_hws_core.num = NC; // use all available cores
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3184	if (__kmp_hws_core.offset >= NC) {
				3185	KMP_WARNING(AffHWSubsetManyCores);
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3186	goto _exit;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3187	}
				3188	} else { // tile_support
				3189	int NC = __kmp_hwloc_count_children_by_type(tp, hN, HWLOC_OBJ_CORE,
				3190	&hC); // num cores in node
				3191	if (__kmp_hws_core.num == 0)
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame^]	3192	__kmp_hws_core.num = NC; // use all available cores
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3193	if (__kmp_hws_core.offset >= NC) {
				3194	KMP_WARNING(AffHWSubsetManyCores);
				3195	goto _exit;
				3196	}
				3197	} // tile_support
				3198	} else { // numa_support
				3199	if (tile_support) {
				3200	// get num tiles in socket
				3201	int NL = __kmp_hwloc_count_children_by_depth(tp, hS, L2depth, &hL);
				3202	if (__kmp_hws_tile.num == 0)
				3203	__kmp_hws_tile.num = NL; // use all available tiles
				3204	if (__kmp_hws_tile.offset >= NL) {
				3205	KMP_WARNING(AffHWSubsetManyTiles);
				3206	goto _exit;
				3207	}
				3208	int NC = __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE,
				3209	&hC); // num cores in tile
				3210	if (__kmp_hws_core.num == 0)
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame^]	3211	__kmp_hws_core.num = NC; // use all available cores
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3212	if (__kmp_hws_core.offset >= NC) {
				3213	KMP_WARNING(AffHWSubsetManyCores);
				3214	goto _exit;
				3215	}
				3216	} else { // tile_support
				3217	int NC = __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_CORE,
				3218	&hC); // num cores in socket
				3219	if (__kmp_hws_core.num == 0)
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame^]	3220	__kmp_hws_core.num = NC; // use all available cores
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3221	if (__kmp_hws_core.offset >= NC) {
				3222	KMP_WARNING(AffHWSubsetManyCores);
				3223	goto _exit;
				3224	}
				3225	} // tile_support
				3226	}
				3227	if (__kmp_hws_proc.num == 0)
				3228	__kmp_hws_proc.num = __kmp_nThreadsPerCore; // use all available procs
				3229	if (__kmp_hws_proc.offset >= __kmp_nThreadsPerCore) {
				3230	KMP_WARNING(AffHWSubsetManyProcs);
				3231	goto _exit;
				3232	}
				3233	// end of validation --------------------------------------------
				3234
				3235	if (pAddr) // pAddr is NULL in case of affinity_none
				3236	newAddr = (AddrUnsPair )__kmp_allocate(sizeof(AddrUnsPair)
				3237	__kmp_avail_proc); // max size
				3238	// main loop to form HW subset ----------------------------------
				3239	hS = NULL;
				3240	int NP = hwloc_get_nbobjs_by_type(tp, HWLOC_OBJ_PACKAGE);
				3241	for (int s = 0; s < NP; ++s) {
				3242	// Check Socket -----------------------------------------------
				3243	hS = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PACKAGE, hS);
				3244	if (!__kmp_hwloc_obj_has_PUs(tp, hS))
				3245	continue; // skip socket if all PUs are out of fullMask
				3246	++nS; // only count objects those have PUs in affinity mask
				3247	if (nS <= __kmp_hws_socket.offset \|\|
				3248	nS > __kmp_hws_socket.num + __kmp_hws_socket.offset) {
				3249	n_old += __kmp_hwloc_skip_PUs_obj(tp, hS); // skip socket
				3250	continue; // move to next socket
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3251	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3252	nCr = 0; // count number of cores per socket
				3253	// socket requested, go down the topology tree
				3254	// check 4 cases: (+NUMA+Tile), (+NUMA-Tile), (-NUMA+Tile), (-NUMA-Tile)
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3255	if (numa_support) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3256	nN = 0;
				3257	hN = NULL;
				3258	// num nodes in current socket
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame^]	3259	int NN =
				3260	__kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_NUMANODE, &hN);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3261	for (int n = 0; n < NN; ++n) {
				3262	// Check NUMA Node ----------------------------------------
				3263	if (!__kmp_hwloc_obj_has_PUs(tp, hN)) {
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3264	hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3265	continue; // skip node if all PUs are out of fullMask
				3266	}
				3267	++nN;
				3268	if (nN <= __kmp_hws_node.offset \|\|
				3269	nN > __kmp_hws_node.num + __kmp_hws_node.offset) {
				3270	// skip node as not requested
				3271	n_old += __kmp_hwloc_skip_PUs_obj(tp, hN); // skip node
				3272	hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN);
				3273	continue; // move to next node
				3274	}
				3275	// node requested, go down the topology tree
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3276	if (tile_support) {
				3277	nL = 0;
				3278	hL = NULL;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3279	int NL = __kmp_hwloc_count_children_by_depth(tp, hN, L2depth, &hL);
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3280	for (int l = 0; l < NL; ++l) {
				3281	// Check L2 (tile) ------------------------------------
				3282	if (!__kmp_hwloc_obj_has_PUs(tp, hL)) {
				3283	hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
				3284	continue; // skip tile if all PUs are out of fullMask
				3285	}
				3286	++nL;
				3287	if (nL <= __kmp_hws_tile.offset \|\|
				3288	nL > __kmp_hws_tile.num + __kmp_hws_tile.offset) {
				3289	// skip tile as not requested
				3290	n_old += __kmp_hwloc_skip_PUs_obj(tp, hL); // skip tile
				3291	hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
				3292	continue; // move to next tile
				3293	}
				3294	// tile requested, go down the topology tree
				3295	nC = 0;
				3296	hC = NULL;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3297	// num cores in current tile
				3298	int NC = __kmp_hwloc_count_children_by_type(tp, hL,
				3299	HWLOC_OBJ_CORE, &hC);
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3300	for (int c = 0; c < NC; ++c) {
				3301	// Check Core ---------------------------------------
				3302	if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
				3303	hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
				3304	continue; // skip core if all PUs are out of fullMask
				3305	}
				3306	++nC;
				3307	if (nC <= __kmp_hws_core.offset \|\|
				3308	nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
				3309	// skip node as not requested
				3310	n_old += __kmp_hwloc_skip_PUs_obj(tp, hC); // skip core
				3311	hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
				3312	continue; // move to next node
				3313	}
				3314	// core requested, go down to PUs
				3315	nT = 0;
				3316	nTr = 0;
				3317	hT = NULL;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3318	// num procs in current core
				3319	int NT = __kmp_hwloc_count_children_by_type(tp, hC,
				3320	HWLOC_OBJ_PU, &hT);
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3321	for (int t = 0; t < NT; ++t) {
				3322	// Check PU ---------------------------------------
				3323	idx = hT->os_index;
				3324	if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
				3325	hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
				3326	continue; // skip PU if not in fullMask
				3327	}
				3328	++nT;
				3329	if (nT <= __kmp_hws_proc.offset \|\|
				3330	nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
				3331	// skip PU
				3332	KMP_CPU_CLR(idx, __kmp_affin_fullMask);
				3333	++n_old;
				3334	KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx));
				3335	hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
				3336	continue; // move to next node
				3337	}
				3338	++nTr;
				3339	if (pAddr) // collect requested thread's data
				3340	newAddr[n_new] = (*pAddr)[n_old];
				3341	++n_new;
				3342	++n_old;
				3343	hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
				3344	} // threads loop
				3345	if (nTr > 0) {
				3346	++nCr; // num cores per socket
				3347	++nCo; // total num cores
				3348	if (nTr > nTpC)
				3349	nTpC = nTr; // calc max threads per core
				3350	}
				3351	hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
				3352	} // cores loop
				3353	hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
				3354	} // tiles loop
				3355	} else { // tile_support
				3356	// no tiles, check cores
				3357	nC = 0;
				3358	hC = NULL;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3359	// num cores in current node
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame^]	3360	int NC =
				3361	__kmp_hwloc_count_children_by_type(tp, hN, HWLOC_OBJ_CORE, &hC);
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3362	for (int c = 0; c < NC; ++c) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3363	// Check Core ---------------------------------------
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3364	if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
				3365	hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
				3366	continue; // skip core if all PUs are out of fullMask
				3367	}
				3368	++nC;
				3369	if (nC <= __kmp_hws_core.offset \|\|
				3370	nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
				3371	// skip node as not requested
				3372	n_old += __kmp_hwloc_skip_PUs_obj(tp, hC); // skip core
				3373	hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
				3374	continue; // move to next node
				3375	}
				3376	// core requested, go down to PUs
				3377	nT = 0;
				3378	nTr = 0;
				3379	hT = NULL;
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame^]	3380	int NT =
				3381	__kmp_hwloc_count_children_by_type(tp, hC, HWLOC_OBJ_PU, &hT);
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3382	for (int t = 0; t < NT; ++t) {
				3383	// Check PU ---------------------------------------
				3384	idx = hT->os_index;
				3385	if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
				3386	hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
				3387	continue; // skip PU if not in fullMask
				3388	}
				3389	++nT;
				3390	if (nT <= __kmp_hws_proc.offset \|\|
				3391	nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
				3392	// skip PU
				3393	KMP_CPU_CLR(idx, __kmp_affin_fullMask);
				3394	++n_old;
				3395	KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx));
				3396	hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
				3397	continue; // move to next node
				3398	}
				3399	++nTr;
				3400	if (pAddr) // collect requested thread's data
				3401	newAddr[n_new] = (*pAddr)[n_old];
				3402	++n_new;
				3403	++n_old;
				3404	hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
				3405	} // threads loop
				3406	if (nTr > 0) {
				3407	++nCr; // num cores per socket
				3408	++nCo; // total num cores
				3409	if (nTr > nTpC)
				3410	nTpC = nTr; // calc max threads per core
				3411	}
				3412	hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
				3413	} // cores loop
				3414	} // tiles support
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3415	hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN);
				3416	} // nodes loop
				3417	} else { // numa_support
				3418	// no NUMA support
				3419	if (tile_support) {
				3420	nL = 0;
				3421	hL = NULL;
				3422	// num tiles in current socket
				3423	int NL = __kmp_hwloc_count_children_by_depth(tp, hS, L2depth, &hL);
				3424	for (int l = 0; l < NL; ++l) {
				3425	// Check L2 (tile) ------------------------------------
				3426	if (!__kmp_hwloc_obj_has_PUs(tp, hL)) {
				3427	hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
				3428	continue; // skip tile if all PUs are out of fullMask
				3429	}
				3430	++nL;
				3431	if (nL <= __kmp_hws_tile.offset \|\|
				3432	nL > __kmp_hws_tile.num + __kmp_hws_tile.offset) {
				3433	// skip tile as not requested
				3434	n_old += __kmp_hwloc_skip_PUs_obj(tp, hL); // skip tile
				3435	hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
				3436	continue; // move to next tile
				3437	}
				3438	// tile requested, go down the topology tree
				3439	nC = 0;
				3440	hC = NULL;
				3441	// num cores per tile
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame^]	3442	int NC =
				3443	__kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE, &hC);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3444	for (int c = 0; c < NC; ++c) {
				3445	// Check Core ---------------------------------------
				3446	if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
				3447	hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
				3448	continue; // skip core if all PUs are out of fullMask
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3449	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3450	++nC;
				3451	if (nC <= __kmp_hws_core.offset \|\|
				3452	nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
				3453	// skip node as not requested
				3454	n_old += __kmp_hwloc_skip_PUs_obj(tp, hC); // skip core
				3455	hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
				3456	continue; // move to next node
				3457	}
				3458	// core requested, go down to PUs
				3459	nT = 0;
				3460	nTr = 0;
				3461	hT = NULL;
				3462	// num procs per core
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame^]	3463	int NT =
				3464	__kmp_hwloc_count_children_by_type(tp, hC, HWLOC_OBJ_PU, &hT);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3465	for (int t = 0; t < NT; ++t) {
				3466	// Check PU ---------------------------------------
				3467	idx = hT->os_index;
				3468	if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
				3469	hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
				3470	continue; // skip PU if not in fullMask
				3471	}
				3472	++nT;
				3473	if (nT <= __kmp_hws_proc.offset \|\|
				3474	nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
				3475	// skip PU
				3476	KMP_CPU_CLR(idx, __kmp_affin_fullMask);
				3477	++n_old;
				3478	KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx));
				3479	hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
				3480	continue; // move to next node
				3481	}
				3482	++nTr;
				3483	if (pAddr) // collect requested thread's data
				3484	newAddr[n_new] = (*pAddr)[n_old];
				3485	++n_new;
				3486	++n_old;
				3487	hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
				3488	} // threads loop
				3489	if (nTr > 0) {
				3490	++nCr; // num cores per socket
				3491	++nCo; // total num cores
				3492	if (nTr > nTpC)
				3493	nTpC = nTr; // calc max threads per core
				3494	}
				3495	hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
				3496	} // cores loop
				3497	hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
				3498	} // tiles loop
				3499	} else { // tile_support
				3500	// no tiles, check cores
				3501	nC = 0;
				3502	hC = NULL;
				3503	// num cores in socket
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame^]	3504	int NC =
				3505	__kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_CORE, &hC);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3506	for (int c = 0; c < NC; ++c) {
				3507	// Check Core -------------------------------------------
				3508	if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
				3509	hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
				3510	continue; // skip core if all PUs are out of fullMask
				3511	}
				3512	++nC;
				3513	if (nC <= __kmp_hws_core.offset \|\|
				3514	nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
				3515	// skip node as not requested
				3516	n_old += __kmp_hwloc_skip_PUs_obj(tp, hC); // skip core
				3517	hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
				3518	continue; // move to next node
				3519	}
				3520	// core requested, go down to PUs
				3521	nT = 0;
				3522	nTr = 0;
				3523	hT = NULL;
				3524	// num procs per core
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame^]	3525	int NT =
				3526	__kmp_hwloc_count_children_by_type(tp, hC, HWLOC_OBJ_PU, &hT);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3527	for (int t = 0; t < NT; ++t) {
				3528	// Check PU ---------------------------------------
				3529	idx = hT->os_index;
				3530	if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
				3531	hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
				3532	continue; // skip PU if not in fullMask
				3533	}
				3534	++nT;
				3535	if (nT <= __kmp_hws_proc.offset \|\|
				3536	nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
				3537	// skip PU
				3538	KMP_CPU_CLR(idx, __kmp_affin_fullMask);
				3539	++n_old;
				3540	KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx));
				3541	hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
				3542	continue; // move to next node
				3543	}
				3544	++nTr;
				3545	if (pAddr) // collect requested thread's data
				3546	newAddr[n_new] = (*pAddr)[n_old];
				3547	++n_new;
				3548	++n_old;
				3549	hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
				3550	} // threads loop
				3551	if (nTr > 0) {
				3552	++nCr; // num cores per socket
				3553	++nCo; // total num cores
				3554	if (nTr > nTpC)
				3555	nTpC = nTr; // calc max threads per core
				3556	}
				3557	hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
				3558	} // cores loop
				3559	} // tiles support
				3560	} // numa_support
				3561	if (nCr > 0) { // found cores?
				3562	++nPkg; // num sockets
				3563	if (nCr > nCpP)
				3564	nCpP = nCr; // calc max cores per socket
				3565	}
				3566	} // sockets loop
				3567
				3568	// check the subset is valid
				3569	KMP_DEBUG_ASSERT(n_old == __kmp_avail_proc);
				3570	KMP_DEBUG_ASSERT(nPkg > 0);
				3571	KMP_DEBUG_ASSERT(nCpP > 0);
				3572	KMP_DEBUG_ASSERT(nTpC > 0);
				3573	KMP_DEBUG_ASSERT(nCo > 0);
				3574	KMP_DEBUG_ASSERT(nPkg <= nPackages);
				3575	KMP_DEBUG_ASSERT(nCpP <= nCoresPerPkg);
				3576	KMP_DEBUG_ASSERT(nTpC <= __kmp_nThreadsPerCore);
				3577	KMP_DEBUG_ASSERT(nCo <= __kmp_ncores);
				3578
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame^]	3579	nPackages = nPkg; // correct num sockets
				3580	nCoresPerPkg = nCpP; // correct num cores per socket
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3581	__kmp_nThreadsPerCore = nTpC; // correct num threads per core
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame^]	3582	__kmp_avail_proc = n_new; // correct num procs
				3583	__kmp_ncores = nCo; // correct num cores
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3584	// hwloc topology method end
				3585	} else
				3586	#endif // KMP_USE_HWLOC
				3587	{
				3588	int n_old = 0, n_new = 0, proc_num = 0;
				3589	if (__kmp_hws_node.num > 0 \|\| __kmp_hws_tile.num > 0) {
				3590	KMP_WARNING(AffHWSubsetNoHWLOC);
				3591	goto _exit;
				3592	}
				3593	if (__kmp_hws_socket.num == 0)
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame^]	3594	__kmp_hws_socket.num = nPackages; // use all available sockets
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3595	if (__kmp_hws_core.num == 0)
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame^]	3596	__kmp_hws_core.num = nCoresPerPkg; // use all available cores
				3597	if (__kmp_hws_proc.num == 0 \|\| __kmp_hws_proc.num > __kmp_nThreadsPerCore)
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3598	__kmp_hws_proc.num = __kmp_nThreadsPerCore; // use all HW contexts
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame^]	3599	if (!__kmp_affinity_uniform_topology()) {
				3600	KMP_WARNING(AffHWSubsetNonUniform);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3601	goto _exit; // don't support non-uniform topology
				3602	}
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame^]	3603	if (depth > 3) {
				3604	KMP_WARNING(AffHWSubsetNonThreeLevel);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3605	goto _exit; // don't support not-3-level topology
				3606	}
				3607	if (__kmp_hws_socket.offset + __kmp_hws_socket.num > nPackages) {
				3608	KMP_WARNING(AffHWSubsetManySockets);
				3609	goto _exit;
				3610	}
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame^]	3611	if (__kmp_hws_core.offset + __kmp_hws_core.num > nCoresPerPkg) {
				3612	KMP_WARNING(AffHWSubsetManyCores);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3613	goto _exit;
				3614	}
				3615	// Form the requested subset
				3616	if (pAddr) // pAddr is NULL in case of affinity_none
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame^]	3617	newAddr = (AddrUnsPair *)__kmp_allocate(
				3618	sizeof(AddrUnsPair) * __kmp_hws_socket.num * __kmp_hws_core.num *
				3619	__kmp_hws_proc.num);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3620	for (int i = 0; i < nPackages; ++i) {
				3621	if (i < __kmp_hws_socket.offset \|\|
				3622	i >= __kmp_hws_socket.offset + __kmp_hws_socket.num) {
				3623	// skip not-requested socket
				3624	n_old += nCoresPerPkg * __kmp_nThreadsPerCore;
				3625	if (__kmp_pu_os_idx != NULL) {
				3626	// walk through skipped socket
				3627	for (int j = 0; j < nCoresPerPkg; ++j) {
				3628	for (int k = 0; k < __kmp_nThreadsPerCore; ++k) {
				3629	KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
				3630	++proc_num;
Jonathan Peyton	fd7cc42	2016-06-21 15:54:38 +0000	[diff] [blame]	3631	}
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3632	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3633	}
				3634	} else {
				3635	// walk through requested socket
				3636	for (int j = 0; j < nCoresPerPkg; ++j) {
				3637	if (j < __kmp_hws_core.offset \|\|
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame^]	3638	j >= __kmp_hws_core.offset +
				3639	__kmp_hws_core.num) { // skip not-requested core
				3640	n_old += __kmp_nThreadsPerCore;
				3641	if (__kmp_pu_os_idx != NULL) {
				3642	for (int k = 0; k < __kmp_nThreadsPerCore; ++k) {
				3643	KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
				3644	++proc_num;
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3645	}
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame^]	3646	}
				3647	} else {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3648	// walk through requested core
				3649	for (int k = 0; k < __kmp_nThreadsPerCore; ++k) {
				3650	if (k < __kmp_hws_proc.num) {
				3651	if (pAddr) // collect requested thread's data
				3652	newAddr[n_new] = (*pAddr)[n_old];
				3653	n_new++;
				3654	} else {
				3655	if (__kmp_pu_os_idx != NULL)
				3656	KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3657	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3658	n_old++;
				3659	++proc_num;
Jonathan Peyton	fd7cc42	2016-06-21 15:54:38 +0000	[diff] [blame]	3660	}
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3661	}
Jonathan Peyton	fd7cc42	2016-06-21 15:54:38 +0000	[diff] [blame]	3662	}
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3663	}
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3664	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3665	KMP_DEBUG_ASSERT(n_old == nPackages * nCoresPerPkg * __kmp_nThreadsPerCore);
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame^]	3666	KMP_DEBUG_ASSERT(n_new ==
				3667	__kmp_hws_socket.num * __kmp_hws_core.num *
				3668	__kmp_hws_proc.num);
				3669	nPackages = __kmp_hws_socket.num; // correct nPackages
				3670	nCoresPerPkg = __kmp_hws_core.num; // correct nCoresPerPkg
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3671	__kmp_nThreadsPerCore = __kmp_hws_proc.num; // correct __kmp_nThreadsPerCore
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame^]	3672	__kmp_avail_proc = n_new; // correct avail_proc
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3673	__kmp_ncores = nPackages * __kmp_hws_core.num; // correct ncores
				3674	} // non-hwloc topology method
				3675	if (pAddr) {
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame^]	3676	__kmp_free(*pAddr);
				3677	*pAddr = newAddr; // replace old topology with new one
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3678	}
				3679	if (__kmp_affinity_verbose) {
				3680	char m[KMP_AFFIN_MASK_PRINT_LEN];
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame^]	3681	__kmp_affinity_print_mask(m, KMP_AFFIN_MASK_PRINT_LEN,
				3682	__kmp_affin_fullMask);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3683	if (__kmp_affinity_respect_mask) {
				3684	KMP_INFORM(InitOSProcSetRespect, "KMP_HW_SUBSET", m);
Paul Osmialowski	ecbe2ea	2016-07-29 20:55:03 +0000	[diff] [blame]	3685	} else {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3686	KMP_INFORM(InitOSProcSetNotRespect, "KMP_HW_SUBSET", m);
Paul Osmialowski	ecbe2ea	2016-07-29 20:55:03 +0000	[diff] [blame]	3687	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3688	KMP_INFORM(AvailableOSProc, "KMP_HW_SUBSET", __kmp_avail_proc);
				3689	kmp_str_buf_t buf;
				3690	__kmp_str_buf_init(&buf);
				3691	__kmp_str_buf_print(&buf, "%d", nPackages);
				3692	KMP_INFORM(TopologyExtra, "KMP_HW_SUBSET", buf.str, nCoresPerPkg,
				3693	__kmp_nThreadsPerCore, __kmp_ncores);
				3694	__kmp_str_buf_free(&buf);
				3695	}
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame^]	3696	_exit:
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3697	if (__kmp_pu_os_idx != NULL) {
				3698	__kmp_free(__kmp_pu_os_idx);
				3699	__kmp_pu_os_idx = NULL;
				3700	}
				3701	}
				3702
				3703	// This function figures out the deepest level at which there is at least one
				3704	// cluster/core with more than one processing unit bound to it.
				3705	static int __kmp_affinity_find_core_level(const AddrUnsPair *address2os,
				3706	int nprocs, int bottom_level) {
				3707	int core_level = 0;
				3708
				3709	for (int i = 0; i < nprocs; i++) {
				3710	for (int j = bottom_level; j > 0; j--) {
				3711	if (address2os[i].first.labels[j] > 0) {
				3712	if (core_level < (j - 1)) {
				3713	core_level = j - 1;
				3714	}
				3715	}
				3716	}
				3717	}
				3718	return core_level;
				3719	}
				3720
				3721	// This function counts number of clusters/cores at given level.
				3722	static int __kmp_affinity_compute_ncores(const AddrUnsPair *address2os,
				3723	int nprocs, int bottom_level,
				3724	int core_level) {
				3725	int ncores = 0;
				3726	int i, j;
				3727
				3728	j = bottom_level;
				3729	for (i = 0; i < nprocs; i++) {
				3730	for (j = bottom_level; j > core_level; j--) {
				3731	if ((i + 1) < nprocs) {
				3732	if (address2os[i + 1].first.labels[j] > 0) {
				3733	break;
				3734	}
				3735	}
				3736	}
				3737	if (j == core_level) {
				3738	ncores++;
				3739	}
				3740	}
				3741	if (j > core_level) {
				3742	// In case of ( nprocs < __kmp_avail_proc ) we may end too deep and miss one
				3743	// core. May occur when called from __kmp_affinity_find_core().
				3744	ncores++;
				3745	}
				3746	return ncores;
				3747	}
				3748
				3749	// This function finds to which cluster/core given processing unit is bound.
				3750	static int __kmp_affinity_find_core(const AddrUnsPair *address2os, int proc,
				3751	int bottom_level, int core_level) {
				3752	return __kmp_affinity_compute_ncores(address2os, proc + 1, bottom_level,
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame^]	3753	core_level) -
				3754	1;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3755	}
				3756
				3757	// This function finds maximal number of processing units bound to a
				3758	// cluster/core at given level.
				3759	static int __kmp_affinity_max_proc_per_core(const AddrUnsPair *address2os,
				3760	int nprocs, int bottom_level,
				3761	int core_level) {
				3762	int maxprocpercore = 0;
				3763
				3764	if (core_level < bottom_level) {
				3765	for (int i = 0; i < nprocs; i++) {
				3766	int percore = address2os[i].first.labels[core_level + 1] + 1;
				3767
				3768	if (percore > maxprocpercore) {
				3769	maxprocpercore = percore;
				3770	}
				3771	}
				3772	} else {
				3773	maxprocpercore = 1;
				3774	}
				3775	return maxprocpercore;
Paul Osmialowski	ecbe2ea	2016-07-29 20:55:03 +0000	[diff] [blame]	3776	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3777
				3778	static AddrUnsPair *address2os = NULL;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3779	static int *procarr = NULL;
				3780	static int __kmp_aff_depth = 0;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3781
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3782	#define KMP_EXIT_AFF_NONE \
				3783	KMP_ASSERT(__kmp_affinity_type == affinity_none); \
				3784	KMP_ASSERT(address2os == NULL); \
				3785	__kmp_apply_thread_places(NULL, 0); \
				3786	return;
Jonathan Peyton	fd7cc42	2016-06-21 15:54:38 +0000	[diff] [blame]	3787
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3788	static int __kmp_affinity_cmp_Address_child_num(const void a, const void b) {
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame^]	3789	const Address *aa =
				3790	(const Address )&(((AddrUnsPair )CCAST(void *, a))->first);
				3791	const Address *bb =
				3792	(const Address )&(((AddrUnsPair )CCAST(void *, b))->first);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3793	unsigned depth = aa->depth;
				3794	unsigned i;
				3795	KMP_DEBUG_ASSERT(depth == bb->depth);
				3796	KMP_DEBUG_ASSERT((unsigned)__kmp_affinity_compact <= depth);
				3797	KMP_DEBUG_ASSERT(__kmp_affinity_compact >= 0);
				3798	for (i = 0; i < (unsigned)__kmp_affinity_compact; i++) {
				3799	int j = depth - i - 1;
				3800	if (aa->childNums[j] < bb->childNums[j])
				3801	return -1;
				3802	if (aa->childNums[j] > bb->childNums[j])
				3803	return 1;
				3804	}
				3805	for (; i < depth; i++) {
				3806	int j = i - __kmp_affinity_compact;
				3807	if (aa->childNums[j] < bb->childNums[j])
				3808	return -1;
				3809	if (aa->childNums[j] > bb->childNums[j])
				3810	return 1;
				3811	}
				3812	return 0;
Jonathan Peyton	e6abe52	2016-09-02 20:54:58 +0000	[diff] [blame]	3813	}
				3814
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3815	static void __kmp_aux_affinity_initialize(void) {
				3816	if (__kmp_affinity_masks != NULL) {
				3817	KMP_ASSERT(__kmp_affin_fullMask != NULL);
				3818	return;
				3819	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3820
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3821	// Create the "full" mask - this defines all of the processors that we
				3822	// consider to be in the machine model. If respect is set, then it is the
				3823	// initialization thread's affinity mask. Otherwise, it is all processors that
				3824	// we know about on the machine.
				3825	if (__kmp_affin_fullMask == NULL) {
				3826	KMP_CPU_ALLOC(__kmp_affin_fullMask);
				3827	}
				3828	if (KMP_AFFINITY_CAPABLE()) {
				3829	if (__kmp_affinity_respect_mask) {
				3830	__kmp_get_system_affinity(__kmp_affin_fullMask, TRUE);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3831
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3832	// Count the number of available processors.
				3833	unsigned i;
				3834	__kmp_avail_proc = 0;
				3835	KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
				3836	if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
				3837	continue;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3838	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3839	__kmp_avail_proc++;
				3840	}
				3841	if (__kmp_avail_proc > __kmp_xproc) {
				3842	if (__kmp_affinity_verbose \|\|
				3843	(__kmp_affinity_warnings &&
				3844	(__kmp_affinity_type != affinity_none))) {
				3845	KMP_WARNING(ErrorInitializeAffinity);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3846	}
				3847	__kmp_affinity_type = affinity_none;
Andrey Churbanov	1f037e4	2015-03-10 09:15:26 +0000	[diff] [blame]	3848	KMP_AFFINITY_DISABLE();
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3849	return;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3850	}
				3851	} else {
				3852	__kmp_affinity_entire_machine_mask(__kmp_affin_fullMask);
				3853	__kmp_avail_proc = __kmp_xproc;
				3854	}
				3855	}
				3856
				3857	int depth = -1;
				3858	kmp_i18n_id_t msg_id = kmp_i18n_null;
				3859
				3860	// For backward compatibility, setting KMP_CPUINFO_FILE =>
				3861	// KMP_TOPOLOGY_METHOD=cpuinfo
				3862	if ((__kmp_cpuinfo_file != NULL) &&
				3863	(__kmp_affinity_top_method == affinity_top_method_all)) {
				3864	__kmp_affinity_top_method = affinity_top_method_cpuinfo;
				3865	}
				3866
				3867	if (__kmp_affinity_top_method == affinity_top_method_all) {
				3868	// In the default code path, errors are not fatal - we just try using
				3869	// another method. We only emit a warning message if affinity is on, or the
				3870	// verbose flag is set, an the nowarnings flag was not set.
				3871	const char *file_name = NULL;
				3872	int line = 0;
				3873	#if KMP_USE_HWLOC
				3874	if (depth < 0 &&
				3875	__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC) {
				3876	if (__kmp_affinity_verbose) {
				3877	KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
				3878	}
				3879	if (!__kmp_hwloc_error) {
				3880	depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id);
				3881	if (depth == 0) {
				3882	KMP_EXIT_AFF_NONE;
				3883	} else if (depth < 0 && __kmp_affinity_verbose) {
				3884	KMP_INFORM(AffIgnoringHwloc, "KMP_AFFINITY");
				3885	}
				3886	} else if (__kmp_affinity_verbose) {
				3887	KMP_INFORM(AffIgnoringHwloc, "KMP_AFFINITY");
				3888	}
				3889	}
				3890	#endif
				3891
				3892	#if KMP_ARCH_X86 \|\| KMP_ARCH_X86_64
				3893
				3894	if (depth < 0) {
				3895	if (__kmp_affinity_verbose) {
				3896	KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
				3897	}
				3898
				3899	file_name = NULL;
				3900	depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
				3901	if (depth == 0) {
				3902	KMP_EXIT_AFF_NONE;
				3903	}
				3904
				3905	if (depth < 0) {
				3906	if (__kmp_affinity_verbose) {
				3907	if (msg_id != kmp_i18n_null) {
				3908	KMP_INFORM(AffInfoStrStr, "KMP_AFFINITY",
				3909	__kmp_i18n_catgets(msg_id),
				3910	KMP_I18N_STR(DecodingLegacyAPIC));
				3911	} else {
				3912	KMP_INFORM(AffInfoStr, "KMP_AFFINITY",
				3913	KMP_I18N_STR(DecodingLegacyAPIC));
				3914	}
				3915	}
				3916
				3917	file_name = NULL;
				3918	depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
				3919	if (depth == 0) {
				3920	KMP_EXIT_AFF_NONE;
				3921	}
				3922	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3923	}
				3924
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3925	#endif /* KMP_ARCH_X86 \|\| KMP_ARCH_X86_64 */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3926
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3927	#if KMP_OS_LINUX
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3928
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3929	if (depth < 0) {
				3930	if (__kmp_affinity_verbose) {
				3931	if (msg_id != kmp_i18n_null) {
				3932	KMP_INFORM(AffStrParseFilename, "KMP_AFFINITY",
				3933	__kmp_i18n_catgets(msg_id), "/proc/cpuinfo");
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3934	} else {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3935	KMP_INFORM(AffParseFilename, "KMP_AFFINITY", "/proc/cpuinfo");
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3936	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3937	}
				3938
				3939	FILE *f = fopen("/proc/cpuinfo", "r");
				3940	if (f == NULL) {
				3941	msg_id = kmp_i18n_str_CantOpenCpuinfo;
				3942	} else {
				3943	file_name = "/proc/cpuinfo";
				3944	depth =
				3945	__kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
				3946	fclose(f);
				3947	if (depth == 0) {
				3948	KMP_EXIT_AFF_NONE;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3949	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3950	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3951	}
				3952
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3953	#endif /* KMP_OS_LINUX */
				3954
				3955	#if KMP_GROUP_AFFINITY
				3956
				3957	if ((depth < 0) && (__kmp_num_proc_groups > 1)) {
				3958	if (__kmp_affinity_verbose) {
				3959	KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
				3960	}
				3961
				3962	depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
				3963	KMP_ASSERT(depth != 0);
				3964	}
				3965
				3966	#endif /* KMP_GROUP_AFFINITY */
				3967
				3968	if (depth < 0) {
				3969	if (__kmp_affinity_verbose && (msg_id != kmp_i18n_null)) {
				3970	if (file_name == NULL) {
				3971	KMP_INFORM(UsingFlatOS, __kmp_i18n_catgets(msg_id));
				3972	} else if (line == 0) {
				3973	KMP_INFORM(UsingFlatOSFile, file_name, __kmp_i18n_catgets(msg_id));
				3974	} else {
				3975	KMP_INFORM(UsingFlatOSFileLine, file_name, line,
				3976	__kmp_i18n_catgets(msg_id));
				3977	}
				3978	}
				3979	// FIXME - print msg if msg_id = kmp_i18n_null ???
				3980
				3981	file_name = "";
				3982	depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
				3983	if (depth == 0) {
				3984	KMP_EXIT_AFF_NONE;
				3985	}
				3986	KMP_ASSERT(depth > 0);
				3987	KMP_ASSERT(address2os != NULL);
				3988	}
				3989	}
				3990
				3991	// If the user has specified that a paricular topology discovery method is to be
				3992	// used, then we abort if that method fails. The exception is group affinity,
				3993	// which might have been implicitly set.
				3994
				3995	#if KMP_ARCH_X86 \|\| KMP_ARCH_X86_64
				3996
				3997	else if (__kmp_affinity_top_method == affinity_top_method_x2apicid) {
				3998	if (__kmp_affinity_verbose) {
				3999	KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
				4000	}
				4001
				4002	depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
				4003	if (depth == 0) {
				4004	KMP_EXIT_AFF_NONE;
				4005	}
				4006	if (depth < 0) {
				4007	KMP_ASSERT(msg_id != kmp_i18n_null);
				4008	KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
				4009	}
				4010	} else if (__kmp_affinity_top_method == affinity_top_method_apicid) {
				4011	if (__kmp_affinity_verbose) {
				4012	KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(DecodingLegacyAPIC));
				4013	}
				4014
				4015	depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
				4016	if (depth == 0) {
				4017	KMP_EXIT_AFF_NONE;
				4018	}
				4019	if (depth < 0) {
				4020	KMP_ASSERT(msg_id != kmp_i18n_null);
				4021	KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
				4022	}
				4023	}
				4024
				4025	#endif /* KMP_ARCH_X86 \|\| KMP_ARCH_X86_64 */
				4026
				4027	else if (__kmp_affinity_top_method == affinity_top_method_cpuinfo) {
				4028	const char *filename;
				4029	if (__kmp_cpuinfo_file != NULL) {
				4030	filename = __kmp_cpuinfo_file;
				4031	} else {
				4032	filename = "/proc/cpuinfo";
				4033	}
				4034
				4035	if (__kmp_affinity_verbose) {
				4036	KMP_INFORM(AffParseFilename, "KMP_AFFINITY", filename);
				4037	}
				4038
				4039	FILE *f = fopen(filename, "r");
				4040	if (f == NULL) {
				4041	int code = errno;
				4042	if (__kmp_cpuinfo_file != NULL) {
				4043	__kmp_msg(kmp_ms_fatal, KMP_MSG(CantOpenFileForReading, filename),
				4044	KMP_ERR(code), KMP_HNT(NameComesFrom_CPUINFO_FILE),
				4045	__kmp_msg_null);
				4046	} else {
				4047	__kmp_msg(kmp_ms_fatal, KMP_MSG(CantOpenFileForReading, filename),
				4048	KMP_ERR(code), __kmp_msg_null);
				4049	}
				4050	}
				4051	int line = 0;
				4052	depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
				4053	fclose(f);
				4054	if (depth < 0) {
				4055	KMP_ASSERT(msg_id != kmp_i18n_null);
				4056	if (line > 0) {
				4057	KMP_FATAL(FileLineMsgExiting, filename, line,
				4058	__kmp_i18n_catgets(msg_id));
				4059	} else {
				4060	KMP_FATAL(FileMsgExiting, filename, __kmp_i18n_catgets(msg_id));
				4061	}
				4062	}
				4063	if (__kmp_affinity_type == affinity_none) {
				4064	KMP_ASSERT(depth == 0);
				4065	KMP_EXIT_AFF_NONE;
				4066	}
				4067	}
				4068
				4069	#if KMP_GROUP_AFFINITY
				4070
				4071	else if (__kmp_affinity_top_method == affinity_top_method_group) {
				4072	if (__kmp_affinity_verbose) {
				4073	KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
				4074	}
				4075
				4076	depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
				4077	KMP_ASSERT(depth != 0);
				4078	if (depth < 0) {
				4079	KMP_ASSERT(msg_id != kmp_i18n_null);
				4080	KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
				4081	}
				4082	}
				4083
				4084	#endif /* KMP_GROUP_AFFINITY */
				4085
				4086	else if (__kmp_affinity_top_method == affinity_top_method_flat) {
				4087	if (__kmp_affinity_verbose) {
				4088	KMP_INFORM(AffUsingFlatOS, "KMP_AFFINITY");
				4089	}
				4090
				4091	depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
				4092	if (depth == 0) {
				4093	KMP_EXIT_AFF_NONE;
				4094	}
				4095	// should not fail
				4096	KMP_ASSERT(depth > 0);
				4097	KMP_ASSERT(address2os != NULL);
				4098	}
				4099
				4100	#if KMP_USE_HWLOC
				4101	else if (__kmp_affinity_top_method == affinity_top_method_hwloc) {
				4102	KMP_ASSERT(__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC);
				4103	if (__kmp_affinity_verbose) {
				4104	KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
				4105	}
				4106	depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id);
				4107	if (depth == 0) {
				4108	KMP_EXIT_AFF_NONE;
				4109	}
				4110	}
				4111	#endif // KMP_USE_HWLOC
				4112
				4113	if (address2os == NULL) {
				4114	if (KMP_AFFINITY_CAPABLE() &&
				4115	(__kmp_affinity_verbose \|\|
				4116	(__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none)))) {
				4117	KMP_WARNING(ErrorInitializeAffinity);
				4118	}
				4119	__kmp_affinity_type = affinity_none;
				4120	KMP_AFFINITY_DISABLE();
				4121	return;
				4122	}
				4123
				4124	__kmp_apply_thread_places(&address2os, depth);
				4125
				4126	// Create the table of masks, indexed by thread Id.
				4127	unsigned maxIndex;
				4128	unsigned numUnique;
				4129	kmp_affin_mask_t *osId2Mask =
				4130	__kmp_create_masks(&maxIndex, &numUnique, address2os, __kmp_avail_proc);
				4131	if (__kmp_affinity_gran_levels == 0) {
				4132	KMP_DEBUG_ASSERT((int)numUnique == __kmp_avail_proc);
				4133	}
				4134
				4135	// Set the childNums vector in all Address objects. This must be done before
				4136	// we can sort using __kmp_affinity_cmp_Address_child_num(), which takes into
				4137	// account the setting of __kmp_affinity_compact.
				4138	__kmp_affinity_assign_child_nums(address2os, __kmp_avail_proc);
				4139
				4140	switch (__kmp_affinity_type) {
				4141
				4142	case affinity_explicit:
				4143	KMP_DEBUG_ASSERT(__kmp_affinity_proclist != NULL);
				4144	#if OMP_40_ENABLED
				4145	if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
				4146	#endif
				4147	{
				4148	__kmp_affinity_process_proclist(
				4149	&__kmp_affinity_masks, &__kmp_affinity_num_masks,
				4150	__kmp_affinity_proclist, osId2Mask, maxIndex);
				4151	}
				4152	#if OMP_40_ENABLED
				4153	else {
				4154	__kmp_affinity_process_placelist(
				4155	&__kmp_affinity_masks, &__kmp_affinity_num_masks,
				4156	__kmp_affinity_proclist, osId2Mask, maxIndex);
				4157	}
				4158	#endif
				4159	if (__kmp_affinity_num_masks == 0) {
				4160	if (__kmp_affinity_verbose \|\|
				4161	(__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) {
				4162	KMP_WARNING(AffNoValidProcID);
				4163	}
				4164	__kmp_affinity_type = affinity_none;
				4165	return;
				4166	}
				4167	break;
				4168
				4169	// The other affinity types rely on sorting the Addresses according to some
				4170	// permutation of the machine topology tree. Set __kmp_affinity_compact and
				4171	// __kmp_affinity_offset appropriately, then jump to a common code fragment
				4172	// to do the sort and create the array of affinity masks.
				4173
				4174	case affinity_logical:
				4175	__kmp_affinity_compact = 0;
				4176	if (__kmp_affinity_offset) {
				4177	__kmp_affinity_offset =
				4178	__kmp_nThreadsPerCore * __kmp_affinity_offset % __kmp_avail_proc;
				4179	}
				4180	goto sortAddresses;
				4181
				4182	case affinity_physical:
				4183	if (__kmp_nThreadsPerCore > 1) {
				4184	__kmp_affinity_compact = 1;
				4185	if (__kmp_affinity_compact >= depth) {
				4186	__kmp_affinity_compact = 0;
				4187	}
				4188	} else {
				4189	__kmp_affinity_compact = 0;
				4190	}
				4191	if (__kmp_affinity_offset) {
				4192	__kmp_affinity_offset =
				4193	__kmp_nThreadsPerCore * __kmp_affinity_offset % __kmp_avail_proc;
				4194	}
				4195	goto sortAddresses;
				4196
				4197	case affinity_scatter:
				4198	if (__kmp_affinity_compact >= depth) {
				4199	__kmp_affinity_compact = 0;
				4200	} else {
				4201	__kmp_affinity_compact = depth - 1 - __kmp_affinity_compact;
				4202	}
				4203	goto sortAddresses;
				4204
				4205	case affinity_compact:
				4206	if (__kmp_affinity_compact >= depth) {
				4207	__kmp_affinity_compact = depth - 1;
				4208	}
				4209	goto sortAddresses;
				4210
				4211	case affinity_balanced:
				4212	if (depth <= 1) {
				4213	if (__kmp_affinity_verbose \|\| __kmp_affinity_warnings) {
				4214	KMP_WARNING(AffBalancedNotAvail, "KMP_AFFINITY");
				4215	}
				4216	__kmp_affinity_type = affinity_none;
				4217	return;
				4218	} else if (__kmp_affinity_uniform_topology()) {
				4219	break;
				4220	} else { // Non-uniform topology
				4221
				4222	// Save the depth for further usage
				4223	__kmp_aff_depth = depth;
				4224
				4225	int core_level = __kmp_affinity_find_core_level(
				4226	address2os, __kmp_avail_proc, depth - 1);
				4227	int ncores = __kmp_affinity_compute_ncores(address2os, __kmp_avail_proc,
				4228	depth - 1, core_level);
				4229	int maxprocpercore = __kmp_affinity_max_proc_per_core(
				4230	address2os, __kmp_avail_proc, depth - 1, core_level);
				4231
				4232	int nproc = ncores * maxprocpercore;
				4233	if ((nproc < 2) \|\| (nproc < __kmp_avail_proc)) {
				4234	if (__kmp_affinity_verbose \|\| __kmp_affinity_warnings) {
				4235	KMP_WARNING(AffBalancedNotAvail, "KMP_AFFINITY");
				4236	}
				4237	__kmp_affinity_type = affinity_none;
				4238	return;
				4239	}
				4240
				4241	procarr = (int )__kmp_allocate(sizeof(int) nproc);
				4242	for (int i = 0; i < nproc; i++) {
				4243	procarr[i] = -1;
				4244	}
				4245
				4246	int lastcore = -1;
				4247	int inlastcore = 0;
				4248	for (int i = 0; i < __kmp_avail_proc; i++) {
				4249	int proc = address2os[i].second;
				4250	int core =
				4251	__kmp_affinity_find_core(address2os, i, depth - 1, core_level);
				4252
				4253	if (core == lastcore) {
				4254	inlastcore++;
				4255	} else {
				4256	inlastcore = 0;
				4257	}
				4258	lastcore = core;
				4259
				4260	procarr[core * maxprocpercore + inlastcore] = proc;
				4261	}
				4262
				4263	break;
				4264	}
				4265
				4266	sortAddresses:
				4267	// Allocate the gtid->affinity mask table.
				4268	if (__kmp_affinity_dups) {
				4269	__kmp_affinity_num_masks = __kmp_avail_proc;
				4270	} else {
				4271	__kmp_affinity_num_masks = numUnique;
				4272	}
				4273
				4274	#if OMP_40_ENABLED
				4275	if ((__kmp_nested_proc_bind.bind_types[0] != proc_bind_intel) &&
				4276	(__kmp_affinity_num_places > 0) &&
				4277	((unsigned)__kmp_affinity_num_places < __kmp_affinity_num_masks)) {
				4278	__kmp_affinity_num_masks = __kmp_affinity_num_places;
				4279	}
				4280	#endif
				4281
				4282	KMP_CPU_ALLOC_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
				4283
				4284	// Sort the address2os table according to the current setting of
				4285	// __kmp_affinity_compact, then fill out __kmp_affinity_masks.
				4286	qsort(address2os, __kmp_avail_proc, sizeof(*address2os),
				4287	__kmp_affinity_cmp_Address_child_num);
				4288	{
				4289	int i;
				4290	unsigned j;
				4291	for (i = 0, j = 0; i < __kmp_avail_proc; i++) {
				4292	if ((!__kmp_affinity_dups) && (!address2os[i].first.leader)) {
				4293	continue;
				4294	}
				4295	unsigned osId = address2os[i].second;
				4296	kmp_affin_mask_t *src = KMP_CPU_INDEX(osId2Mask, osId);
				4297	kmp_affin_mask_t *dest = KMP_CPU_INDEX(__kmp_affinity_masks, j);
				4298	KMP_ASSERT(KMP_CPU_ISSET(osId, src));
				4299	KMP_CPU_COPY(dest, src);
				4300	if (++j >= __kmp_affinity_num_masks) {
				4301	break;
				4302	}
				4303	}
				4304	KMP_DEBUG_ASSERT(j == __kmp_affinity_num_masks);
				4305	}
				4306	break;
				4307
				4308	default:
				4309	KMP_ASSERT2(0, "Unexpected affinity setting");
				4310	}
				4311
				4312	KMP_CPU_FREE_ARRAY(osId2Mask, maxIndex + 1);
				4313	machine_hierarchy.init(address2os, __kmp_avail_proc);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4314	}
Jonathan Peyton	fd7cc42	2016-06-21 15:54:38 +0000	[diff] [blame]	4315	#undef KMP_EXIT_AFF_NONE
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4316
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4317	void __kmp_affinity_initialize(void) {
				4318	// Much of the code above was written assumming that if a machine was not
				4319	// affinity capable, then __kmp_affinity_type == affinity_none. We now
				4320	// explicitly represent this as __kmp_affinity_type == affinity_disabled.
				4321	// There are too many checks for __kmp_affinity_type == affinity_none
				4322	// in this code. Instead of trying to change them all, check if
				4323	// __kmp_affinity_type == affinity_disabled, and if so, slam it with
				4324	// affinity_none, call the real initialization routine, then restore
				4325	// __kmp_affinity_type to affinity_disabled.
				4326	int disabled = (__kmp_affinity_type == affinity_disabled);
				4327	if (!KMP_AFFINITY_CAPABLE()) {
				4328	KMP_ASSERT(disabled);
				4329	}
				4330	if (disabled) {
				4331	__kmp_affinity_type = affinity_none;
				4332	}
				4333	__kmp_aux_affinity_initialize();
				4334	if (disabled) {
				4335	__kmp_affinity_type = affinity_disabled;
				4336	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4337	}
				4338
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4339	void __kmp_affinity_uninitialize(void) {
				4340	if (__kmp_affinity_masks != NULL) {
				4341	KMP_CPU_FREE_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
				4342	__kmp_affinity_masks = NULL;
				4343	}
				4344	if (__kmp_affin_fullMask != NULL) {
				4345	KMP_CPU_FREE(__kmp_affin_fullMask);
				4346	__kmp_affin_fullMask = NULL;
				4347	}
				4348	__kmp_affinity_num_masks = 0;
				4349	__kmp_affinity_type = affinity_default;
				4350	#if OMP_40_ENABLED
				4351	__kmp_affinity_num_places = 0;
				4352	#endif
				4353	if (__kmp_affinity_proclist != NULL) {
				4354	__kmp_free(__kmp_affinity_proclist);
				4355	__kmp_affinity_proclist = NULL;
				4356	}
				4357	if (address2os != NULL) {
				4358	__kmp_free(address2os);
				4359	address2os = NULL;
				4360	}
				4361	if (procarr != NULL) {
				4362	__kmp_free(procarr);
				4363	procarr = NULL;
				4364	}
				4365	#if KMP_USE_HWLOC
				4366	if (__kmp_hwloc_topology != NULL) {
				4367	hwloc_topology_destroy(__kmp_hwloc_topology);
				4368	__kmp_hwloc_topology = NULL;
				4369	}
				4370	#endif
				4371	KMPAffinity::destroy_api();
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4372	}
				4373
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4374	void __kmp_affinity_set_init_mask(int gtid, int isa_root) {
				4375	if (!KMP_AFFINITY_CAPABLE()) {
				4376	return;
				4377	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4378
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4379	kmp_info_t th = (kmp_info_t )TCR_SYNC_PTR(__kmp_threads[gtid]);
				4380	if (th->th.th_affin_mask == NULL) {
				4381	KMP_CPU_ALLOC(th->th.th_affin_mask);
				4382	} else {
				4383	KMP_CPU_ZERO(th->th.th_affin_mask);
				4384	}
				4385
				4386	// Copy the thread mask to the kmp_info_t strucuture. If
				4387	// __kmp_affinity_type == affinity_none, copy the "full" mask, i.e. one that
				4388	// has all of the OS proc ids set, or if __kmp_affinity_respect_mask is set,
				4389	// then the full mask is the same as the mask of the initialization thread.
				4390	kmp_affin_mask_t *mask;
				4391	int i;
				4392
				4393	#if OMP_40_ENABLED
				4394	if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
				4395	#endif
				4396	{
				4397	if ((__kmp_affinity_type == affinity_none) \|\|
				4398	(__kmp_affinity_type == affinity_balanced)) {
				4399	#if KMP_GROUP_AFFINITY
				4400	if (__kmp_num_proc_groups > 1) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4401	return;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4402	}
				4403	#endif
				4404	KMP_ASSERT(__kmp_affin_fullMask != NULL);
				4405	i = KMP_PLACE_ALL;
				4406	mask = __kmp_affin_fullMask;
				4407	} else {
				4408	KMP_DEBUG_ASSERT(__kmp_affinity_num_masks > 0);
				4409	i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
				4410	mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4411	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4412	}
				4413	#if OMP_40_ENABLED
				4414	else {
				4415	if ((!isa_root) \|\|
				4416	(__kmp_nested_proc_bind.bind_types[0] == proc_bind_false)) {
				4417	#if KMP_GROUP_AFFINITY
				4418	if (__kmp_num_proc_groups > 1) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4419	return;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4420	}
				4421	#endif
				4422	KMP_ASSERT(__kmp_affin_fullMask != NULL);
				4423	i = KMP_PLACE_ALL;
				4424	mask = __kmp_affin_fullMask;
				4425	} else {
				4426	// int i = some hash function or just a counter that doesn't
				4427	// always start at 0. Use gtid for now.
				4428	KMP_DEBUG_ASSERT(__kmp_affinity_num_masks > 0);
				4429	i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
				4430	mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4431	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4432	}
				4433	#endif
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4434
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4435	#if OMP_40_ENABLED
				4436	th->th.th_current_place = i;
				4437	if (isa_root) {
				4438	th->th.th_new_place = i;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4439	th->th.th_first_place = 0;
				4440	th->th.th_last_place = __kmp_affinity_num_masks - 1;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4441	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4442
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4443	if (i == KMP_PLACE_ALL) {
				4444	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to all places\n",
				4445	gtid));
				4446	} else {
				4447	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to place %d\n",
				4448	gtid, i));
				4449	}
				4450	#else
				4451	if (i == -1) {
				4452	KA_TRACE(
				4453	100,
				4454	("__kmp_affinity_set_init_mask: binding T#%d to __kmp_affin_fullMask\n",
				4455	gtid));
				4456	} else {
				4457	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to mask %d\n",
				4458	gtid, i));
				4459	}
				4460	#endif /* OMP_40_ENABLED */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4461
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4462	KMP_CPU_COPY(th->th.th_affin_mask, mask);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4463
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4464	if (__kmp_affinity_verbose) {
				4465	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4466	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4467	th->th.th_affin_mask);
				4468	KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
				4469	__kmp_gettid(), gtid, buf);
				4470	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4471
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4472	#if KMP_OS_WINDOWS
				4473	// On Windows* OS, the process affinity mask might have changed. If the user
				4474	// didn't request affinity and this call fails, just continue silently.
				4475	// See CQ171393.
				4476	if (__kmp_affinity_type == affinity_none) {
				4477	__kmp_set_system_affinity(th->th.th_affin_mask, FALSE);
				4478	} else
Jonathan Peyton	7c465a5	2016-09-12 19:02:53 +0000	[diff] [blame]	4479	#endif
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4480	__kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
Jonathan Peyton	7c465a5	2016-09-12 19:02:53 +0000	[diff] [blame]	4481	}
				4482
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4483	#if OMP_40_ENABLED
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4484
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4485	void __kmp_affinity_set_place(int gtid) {
				4486	int retval;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4487
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4488	if (!KMP_AFFINITY_CAPABLE()) {
				4489	return;
				4490	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4491
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4492	kmp_info_t th = (kmp_info_t )TCR_SYNC_PTR(__kmp_threads[gtid]);
				4493
				4494	KA_TRACE(100, ("__kmp_affinity_set_place: binding T#%d to place %d (current "
				4495	"place = %d)\n",
				4496	gtid, th->th.th_new_place, th->th.th_current_place));
				4497
				4498	// Check that the new place is within this thread's partition.
				4499	KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
				4500	KMP_ASSERT(th->th.th_new_place >= 0);
				4501	KMP_ASSERT((unsigned)th->th.th_new_place <= __kmp_affinity_num_masks);
				4502	if (th->th.th_first_place <= th->th.th_last_place) {
				4503	KMP_ASSERT((th->th.th_new_place >= th->th.th_first_place) &&
				4504	(th->th.th_new_place <= th->th.th_last_place));
				4505	} else {
				4506	KMP_ASSERT((th->th.th_new_place <= th->th.th_first_place) \|\|
				4507	(th->th.th_new_place >= th->th.th_last_place));
				4508	}
				4509
				4510	// Copy the thread mask to the kmp_info_t strucuture,
				4511	// and set this thread's affinity.
				4512	kmp_affin_mask_t *mask =
				4513	KMP_CPU_INDEX(__kmp_affinity_masks, th->th.th_new_place);
				4514	KMP_CPU_COPY(th->th.th_affin_mask, mask);
				4515	th->th.th_current_place = th->th.th_new_place;
				4516
				4517	if (__kmp_affinity_verbose) {
				4518	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4519	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4520	th->th.th_affin_mask);
				4521	KMP_INFORM(BoundToOSProcSet, "OMP_PROC_BIND", (kmp_int32)getpid(),
				4522	__kmp_gettid(), gtid, buf);
				4523	}
				4524	__kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
				4525	}
				4526
				4527	#endif /* OMP_40_ENABLED */
				4528
				4529	int __kmp_aux_set_affinity(void **mask) {
				4530	int gtid;
				4531	kmp_info_t *th;
				4532	int retval;
				4533
				4534	if (!KMP_AFFINITY_CAPABLE()) {
				4535	return -1;
				4536	}
				4537
				4538	gtid = __kmp_entry_gtid();
				4539	KA_TRACE(1000, ; {
				4540	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4541	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4542	(kmp_affin_mask_t )(mask));
				4543	__kmp_debug_printf(
				4544	"kmp_set_affinity: setting affinity mask for thread %d = %s\n", gtid,
				4545	buf);
				4546	});
				4547
				4548	if (__kmp_env_consistency_check) {
				4549	if ((mask == NULL) \|\| (*mask == NULL)) {
				4550	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				4551	} else {
				4552	unsigned proc;
				4553	int num_procs = 0;
				4554
				4555	KMP_CPU_SET_ITERATE(proc, ((kmp_affin_mask_t )(mask))) {
				4556	if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
				4557	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4558	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4559	if (!KMP_CPU_ISSET(proc, (kmp_affin_mask_t )(mask))) {
				4560	continue;
				4561	}
				4562	num_procs++;
				4563	}
				4564	if (num_procs == 0) {
				4565	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				4566	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4567
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4568	#if KMP_GROUP_AFFINITY
				4569	if (__kmp_get_proc_group((kmp_affin_mask_t )(mask)) < 0) {
				4570	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				4571	}
				4572	#endif /* KMP_GROUP_AFFINITY */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4573	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4574	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4575
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4576	th = __kmp_threads[gtid];
				4577	KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
				4578	retval = __kmp_set_system_affinity((kmp_affin_mask_t )(mask), FALSE);
				4579	if (retval == 0) {
				4580	KMP_CPU_COPY(th->th.th_affin_mask, (kmp_affin_mask_t )(mask));
				4581	}
				4582
				4583	#if OMP_40_ENABLED
				4584	th->th.th_current_place = KMP_PLACE_UNDEFINED;
				4585	th->th.th_new_place = KMP_PLACE_UNDEFINED;
				4586	th->th.th_first_place = 0;
				4587	th->th.th_last_place = __kmp_affinity_num_masks - 1;
				4588
				4589	// Turn off 4.0 affinity for the current tread at this parallel level.
				4590	th->th.th_current_task->td_icvs.proc_bind = proc_bind_false;
				4591	#endif
				4592
				4593	return retval;
				4594	}
				4595
				4596	int __kmp_aux_get_affinity(void **mask) {
				4597	int gtid;
				4598	int retval;
				4599	kmp_info_t *th;
				4600
				4601	if (!KMP_AFFINITY_CAPABLE()) {
				4602	return -1;
				4603	}
				4604
				4605	gtid = __kmp_entry_gtid();
				4606	th = __kmp_threads[gtid];
				4607	KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
				4608
				4609	KA_TRACE(1000, ; {
				4610	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4611	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4612	th->th.th_affin_mask);
				4613	__kmp_printf("kmp_get_affinity: stored affinity mask for thread %d = %s\n",
				4614	gtid, buf);
				4615	});
				4616
				4617	if (__kmp_env_consistency_check) {
				4618	if ((mask == NULL) \|\| (*mask == NULL)) {
				4619	KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity");
				4620	}
				4621	}
				4622
				4623	#if !KMP_OS_WINDOWS
				4624
				4625	retval = __kmp_get_system_affinity((kmp_affin_mask_t )(mask), FALSE);
				4626	KA_TRACE(1000, ; {
				4627	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4628	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4629	(kmp_affin_mask_t )(mask));
				4630	__kmp_printf("kmp_get_affinity: system affinity mask for thread %d = %s\n",
				4631	gtid, buf);
				4632	});
				4633	return retval;
				4634
				4635	#else
				4636
				4637	KMP_CPU_COPY((kmp_affin_mask_t )(mask), th->th.th_affin_mask);
				4638	return 0;
				4639
				4640	#endif /* KMP_OS_WINDOWS */
				4641	}
				4642
				4643	int __kmp_aux_get_affinity_max_proc() {
				4644	if (!KMP_AFFINITY_CAPABLE()) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4645	return 0;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4646	}
				4647	#if KMP_GROUP_AFFINITY
				4648	if (__kmp_num_proc_groups > 1) {
				4649	return (int)(__kmp_num_proc_groups * sizeof(DWORD_PTR) * CHAR_BIT);
				4650	}
				4651	#endif
				4652	return __kmp_xproc;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4653	}
				4654
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4655	int __kmp_aux_set_affinity_mask_proc(int proc, void **mask) {
				4656	int retval;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4657
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4658	if (!KMP_AFFINITY_CAPABLE()) {
				4659	return -1;
				4660	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4661
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4662	KA_TRACE(1000, ; {
				4663	int gtid = __kmp_entry_gtid();
				4664	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4665	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4666	(kmp_affin_mask_t )(mask));
				4667	__kmp_debug_printf("kmp_set_affinity_mask_proc: setting proc %d in "
				4668	"affinity mask for thread %d = %s\n",
				4669	proc, gtid, buf);
				4670	});
				4671
				4672	if (__kmp_env_consistency_check) {
				4673	if ((mask == NULL) \|\| (*mask == NULL)) {
				4674	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity_mask_proc");
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4675	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4676	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4677
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4678	if ((proc < 0) \|\| (proc >= __kmp_aux_get_affinity_max_proc())) {
				4679	return -1;
				4680	}
				4681	if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
				4682	return -2;
				4683	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4684
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4685	KMP_CPU_SET(proc, (kmp_affin_mask_t )(mask));
				4686	return 0;
				4687	}
				4688
				4689	int __kmp_aux_unset_affinity_mask_proc(int proc, void **mask) {
				4690	int retval;
				4691
				4692	if (!KMP_AFFINITY_CAPABLE()) {
				4693	return -1;
				4694	}
				4695
				4696	KA_TRACE(1000, ; {
				4697	int gtid = __kmp_entry_gtid();
				4698	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4699	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4700	(kmp_affin_mask_t )(mask));
				4701	__kmp_debug_printf("kmp_unset_affinity_mask_proc: unsetting proc %d in "
				4702	"affinity mask for thread %d = %s\n",
				4703	proc, gtid, buf);
				4704	});
				4705
				4706	if (__kmp_env_consistency_check) {
				4707	if ((mask == NULL) \|\| (*mask == NULL)) {
				4708	KMP_FATAL(AffinityInvalidMask, "kmp_unset_affinity_mask_proc");
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4709	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4710	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4711
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4712	if ((proc < 0) \|\| (proc >= __kmp_aux_get_affinity_max_proc())) {
				4713	return -1;
				4714	}
				4715	if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
				4716	return -2;
				4717	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4718
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4719	KMP_CPU_CLR(proc, (kmp_affin_mask_t )(mask));
				4720	return 0;
				4721	}
				4722
				4723	int __kmp_aux_get_affinity_mask_proc(int proc, void **mask) {
				4724	int retval;
				4725
				4726	if (!KMP_AFFINITY_CAPABLE()) {
				4727	return -1;
				4728	}
				4729
				4730	KA_TRACE(1000, ; {
				4731	int gtid = __kmp_entry_gtid();
				4732	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4733	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4734	(kmp_affin_mask_t )(mask));
				4735	__kmp_debug_printf("kmp_get_affinity_mask_proc: getting proc %d in "
				4736	"affinity mask for thread %d = %s\n",
				4737	proc, gtid, buf);
				4738	});
				4739
				4740	if (__kmp_env_consistency_check) {
				4741	if ((mask == NULL) \|\| (*mask == NULL)) {
				4742	KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity_mask_proc");
				4743	}
				4744	}
				4745
				4746	if ((proc < 0) \|\| (proc >= __kmp_aux_get_affinity_max_proc())) {
				4747	return -1;
				4748	}
				4749	if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4750	return 0;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4751	}
				4752
				4753	return KMP_CPU_ISSET(proc, (kmp_affin_mask_t )(mask));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4754	}
				4755
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4756	// Dynamic affinity settings - Affinity balanced
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4757	void __kmp_balanced_affinity(int tid, int nthreads) {
				4758	bool fine_gran = true;
Paul Osmialowski	ecbe2ea	2016-07-29 20:55:03 +0000	[diff] [blame]	4759
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4760	switch (__kmp_affinity_gran) {
				4761	case affinity_gran_fine:
				4762	case affinity_gran_thread:
				4763	break;
				4764	case affinity_gran_core:
				4765	if (__kmp_nThreadsPerCore > 1) {
				4766	fine_gran = false;
				4767	}
				4768	break;
				4769	case affinity_gran_package:
				4770	if (nCoresPerPkg > 1) {
				4771	fine_gran = false;
				4772	}
				4773	break;
				4774	default:
				4775	fine_gran = false;
				4776	}
				4777
				4778	if (__kmp_affinity_uniform_topology()) {
				4779	int coreID;
				4780	int threadID;
				4781	// Number of hyper threads per core in HT machine
				4782	int __kmp_nth_per_core = __kmp_avail_proc / __kmp_ncores;
				4783	// Number of cores
				4784	int ncores = __kmp_ncores;
				4785	if ((nPackages > 1) && (__kmp_nth_per_core <= 1)) {
				4786	__kmp_nth_per_core = __kmp_avail_proc / nPackages;
				4787	ncores = nPackages;
				4788	}
				4789	// How many threads will be bound to each core
				4790	int chunk = nthreads / ncores;
				4791	// How many cores will have an additional thread bound to it - "big cores"
				4792	int big_cores = nthreads % ncores;
				4793	// Number of threads on the big cores
				4794	int big_nth = (chunk + 1) * big_cores;
				4795	if (tid < big_nth) {
				4796	coreID = tid / (chunk + 1);
				4797	threadID = (tid % (chunk + 1)) % __kmp_nth_per_core;
				4798	} else { // tid >= big_nth
				4799	coreID = (tid - big_cores) / chunk;
				4800	threadID = ((tid - big_cores) % chunk) % __kmp_nth_per_core;
Paul Osmialowski	ecbe2ea	2016-07-29 20:55:03 +0000	[diff] [blame]	4801	}
				4802
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4803	KMP_DEBUG_ASSERT2(KMP_AFFINITY_CAPABLE(),
				4804	"Illegal set affinity operation when not capable");
				4805
				4806	kmp_affin_mask_t *mask;
				4807	KMP_CPU_ALLOC_ON_STACK(mask);
				4808	KMP_CPU_ZERO(mask);
				4809
				4810	if (fine_gran) {
				4811	int osID = address2os[coreID * __kmp_nth_per_core + threadID].second;
				4812	KMP_CPU_SET(osID, mask);
				4813	} else {
				4814	for (int i = 0; i < __kmp_nth_per_core; i++) {
				4815	int osID;
				4816	osID = address2os[coreID * __kmp_nth_per_core + i].second;
				4817	KMP_CPU_SET(osID, mask);
				4818	}
				4819	}
				4820	if (__kmp_affinity_verbose) {
				4821	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4822	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
				4823	KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
				4824	__kmp_gettid(), tid, buf);
				4825	}
				4826	__kmp_set_system_affinity(mask, TRUE);
				4827	KMP_CPU_FREE_FROM_STACK(mask);
				4828	} else { // Non-uniform topology
				4829
				4830	kmp_affin_mask_t *mask;
				4831	KMP_CPU_ALLOC_ON_STACK(mask);
				4832	KMP_CPU_ZERO(mask);
				4833
				4834	int core_level = __kmp_affinity_find_core_level(
				4835	address2os, __kmp_avail_proc, __kmp_aff_depth - 1);
				4836	int ncores = __kmp_affinity_compute_ncores(address2os, __kmp_avail_proc,
				4837	__kmp_aff_depth - 1, core_level);
				4838	int nth_per_core = __kmp_affinity_max_proc_per_core(
				4839	address2os, __kmp_avail_proc, __kmp_aff_depth - 1, core_level);
				4840
				4841	// For performance gain consider the special case nthreads ==
				4842	// __kmp_avail_proc
				4843	if (nthreads == __kmp_avail_proc) {
				4844	if (fine_gran) {
				4845	int osID = address2os[tid].second;
				4846	KMP_CPU_SET(osID, mask);
				4847	} else {
				4848	int core = __kmp_affinity_find_core(address2os, tid,
				4849	__kmp_aff_depth - 1, core_level);
				4850	for (int i = 0; i < __kmp_avail_proc; i++) {
				4851	int osID = address2os[i].second;
				4852	if (__kmp_affinity_find_core(address2os, i, __kmp_aff_depth - 1,
				4853	core_level) == core) {
				4854	KMP_CPU_SET(osID, mask);
				4855	}
Paul Osmialowski	ecbe2ea	2016-07-29 20:55:03 +0000	[diff] [blame]	4856	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4857	}
				4858	} else if (nthreads <= ncores) {
				4859
				4860	int core = 0;
				4861	for (int i = 0; i < ncores; i++) {
				4862	// Check if this core from procarr[] is in the mask
				4863	int in_mask = 0;
				4864	for (int j = 0; j < nth_per_core; j++) {
				4865	if (procarr[i * nth_per_core + j] != -1) {
				4866	in_mask = 1;
				4867	break;
				4868	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4869	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4870	if (in_mask) {
				4871	if (tid == core) {
				4872	for (int j = 0; j < nth_per_core; j++) {
				4873	int osID = procarr[i * nth_per_core + j];
				4874	if (osID != -1) {
				4875	KMP_CPU_SET(osID, mask);
				4876	// For fine granularity it is enough to set the first available
				4877	// osID for this core
				4878	if (fine_gran) {
				4879	break;
				4880	}
				4881	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4882	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4883	break;
				4884	} else {
				4885	core++;
				4886	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4887	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4888	}
				4889	} else { // nthreads > ncores
				4890	// Array to save the number of processors at each core
				4891	int nproc_at_core = (int )KMP_ALLOCA(sizeof(int) * ncores);
				4892	// Array to save the number of cores with "x" available processors;
				4893	int *ncores_with_x_procs =
				4894	(int )KMP_ALLOCA(sizeof(int) (nth_per_core + 1));
				4895	// Array to save the number of cores with # procs from x to nth_per_core
				4896	int *ncores_with_x_to_max_procs =
				4897	(int )KMP_ALLOCA(sizeof(int) (nth_per_core + 1));
				4898
				4899	for (int i = 0; i <= nth_per_core; i++) {
				4900	ncores_with_x_procs[i] = 0;
				4901	ncores_with_x_to_max_procs[i] = 0;
				4902	}
				4903
				4904	for (int i = 0; i < ncores; i++) {
				4905	int cnt = 0;
				4906	for (int j = 0; j < nth_per_core; j++) {
				4907	if (procarr[i * nth_per_core + j] != -1) {
				4908	cnt++;
				4909	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4910	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4911	nproc_at_core[i] = cnt;
				4912	ncores_with_x_procs[cnt]++;
				4913	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4914
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4915	for (int i = 0; i <= nth_per_core; i++) {
				4916	for (int j = i; j <= nth_per_core; j++) {
				4917	ncores_with_x_to_max_procs[i] += ncores_with_x_procs[j];
				4918	}
				4919	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4920
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4921	// Max number of processors
				4922	int nproc = nth_per_core * ncores;
				4923	// An array to keep number of threads per each context
				4924	int newarr = (int )__kmp_allocate(sizeof(int) * nproc);
				4925	for (int i = 0; i < nproc; i++) {
				4926	newarr[i] = 0;
				4927	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4928
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4929	int nth = nthreads;
				4930	int flag = 0;
				4931	while (nth > 0) {
				4932	for (int j = 1; j <= nth_per_core; j++) {
				4933	int cnt = ncores_with_x_to_max_procs[j];
				4934	for (int i = 0; i < ncores; i++) {
				4935	// Skip the core with 0 processors
				4936	if (nproc_at_core[i] == 0) {
				4937	continue;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4938	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4939	for (int k = 0; k < nth_per_core; k++) {
				4940	if (procarr[i * nth_per_core + k] != -1) {
				4941	if (newarr[i * nth_per_core + k] == 0) {
				4942	newarr[i * nth_per_core + k] = 1;
				4943	cnt--;
				4944	nth--;
				4945	break;
				4946	} else {
				4947	if (flag != 0) {
				4948	newarr[i * nth_per_core + k]++;
				4949	cnt--;
				4950	nth--;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4951	break;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4952	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4953	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4954	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4955	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4956	if (cnt == 0 \|\| nth == 0) {
				4957	break;
				4958	}
				4959	}
				4960	if (nth == 0) {
				4961	break;
				4962	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4963	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4964	flag = 1;
				4965	}
				4966	int sum = 0;
				4967	for (int i = 0; i < nproc; i++) {
				4968	sum += newarr[i];
				4969	if (sum > tid) {
				4970	if (fine_gran) {
				4971	int osID = procarr[i];
				4972	KMP_CPU_SET(osID, mask);
				4973	} else {
				4974	int coreID = i / nth_per_core;
				4975	for (int ii = 0; ii < nth_per_core; ii++) {
				4976	int osID = procarr[coreID * nth_per_core + ii];
				4977	if (osID != -1) {
				4978	KMP_CPU_SET(osID, mask);
				4979	}
				4980	}
				4981	}
				4982	break;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4983	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4984	}
				4985	__kmp_free(newarr);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4986	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4987
				4988	if (__kmp_affinity_verbose) {
				4989	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4990	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
				4991	KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
				4992	__kmp_gettid(), tid, buf);
				4993	}
				4994	__kmp_set_system_affinity(mask, TRUE);
				4995	KMP_CPU_FREE_FROM_STACK(mask);
				4996	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4997	}
				4998
Jonathan Peyton	3076fa4	2016-01-12 17:21:55 +0000	[diff] [blame]	4999	#if KMP_OS_LINUX
				5000	// We don't need this entry for Windows because
				5001	// there is GetProcessAffinityMask() api
				5002	//
				5003	// The intended usage is indicated by these steps:
				5004	// 1) The user gets the current affinity mask
				5005	// 2) Then sets the affinity by calling this function
				5006	// 3) Error check the return value
				5007	// 4) Use non-OpenMP parallelization
				5008	// 5) Reset the affinity to what was stored in step 1)
				5009	#ifdef __cplusplus
				5010	extern "C"
				5011	#endif
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	5012	int
				5013	kmp_set_thread_affinity_mask_initial()
Jonathan Peyton	3076fa4	2016-01-12 17:21:55 +0000	[diff] [blame]	5014	// the function returns 0 on success,
				5015	// -1 if we cannot bind thread
				5016	// >0 (errno) if an error happened during binding
				5017	{
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	5018	int gtid = __kmp_get_gtid();
				5019	if (gtid < 0) {
				5020	// Do not touch non-omp threads
				5021	KA_TRACE(30, ("kmp_set_thread_affinity_mask_initial: "
				5022	"non-omp thread, returning\n"));
				5023	return -1;
				5024	}
				5025	if (!KMP_AFFINITY_CAPABLE() \|\| !__kmp_init_middle) {
				5026	KA_TRACE(30, ("kmp_set_thread_affinity_mask_initial: "
				5027	"affinity not initialized, returning\n"));
				5028	return -1;
				5029	}
				5030	KA_TRACE(30, ("kmp_set_thread_affinity_mask_initial: "
				5031	"set full mask for thread %d\n",
				5032	gtid));
				5033	KMP_DEBUG_ASSERT(__kmp_affin_fullMask != NULL);
				5034	return __kmp_set_system_affinity(__kmp_affin_fullMask, FALSE);
Jonathan Peyton	3076fa4	2016-01-12 17:21:55 +0000	[diff] [blame]	5035	}
				5036	#endif
				5037
Alp Toker	763b939	2014-02-28 09:42:41 +0000	[diff] [blame]	5038	#endif // KMP_AFFINITY_SUPPORTED