Blame - openmp/runtime/src/kmp_affinity.cpp - toolchain/llvm-project

blob: f14cdf68c08649d819d3dba6fb34bc4aa25300d3 [file] [log] [blame]

Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1	/*
				2	* kmp_affinity.cpp -- affinity management
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3	*/
				4
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	5	//===----------------------------------------------------------------------===//
				6	//
				7	// The LLVM Compiler Infrastructure
				8	//
				9	// This file is dual licensed under the MIT and the University of Illinois Open
				10	// Source Licenses. See LICENSE.txt for details.
				11	//
				12	//===----------------------------------------------------------------------===//
				13
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	14	#include "kmp.h"
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	15	#include "kmp_affinity.h"
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	16	#include "kmp_i18n.h"
				17	#include "kmp_io.h"
				18	#include "kmp_str.h"
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	19	#include "kmp_wrapper_getpid.h"
Jonathan Peyton	f639936	2018-07-09 17:51:13 +0000	[diff] [blame]	20	#if KMP_USE_HIER_SCHED
				21	#include "kmp_dispatch_hier.h"
				22	#endif
Jonathan Peyton	1707836	2015-09-10 19:22:07 +0000	[diff] [blame]	23
				24	// Store the real or imagined machine hierarchy here
				25	static hierarchy_info machine_hierarchy;
				26
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	27	void __kmp_cleanup_hierarchy() { machine_hierarchy.fini(); }
				28
Jonathan Peyton	1707836	2015-09-10 19:22:07 +0000	[diff] [blame]	29	void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	30	kmp_uint32 depth;
				31	// The test below is true if affinity is available, but set to "none". Need to
				32	// init on first use of hierarchical barrier.
				33	if (TCR_1(machine_hierarchy.uninitialized))
				34	machine_hierarchy.init(NULL, nproc);
Jonathan Peyton	1707836	2015-09-10 19:22:07 +0000	[diff] [blame]	35
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	36	// Adjust the hierarchy in case num threads exceeds original
				37	if (nproc > machine_hierarchy.base_num_threads)
				38	machine_hierarchy.resize(nproc);
Jonathan Peyton	7dee82e	2015-11-09 16:24:53 +0000	[diff] [blame]	39
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	40	depth = machine_hierarchy.depth;
				41	KMP_DEBUG_ASSERT(depth > 0);
Jonathan Peyton	1707836	2015-09-10 19:22:07 +0000	[diff] [blame]	42
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	43	thr_bar->depth = depth;
				44	thr_bar->base_leaf_kids = (kmp_uint8)machine_hierarchy.numPerLevel[0] - 1;
				45	thr_bar->skip_per_level = machine_hierarchy.skipPerLevel;
Jonathan Peyton	1707836	2015-09-10 19:22:07 +0000	[diff] [blame]	46	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	47
Alp Toker	763b939	2014-02-28 09:42:41 +0000	[diff] [blame]	48	#if KMP_AFFINITY_SUPPORTED
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	49
Jonathan Peyton	1cdd87a	2016-11-14 21:08:35 +0000	[diff] [blame]	50	bool KMPAffinity::picked_api = false;
				51
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	52	void *KMPAffinity::Mask::operator new(size_t n) { return __kmp_allocate(n); }
				53	void *KMPAffinity::Mask::operator new[](size_t n) { return __kmp_allocate(n); }
				54	void KMPAffinity::Mask::operator delete(void *p) { __kmp_free(p); }
				55	void KMPAffinity::Mask::operator delete[](void *p) { __kmp_free(p); }
				56	void *KMPAffinity::operator new(size_t n) { return __kmp_allocate(n); }
				57	void KMPAffinity::operator delete(void *p) { __kmp_free(p); }
Jonathan Peyton	1cdd87a	2016-11-14 21:08:35 +0000	[diff] [blame]	58
				59	void KMPAffinity::pick_api() {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	60	KMPAffinity *affinity_dispatch;
				61	if (picked_api)
				62	return;
Jonathan Peyton	1cdd87a	2016-11-14 21:08:35 +0000	[diff] [blame]	63	#if KMP_USE_HWLOC
Jonathan Peyton	e3e2aaf	2017-05-31 20:35:22 +0000	[diff] [blame]	64	// Only use Hwloc if affinity isn't explicitly disabled and
				65	// user requests Hwloc topology method
				66	if (__kmp_affinity_top_method == affinity_top_method_hwloc &&
				67	__kmp_affinity_type != affinity_disabled) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	68	affinity_dispatch = new KMPHwlocAffinity();
				69	} else
Jonathan Peyton	1cdd87a	2016-11-14 21:08:35 +0000	[diff] [blame]	70	#endif
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	71	{
				72	affinity_dispatch = new KMPNativeAffinity();
				73	}
				74	__kmp_affinity_dispatch = affinity_dispatch;
				75	picked_api = true;
Jonathan Peyton	1cdd87a	2016-11-14 21:08:35 +0000	[diff] [blame]	76	}
				77
				78	void KMPAffinity::destroy_api() {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	79	if (__kmp_affinity_dispatch != NULL) {
				80	delete __kmp_affinity_dispatch;
				81	__kmp_affinity_dispatch = NULL;
				82	picked_api = false;
				83	}
Jonathan Peyton	1cdd87a	2016-11-14 21:08:35 +0000	[diff] [blame]	84	}
				85
Jonathan Peyton	6d88e04	2018-12-13 23:14:24 +0000	[diff] [blame]	86	#define KMP_ADVANCE_SCAN(scan) \
				87	while (*scan != '\0') { \
				88	scan++; \
				89	}
				90
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	91	// Print the affinity mask to the character array in a pretty format.
Jonathan Peyton	6d88e04	2018-12-13 23:14:24 +0000	[diff] [blame]	92	// The format is a comma separated list of non-negative integers or integer
				93	// ranges: e.g., 1,2,3-5,7,9-15
				94	// The format can also be the string "{<empty>}" if no bits are set in mask
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	95	char __kmp_affinity_print_mask(char buf, int buf_len,
				96	kmp_affin_mask_t *mask) {
Jonathan Peyton	6d88e04	2018-12-13 23:14:24 +0000	[diff] [blame]	97	int start = 0, finish = 0, previous = 0;
				98	bool first_range;
				99	KMP_ASSERT(buf);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	100	KMP_ASSERT(buf_len >= 40);
Jonathan Peyton	6d88e04	2018-12-13 23:14:24 +0000	[diff] [blame]	101	KMP_ASSERT(mask);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	102	char *scan = buf;
				103	char *end = buf + buf_len - 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	104
Jonathan Peyton	6d88e04	2018-12-13 23:14:24 +0000	[diff] [blame]	105	// Check for empty set.
				106	if (mask->begin() == mask->end()) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	107	KMP_SNPRINTF(scan, end - scan + 1, "{<empty>}");
Jonathan Peyton	6d88e04	2018-12-13 23:14:24 +0000	[diff] [blame]	108	KMP_ADVANCE_SCAN(scan);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	109	KMP_ASSERT(scan <= end);
				110	return buf;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	111	}
				112
Jonathan Peyton	6d88e04	2018-12-13 23:14:24 +0000	[diff] [blame]	113	first_range = true;
				114	start = mask->begin();
				115	while (1) {
				116	// Find next range
				117	// [start, previous] is inclusive range of contiguous bits in mask
				118	for (finish = mask->next(start), previous = start;
				119	finish == previous + 1 && finish != mask->end();
				120	finish = mask->next(finish)) {
				121	previous = finish;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	122	}
				123
Jonathan Peyton	6d88e04	2018-12-13 23:14:24 +0000	[diff] [blame]	124	// The first range does not need a comma printed before it, but the rest
				125	// of the ranges do need a comma beforehand
				126	if (!first_range) {
				127	KMP_SNPRINTF(scan, end - scan + 1, "%s", ",");
				128	KMP_ADVANCE_SCAN(scan);
				129	} else {
				130	first_range = false;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	131	}
Jonathan Peyton	6d88e04	2018-12-13 23:14:24 +0000	[diff] [blame]	132	// Range with three or more contiguous bits in the affinity mask
				133	if (previous - start > 1) {
				134	KMP_SNPRINTF(scan, end - scan + 1, "%d-%d", static_cast<int>(start),
				135	static_cast<int>(previous));
				136	} else {
				137	// Range with one or two contiguous bits in the affinity mask
				138	KMP_SNPRINTF(scan, end - scan + 1, "%d", static_cast<int>(start));
				139	KMP_ADVANCE_SCAN(scan);
				140	if (previous - start > 0) {
				141	KMP_SNPRINTF(scan, end - scan + 1, ",%d", static_cast<int>(previous));
				142	}
				143	}
				144	KMP_ADVANCE_SCAN(scan);
				145	// Start over with new start point
				146	start = finish;
				147	if (start == mask->end())
				148	break;
				149	// Check for overflow
				150	if (end - scan < 2)
				151	break;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	152	}
Jonathan Peyton	6d88e04	2018-12-13 23:14:24 +0000	[diff] [blame]	153
				154	// Check for overflow
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	155	KMP_ASSERT(scan <= end);
				156	return buf;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	157	}
Jonathan Peyton	6d88e04	2018-12-13 23:14:24 +0000	[diff] [blame]	158	#undef KMP_ADVANCE_SCAN
				159
				160	// Print the affinity mask to the string buffer object in a pretty format
				161	// The format is a comma separated list of non-negative integers or integer
				162	// ranges: e.g., 1,2,3-5,7,9-15
				163	// The format can also be the string "{<empty>}" if no bits are set in mask
				164	kmp_str_buf_t __kmp_affinity_str_buf_mask(kmp_str_buf_t buf,
				165	kmp_affin_mask_t *mask) {
				166	int start = 0, finish = 0, previous = 0;
				167	bool first_range;
				168	KMP_ASSERT(buf);
				169	KMP_ASSERT(mask);
				170
				171	__kmp_str_buf_clear(buf);
				172
				173	// Check for empty set.
				174	if (mask->begin() == mask->end()) {
				175	__kmp_str_buf_print(buf, "%s", "{<empty>}");
				176	return buf;
				177	}
				178
				179	first_range = true;
				180	start = mask->begin();
				181	while (1) {
				182	// Find next range
				183	// [start, previous] is inclusive range of contiguous bits in mask
				184	for (finish = mask->next(start), previous = start;
				185	finish == previous + 1 && finish != mask->end();
				186	finish = mask->next(finish)) {
				187	previous = finish;
				188	}
				189
				190	// The first range does not need a comma printed before it, but the rest
				191	// of the ranges do need a comma beforehand
				192	if (!first_range) {
				193	__kmp_str_buf_print(buf, "%s", ",");
				194	} else {
				195	first_range = false;
				196	}
				197	// Range with three or more contiguous bits in the affinity mask
				198	if (previous - start > 1) {
				199	__kmp_str_buf_print(buf, "%d-%d", static_cast<int>(start),
				200	static_cast<int>(previous));
				201	} else {
				202	// Range with one or two contiguous bits in the affinity mask
				203	__kmp_str_buf_print(buf, "%d", static_cast<int>(start));
				204	if (previous - start > 0) {
				205	__kmp_str_buf_print(buf, ",%d", static_cast<int>(previous));
				206	}
				207	}
				208	// Start over with new start point
				209	start = finish;
				210	if (start == mask->end())
				211	break;
				212	}
				213	return buf;
				214	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	215
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	216	void __kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask) {
				217	KMP_CPU_ZERO(mask);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	218
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	219	#if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	220
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	221	if (__kmp_num_proc_groups > 1) {
				222	int group;
				223	KMP_DEBUG_ASSERT(__kmp_GetActiveProcessorCount != NULL);
				224	for (group = 0; group < __kmp_num_proc_groups; group++) {
				225	int i;
				226	int num = __kmp_GetActiveProcessorCount(group);
				227	for (i = 0; i < num; i++) {
				228	KMP_CPU_SET(i + group * (CHAR_BIT * sizeof(DWORD_PTR)), mask);
				229	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	230	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	231	} else
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	232
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	233	#endif /* KMP_GROUP_AFFINITY */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	234
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	235	{
				236	int proc;
				237	for (proc = 0; proc < __kmp_xproc; proc++) {
				238	KMP_CPU_SET(proc, mask);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	239	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	240	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	241	}
				242
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	243	// When sorting by labels, __kmp_affinity_assign_child_nums() must first be
				244	// called to renumber the labels from [0..n] and place them into the child_num
				245	// vector of the address object. This is done in case the labels used for
Alp Toker	8f2d3f0	2014-02-24 10:40:15 +0000	[diff] [blame]	246	// the children at one node of the hierarchy differ from those used for
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	247	// another node at the same level. Example: suppose the machine has 2 nodes
				248	// with 2 packages each. The first node contains packages 601 and 602, and
				249	// second node contains packages 603 and 604. If we try to sort the table
				250	// for "scatter" affinity, the table will still be sorted 601, 602, 603, 604
				251	// because we are paying attention to the labels themselves, not the ordinal
				252	// child numbers. By using the child numbers in the sort, the result is
				253	// {0,0}=601, {0,1}=603, {1,0}=602, {1,1}=604.
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	254	static void __kmp_affinity_assign_child_nums(AddrUnsPair *address2os,
				255	int numAddrs) {
				256	KMP_DEBUG_ASSERT(numAddrs > 0);
				257	int depth = address2os->first.depth;
				258	unsigned counts = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				259	unsigned lastLabel = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				260	int labCt;
				261	for (labCt = 0; labCt < depth; labCt++) {
				262	address2os[0].first.childNums[labCt] = counts[labCt] = 0;
				263	lastLabel[labCt] = address2os[0].first.labels[labCt];
				264	}
				265	int i;
				266	for (i = 1; i < numAddrs; i++) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	267	for (labCt = 0; labCt < depth; labCt++) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	268	if (address2os[i].first.labels[labCt] != lastLabel[labCt]) {
				269	int labCt2;
				270	for (labCt2 = labCt + 1; labCt2 < depth; labCt2++) {
				271	counts[labCt2] = 0;
				272	lastLabel[labCt2] = address2os[i].first.labels[labCt2];
				273	}
				274	counts[labCt]++;
				275	lastLabel[labCt] = address2os[i].first.labels[labCt];
				276	break;
				277	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	278	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	279	for (labCt = 0; labCt < depth; labCt++) {
				280	address2os[i].first.childNums[labCt] = counts[labCt];
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	281	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	282	for (; labCt < (int)Address::maxDepth; labCt++) {
				283	address2os[i].first.childNums[labCt] = 0;
				284	}
				285	}
				286	__kmp_free(lastLabel);
				287	__kmp_free(counts);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	288	}
				289
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	290	// All of the __kmp_affinity_create_*_map() routines should set
				291	// __kmp_affinity_masks to a vector of affinity mask objects of length
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	292	// __kmp_affinity_num_masks, if __kmp_affinity_type != affinity_none, and return
				293	// the number of levels in the machine topology tree (zero if
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	294	// __kmp_affinity_type == affinity_none).
				295	//
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	296	// All of the __kmp_affinity_create_*_map() routines should set
				297	// *__kmp_affin_fullMask to the affinity mask for the initialization thread.
				298	// They need to save and restore the mask, and it could be needed later, so
				299	// saving it is just an optimization to avoid calling kmp_get_system_affinity()
				300	// again.
Jonathan Peyton	c5304aa	2016-06-13 21:28:03 +0000	[diff] [blame]	301	kmp_affin_mask_t *__kmp_affin_fullMask = NULL;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	302
				303	static int nCoresPerPkg, nPackages;
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame]	304	static int __kmp_nThreadsPerCore;
				305	#ifndef KMP_DFLT_NTH_CORES
				306	static int __kmp_ncores;
				307	#endif
Jonathan Peyton	fd7cc42	2016-06-21 15:54:38 +0000	[diff] [blame]	308	static int *__kmp_pu_os_idx = NULL;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	309
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	310	// __kmp_affinity_uniform_topology() doesn't work when called from
				311	// places which support arbitrarily many levels in the machine topology
				312	// map, i.e. the non-default cases in __kmp_affinity_create_cpuinfo_map()
				313	// __kmp_affinity_create_x2apicid_map().
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	314	inline static bool __kmp_affinity_uniform_topology() {
				315	return __kmp_avail_proc == (__kmp_nThreadsPerCore * nCoresPerPkg * nPackages);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	316	}
				317
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	318	// Print out the detailed machine topology map, i.e. the physical locations
				319	// of each OS proc.
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	320	static void __kmp_affinity_print_topology(AddrUnsPair *address2os, int len,
				321	int depth, int pkgLevel,
				322	int coreLevel, int threadLevel) {
				323	int proc;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	324
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	325	KMP_INFORM(OSProcToPhysicalThreadMap, "KMP_AFFINITY");
				326	for (proc = 0; proc < len; proc++) {
				327	int level;
				328	kmp_str_buf_t buf;
				329	__kmp_str_buf_init(&buf);
				330	for (level = 0; level < depth; level++) {
				331	if (level == threadLevel) {
				332	__kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Thread));
				333	} else if (level == coreLevel) {
				334	__kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Core));
				335	} else if (level == pkgLevel) {
				336	__kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Package));
				337	} else if (level > pkgLevel) {
				338	__kmp_str_buf_print(&buf, "%s_%d ", KMP_I18N_STR(Node),
				339	level - pkgLevel - 1);
				340	} else {
				341	__kmp_str_buf_print(&buf, "L%d ", level);
				342	}
				343	__kmp_str_buf_print(&buf, "%d ", address2os[proc].first.labels[level]);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	344	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	345	KMP_INFORM(OSProcMapToPack, "KMP_AFFINITY", address2os[proc].second,
				346	buf.str);
				347	__kmp_str_buf_free(&buf);
				348	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	349	}
				350
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	351	#if KMP_USE_HWLOC
Jonathan Peyton	202a24d	2016-06-13 17:30:08 +0000	[diff] [blame]	352
Andrey Churbanov	a586821	2017-11-30 11:51:47 +0000	[diff] [blame]	353	static void __kmp_affinity_print_hwloc_tp(AddrUnsPair *addrP, int len,
				354	int depth, int *levels) {
				355	int proc;
				356	kmp_str_buf_t buf;
				357	__kmp_str_buf_init(&buf);
				358	KMP_INFORM(OSProcToPhysicalThreadMap, "KMP_AFFINITY");
				359	for (proc = 0; proc < len; proc++) {
				360	__kmp_str_buf_print(&buf, "%s %d ", KMP_I18N_STR(Package),
				361	addrP[proc].first.labels[0]);
				362	if (depth > 1) {
				363	int level = 1; // iterate over levels
				364	int label = 1; // iterate over labels
				365	if (__kmp_numa_detected)
				366	// node level follows package
				367	if (levels[level++] > 0)
				368	__kmp_str_buf_print(&buf, "%s %d ", KMP_I18N_STR(Node),
				369	addrP[proc].first.labels[label++]);
				370	if (__kmp_tile_depth > 0)
				371	// tile level follows node if any, or package
				372	if (levels[level++] > 0)
				373	__kmp_str_buf_print(&buf, "%s %d ", KMP_I18N_STR(Tile),
				374	addrP[proc].first.labels[label++]);
				375	if (levels[level++] > 0)
				376	// core level follows
				377	__kmp_str_buf_print(&buf, "%s %d ", KMP_I18N_STR(Core),
				378	addrP[proc].first.labels[label++]);
				379	if (levels[level++] > 0)
				380	// thread level is the latest
				381	__kmp_str_buf_print(&buf, "%s %d ", KMP_I18N_STR(Thread),
				382	addrP[proc].first.labels[label++]);
				383	KMP_DEBUG_ASSERT(label == depth);
				384	}
				385	KMP_INFORM(OSProcMapToPack, "KMP_AFFINITY", addrP[proc].second, buf.str);
				386	__kmp_str_buf_clear(&buf);
				387	}
				388	__kmp_str_buf_free(&buf);
				389	}
				390
				391	static int nNodePerPkg, nTilePerPkg, nTilePerNode, nCorePerNode, nCorePerTile;
				392
Jonathan Peyton	202a24d	2016-06-13 17:30:08 +0000	[diff] [blame]	393	// This function removes the topology levels that are radix 1 and don't offer
				394	// further information about the topology. The most common example is when you
				395	// have one thread context per core, we don't want the extra thread context
				396	// level if it offers no unique labels. So they are removed.
				397	// return value: the new depth of address2os
Andrey Churbanov	a586821	2017-11-30 11:51:47 +0000	[diff] [blame]	398	static int __kmp_affinity_remove_radix_one_levels(AddrUnsPair *addrP, int nTh,
				399	int depth, int *levels) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	400	int level;
				401	int i;
				402	int radix1_detected;
Andrey Churbanov	a586821	2017-11-30 11:51:47 +0000	[diff] [blame]	403	int new_depth = depth;
				404	for (level = depth - 1; level > 0; --level) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	405	// Detect if this level is radix 1
				406	radix1_detected = 1;
Andrey Churbanov	a586821	2017-11-30 11:51:47 +0000	[diff] [blame]	407	for (i = 1; i < nTh; ++i) {
				408	if (addrP[0].first.labels[level] != addrP[i].first.labels[level]) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	409	// There are differing label values for this level so it stays
				410	radix1_detected = 0;
				411	break;
				412	}
Jonathan Peyton	202a24d	2016-06-13 17:30:08 +0000	[diff] [blame]	413	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	414	if (!radix1_detected)
				415	continue;
				416	// Radix 1 was detected
Andrey Churbanov	a586821	2017-11-30 11:51:47 +0000	[diff] [blame]	417	--new_depth;
				418	levels[level] = -1; // mark level as not present in address2os array
				419	if (level == new_depth) {
				420	// "turn off" deepest level, just decrement the depth that removes
				421	// the level from address2os array
				422	for (i = 0; i < nTh; ++i) {
				423	addrP[i].first.depth--;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	424	}
Andrey Churbanov	a586821	2017-11-30 11:51:47 +0000	[diff] [blame]	425	} else {
				426	// For other levels, we move labels over and also reduce the depth
				427	int j;
				428	for (j = level; j < new_depth; ++j) {
				429	for (i = 0; i < nTh; ++i) {
				430	addrP[i].first.labels[j] = addrP[i].first.labels[j + 1];
				431	addrP[i].first.depth--;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	432	}
Andrey Churbanov	a586821	2017-11-30 11:51:47 +0000	[diff] [blame]	433	levels[j + 1] -= 1;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	434	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	435	}
				436	}
Andrey Churbanov	a586821	2017-11-30 11:51:47 +0000	[diff] [blame]	437	return new_depth;
Jonathan Peyton	202a24d	2016-06-13 17:30:08 +0000	[diff] [blame]	438	}
				439
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	440	// Returns the number of objects of type 'type' below 'obj' within the topology
				441	// tree structure. e.g., if obj is a HWLOC_OBJ_PACKAGE object, and type is
				442	// HWLOC_OBJ_PU, then this will return the number of PU's under the SOCKET
				443	// object.
				444	static int __kmp_hwloc_get_nobjs_under_obj(hwloc_obj_t obj,
				445	hwloc_obj_type_t type) {
				446	int retval = 0;
				447	hwloc_obj_t first;
				448	for (first = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, obj->type,
				449	obj->logical_index, type, 0);
				450	first != NULL &&
				451	hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, obj->type, first) ==
				452	obj;
				453	first = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, first->type,
				454	first)) {
				455	++retval;
				456	}
				457	return retval;
Jonathan Peyton	202a24d	2016-06-13 17:30:08 +0000	[diff] [blame]	458	}
				459
Andrey Churbanov	a586821	2017-11-30 11:51:47 +0000	[diff] [blame]	460	static int __kmp_hwloc_count_children_by_depth(hwloc_topology_t t,
				461	hwloc_obj_t o, unsigned depth,
				462	hwloc_obj_t *f) {
				463	if (o->depth == depth) {
				464	if (*f == NULL)
				465	*f = o; // output first descendant found
				466	return 1;
				467	}
				468	int sum = 0;
				469	for (unsigned i = 0; i < o->arity; i++)
				470	sum += __kmp_hwloc_count_children_by_depth(t, o->children[i], depth, f);
				471	return sum; // will be 0 if no one found (as PU arity is 0)
				472	}
				473
				474	static int __kmp_hwloc_count_children_by_type(hwloc_topology_t t, hwloc_obj_t o,
				475	hwloc_obj_type_t type,
				476	hwloc_obj_t *f) {
				477	if (!hwloc_compare_types(o->type, type)) {
				478	if (*f == NULL)
				479	*f = o; // output first descendant found
				480	return 1;
				481	}
				482	int sum = 0;
				483	for (unsigned i = 0; i < o->arity; i++)
				484	sum += __kmp_hwloc_count_children_by_type(t, o->children[i], type, f);
				485	return sum; // will be 0 if no one found (as PU arity is 0)
				486	}
				487
				488	static int __kmp_hwloc_process_obj_core_pu(AddrUnsPair *addrPair,
				489	int &nActiveThreads,
				490	int &num_active_cores,
				491	hwloc_obj_t obj, int depth,
				492	int *labels) {
				493	hwloc_obj_t core = NULL;
				494	hwloc_topology_t &tp = __kmp_hwloc_topology;
				495	int NC = __kmp_hwloc_count_children_by_type(tp, obj, HWLOC_OBJ_CORE, &core);
				496	for (int core_id = 0; core_id < NC; ++core_id, core = core->next_cousin) {
				497	hwloc_obj_t pu = NULL;
				498	KMP_DEBUG_ASSERT(core != NULL);
				499	int num_active_threads = 0;
				500	int NT = __kmp_hwloc_count_children_by_type(tp, core, HWLOC_OBJ_PU, &pu);
				501	// int NT = core->arity; pu = core->first_child; // faster?
				502	for (int pu_id = 0; pu_id < NT; ++pu_id, pu = pu->next_cousin) {
				503	KMP_DEBUG_ASSERT(pu != NULL);
				504	if (!KMP_CPU_ISSET(pu->os_index, __kmp_affin_fullMask))
				505	continue; // skip inactive (inaccessible) unit
				506	Address addr(depth + 2);
				507	KA_TRACE(20, ("Hwloc inserting %d (%d) %d (%d) %d (%d) into address2os\n",
				508	obj->os_index, obj->logical_index, core->os_index,
				509	core->logical_index, pu->os_index, pu->logical_index));
				510	for (int i = 0; i < depth; ++i)
				511	addr.labels[i] = labels[i]; // package, etc.
				512	addr.labels[depth] = core_id; // core
				513	addr.labels[depth + 1] = pu_id; // pu
				514	addrPair[nActiveThreads] = AddrUnsPair(addr, pu->os_index);
				515	__kmp_pu_os_idx[nActiveThreads] = pu->os_index;
				516	nActiveThreads++;
				517	++num_active_threads; // count active threads per core
				518	}
				519	if (num_active_threads) { // were there any active threads on the core?
				520	++__kmp_ncores; // count total active cores
				521	++num_active_cores; // count active cores per socket
				522	if (num_active_threads > __kmp_nThreadsPerCore)
				523	__kmp_nThreadsPerCore = num_active_threads; // calc maximum
				524	}
				525	}
				526	return 0;
				527	}
				528
				529	// Check if NUMA node detected below the package,
				530	// and if tile object is detected and return its depth
				531	static int __kmp_hwloc_check_numa() {
				532	hwloc_topology_t &tp = __kmp_hwloc_topology;
				533	hwloc_obj_t hT, hC, hL, hN, hS; // hwloc objects (pointers to)
				534	int depth;
				535
				536	// Get some PU
				537	hT = hwloc_get_obj_by_type(tp, HWLOC_OBJ_PU, 0);
				538	if (hT == NULL) // something has gone wrong
				539	return 1;
				540
				541	// check NUMA node below PACKAGE
				542	hN = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hT);
				543	hS = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_PACKAGE, hT);
				544	KMP_DEBUG_ASSERT(hS != NULL);
				545	if (hN != NULL && hN->depth > hS->depth) {
				546	__kmp_numa_detected = TRUE; // socket includes node(s)
				547	if (__kmp_affinity_gran == affinity_gran_node) {
				548	__kmp_affinity_gran == affinity_gran_numa;
				549	}
				550	}
				551
				552	// check tile, get object by depth because of multiple caches possible
				553	depth = hwloc_get_cache_type_depth(tp, 2, HWLOC_OBJ_CACHE_UNIFIED);
				554	hL = hwloc_get_ancestor_obj_by_depth(tp, depth, hT);
				555	hC = NULL; // not used, but reset it here just in case
				556	if (hL != NULL &&
				557	__kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE, &hC) > 1)
				558	__kmp_tile_depth = depth; // tile consists of multiple cores
				559	return 0;
				560	}
				561
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	562	static int __kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
				563	kmp_i18n_id_t *const msg_id) {
Andrey Churbanov	a586821	2017-11-30 11:51:47 +0000	[diff] [blame]	564	hwloc_topology_t &tp = __kmp_hwloc_topology; // shortcut of a long name
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	565	*address2os = NULL;
				566	*msg_id = kmp_i18n_null;
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	567
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	568	// Save the affinity mask for the current thread.
				569	kmp_affin_mask_t *oldMask;
				570	KMP_CPU_ALLOC(oldMask);
				571	__kmp_get_system_affinity(oldMask, TRUE);
Andrey Churbanov	a586821	2017-11-30 11:51:47 +0000	[diff] [blame]	572	__kmp_hwloc_check_numa();
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	573
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	574	if (!KMP_AFFINITY_CAPABLE()) {
				575	// Hack to try and infer the machine topology using only the data
				576	// available from cpuid on the current thread, and __kmp_xproc.
				577	KMP_ASSERT(__kmp_affinity_type == affinity_none);
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	578
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	579	nCoresPerPkg = __kmp_hwloc_get_nobjs_under_obj(
Andrey Churbanov	a586821	2017-11-30 11:51:47 +0000	[diff] [blame]	580	hwloc_get_obj_by_type(tp, HWLOC_OBJ_PACKAGE, 0), HWLOC_OBJ_CORE);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	581	__kmp_nThreadsPerCore = __kmp_hwloc_get_nobjs_under_obj(
Andrey Churbanov	a586821	2017-11-30 11:51:47 +0000	[diff] [blame]	582	hwloc_get_obj_by_type(tp, HWLOC_OBJ_CORE, 0), HWLOC_OBJ_PU);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	583	__kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
				584	nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	585	if (__kmp_affinity_verbose) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	586	KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY");
				587	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				588	if (__kmp_affinity_uniform_topology()) {
				589	KMP_INFORM(Uniform, "KMP_AFFINITY");
				590	} else {
				591	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				592	}
				593	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				594	__kmp_nThreadsPerCore, __kmp_ncores);
				595	}
				596	KMP_CPU_FREE(oldMask);
				597	return 0;
				598	}
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	599
Andrey Churbanov	a586821	2017-11-30 11:51:47 +0000	[diff] [blame]	600	int depth = 3;
				601	int levels[5] = {0, 1, 2, 3, 4}; // package, [node,] [tile,] core, thread
				602	int labels[3] = {0}; // package [,node] [,tile] - head of lables array
				603	if (__kmp_numa_detected)
				604	++depth;
				605	if (__kmp_tile_depth)
				606	++depth;
				607
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	608	// Allocate the data structure to be returned.
				609	AddrUnsPair *retval =
				610	(AddrUnsPair )__kmp_allocate(sizeof(AddrUnsPair) __kmp_avail_proc);
Andrey Churbanov	a586821	2017-11-30 11:51:47 +0000	[diff] [blame]	611	KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	612	__kmp_pu_os_idx = (int )__kmp_allocate(sizeof(int) __kmp_avail_proc);
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	613
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	614	// When affinity is off, this routine will still be called to set
				615	// __kmp_ncores, as well as __kmp_nThreadsPerCore,
				616	// nCoresPerPkg, & nPackages. Make sure all these vars are set
				617	// correctly, and return if affinity is not enabled.
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	618
Andrey Churbanov	a586821	2017-11-30 11:51:47 +0000	[diff] [blame]	619	hwloc_obj_t socket, node, tile;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	620	int nActiveThreads = 0;
Andrey Churbanov	a586821	2017-11-30 11:51:47 +0000	[diff] [blame]	621	int socket_id = 0;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	622	// re-calculate globals to count only accessible resources
				623	__kmp_ncores = nPackages = nCoresPerPkg = __kmp_nThreadsPerCore = 0;
Andrey Churbanov	a586821	2017-11-30 11:51:47 +0000	[diff] [blame]	624	nNodePerPkg = nTilePerPkg = nTilePerNode = nCorePerNode = nCorePerTile = 0;
				625	for (socket = hwloc_get_obj_by_type(tp, HWLOC_OBJ_PACKAGE, 0); socket != NULL;
				626	socket = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PACKAGE, socket),
				627	socket_id++) {
				628	labels[0] = socket_id;
				629	if (__kmp_numa_detected) {
				630	int NN;
				631	int n_active_nodes = 0;
				632	node = NULL;
				633	NN = __kmp_hwloc_count_children_by_type(tp, socket, HWLOC_OBJ_NUMANODE,
				634	&node);
				635	for (int node_id = 0; node_id < NN; ++node_id, node = node->next_cousin) {
				636	labels[1] = node_id;
				637	if (__kmp_tile_depth) {
				638	// NUMA + tiles
				639	int NT;
				640	int n_active_tiles = 0;
				641	tile = NULL;
				642	NT = __kmp_hwloc_count_children_by_depth(tp, node, __kmp_tile_depth,
				643	&tile);
				644	for (int tl_id = 0; tl_id < NT; ++tl_id, tile = tile->next_cousin) {
				645	labels[2] = tl_id;
				646	int n_active_cores = 0;
				647	__kmp_hwloc_process_obj_core_pu(retval, nActiveThreads,
				648	n_active_cores, tile, 3, labels);
				649	if (n_active_cores) { // were there any active cores on the socket?
				650	++n_active_tiles; // count active tiles per node
				651	if (n_active_cores > nCorePerTile)
				652	nCorePerTile = n_active_cores; // calc maximum
				653	}
				654	}
				655	if (n_active_tiles) { // were there any active tiles on the socket?
				656	++n_active_nodes; // count active nodes per package
				657	if (n_active_tiles > nTilePerNode)
				658	nTilePerNode = n_active_tiles; // calc maximum
				659	}
				660	} else {
				661	// NUMA, no tiles
				662	int n_active_cores = 0;
				663	__kmp_hwloc_process_obj_core_pu(retval, nActiveThreads,
				664	n_active_cores, node, 2, labels);
				665	if (n_active_cores) { // were there any active cores on the socket?
				666	++n_active_nodes; // count active nodes per package
				667	if (n_active_cores > nCorePerNode)
				668	nCorePerNode = n_active_cores; // calc maximum
				669	}
				670	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	671	}
Andrey Churbanov	a586821	2017-11-30 11:51:47 +0000	[diff] [blame]	672	if (n_active_nodes) { // were there any active nodes on the socket?
				673	++nPackages; // count total active packages
				674	if (n_active_nodes > nNodePerPkg)
				675	nNodePerPkg = n_active_nodes; // calc maximum
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	676	}
Andrey Churbanov	a586821	2017-11-30 11:51:47 +0000	[diff] [blame]	677	} else {
				678	if (__kmp_tile_depth) {
				679	// no NUMA, tiles
				680	int NT;
				681	int n_active_tiles = 0;
				682	tile = NULL;
				683	NT = __kmp_hwloc_count_children_by_depth(tp, socket, __kmp_tile_depth,
				684	&tile);
				685	for (int tl_id = 0; tl_id < NT; ++tl_id, tile = tile->next_cousin) {
				686	labels[1] = tl_id;
				687	int n_active_cores = 0;
				688	__kmp_hwloc_process_obj_core_pu(retval, nActiveThreads,
				689	n_active_cores, tile, 2, labels);
				690	if (n_active_cores) { // were there any active cores on the socket?
				691	++n_active_tiles; // count active tiles per package
				692	if (n_active_cores > nCorePerTile)
				693	nCorePerTile = n_active_cores; // calc maximum
				694	}
				695	}
				696	if (n_active_tiles) { // were there any active tiles on the socket?
				697	++nPackages; // count total active packages
				698	if (n_active_tiles > nTilePerPkg)
				699	nTilePerPkg = n_active_tiles; // calc maximum
				700	}
				701	} else {
				702	// no NUMA, no tiles
				703	int n_active_cores = 0;
				704	__kmp_hwloc_process_obj_core_pu(retval, nActiveThreads, n_active_cores,
				705	socket, 1, labels);
				706	if (n_active_cores) { // were there any active cores on the socket?
				707	++nPackages; // count total active packages
				708	if (n_active_cores > nCoresPerPkg)
				709	nCoresPerPkg = n_active_cores; // calc maximum
				710	}
				711	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	712	}
				713	}
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	714
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	715	// If there's only one thread context to bind to, return now.
				716	KMP_DEBUG_ASSERT(nActiveThreads == __kmp_avail_proc);
				717	KMP_ASSERT(nActiveThreads > 0);
				718	if (nActiveThreads == 1) {
				719	__kmp_ncores = nPackages = 1;
				720	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
				721	if (__kmp_affinity_verbose) {
				722	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				723	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				724
				725	KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
				726	if (__kmp_affinity_respect_mask) {
				727	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				728	} else {
				729	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				730	}
				731	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				732	KMP_INFORM(Uniform, "KMP_AFFINITY");
				733	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				734	__kmp_nThreadsPerCore, __kmp_ncores);
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	735	}
				736
				737	if (__kmp_affinity_type == affinity_none) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	738	__kmp_free(retval);
				739	KMP_CPU_FREE(oldMask);
				740	return 0;
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	741	}
				742
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	743	// Form an Address object which only includes the package level.
				744	Address addr(1);
Andrey Churbanov	a586821	2017-11-30 11:51:47 +0000	[diff] [blame]	745	addr.labels[0] = retval[0].first.labels[0];
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	746	retval[0].first = addr;
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	747
				748	if (__kmp_affinity_gran_levels < 0) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	749	__kmp_affinity_gran_levels = 0;
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	750	}
				751
				752	if (__kmp_affinity_verbose) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	753	__kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	754	}
				755
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	756	*address2os = retval;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	757	KMP_CPU_FREE(oldMask);
				758	return 1;
				759	}
				760
				761	// Sort the table by physical Id.
				762	qsort(retval, nActiveThreads, sizeof(*retval),
				763	__kmp_affinity_cmp_Address_labels);
				764
				765	// Check to see if the machine topology is uniform
Andrey Churbanov	a586821	2017-11-30 11:51:47 +0000	[diff] [blame]	766	int nPUs = nPackages * __kmp_nThreadsPerCore;
				767	if (__kmp_numa_detected) {
				768	if (__kmp_tile_depth) { // NUMA + tiles
				769	nPUs = (nNodePerPkg nTilePerNode * nCorePerTile);
				770	} else { // NUMA, no tiles
				771	nPUs = (nNodePerPkg nCorePerNode);
				772	}
				773	} else {
				774	if (__kmp_tile_depth) { // no NUMA, tiles
				775	nPUs = (nTilePerPkg nCorePerTile);
				776	} else { // no NUMA, no tiles
				777	nPUs *= nCoresPerPkg;
				778	}
				779	}
				780	unsigned uniform = (nPUs == nActiveThreads);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	781
				782	// Print the machine topology summary.
				783	if (__kmp_affinity_verbose) {
				784	char mask[KMP_AFFIN_MASK_PRINT_LEN];
				785	__kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	786	if (__kmp_affinity_respect_mask) {
				787	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask);
				788	} else {
				789	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask);
				790	}
				791	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				792	if (uniform) {
				793	KMP_INFORM(Uniform, "KMP_AFFINITY");
				794	} else {
				795	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				796	}
Andrey Churbanov	a586821	2017-11-30 11:51:47 +0000	[diff] [blame]	797	if (__kmp_numa_detected) {
				798	if (__kmp_tile_depth) { // NUMA + tiles
				799	KMP_INFORM(TopologyExtraNoTi, "KMP_AFFINITY", nPackages, nNodePerPkg,
				800	nTilePerNode, nCorePerTile, __kmp_nThreadsPerCore,
				801	__kmp_ncores);
				802	} else { // NUMA, no tiles
				803	KMP_INFORM(TopologyExtraNode, "KMP_AFFINITY", nPackages, nNodePerPkg,
				804	nCorePerNode, __kmp_nThreadsPerCore, __kmp_ncores);
				805	nPUs = (nNodePerPkg nCorePerNode);
				806	}
				807	} else {
				808	if (__kmp_tile_depth) { // no NUMA, tiles
				809	KMP_INFORM(TopologyExtraTile, "KMP_AFFINITY", nPackages, nTilePerPkg,
				810	nCorePerTile, __kmp_nThreadsPerCore, __kmp_ncores);
				811	} else { // no NUMA, no tiles
				812	kmp_str_buf_t buf;
				813	__kmp_str_buf_init(&buf);
				814	__kmp_str_buf_print(&buf, "%d", nPackages);
				815	KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg,
				816	__kmp_nThreadsPerCore, __kmp_ncores);
				817	__kmp_str_buf_free(&buf);
				818	}
				819	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	820	}
				821
				822	if (__kmp_affinity_type == affinity_none) {
				823	__kmp_free(retval);
				824	KMP_CPU_FREE(oldMask);
				825	return 0;
				826	}
				827
Andrey Churbanov	a586821	2017-11-30 11:51:47 +0000	[diff] [blame]	828	int depth_full = depth; // number of levels before compressing
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	829	// Find any levels with radiix 1, and remove them from the map
				830	// (except for the package level).
Andrey Churbanov	a586821	2017-11-30 11:51:47 +0000	[diff] [blame]	831	depth = __kmp_affinity_remove_radix_one_levels(retval, nActiveThreads, depth,
				832	levels);
				833	KMP_DEBUG_ASSERT(__kmp_affinity_gran != affinity_gran_default);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	834	if (__kmp_affinity_gran_levels < 0) {
				835	// Set the granularity level based on what levels are modeled
				836	// in the machine topology map.
Andrey Churbanov	a586821	2017-11-30 11:51:47 +0000	[diff] [blame]	837	__kmp_affinity_gran_levels = 0; // lowest level (e.g. fine)
				838	if (__kmp_affinity_gran > affinity_gran_thread) {
				839	for (int i = 1; i <= depth_full; ++i) {
				840	if (__kmp_affinity_gran <= i) // only count deeper levels
				841	break;
				842	if (levels[depth_full - i] > 0)
				843	__kmp_affinity_gran_levels++;
				844	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	845	}
Andrey Churbanov	a586821	2017-11-30 11:51:47 +0000	[diff] [blame]	846	if (__kmp_affinity_gran > affinity_gran_package)
				847	__kmp_affinity_gran_levels++; // e.g. granularity = group
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	848	}
				849
Andrey Churbanov	a586821	2017-11-30 11:51:47 +0000	[diff] [blame]	850	if (__kmp_affinity_verbose)
				851	__kmp_affinity_print_hwloc_tp(retval, nActiveThreads, depth, levels);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	852
				853	KMP_CPU_FREE(oldMask);
				854	*address2os = retval;
				855	return depth;
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	856	}
				857	#endif // KMP_USE_HWLOC
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	858
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	859	// If we don't know how to retrieve the machine's processor topology, or
				860	// encounter an error in doing so, this routine is called to form a "flat"
				861	// mapping of os thread id's <-> processor id's.
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	862	static int __kmp_affinity_create_flat_map(AddrUnsPair **address2os,
				863	kmp_i18n_id_t *const msg_id) {
				864	*address2os = NULL;
				865	*msg_id = kmp_i18n_null;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	866
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	867	// Even if __kmp_affinity_type == affinity_none, this routine might still
				868	// called to set __kmp_ncores, as well as
				869	// __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
				870	if (!KMP_AFFINITY_CAPABLE()) {
				871	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				872	__kmp_ncores = nPackages = __kmp_xproc;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	873	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	874	if (__kmp_affinity_verbose) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	875	KMP_INFORM(AffFlatTopology, "KMP_AFFINITY");
				876	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				877	KMP_INFORM(Uniform, "KMP_AFFINITY");
				878	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				879	__kmp_nThreadsPerCore, __kmp_ncores);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	880	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	881	return 0;
				882	}
				883
				884	// When affinity is off, this routine will still be called to set
				885	// __kmp_ncores, as well as __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
				886	// Make sure all these vars are set correctly, and return now if affinity is
				887	// not enabled.
				888	__kmp_ncores = nPackages = __kmp_avail_proc;
				889	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
				890	if (__kmp_affinity_verbose) {
				891	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				892	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				893	__kmp_affin_fullMask);
				894
				895	KMP_INFORM(AffCapableUseFlat, "KMP_AFFINITY");
				896	if (__kmp_affinity_respect_mask) {
				897	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				898	} else {
				899	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	900	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	901	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				902	KMP_INFORM(Uniform, "KMP_AFFINITY");
				903	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				904	__kmp_nThreadsPerCore, __kmp_ncores);
				905	}
				906	KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
				907	__kmp_pu_os_idx = (int )__kmp_allocate(sizeof(int) __kmp_avail_proc);
				908	if (__kmp_affinity_type == affinity_none) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	909	int avail_ct = 0;
				910	int i;
Jonathan Peyton	c5304aa	2016-06-13 21:28:03 +0000	[diff] [blame]	911	KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	912	if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask))
				913	continue;
				914	__kmp_pu_os_idx[avail_ct++] = i; // suppose indices are flat
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	915	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	916	return 0;
				917	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	918
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	919	// Contruct the data structure to be returned.
				920	*address2os =
				921	(AddrUnsPair )__kmp_allocate(sizeof(address2os) __kmp_avail_proc);
				922	int avail_ct = 0;
Jonathan Peyton	baad3f6	2018-08-09 22:04:30 +0000	[diff] [blame]	923	int i;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	924	KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
				925	// Skip this proc if it is not included in the machine model.
				926	if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
				927	continue;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	928	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	929	__kmp_pu_os_idx[avail_ct] = i; // suppose indices are flat
				930	Address addr(1);
				931	addr.labels[0] = i;
				932	(*address2os)[avail_ct++] = AddrUnsPair(addr, i);
				933	}
				934	if (__kmp_affinity_verbose) {
				935	KMP_INFORM(OSProcToPackage, "KMP_AFFINITY");
				936	}
				937
				938	if (__kmp_affinity_gran_levels < 0) {
				939	// Only the package level is modeled in the machine topology map,
				940	// so the #levels of granularity is either 0 or 1.
				941	if (__kmp_affinity_gran > affinity_gran_package) {
				942	__kmp_affinity_gran_levels = 1;
				943	} else {
				944	__kmp_affinity_gran_levels = 0;
				945	}
				946	}
				947	return 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	948	}
				949
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	950	#if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	951
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	952	// If multiple Windows* OS processor groups exist, we can create a 2-level
				953	// topology map with the groups at level 0 and the individual procs at level 1.
				954	// This facilitates letting the threads float among all procs in a group,
				955	// if granularity=group (the default when there are multiple groups).
				956	static int __kmp_affinity_create_proc_group_map(AddrUnsPair **address2os,
				957	kmp_i18n_id_t *const msg_id) {
				958	*address2os = NULL;
				959	*msg_id = kmp_i18n_null;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	960
Jonathan Peyton	5868499	2017-05-15 19:05:59 +0000	[diff] [blame]	961	// If we aren't affinity capable, then return now.
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	962	// The flat mapping will be used.
Jonathan Peyton	5868499	2017-05-15 19:05:59 +0000	[diff] [blame]	963	if (!KMP_AFFINITY_CAPABLE()) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	964	// FIXME set *msg_id
				965	return -1;
				966	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	967
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	968	// Contruct the data structure to be returned.
				969	*address2os =
				970	(AddrUnsPair )__kmp_allocate(sizeof(address2os) __kmp_avail_proc);
				971	KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
				972	__kmp_pu_os_idx = (int )__kmp_allocate(sizeof(int) __kmp_avail_proc);
				973	int avail_ct = 0;
				974	int i;
				975	KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
				976	// Skip this proc if it is not included in the machine model.
				977	if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
				978	continue;
				979	}
				980	__kmp_pu_os_idx[avail_ct] = i; // suppose indices are flat
				981	Address addr(2);
				982	addr.labels[0] = i / (CHAR_BIT * sizeof(DWORD_PTR));
				983	addr.labels[1] = i % (CHAR_BIT * sizeof(DWORD_PTR));
				984	(*address2os)[avail_ct++] = AddrUnsPair(addr, i);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	985
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	986	if (__kmp_affinity_verbose) {
				987	KMP_INFORM(AffOSProcToGroup, "KMP_AFFINITY", i, addr.labels[0],
				988	addr.labels[1]);
				989	}
				990	}
				991
				992	if (__kmp_affinity_gran_levels < 0) {
				993	if (__kmp_affinity_gran == affinity_gran_group) {
				994	__kmp_affinity_gran_levels = 1;
				995	} else if ((__kmp_affinity_gran == affinity_gran_fine) \|\|
				996	(__kmp_affinity_gran == affinity_gran_thread)) {
				997	__kmp_affinity_gran_levels = 0;
				998	} else {
				999	const char *gran_str = NULL;
				1000	if (__kmp_affinity_gran == affinity_gran_core) {
				1001	gran_str = "core";
				1002	} else if (__kmp_affinity_gran == affinity_gran_package) {
				1003	gran_str = "package";
				1004	} else if (__kmp_affinity_gran == affinity_gran_node) {
				1005	gran_str = "node";
				1006	} else {
				1007	KMP_ASSERT(0);
				1008	}
				1009
				1010	// Warning: can't use affinity granularity \"gran\" with group topology
				1011	// method, using "thread"
				1012	__kmp_affinity_gran_levels = 0;
				1013	}
				1014	}
				1015	return 2;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1016	}
				1017
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1018	#endif /* KMP_GROUP_AFFINITY */
				1019
				1020	#if KMP_ARCH_X86 \|\| KMP_ARCH_X86_64
				1021
				1022	static int __kmp_cpuid_mask_width(int count) {
				1023	int r = 0;
				1024
				1025	while ((1 << r) < count)
				1026	++r;
				1027	return r;
				1028	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1029
				1030	class apicThreadInfo {
				1031	public:
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1032	unsigned osId; // param to __kmp_affinity_bind_thread
				1033	unsigned apicId; // from cpuid after binding
				1034	unsigned maxCoresPerPkg; // ""
				1035	unsigned maxThreadsPerPkg; // ""
				1036	unsigned pkgId; // inferred from above values
				1037	unsigned coreId; // ""
				1038	unsigned threadId; // ""
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1039	};
				1040
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1041	static int __kmp_affinity_cmp_apicThreadInfo_phys_id(const void *a,
				1042	const void *b) {
				1043	const apicThreadInfo aa = (const apicThreadInfo )a;
				1044	const apicThreadInfo bb = (const apicThreadInfo )b;
				1045	if (aa->pkgId < bb->pkgId)
				1046	return -1;
				1047	if (aa->pkgId > bb->pkgId)
				1048	return 1;
				1049	if (aa->coreId < bb->coreId)
				1050	return -1;
				1051	if (aa->coreId > bb->coreId)
				1052	return 1;
				1053	if (aa->threadId < bb->threadId)
				1054	return -1;
				1055	if (aa->threadId > bb->threadId)
				1056	return 1;
				1057	return 0;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1058	}
				1059
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1060	// On IA-32 architecture and Intel(R) 64 architecture, we attempt to use
				1061	// an algorithm which cycles through the available os threads, setting
				1062	// the current thread's affinity mask to that thread, and then retrieves
				1063	// the Apic Id for each thread context using the cpuid instruction.
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1064	static int __kmp_affinity_create_apicid_map(AddrUnsPair **address2os,
				1065	kmp_i18n_id_t *const msg_id) {
				1066	kmp_cpuid buf;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1067	*address2os = NULL;
				1068	*msg_id = kmp_i18n_null;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1069
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1070	// Check if cpuid leaf 4 is supported.
				1071	__kmp_x86_cpuid(0, 0, &buf);
				1072	if (buf.eax < 4) {
				1073	*msg_id = kmp_i18n_str_NoLeaf4Support;
				1074	return -1;
				1075	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1076
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1077	// The algorithm used starts by setting the affinity to each available thread
				1078	// and retrieving info from the cpuid instruction, so if we are not capable of
				1079	// calling __kmp_get_system_affinity() and _kmp_get_system_affinity(), then we
				1080	// need to do something else - use the defaults that we calculated from
				1081	// issuing cpuid without binding to each proc.
				1082	if (!KMP_AFFINITY_CAPABLE()) {
				1083	// Hack to try and infer the machine topology using only the data
				1084	// available from cpuid on the current thread, and __kmp_xproc.
				1085	KMP_ASSERT(__kmp_affinity_type == affinity_none);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1086
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1087	// Get an upper bound on the number of threads per package using cpuid(1).
				1088	// On some OS/chps combinations where HT is supported by the chip but is
				1089	// disabled, this value will be 2 on a single core chip. Usually, it will be
				1090	// 2 if HT is enabled and 1 if HT is disabled.
				1091	__kmp_x86_cpuid(1, 0, &buf);
				1092	int maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
				1093	if (maxThreadsPerPkg == 0) {
				1094	maxThreadsPerPkg = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1095	}
				1096
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1097	// The num cores per pkg comes from cpuid(4). 1 must be added to the encoded
				1098	// value.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1099	//
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1100	// The author of cpu_count.cpp treated this only an upper bound on the
				1101	// number of cores, but I haven't seen any cases where it was greater than
				1102	// the actual number of cores, so we will treat it as exact in this block of
				1103	// code.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1104	//
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1105	// First, we need to check if cpuid(4) is supported on this chip. To see if
				1106	// cpuid(n) is supported, issue cpuid(0) and check if eax has the value n or
				1107	// greater.
				1108	__kmp_x86_cpuid(0, 0, &buf);
				1109	if (buf.eax >= 4) {
				1110	__kmp_x86_cpuid(4, 0, &buf);
				1111	nCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
				1112	} else {
				1113	nCoresPerPkg = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1114	}
				1115
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1116	// There is no way to reliably tell if HT is enabled without issuing the
				1117	// cpuid instruction from every thread, can correlating the cpuid info, so
				1118	// if the machine is not affinity capable, we assume that HT is off. We have
				1119	// seen quite a few machines where maxThreadsPerPkg is 2, yet the machine
				1120	// does not support HT.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1121	//
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1122	// - Older OSes are usually found on machines with older chips, which do not
				1123	// support HT.
				1124	// - The performance penalty for mistakenly identifying a machine as HT when
				1125	// it isn't (which results in blocktime being incorrecly set to 0) is
				1126	// greater than the penalty when for mistakenly identifying a machine as
				1127	// being 1 thread/core when it is really HT enabled (which results in
				1128	// blocktime being incorrectly set to a positive value).
				1129	__kmp_ncores = __kmp_xproc;
				1130	nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1131	__kmp_nThreadsPerCore = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1132	if (__kmp_affinity_verbose) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1133	KMP_INFORM(AffNotCapableUseLocCpuid, "KMP_AFFINITY");
				1134	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1135	if (__kmp_affinity_uniform_topology()) {
				1136	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1137	} else {
				1138	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				1139	}
				1140	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				1141	__kmp_nThreadsPerCore, __kmp_ncores);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1142	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1143	return 0;
				1144	}
				1145
				1146	// From here on, we can assume that it is safe to call
				1147	// __kmp_get_system_affinity() and __kmp_set_system_affinity(), even if
				1148	// __kmp_affinity_type = affinity_none.
				1149
				1150	// Save the affinity mask for the current thread.
				1151	kmp_affin_mask_t *oldMask;
				1152	KMP_CPU_ALLOC(oldMask);
				1153	KMP_ASSERT(oldMask != NULL);
				1154	__kmp_get_system_affinity(oldMask, TRUE);
				1155
				1156	// Run through each of the available contexts, binding the current thread
				1157	// to it, and obtaining the pertinent information using the cpuid instr.
				1158	//
				1159	// The relevant information is:
				1160	// - Apic Id: Bits 24:31 of ebx after issuing cpuid(1) - each thread context
				1161	// has a uniqie Apic Id, which is of the form pkg# : core# : thread#.
				1162	// - Max Threads Per Pkg: Bits 16:23 of ebx after issuing cpuid(1). The value
				1163	// of this field determines the width of the core# + thread# fields in the
				1164	// Apic Id. It is also an upper bound on the number of threads per
				1165	// package, but it has been verified that situations happen were it is not
				1166	// exact. In particular, on certain OS/chip combinations where Intel(R)
				1167	// Hyper-Threading Technology is supported by the chip but has been
				1168	// disabled, the value of this field will be 2 (for a single core chip).
				1169	// On other OS/chip combinations supporting Intel(R) Hyper-Threading
				1170	// Technology, the value of this field will be 1 when Intel(R)
				1171	// Hyper-Threading Technology is disabled and 2 when it is enabled.
				1172	// - Max Cores Per Pkg: Bits 26:31 of eax after issuing cpuid(4). The value
				1173	// of this field (+1) determines the width of the core# field in the Apic
				1174	// Id. The comments in "cpucount.cpp" say that this value is an upper
				1175	// bound, but the IA-32 architecture manual says that it is exactly the
				1176	// number of cores per package, and I haven't seen any case where it
				1177	// wasn't.
				1178	//
				1179	// From this information, deduce the package Id, core Id, and thread Id,
				1180	// and set the corresponding fields in the apicThreadInfo struct.
				1181	unsigned i;
				1182	apicThreadInfo threadInfo = (apicThreadInfo )__kmp_allocate(
				1183	__kmp_avail_proc * sizeof(apicThreadInfo));
				1184	unsigned nApics = 0;
				1185	KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
				1186	// Skip this proc if it is not included in the machine model.
				1187	if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
				1188	continue;
Jonathan Peyton	fd7cc42	2016-06-21 15:54:38 +0000	[diff] [blame]	1189	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1190	KMP_DEBUG_ASSERT((int)nApics < __kmp_avail_proc);
				1191
				1192	__kmp_affinity_dispatch->bind_thread(i);
				1193	threadInfo[nApics].osId = i;
				1194
				1195	// The apic id and max threads per pkg come from cpuid(1).
				1196	__kmp_x86_cpuid(1, 0, &buf);
				1197	if (((buf.edx >> 9) & 1) == 0) {
				1198	__kmp_set_system_affinity(oldMask, TRUE);
				1199	__kmp_free(threadInfo);
				1200	KMP_CPU_FREE(oldMask);
				1201	*msg_id = kmp_i18n_str_ApicNotPresent;
				1202	return -1;
				1203	}
				1204	threadInfo[nApics].apicId = (buf.ebx >> 24) & 0xff;
				1205	threadInfo[nApics].maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
				1206	if (threadInfo[nApics].maxThreadsPerPkg == 0) {
				1207	threadInfo[nApics].maxThreadsPerPkg = 1;
				1208	}
				1209
				1210	// Max cores per pkg comes from cpuid(4). 1 must be added to the encoded
				1211	// value.
				1212	//
				1213	// First, we need to check if cpuid(4) is supported on this chip. To see if
				1214	// cpuid(n) is supported, issue cpuid(0) and check if eax has the value n
				1215	// or greater.
				1216	__kmp_x86_cpuid(0, 0, &buf);
				1217	if (buf.eax >= 4) {
				1218	__kmp_x86_cpuid(4, 0, &buf);
				1219	threadInfo[nApics].maxCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
				1220	} else {
				1221	threadInfo[nApics].maxCoresPerPkg = 1;
				1222	}
				1223
				1224	// Infer the pkgId / coreId / threadId using only the info obtained locally.
				1225	int widthCT = __kmp_cpuid_mask_width(threadInfo[nApics].maxThreadsPerPkg);
				1226	threadInfo[nApics].pkgId = threadInfo[nApics].apicId >> widthCT;
				1227
				1228	int widthC = __kmp_cpuid_mask_width(threadInfo[nApics].maxCoresPerPkg);
				1229	int widthT = widthCT - widthC;
				1230	if (widthT < 0) {
				1231	// I've never seen this one happen, but I suppose it could, if the cpuid
				1232	// instruction on a chip was really screwed up. Make sure to restore the
				1233	// affinity mask before the tail call.
				1234	__kmp_set_system_affinity(oldMask, TRUE);
				1235	__kmp_free(threadInfo);
				1236	KMP_CPU_FREE(oldMask);
				1237	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1238	return -1;
				1239	}
				1240
				1241	int maskC = (1 << widthC) - 1;
				1242	threadInfo[nApics].coreId = (threadInfo[nApics].apicId >> widthT) & maskC;
				1243
				1244	int maskT = (1 << widthT) - 1;
				1245	threadInfo[nApics].threadId = threadInfo[nApics].apicId & maskT;
				1246
				1247	nApics++;
				1248	}
				1249
				1250	// We've collected all the info we need.
				1251	// Restore the old affinity mask for this thread.
				1252	__kmp_set_system_affinity(oldMask, TRUE);
				1253
				1254	// If there's only one thread context to bind to, form an Address object
				1255	// with depth 1 and return immediately (or, if affinity is off, set
				1256	// address2os to NULL and return).
				1257	//
				1258	// If it is configured to omit the package level when there is only a single
				1259	// package, the logic at the end of this routine won't work if there is only
				1260	// a single thread - it would try to form an Address object with depth 0.
				1261	KMP_ASSERT(nApics > 0);
				1262	if (nApics == 1) {
				1263	__kmp_ncores = nPackages = 1;
				1264	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
				1265	if (__kmp_affinity_verbose) {
				1266	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				1267	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				1268
				1269	KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
				1270	if (__kmp_affinity_respect_mask) {
				1271	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				1272	} else {
				1273	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				1274	}
				1275	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1276	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1277	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				1278	__kmp_nThreadsPerCore, __kmp_ncores);
				1279	}
				1280
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1281	if (__kmp_affinity_type == affinity_none) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1282	__kmp_free(threadInfo);
				1283	KMP_CPU_FREE(oldMask);
				1284	return 0;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1285	}
				1286
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1287	address2os = (AddrUnsPair )__kmp_allocate(sizeof(AddrUnsPair));
				1288	Address addr(1);
				1289	addr.labels[0] = threadInfo[0].pkgId;
				1290	(*address2os)[0] = AddrUnsPair(addr, threadInfo[0].osId);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1291
				1292	if (__kmp_affinity_gran_levels < 0) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1293	__kmp_affinity_gran_levels = 0;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1294	}
				1295
				1296	if (__kmp_affinity_verbose) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1297	__kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1298	}
				1299
				1300	__kmp_free(threadInfo);
				1301	KMP_CPU_FREE(oldMask);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1302	return 1;
				1303	}
				1304
				1305	// Sort the threadInfo table by physical Id.
				1306	qsort(threadInfo, nApics, sizeof(*threadInfo),
				1307	__kmp_affinity_cmp_apicThreadInfo_phys_id);
				1308
				1309	// The table is now sorted by pkgId / coreId / threadId, but we really don't
				1310	// know the radix of any of the fields. pkgId's may be sparsely assigned among
				1311	// the chips on a system. Although coreId's are usually assigned
				1312	// [0 .. coresPerPkg-1] and threadId's are usually assigned
				1313	// [0..threadsPerCore-1], we don't want to make any such assumptions.
				1314	//
				1315	// For that matter, we don't know what coresPerPkg and threadsPerCore (or the
				1316	// total # packages) are at this point - we want to determine that now. We
				1317	// only have an upper bound on the first two figures.
				1318	//
				1319	// We also perform a consistency check at this point: the values returned by
				1320	// the cpuid instruction for any thread bound to a given package had better
				1321	// return the same info for maxThreadsPerPkg and maxCoresPerPkg.
				1322	nPackages = 1;
				1323	nCoresPerPkg = 1;
				1324	__kmp_nThreadsPerCore = 1;
				1325	unsigned nCores = 1;
				1326
				1327	unsigned pkgCt = 1; // to determine radii
				1328	unsigned lastPkgId = threadInfo[0].pkgId;
				1329	unsigned coreCt = 1;
				1330	unsigned lastCoreId = threadInfo[0].coreId;
				1331	unsigned threadCt = 1;
				1332	unsigned lastThreadId = threadInfo[0].threadId;
				1333
				1334	// intra-pkg consist checks
				1335	unsigned prevMaxCoresPerPkg = threadInfo[0].maxCoresPerPkg;
				1336	unsigned prevMaxThreadsPerPkg = threadInfo[0].maxThreadsPerPkg;
				1337
				1338	for (i = 1; i < nApics; i++) {
				1339	if (threadInfo[i].pkgId != lastPkgId) {
				1340	nCores++;
				1341	pkgCt++;
				1342	lastPkgId = threadInfo[i].pkgId;
				1343	if ((int)coreCt > nCoresPerPkg)
				1344	nCoresPerPkg = coreCt;
				1345	coreCt = 1;
				1346	lastCoreId = threadInfo[i].coreId;
				1347	if ((int)threadCt > __kmp_nThreadsPerCore)
				1348	__kmp_nThreadsPerCore = threadCt;
				1349	threadCt = 1;
				1350	lastThreadId = threadInfo[i].threadId;
				1351
				1352	// This is a different package, so go on to the next iteration without
				1353	// doing any consistency checks. Reset the consistency check vars, though.
				1354	prevMaxCoresPerPkg = threadInfo[i].maxCoresPerPkg;
				1355	prevMaxThreadsPerPkg = threadInfo[i].maxThreadsPerPkg;
				1356	continue;
				1357	}
				1358
				1359	if (threadInfo[i].coreId != lastCoreId) {
				1360	nCores++;
				1361	coreCt++;
				1362	lastCoreId = threadInfo[i].coreId;
				1363	if ((int)threadCt > __kmp_nThreadsPerCore)
				1364	__kmp_nThreadsPerCore = threadCt;
				1365	threadCt = 1;
				1366	lastThreadId = threadInfo[i].threadId;
				1367	} else if (threadInfo[i].threadId != lastThreadId) {
				1368	threadCt++;
				1369	lastThreadId = threadInfo[i].threadId;
				1370	} else {
				1371	__kmp_free(threadInfo);
				1372	KMP_CPU_FREE(oldMask);
				1373	*msg_id = kmp_i18n_str_LegacyApicIDsNotUnique;
				1374	return -1;
				1375	}
				1376
				1377	// Check to make certain that the maxCoresPerPkg and maxThreadsPerPkg
				1378	// fields agree between all the threads bounds to a given package.
				1379	if ((prevMaxCoresPerPkg != threadInfo[i].maxCoresPerPkg) \|\|
				1380	(prevMaxThreadsPerPkg != threadInfo[i].maxThreadsPerPkg)) {
				1381	__kmp_free(threadInfo);
				1382	KMP_CPU_FREE(oldMask);
				1383	*msg_id = kmp_i18n_str_InconsistentCpuidInfo;
				1384	return -1;
				1385	}
				1386	}
				1387	nPackages = pkgCt;
				1388	if ((int)coreCt > nCoresPerPkg)
				1389	nCoresPerPkg = coreCt;
				1390	if ((int)threadCt > __kmp_nThreadsPerCore)
				1391	__kmp_nThreadsPerCore = threadCt;
				1392
				1393	// When affinity is off, this routine will still be called to set
				1394	// __kmp_ncores, as well as __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
				1395	// Make sure all these vars are set correctly, and return now if affinity is
				1396	// not enabled.
				1397	__kmp_ncores = nCores;
				1398	if (__kmp_affinity_verbose) {
				1399	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				1400	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				1401
				1402	KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
				1403	if (__kmp_affinity_respect_mask) {
				1404	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				1405	} else {
				1406	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				1407	}
				1408	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1409	if (__kmp_affinity_uniform_topology()) {
				1410	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1411	} else {
				1412	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				1413	}
				1414	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				1415	__kmp_nThreadsPerCore, __kmp_ncores);
				1416	}
				1417	KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
Jonathan Peyton	baad3f6	2018-08-09 22:04:30 +0000	[diff] [blame]	1418	KMP_DEBUG_ASSERT(nApics == (unsigned)__kmp_avail_proc);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1419	__kmp_pu_os_idx = (int )__kmp_allocate(sizeof(int) __kmp_avail_proc);
				1420	for (i = 0; i < nApics; ++i) {
				1421	__kmp_pu_os_idx[i] = threadInfo[i].osId;
				1422	}
				1423	if (__kmp_affinity_type == affinity_none) {
				1424	__kmp_free(threadInfo);
				1425	KMP_CPU_FREE(oldMask);
				1426	return 0;
				1427	}
				1428
				1429	// Now that we've determined the number of packages, the number of cores per
				1430	// package, and the number of threads per core, we can construct the data
				1431	// structure that is to be returned.
				1432	int pkgLevel = 0;
				1433	int coreLevel = (nCoresPerPkg <= 1) ? -1 : 1;
				1434	int threadLevel =
				1435	(__kmp_nThreadsPerCore <= 1) ? -1 : ((coreLevel >= 0) ? 2 : 1);
				1436	unsigned depth = (pkgLevel >= 0) + (coreLevel >= 0) + (threadLevel >= 0);
				1437
				1438	KMP_ASSERT(depth > 0);
				1439	address2os = (AddrUnsPair )__kmp_allocate(sizeof(AddrUnsPair) * nApics);
				1440
				1441	for (i = 0; i < nApics; ++i) {
				1442	Address addr(depth);
				1443	unsigned os = threadInfo[i].osId;
				1444	int d = 0;
				1445
				1446	if (pkgLevel >= 0) {
				1447	addr.labels[d++] = threadInfo[i].pkgId;
				1448	}
				1449	if (coreLevel >= 0) {
				1450	addr.labels[d++] = threadInfo[i].coreId;
				1451	}
				1452	if (threadLevel >= 0) {
				1453	addr.labels[d++] = threadInfo[i].threadId;
				1454	}
				1455	(*address2os)[i] = AddrUnsPair(addr, os);
				1456	}
				1457
				1458	if (__kmp_affinity_gran_levels < 0) {
				1459	// Set the granularity level based on what levels are modeled in the machine
				1460	// topology map.
				1461	__kmp_affinity_gran_levels = 0;
				1462	if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
				1463	__kmp_affinity_gran_levels++;
				1464	}
				1465	if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
				1466	__kmp_affinity_gran_levels++;
				1467	}
				1468	if ((pkgLevel >= 0) && (__kmp_affinity_gran > affinity_gran_package)) {
				1469	__kmp_affinity_gran_levels++;
				1470	}
				1471	}
				1472
				1473	if (__kmp_affinity_verbose) {
				1474	__kmp_affinity_print_topology(*address2os, nApics, depth, pkgLevel,
				1475	coreLevel, threadLevel);
				1476	}
				1477
				1478	__kmp_free(threadInfo);
				1479	KMP_CPU_FREE(oldMask);
				1480	return depth;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1481	}
				1482
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1483	// Intel(R) microarchitecture code name Nehalem, Dunnington and later
				1484	// architectures support a newer interface for specifying the x2APIC Ids,
				1485	// based on cpuid leaf 11.
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1486	static int __kmp_affinity_create_x2apicid_map(AddrUnsPair **address2os,
				1487	kmp_i18n_id_t *const msg_id) {
				1488	kmp_cpuid buf;
				1489	*address2os = NULL;
				1490	*msg_id = kmp_i18n_null;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1491
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1492	// Check to see if cpuid leaf 11 is supported.
				1493	__kmp_x86_cpuid(0, 0, &buf);
				1494	if (buf.eax < 11) {
				1495	*msg_id = kmp_i18n_str_NoLeaf11Support;
				1496	return -1;
				1497	}
				1498	__kmp_x86_cpuid(11, 0, &buf);
				1499	if (buf.ebx == 0) {
				1500	*msg_id = kmp_i18n_str_NoLeaf11Support;
				1501	return -1;
				1502	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1503
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1504	// Find the number of levels in the machine topology. While we're at it, get
				1505	// the default values for __kmp_nThreadsPerCore & nCoresPerPkg. We will try to
				1506	// get more accurate values later by explicitly counting them, but get
				1507	// reasonable defaults now, in case we return early.
				1508	int level;
				1509	int threadLevel = -1;
				1510	int coreLevel = -1;
				1511	int pkgLevel = -1;
				1512	__kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
				1513
				1514	for (level = 0;; level++) {
				1515	if (level > 31) {
				1516	// FIXME: Hack for DPD200163180
				1517	//
				1518	// If level is big then something went wrong -> exiting
				1519	//
				1520	// There could actually be 32 valid levels in the machine topology, but so
				1521	// far, the only machine we have seen which does not exit this loop before
				1522	// iteration 32 has fubar x2APIC settings.
				1523	//
				1524	// For now, just reject this case based upon loop trip count.
				1525	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1526	return -1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1527	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1528	__kmp_x86_cpuid(11, level, &buf);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1529	if (buf.ebx == 0) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1530	if (pkgLevel < 0) {
				1531	// Will infer nPackages from __kmp_xproc
				1532	pkgLevel = level;
				1533	level++;
				1534	}
				1535	break;
				1536	}
				1537	int kind = (buf.ecx >> 8) & 0xff;
				1538	if (kind == 1) {
				1539	// SMT level
				1540	threadLevel = level;
				1541	coreLevel = -1;
				1542	pkgLevel = -1;
				1543	__kmp_nThreadsPerCore = buf.ebx & 0xffff;
				1544	if (__kmp_nThreadsPerCore == 0) {
				1545	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1546	return -1;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1547	}
				1548	} else if (kind == 2) {
				1549	// core level
				1550	coreLevel = level;
				1551	pkgLevel = -1;
				1552	nCoresPerPkg = buf.ebx & 0xffff;
				1553	if (nCoresPerPkg == 0) {
				1554	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1555	return -1;
				1556	}
				1557	} else {
				1558	if (level <= 0) {
				1559	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1560	return -1;
				1561	}
				1562	if (pkgLevel >= 0) {
				1563	continue;
				1564	}
				1565	pkgLevel = level;
				1566	nPackages = buf.ebx & 0xffff;
				1567	if (nPackages == 0) {
				1568	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1569	return -1;
				1570	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1571	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1572	}
				1573	int depth = level;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1574
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1575	// In the above loop, "level" was counted from the finest level (usually
				1576	// thread) to the coarsest. The caller expects that we will place the labels
				1577	// in (*address2os)[].first.labels[] in the inverse order, so we need to
				1578	// invert the vars saying which level means what.
				1579	if (threadLevel >= 0) {
				1580	threadLevel = depth - threadLevel - 1;
				1581	}
				1582	if (coreLevel >= 0) {
				1583	coreLevel = depth - coreLevel - 1;
				1584	}
				1585	KMP_DEBUG_ASSERT(pkgLevel >= 0);
				1586	pkgLevel = depth - pkgLevel - 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1587
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1588	// The algorithm used starts by setting the affinity to each available thread
				1589	// and retrieving info from the cpuid instruction, so if we are not capable of
				1590	// calling __kmp_get_system_affinity() and _kmp_get_system_affinity(), then we
				1591	// need to do something else - use the defaults that we calculated from
				1592	// issuing cpuid without binding to each proc.
				1593	if (!KMP_AFFINITY_CAPABLE()) {
				1594	// Hack to try and infer the machine topology using only the data
				1595	// available from cpuid on the current thread, and __kmp_xproc.
				1596	KMP_ASSERT(__kmp_affinity_type == affinity_none);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1597
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1598	__kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
				1599	nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1600	if (__kmp_affinity_verbose) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1601	KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY");
				1602	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1603	if (__kmp_affinity_uniform_topology()) {
				1604	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1605	} else {
				1606	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				1607	}
				1608	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				1609	__kmp_nThreadsPerCore, __kmp_ncores);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1610	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1611	return 0;
				1612	}
				1613
				1614	// From here on, we can assume that it is safe to call
				1615	// __kmp_get_system_affinity() and __kmp_set_system_affinity(), even if
				1616	// __kmp_affinity_type = affinity_none.
				1617
				1618	// Save the affinity mask for the current thread.
				1619	kmp_affin_mask_t *oldMask;
				1620	KMP_CPU_ALLOC(oldMask);
				1621	__kmp_get_system_affinity(oldMask, TRUE);
				1622
				1623	// Allocate the data structure to be returned.
				1624	AddrUnsPair *retval =
				1625	(AddrUnsPair )__kmp_allocate(sizeof(AddrUnsPair) __kmp_avail_proc);
				1626
				1627	// Run through each of the available contexts, binding the current thread
				1628	// to it, and obtaining the pertinent information using the cpuid instr.
				1629	unsigned int proc;
				1630	int nApics = 0;
				1631	KMP_CPU_SET_ITERATE(proc, __kmp_affin_fullMask) {
				1632	// Skip this proc if it is not included in the machine model.
				1633	if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
				1634	continue;
Jonathan Peyton	fd7cc42	2016-06-21 15:54:38 +0000	[diff] [blame]	1635	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1636	KMP_DEBUG_ASSERT(nApics < __kmp_avail_proc);
				1637
				1638	__kmp_affinity_dispatch->bind_thread(proc);
				1639
				1640	// Extract labels for each level in the machine topology map from Apic ID.
				1641	Address addr(depth);
				1642	int prev_shift = 0;
				1643
				1644	for (level = 0; level < depth; level++) {
				1645	__kmp_x86_cpuid(11, level, &buf);
				1646	unsigned apicId = buf.edx;
				1647	if (buf.ebx == 0) {
				1648	if (level != depth - 1) {
				1649	KMP_CPU_FREE(oldMask);
				1650	*msg_id = kmp_i18n_str_InconsistentCpuidInfo;
				1651	return -1;
				1652	}
				1653	addr.labels[depth - level - 1] = apicId >> prev_shift;
				1654	level++;
				1655	break;
				1656	}
				1657	int shift = buf.eax & 0x1f;
				1658	int mask = (1 << shift) - 1;
				1659	addr.labels[depth - level - 1] = (apicId & mask) >> prev_shift;
				1660	prev_shift = shift;
				1661	}
				1662	if (level != depth) {
				1663	KMP_CPU_FREE(oldMask);
				1664	*msg_id = kmp_i18n_str_InconsistentCpuidInfo;
				1665	return -1;
				1666	}
				1667
				1668	retval[nApics] = AddrUnsPair(addr, proc);
				1669	nApics++;
				1670	}
				1671
				1672	// We've collected all the info we need.
				1673	// Restore the old affinity mask for this thread.
				1674	__kmp_set_system_affinity(oldMask, TRUE);
				1675
				1676	// If there's only one thread context to bind to, return now.
				1677	KMP_ASSERT(nApics > 0);
				1678	if (nApics == 1) {
				1679	__kmp_ncores = nPackages = 1;
				1680	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
				1681	if (__kmp_affinity_verbose) {
				1682	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				1683	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				1684
				1685	KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
				1686	if (__kmp_affinity_respect_mask) {
				1687	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				1688	} else {
				1689	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				1690	}
				1691	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1692	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1693	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				1694	__kmp_nThreadsPerCore, __kmp_ncores);
				1695	}
				1696
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1697	if (__kmp_affinity_type == affinity_none) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1698	__kmp_free(retval);
				1699	KMP_CPU_FREE(oldMask);
				1700	return 0;
				1701	}
				1702
				1703	// Form an Address object which only includes the package level.
				1704	Address addr(1);
				1705	addr.labels[0] = retval[0].first.labels[pkgLevel];
				1706	retval[0].first = addr;
				1707
				1708	if (__kmp_affinity_gran_levels < 0) {
				1709	__kmp_affinity_gran_levels = 0;
				1710	}
				1711
				1712	if (__kmp_affinity_verbose) {
				1713	__kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
				1714	}
				1715
				1716	*address2os = retval;
				1717	KMP_CPU_FREE(oldMask);
				1718	return 1;
				1719	}
				1720
				1721	// Sort the table by physical Id.
				1722	qsort(retval, nApics, sizeof(*retval), __kmp_affinity_cmp_Address_labels);
				1723
				1724	// Find the radix at each of the levels.
				1725	unsigned totals = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1726	unsigned counts = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1727	unsigned maxCt = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1728	unsigned last = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1729	for (level = 0; level < depth; level++) {
				1730	totals[level] = 1;
				1731	maxCt[level] = 1;
				1732	counts[level] = 1;
				1733	last[level] = retval[0].first.labels[level];
				1734	}
				1735
				1736	// From here on, the iteration variable "level" runs from the finest level to
				1737	// the coarsest, i.e. we iterate forward through
				1738	// (*address2os)[].first.labels[] - in the previous loops, we iterated
				1739	// backwards.
				1740	for (proc = 1; (int)proc < nApics; proc++) {
				1741	int level;
				1742	for (level = 0; level < depth; level++) {
				1743	if (retval[proc].first.labels[level] != last[level]) {
				1744	int j;
				1745	for (j = level + 1; j < depth; j++) {
				1746	totals[j]++;
				1747	counts[j] = 1;
				1748	// The line below causes printing incorrect topology information in
				1749	// case the max value for some level (maxCt[level]) is encountered
				1750	// earlier than some less value while going through the array. For
				1751	// example, let pkg0 has 4 cores and pkg1 has 2 cores. Then
				1752	// maxCt[1] == 2
				1753	// whereas it must be 4.
				1754	// TODO!!! Check if it can be commented safely
				1755	// maxCt[j] = 1;
				1756	last[j] = retval[proc].first.labels[j];
				1757	}
				1758	totals[level]++;
				1759	counts[level]++;
				1760	if (counts[level] > maxCt[level]) {
				1761	maxCt[level] = counts[level];
				1762	}
				1763	last[level] = retval[proc].first.labels[level];
				1764	break;
				1765	} else if (level == depth - 1) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1766	__kmp_free(last);
				1767	__kmp_free(maxCt);
				1768	__kmp_free(counts);
				1769	__kmp_free(totals);
				1770	__kmp_free(retval);
				1771	KMP_CPU_FREE(oldMask);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1772	*msg_id = kmp_i18n_str_x2ApicIDsNotUnique;
				1773	return -1;
				1774	}
				1775	}
				1776	}
				1777
				1778	// When affinity is off, this routine will still be called to set
				1779	// __kmp_ncores, as well as __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
				1780	// Make sure all these vars are set correctly, and return if affinity is not
				1781	// enabled.
				1782	if (threadLevel >= 0) {
				1783	__kmp_nThreadsPerCore = maxCt[threadLevel];
				1784	} else {
				1785	__kmp_nThreadsPerCore = 1;
				1786	}
				1787	nPackages = totals[pkgLevel];
				1788
				1789	if (coreLevel >= 0) {
				1790	__kmp_ncores = totals[coreLevel];
				1791	nCoresPerPkg = maxCt[coreLevel];
				1792	} else {
				1793	__kmp_ncores = nPackages;
				1794	nCoresPerPkg = 1;
				1795	}
				1796
				1797	// Check to see if the machine topology is uniform
				1798	unsigned prod = maxCt[0];
				1799	for (level = 1; level < depth; level++) {
				1800	prod *= maxCt[level];
				1801	}
				1802	bool uniform = (prod == totals[level - 1]);
				1803
				1804	// Print the machine topology summary.
				1805	if (__kmp_affinity_verbose) {
				1806	char mask[KMP_AFFIN_MASK_PRINT_LEN];
				1807	__kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				1808
				1809	KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
				1810	if (__kmp_affinity_respect_mask) {
				1811	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask);
				1812	} else {
				1813	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask);
				1814	}
				1815	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1816	if (uniform) {
				1817	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1818	} else {
				1819	KMP_INFORM(NonUniform, "KMP_AFFINITY");
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1820	}
				1821
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1822	kmp_str_buf_t buf;
				1823	__kmp_str_buf_init(&buf);
				1824
				1825	__kmp_str_buf_print(&buf, "%d", totals[0]);
				1826	for (level = 1; level <= pkgLevel; level++) {
				1827	__kmp_str_buf_print(&buf, " x %d", maxCt[level]);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1828	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1829	KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg,
				1830	__kmp_nThreadsPerCore, __kmp_ncores);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1831
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1832	__kmp_str_buf_free(&buf);
				1833	}
				1834	KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
				1835	KMP_DEBUG_ASSERT(nApics == __kmp_avail_proc);
				1836	__kmp_pu_os_idx = (int )__kmp_allocate(sizeof(int) __kmp_avail_proc);
				1837	for (proc = 0; (int)proc < nApics; ++proc) {
				1838	__kmp_pu_os_idx[proc] = retval[proc].second;
				1839	}
				1840	if (__kmp_affinity_type == affinity_none) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1841	__kmp_free(last);
				1842	__kmp_free(maxCt);
				1843	__kmp_free(counts);
				1844	__kmp_free(totals);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1845	__kmp_free(retval);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1846	KMP_CPU_FREE(oldMask);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1847	return 0;
				1848	}
				1849
				1850	// Find any levels with radiix 1, and remove them from the map
				1851	// (except for the package level).
				1852	int new_depth = 0;
				1853	for (level = 0; level < depth; level++) {
				1854	if ((maxCt[level] == 1) && (level != pkgLevel)) {
				1855	continue;
				1856	}
				1857	new_depth++;
				1858	}
				1859
				1860	// If we are removing any levels, allocate a new vector to return,
				1861	// and copy the relevant information to it.
				1862	if (new_depth != depth) {
				1863	AddrUnsPair *new_retval =
				1864	(AddrUnsPair )__kmp_allocate(sizeof(AddrUnsPair) nApics);
				1865	for (proc = 0; (int)proc < nApics; proc++) {
				1866	Address addr(new_depth);
				1867	new_retval[proc] = AddrUnsPair(addr, retval[proc].second);
				1868	}
				1869	int new_level = 0;
				1870	int newPkgLevel = -1;
				1871	int newCoreLevel = -1;
				1872	int newThreadLevel = -1;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1873	for (level = 0; level < depth; level++) {
				1874	if ((maxCt[level] == 1) && (level != pkgLevel)) {
				1875	// Remove this level. Never remove the package level
				1876	continue;
				1877	}
				1878	if (level == pkgLevel) {
Andrey Churbanov	a586821	2017-11-30 11:51:47 +0000	[diff] [blame]	1879	newPkgLevel = new_level;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1880	}
				1881	if (level == coreLevel) {
Andrey Churbanov	a586821	2017-11-30 11:51:47 +0000	[diff] [blame]	1882	newCoreLevel = new_level;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1883	}
				1884	if (level == threadLevel) {
Andrey Churbanov	a586821	2017-11-30 11:51:47 +0000	[diff] [blame]	1885	newThreadLevel = new_level;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1886	}
				1887	for (proc = 0; (int)proc < nApics; proc++) {
				1888	new_retval[proc].first.labels[new_level] =
				1889	retval[proc].first.labels[level];
				1890	}
				1891	new_level++;
				1892	}
				1893
				1894	__kmp_free(retval);
				1895	retval = new_retval;
				1896	depth = new_depth;
				1897	pkgLevel = newPkgLevel;
				1898	coreLevel = newCoreLevel;
				1899	threadLevel = newThreadLevel;
				1900	}
				1901
				1902	if (__kmp_affinity_gran_levels < 0) {
				1903	// Set the granularity level based on what levels are modeled
				1904	// in the machine topology map.
				1905	__kmp_affinity_gran_levels = 0;
				1906	if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
				1907	__kmp_affinity_gran_levels++;
				1908	}
				1909	if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
				1910	__kmp_affinity_gran_levels++;
				1911	}
				1912	if (__kmp_affinity_gran > affinity_gran_package) {
				1913	__kmp_affinity_gran_levels++;
				1914	}
				1915	}
				1916
				1917	if (__kmp_affinity_verbose) {
				1918	__kmp_affinity_print_topology(retval, nApics, depth, pkgLevel, coreLevel,
				1919	threadLevel);
				1920	}
				1921
				1922	__kmp_free(last);
				1923	__kmp_free(maxCt);
				1924	__kmp_free(counts);
				1925	__kmp_free(totals);
				1926	KMP_CPU_FREE(oldMask);
				1927	*address2os = retval;
				1928	return depth;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1929	}
				1930
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1931	#endif /* KMP_ARCH_X86 \|\| KMP_ARCH_X86_64 */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1932
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1933	#define osIdIndex 0
				1934	#define threadIdIndex 1
				1935	#define coreIdIndex 2
				1936	#define pkgIdIndex 3
				1937	#define nodeIdIndex 4
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1938
				1939	typedef unsigned *ProcCpuInfo;
				1940	static unsigned maxIndex = pkgIdIndex;
				1941
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1942	static int __kmp_affinity_cmp_ProcCpuInfo_phys_id(const void *a,
				1943	const void *b) {
				1944	unsigned i;
Andrey Churbanov	5ba90c7	2017-07-17 09:03:14 +0000	[diff] [blame]	1945	const unsigned aa = (unsigned const )a;
				1946	const unsigned bb = (unsigned const )b;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	1947	for (i = maxIndex;; i--) {
				1948	if (aa[i] < bb[i])
				1949	return -1;
				1950	if (aa[i] > bb[i])
				1951	return 1;
				1952	if (i == osIdIndex)
				1953	break;
				1954	}
				1955	return 0;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1956	}
				1957
Jonathan Peyton	f639936	2018-07-09 17:51:13 +0000	[diff] [blame]	1958	#if KMP_USE_HIER_SCHED
				1959	// Set the array sizes for the hierarchy layers
				1960	static void __kmp_dispatch_set_hierarchy_values() {
				1961	// Set the maximum number of L1's to number of cores
				1962	// Set the maximum number of L2's to to either number of cores / 2 for
				1963	// Intel(R) Xeon Phi(TM) coprocessor formally codenamed Knights Landing
				1964	// Or the number of cores for Intel(R) Xeon(R) processors
				1965	// Set the maximum number of NUMA nodes and L3's to number of packages
				1966	__kmp_hier_max_units[kmp_hier_layer_e::LAYER_THREAD + 1] =
				1967	nPackages * nCoresPerPkg * __kmp_nThreadsPerCore;
				1968	__kmp_hier_max_units[kmp_hier_layer_e::LAYER_L1 + 1] = __kmp_ncores;
				1969	#if KMP_ARCH_X86_64 && (KMP_OS_LINUX \|\| KMP_OS_WINDOWS)
				1970	if (__kmp_mic_type >= mic3)
				1971	__kmp_hier_max_units[kmp_hier_layer_e::LAYER_L2 + 1] = __kmp_ncores / 2;
				1972	else
				1973	#endif // KMP_ARCH_X86_64 && (KMP_OS_LINUX \|\| KMP_OS_WINDOWS)
				1974	__kmp_hier_max_units[kmp_hier_layer_e::LAYER_L2 + 1] = __kmp_ncores;
				1975	__kmp_hier_max_units[kmp_hier_layer_e::LAYER_L3 + 1] = nPackages;
				1976	__kmp_hier_max_units[kmp_hier_layer_e::LAYER_NUMA + 1] = nPackages;
				1977	__kmp_hier_max_units[kmp_hier_layer_e::LAYER_LOOP + 1] = 1;
				1978	// Set the number of threads per unit
				1979	// Number of hardware threads per L1/L2/L3/NUMA/LOOP
				1980	__kmp_hier_threads_per[kmp_hier_layer_e::LAYER_THREAD + 1] = 1;
				1981	__kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L1 + 1] =
				1982	__kmp_nThreadsPerCore;
				1983	#if KMP_ARCH_X86_64 && (KMP_OS_LINUX \|\| KMP_OS_WINDOWS)
				1984	if (__kmp_mic_type >= mic3)
				1985	__kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L2 + 1] =
				1986	2 * __kmp_nThreadsPerCore;
				1987	else
				1988	#endif // KMP_ARCH_X86_64 && (KMP_OS_LINUX \|\| KMP_OS_WINDOWS)
				1989	__kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L2 + 1] =
				1990	__kmp_nThreadsPerCore;
				1991	__kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L3 + 1] =
				1992	nCoresPerPkg * __kmp_nThreadsPerCore;
				1993	__kmp_hier_threads_per[kmp_hier_layer_e::LAYER_NUMA + 1] =
				1994	nCoresPerPkg * __kmp_nThreadsPerCore;
				1995	__kmp_hier_threads_per[kmp_hier_layer_e::LAYER_LOOP + 1] =
				1996	nPackages * nCoresPerPkg * __kmp_nThreadsPerCore;
				1997	}
				1998
				1999	// Return the index into the hierarchy for this tid and layer type (L1, L2, etc)
				2000	// i.e., this thread's L1 or this thread's L2, etc.
				2001	int __kmp_dispatch_get_index(int tid, kmp_hier_layer_e type) {
				2002	int index = type + 1;
				2003	int num_hw_threads = __kmp_hier_max_units[kmp_hier_layer_e::LAYER_THREAD + 1];
				2004	KMP_DEBUG_ASSERT(type != kmp_hier_layer_e::LAYER_LAST);
				2005	if (type == kmp_hier_layer_e::LAYER_THREAD)
				2006	return tid;
				2007	else if (type == kmp_hier_layer_e::LAYER_LOOP)
				2008	return 0;
				2009	KMP_DEBUG_ASSERT(__kmp_hier_max_units[index] != 0);
				2010	if (tid >= num_hw_threads)
				2011	tid = tid % num_hw_threads;
				2012	return (tid / __kmp_hier_threads_per[index]) % __kmp_hier_max_units[index];
				2013	}
				2014
				2015	// Return the number of t1's per t2
				2016	int __kmp_dispatch_get_t1_per_t2(kmp_hier_layer_e t1, kmp_hier_layer_e t2) {
				2017	int i1 = t1 + 1;
				2018	int i2 = t2 + 1;
				2019	KMP_DEBUG_ASSERT(i1 <= i2);
				2020	KMP_DEBUG_ASSERT(t1 != kmp_hier_layer_e::LAYER_LAST);
				2021	KMP_DEBUG_ASSERT(t2 != kmp_hier_layer_e::LAYER_LAST);
				2022	KMP_DEBUG_ASSERT(__kmp_hier_threads_per[i1] != 0);
				2023	// (nthreads/t2) / (nthreads/t1) = t1 / t2
				2024	return __kmp_hier_threads_per[i2] / __kmp_hier_threads_per[i1];
				2025	}
				2026	#endif // KMP_USE_HIER_SCHED
				2027
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2028	// Parse /proc/cpuinfo (or an alternate file in the same format) to obtain the
				2029	// affinity map.
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2030	static int __kmp_affinity_create_cpuinfo_map(AddrUnsPair **address2os,
				2031	int *line,
				2032	kmp_i18n_id_t *const msg_id,
				2033	FILE *f) {
				2034	*address2os = NULL;
				2035	*msg_id = kmp_i18n_null;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2036
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2037	// Scan of the file, and count the number of "processor" (osId) fields,
				2038	// and find the highest value of <n> for a node_<n> field.
				2039	char buf[256];
				2040	unsigned num_records = 0;
				2041	while (!feof(f)) {
				2042	buf[sizeof(buf) - 1] = 1;
				2043	if (!fgets(buf, sizeof(buf), f)) {
				2044	// Read errors presumably because of EOF
				2045	break;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2046	}
				2047
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2048	char s1[] = "processor";
				2049	if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
				2050	num_records++;
				2051	continue;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2052	}
				2053
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2054	// FIXME - this will match "node_<n> <garbage>"
				2055	unsigned level;
Andrey Churbanov	5ba90c7	2017-07-17 09:03:14 +0000	[diff] [blame]	2056	if (KMP_SSCANF(buf, "node_%u id", &level) == 1) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2057	if (nodeIdIndex + level >= maxIndex) {
				2058	maxIndex = nodeIdIndex + level;
				2059	}
				2060	continue;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2061	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2062	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2063
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2064	// Check for empty file / no valid processor records, or too many. The number
				2065	// of records can't exceed the number of valid bits in the affinity mask.
				2066	if (num_records == 0) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2067	*line = 0;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2068	*msg_id = kmp_i18n_str_NoProcRecords;
				2069	return -1;
				2070	}
				2071	if (num_records > (unsigned)__kmp_xproc) {
				2072	*line = 0;
				2073	*msg_id = kmp_i18n_str_TooManyProcRecords;
				2074	return -1;
				2075	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2076
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2077	// Set the file pointer back to the begginning, so that we can scan the file
				2078	// again, this time performing a full parse of the data. Allocate a vector of
				2079	// ProcCpuInfo object, where we will place the data. Adding an extra element
				2080	// at the end allows us to remove a lot of extra checks for termination
				2081	// conditions.
				2082	if (fseek(f, 0, SEEK_SET) != 0) {
				2083	*line = 0;
				2084	*msg_id = kmp_i18n_str_CantRewindCpuinfo;
				2085	return -1;
				2086	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2087
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2088	// Allocate the array of records to store the proc info in. The dummy
				2089	// element at the end makes the logic in filling them out easier to code.
				2090	unsigned **threadInfo =
				2091	(unsigned *)__kmp_allocate((num_records + 1) sizeof(unsigned *));
				2092	unsigned i;
				2093	for (i = 0; i <= num_records; i++) {
				2094	threadInfo[i] =
				2095	(unsigned )__kmp_allocate((maxIndex + 1) sizeof(unsigned));
				2096	}
				2097
				2098	#define CLEANUP_THREAD_INFO \
				2099	for (i = 0; i <= num_records; i++) { \
				2100	__kmp_free(threadInfo[i]); \
				2101	} \
				2102	__kmp_free(threadInfo);
				2103
				2104	// A value of UINT_MAX means that we didn't find the field
				2105	unsigned __index;
				2106
				2107	#define INIT_PROC_INFO(p) \
				2108	for (__index = 0; __index <= maxIndex; __index++) { \
				2109	(p)[__index] = UINT_MAX; \
				2110	}
				2111
				2112	for (i = 0; i <= num_records; i++) {
				2113	INIT_PROC_INFO(threadInfo[i]);
				2114	}
				2115
				2116	unsigned num_avail = 0;
				2117	*line = 0;
				2118	while (!feof(f)) {
				2119	// Create an inner scoping level, so that all the goto targets at the end of
				2120	// the loop appear in an outer scoping level. This avoids warnings about
				2121	// jumping past an initialization to a target in the same block.
				2122	{
				2123	buf[sizeof(buf) - 1] = 1;
				2124	bool long_line = false;
				2125	if (!fgets(buf, sizeof(buf), f)) {
				2126	// Read errors presumably because of EOF
				2127	// If there is valid data in threadInfo[num_avail], then fake
				2128	// a blank line in ensure that the last address gets parsed.
				2129	bool valid = false;
				2130	for (i = 0; i <= maxIndex; i++) {
				2131	if (threadInfo[num_avail][i] != UINT_MAX) {
				2132	valid = true;
				2133	}
				2134	}
				2135	if (!valid) {
				2136	break;
				2137	}
				2138	buf[0] = 0;
				2139	} else if (!buf[sizeof(buf) - 1]) {
				2140	// The line is longer than the buffer. Set a flag and don't
				2141	// emit an error if we were going to ignore the line, anyway.
				2142	long_line = true;
				2143
				2144	#define CHECK_LINE \
				2145	if (long_line) { \
				2146	CLEANUP_THREAD_INFO; \
				2147	*msg_id = kmp_i18n_str_LongLineCpuinfo; \
				2148	return -1; \
				2149	}
				2150	}
				2151	(*line)++;
				2152
				2153	char s1[] = "processor";
				2154	if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
				2155	CHECK_LINE;
				2156	char *p = strchr(buf + sizeof(s1) - 1, ':');
				2157	unsigned val;
				2158	if ((p == NULL) \|\| (KMP_SSCANF(p + 1, "%u\n", &val) != 1))
				2159	goto no_val;
				2160	if (threadInfo[num_avail][osIdIndex] != UINT_MAX)
Paul Osmialowski	7634f70	2017-12-13 16:12:24 +0000	[diff] [blame]	2161	#if KMP_ARCH_AARCH64
				2162	// Handle the old AArch64 /proc/cpuinfo layout differently,
				2163	// it contains all of the 'processor' entries listed in a
				2164	// single 'Processor' section, therefore the normal looking
				2165	// for duplicates in that section will always fail.
				2166	num_avail++;
				2167	#else
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2168	goto dup_field;
Paul Osmialowski	7634f70	2017-12-13 16:12:24 +0000	[diff] [blame]	2169	#endif
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2170	threadInfo[num_avail][osIdIndex] = val;
Jonas Hahnfeld	ce528ac	2017-12-08 15:07:05 +0000	[diff] [blame]	2171	#if KMP_OS_LINUX && !(KMP_ARCH_X86 \|\| KMP_ARCH_X86_64)
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2172	char path[256];
				2173	KMP_SNPRINTF(
				2174	path, sizeof(path),
				2175	"/sys/devices/system/cpu/cpu%u/topology/physical_package_id",
				2176	threadInfo[num_avail][osIdIndex]);
				2177	__kmp_read_from_file(path, "%u", &threadInfo[num_avail][pkgIdIndex]);
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	2178
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2179	KMP_SNPRINTF(path, sizeof(path),
				2180	"/sys/devices/system/cpu/cpu%u/topology/core_id",
				2181	threadInfo[num_avail][osIdIndex]);
				2182	__kmp_read_from_file(path, "%u", &threadInfo[num_avail][coreIdIndex]);
				2183	continue;
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	2184	#else
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2185	}
				2186	char s2[] = "physical id";
				2187	if (strncmp(buf, s2, sizeof(s2) - 1) == 0) {
				2188	CHECK_LINE;
				2189	char *p = strchr(buf + sizeof(s2) - 1, ':');
				2190	unsigned val;
				2191	if ((p == NULL) \|\| (KMP_SSCANF(p + 1, "%u\n", &val) != 1))
				2192	goto no_val;
				2193	if (threadInfo[num_avail][pkgIdIndex] != UINT_MAX)
				2194	goto dup_field;
				2195	threadInfo[num_avail][pkgIdIndex] = val;
				2196	continue;
				2197	}
				2198	char s3[] = "core id";
				2199	if (strncmp(buf, s3, sizeof(s3) - 1) == 0) {
				2200	CHECK_LINE;
				2201	char *p = strchr(buf + sizeof(s3) - 1, ':');
				2202	unsigned val;
				2203	if ((p == NULL) \|\| (KMP_SSCANF(p + 1, "%u\n", &val) != 1))
				2204	goto no_val;
				2205	if (threadInfo[num_avail][coreIdIndex] != UINT_MAX)
				2206	goto dup_field;
				2207	threadInfo[num_avail][coreIdIndex] = val;
				2208	continue;
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	2209	#endif // KMP_OS_LINUX && USE_SYSFS_INFO
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2210	}
				2211	char s4[] = "thread id";
				2212	if (strncmp(buf, s4, sizeof(s4) - 1) == 0) {
				2213	CHECK_LINE;
				2214	char *p = strchr(buf + sizeof(s4) - 1, ':');
				2215	unsigned val;
				2216	if ((p == NULL) \|\| (KMP_SSCANF(p + 1, "%u\n", &val) != 1))
				2217	goto no_val;
				2218	if (threadInfo[num_avail][threadIdIndex] != UINT_MAX)
				2219	goto dup_field;
				2220	threadInfo[num_avail][threadIdIndex] = val;
				2221	continue;
				2222	}
				2223	unsigned level;
Jonathan Peyton	6a393f7	2017-09-05 15:43:58 +0000	[diff] [blame]	2224	if (KMP_SSCANF(buf, "node_%u id", &level) == 1) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2225	CHECK_LINE;
				2226	char *p = strchr(buf + sizeof(s4) - 1, ':');
				2227	unsigned val;
				2228	if ((p == NULL) \|\| (KMP_SSCANF(p + 1, "%u\n", &val) != 1))
				2229	goto no_val;
				2230	KMP_ASSERT(nodeIdIndex + level <= maxIndex);
				2231	if (threadInfo[num_avail][nodeIdIndex + level] != UINT_MAX)
				2232	goto dup_field;
				2233	threadInfo[num_avail][nodeIdIndex + level] = val;
				2234	continue;
				2235	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2236
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2237	// We didn't recognize the leading token on the line. There are lots of
				2238	// leading tokens that we don't recognize - if the line isn't empty, go on
				2239	// to the next line.
				2240	if ((buf != 0) && (buf != '\n')) {
				2241	// If the line is longer than the buffer, read characters
				2242	// until we find a newline.
				2243	if (long_line) {
				2244	int ch;
				2245	while (((ch = fgetc(f)) != EOF) && (ch != '\n'))
				2246	;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2247	}
				2248	continue;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2249	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2250
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2251	// A newline has signalled the end of the processor record.
				2252	// Check that there aren't too many procs specified.
				2253	if ((int)num_avail == __kmp_xproc) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2254	CLEANUP_THREAD_INFO;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2255	*msg_id = kmp_i18n_str_TooManyEntries;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2256	return -1;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2257	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2258
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2259	// Check for missing fields. The osId field must be there, and we
				2260	// currently require that the physical id field is specified, also.
				2261	if (threadInfo[num_avail][osIdIndex] == UINT_MAX) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2262	CLEANUP_THREAD_INFO;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2263	*msg_id = kmp_i18n_str_MissingProcField;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2264	return -1;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2265	}
				2266	if (threadInfo[0][pkgIdIndex] == UINT_MAX) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2267	CLEANUP_THREAD_INFO;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2268	*msg_id = kmp_i18n_str_MissingPhysicalIDField;
				2269	return -1;
				2270	}
				2271
				2272	// Skip this proc if it is not included in the machine model.
				2273	if (!KMP_CPU_ISSET(threadInfo[num_avail][osIdIndex],
				2274	__kmp_affin_fullMask)) {
				2275	INIT_PROC_INFO(threadInfo[num_avail]);
				2276	continue;
				2277	}
				2278
				2279	// We have a successful parse of this proc's info.
				2280	// Increment the counter, and prepare for the next proc.
				2281	num_avail++;
				2282	KMP_ASSERT(num_avail <= num_records);
				2283	INIT_PROC_INFO(threadInfo[num_avail]);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2284	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2285	continue;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2286
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2287	no_val:
				2288	CLEANUP_THREAD_INFO;
				2289	*msg_id = kmp_i18n_str_MissingValCpuinfo;
				2290	return -1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2291
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2292	dup_field:
				2293	CLEANUP_THREAD_INFO;
				2294	*msg_id = kmp_i18n_str_DuplicateFieldCpuinfo;
				2295	return -1;
				2296	}
				2297	*line = 0;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2298
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2299	#if KMP_MIC && REDUCE_TEAM_SIZE
				2300	unsigned teamSize = 0;
				2301	#endif // KMP_MIC && REDUCE_TEAM_SIZE
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2302
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2303	// check for num_records == __kmp_xproc ???
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2304
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2305	// If there's only one thread context to bind to, form an Address object with
				2306	// depth 1 and return immediately (or, if affinity is off, set address2os to
				2307	// NULL and return).
				2308	//
				2309	// If it is configured to omit the package level when there is only a single
				2310	// package, the logic at the end of this routine won't work if there is only a
				2311	// single thread - it would try to form an Address object with depth 0.
				2312	KMP_ASSERT(num_avail > 0);
				2313	KMP_ASSERT(num_avail <= num_records);
				2314	if (num_avail == 1) {
				2315	__kmp_ncores = 1;
				2316	__kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2317	if (__kmp_affinity_verbose) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2318	if (!KMP_AFFINITY_CAPABLE()) {
				2319	KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
				2320	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				2321	KMP_INFORM(Uniform, "KMP_AFFINITY");
				2322	} else {
				2323	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				2324	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				2325	__kmp_affin_fullMask);
				2326	KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
				2327	if (__kmp_affinity_respect_mask) {
				2328	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				2329	} else {
				2330	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2331	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2332	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				2333	KMP_INFORM(Uniform, "KMP_AFFINITY");
				2334	}
				2335	int index;
				2336	kmp_str_buf_t buf;
				2337	__kmp_str_buf_init(&buf);
				2338	__kmp_str_buf_print(&buf, "1");
				2339	for (index = maxIndex - 1; index > pkgIdIndex; index--) {
				2340	__kmp_str_buf_print(&buf, " x 1");
				2341	}
				2342	KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, 1, 1, 1);
				2343	__kmp_str_buf_free(&buf);
Jonathan Peyton	fd7cc42	2016-06-21 15:54:38 +0000	[diff] [blame]	2344	}
				2345
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2346	if (__kmp_affinity_type == affinity_none) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2347	CLEANUP_THREAD_INFO;
				2348	return 0;
				2349	}
				2350
				2351	address2os = (AddrUnsPair )__kmp_allocate(sizeof(AddrUnsPair));
				2352	Address addr(1);
				2353	addr.labels[0] = threadInfo[0][pkgIdIndex];
				2354	(*address2os)[0] = AddrUnsPair(addr, threadInfo[0][osIdIndex]);
				2355
				2356	if (__kmp_affinity_gran_levels < 0) {
				2357	__kmp_affinity_gran_levels = 0;
				2358	}
				2359
				2360	if (__kmp_affinity_verbose) {
				2361	__kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
				2362	}
				2363
				2364	CLEANUP_THREAD_INFO;
				2365	return 1;
				2366	}
				2367
				2368	// Sort the threadInfo table by physical Id.
				2369	qsort(threadInfo, num_avail, sizeof(*threadInfo),
				2370	__kmp_affinity_cmp_ProcCpuInfo_phys_id);
				2371
				2372	// The table is now sorted by pkgId / coreId / threadId, but we really don't
				2373	// know the radix of any of the fields. pkgId's may be sparsely assigned among
				2374	// the chips on a system. Although coreId's are usually assigned
				2375	// [0 .. coresPerPkg-1] and threadId's are usually assigned
				2376	// [0..threadsPerCore-1], we don't want to make any such assumptions.
				2377	//
				2378	// For that matter, we don't know what coresPerPkg and threadsPerCore (or the
				2379	// total # packages) are at this point - we want to determine that now. We
				2380	// only have an upper bound on the first two figures.
				2381	unsigned *counts =
				2382	(unsigned )__kmp_allocate((maxIndex + 1) sizeof(unsigned));
				2383	unsigned *maxCt =
				2384	(unsigned )__kmp_allocate((maxIndex + 1) sizeof(unsigned));
				2385	unsigned *totals =
				2386	(unsigned )__kmp_allocate((maxIndex + 1) sizeof(unsigned));
				2387	unsigned *lastId =
				2388	(unsigned )__kmp_allocate((maxIndex + 1) sizeof(unsigned));
				2389
				2390	bool assign_thread_ids = false;
				2391	unsigned threadIdCt;
				2392	unsigned index;
				2393
				2394	restart_radix_check:
				2395	threadIdCt = 0;
				2396
				2397	// Initialize the counter arrays with data from threadInfo[0].
				2398	if (assign_thread_ids) {
				2399	if (threadInfo[0][threadIdIndex] == UINT_MAX) {
				2400	threadInfo[0][threadIdIndex] = threadIdCt++;
				2401	} else if (threadIdCt <= threadInfo[0][threadIdIndex]) {
				2402	threadIdCt = threadInfo[0][threadIdIndex] + 1;
				2403	}
				2404	}
				2405	for (index = 0; index <= maxIndex; index++) {
				2406	counts[index] = 1;
				2407	maxCt[index] = 1;
				2408	totals[index] = 1;
				2409	lastId[index] = threadInfo[0][index];
				2410	;
				2411	}
				2412
				2413	// Run through the rest of the OS procs.
				2414	for (i = 1; i < num_avail; i++) {
				2415	// Find the most significant index whose id differs from the id for the
				2416	// previous OS proc.
				2417	for (index = maxIndex; index >= threadIdIndex; index--) {
				2418	if (assign_thread_ids && (index == threadIdIndex)) {
				2419	// Auto-assign the thread id field if it wasn't specified.
				2420	if (threadInfo[i][threadIdIndex] == UINT_MAX) {
				2421	threadInfo[i][threadIdIndex] = threadIdCt++;
				2422	}
Jonathan Peyton	642688b	2017-06-01 16:46:36 +0000	[diff] [blame]	2423	// Apparently the thread id field was specified for some entries and not
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2424	// others. Start the thread id counter off at the next higher thread id.
				2425	else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
				2426	threadIdCt = threadInfo[i][threadIdIndex] + 1;
				2427	}
				2428	}
				2429	if (threadInfo[i][index] != lastId[index]) {
				2430	// Run through all indices which are less significant, and reset the
				2431	// counts to 1. At all levels up to and including index, we need to
				2432	// increment the totals and record the last id.
				2433	unsigned index2;
				2434	for (index2 = threadIdIndex; index2 < index; index2++) {
				2435	totals[index2]++;
				2436	if (counts[index2] > maxCt[index2]) {
				2437	maxCt[index2] = counts[index2];
				2438	}
				2439	counts[index2] = 1;
				2440	lastId[index2] = threadInfo[i][index2];
				2441	}
				2442	counts[index]++;
				2443	totals[index]++;
				2444	lastId[index] = threadInfo[i][index];
				2445
				2446	if (assign_thread_ids && (index > threadIdIndex)) {
				2447
				2448	#if KMP_MIC && REDUCE_TEAM_SIZE
				2449	// The default team size is the total #threads in the machine
				2450	// minus 1 thread for every core that has 3 or more threads.
				2451	teamSize += (threadIdCt <= 2) ? (threadIdCt) : (threadIdCt - 1);
				2452	#endif // KMP_MIC && REDUCE_TEAM_SIZE
				2453
				2454	// Restart the thread counter, as we are on a new core.
				2455	threadIdCt = 0;
				2456
				2457	// Auto-assign the thread id field if it wasn't specified.
				2458	if (threadInfo[i][threadIdIndex] == UINT_MAX) {
				2459	threadInfo[i][threadIdIndex] = threadIdCt++;
				2460	}
				2461
				2462	// Aparrently the thread id field was specified for some entries and
				2463	// not others. Start the thread id counter off at the next higher
				2464	// thread id.
				2465	else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
				2466	threadIdCt = threadInfo[i][threadIdIndex] + 1;
				2467	}
				2468	}
				2469	break;
				2470	}
				2471	}
				2472	if (index < threadIdIndex) {
				2473	// If thread ids were specified, it is an error if they are not unique.
				2474	// Also, check that we waven't already restarted the loop (to be safe -
				2475	// shouldn't need to).
				2476	if ((threadInfo[i][threadIdIndex] != UINT_MAX) \|\| assign_thread_ids) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2477	__kmp_free(lastId);
				2478	__kmp_free(totals);
				2479	__kmp_free(maxCt);
				2480	__kmp_free(counts);
				2481	CLEANUP_THREAD_INFO;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2482	*msg_id = kmp_i18n_str_PhysicalIDsNotUnique;
				2483	return -1;
				2484	}
				2485
				2486	// If the thread ids were not specified and we see entries entries that
				2487	// are duplicates, start the loop over and assign the thread ids manually.
				2488	assign_thread_ids = true;
				2489	goto restart_radix_check;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2490	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2491	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2492
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2493	#if KMP_MIC && REDUCE_TEAM_SIZE
				2494	// The default team size is the total #threads in the machine
				2495	// minus 1 thread for every core that has 3 or more threads.
				2496	teamSize += (threadIdCt <= 2) ? (threadIdCt) : (threadIdCt - 1);
				2497	#endif // KMP_MIC && REDUCE_TEAM_SIZE
				2498
				2499	for (index = threadIdIndex; index <= maxIndex; index++) {
				2500	if (counts[index] > maxCt[index]) {
				2501	maxCt[index] = counts[index];
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2502	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2503	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2504
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2505	__kmp_nThreadsPerCore = maxCt[threadIdIndex];
				2506	nCoresPerPkg = maxCt[coreIdIndex];
				2507	nPackages = totals[pkgIdIndex];
				2508
				2509	// Check to see if the machine topology is uniform
				2510	unsigned prod = totals[maxIndex];
				2511	for (index = threadIdIndex; index < maxIndex; index++) {
				2512	prod *= maxCt[index];
				2513	}
				2514	bool uniform = (prod == totals[threadIdIndex]);
				2515
				2516	// When affinity is off, this routine will still be called to set
				2517	// __kmp_ncores, as well as __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
				2518	// Make sure all these vars are set correctly, and return now if affinity is
				2519	// not enabled.
				2520	__kmp_ncores = totals[coreIdIndex];
				2521
				2522	if (__kmp_affinity_verbose) {
				2523	if (!KMP_AFFINITY_CAPABLE()) {
				2524	KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
				2525	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				2526	if (uniform) {
				2527	KMP_INFORM(Uniform, "KMP_AFFINITY");
				2528	} else {
				2529	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				2530	}
				2531	} else {
				2532	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				2533	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				2534	__kmp_affin_fullMask);
				2535	KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
				2536	if (__kmp_affinity_respect_mask) {
				2537	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				2538	} else {
				2539	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				2540	}
				2541	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				2542	if (uniform) {
				2543	KMP_INFORM(Uniform, "KMP_AFFINITY");
				2544	} else {
				2545	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				2546	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2547	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2548	kmp_str_buf_t buf;
				2549	__kmp_str_buf_init(&buf);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2550
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2551	__kmp_str_buf_print(&buf, "%d", totals[maxIndex]);
				2552	for (index = maxIndex - 1; index >= pkgIdIndex; index--) {
				2553	__kmp_str_buf_print(&buf, " x %d", maxCt[index]);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2554	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2555	KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, maxCt[coreIdIndex],
				2556	maxCt[threadIdIndex], __kmp_ncores);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2557
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2558	__kmp_str_buf_free(&buf);
				2559	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2560
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2561	#if KMP_MIC && REDUCE_TEAM_SIZE
				2562	// Set the default team size.
				2563	if ((__kmp_dflt_team_nth == 0) && (teamSize > 0)) {
				2564	__kmp_dflt_team_nth = teamSize;
				2565	KA_TRACE(20, ("__kmp_affinity_create_cpuinfo_map: setting "
				2566	"__kmp_dflt_team_nth = %d\n",
				2567	__kmp_dflt_team_nth));
				2568	}
				2569	#endif // KMP_MIC && REDUCE_TEAM_SIZE
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2570
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2571	KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
Jonathan Peyton	baad3f6	2018-08-09 22:04:30 +0000	[diff] [blame]	2572	KMP_DEBUG_ASSERT(num_avail == (unsigned)__kmp_avail_proc);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2573	__kmp_pu_os_idx = (int )__kmp_allocate(sizeof(int) __kmp_avail_proc);
				2574	for (i = 0; i < num_avail; ++i) { // fill the os indices
				2575	__kmp_pu_os_idx[i] = threadInfo[i][osIdIndex];
				2576	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2577
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2578	if (__kmp_affinity_type == affinity_none) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2579	__kmp_free(lastId);
				2580	__kmp_free(totals);
				2581	__kmp_free(maxCt);
				2582	__kmp_free(counts);
				2583	CLEANUP_THREAD_INFO;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2584	return 0;
				2585	}
				2586
				2587	// Count the number of levels which have more nodes at that level than at the
				2588	// parent's level (with there being an implicit root node of the top level).
				2589	// This is equivalent to saying that there is at least one node at this level
				2590	// which has a sibling. These levels are in the map, and the package level is
				2591	// always in the map.
				2592	bool inMap = (bool )__kmp_allocate((maxIndex + 1) * sizeof(bool));
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2593	for (index = threadIdIndex; index < maxIndex; index++) {
				2594	KMP_ASSERT(totals[index] >= totals[index + 1]);
				2595	inMap[index] = (totals[index] > totals[index + 1]);
				2596	}
				2597	inMap[maxIndex] = (totals[maxIndex] > 1);
				2598	inMap[pkgIdIndex] = true;
				2599
				2600	int depth = 0;
				2601	for (index = threadIdIndex; index <= maxIndex; index++) {
				2602	if (inMap[index]) {
				2603	depth++;
				2604	}
				2605	}
				2606	KMP_ASSERT(depth > 0);
				2607
				2608	// Construct the data structure that is to be returned.
				2609	address2os = (AddrUnsPair )__kmp_allocate(sizeof(AddrUnsPair) * num_avail);
				2610	int pkgLevel = -1;
				2611	int coreLevel = -1;
				2612	int threadLevel = -1;
				2613
				2614	for (i = 0; i < num_avail; ++i) {
				2615	Address addr(depth);
				2616	unsigned os = threadInfo[i][osIdIndex];
				2617	int src_index;
				2618	int dst_index = 0;
				2619
				2620	for (src_index = maxIndex; src_index >= threadIdIndex; src_index--) {
				2621	if (!inMap[src_index]) {
				2622	continue;
				2623	}
				2624	addr.labels[dst_index] = threadInfo[i][src_index];
				2625	if (src_index == pkgIdIndex) {
				2626	pkgLevel = dst_index;
				2627	} else if (src_index == coreIdIndex) {
				2628	coreLevel = dst_index;
				2629	} else if (src_index == threadIdIndex) {
				2630	threadLevel = dst_index;
				2631	}
				2632	dst_index++;
				2633	}
				2634	(*address2os)[i] = AddrUnsPair(addr, os);
				2635	}
				2636
				2637	if (__kmp_affinity_gran_levels < 0) {
				2638	// Set the granularity level based on what levels are modeled
				2639	// in the machine topology map.
				2640	unsigned src_index;
				2641	__kmp_affinity_gran_levels = 0;
				2642	for (src_index = threadIdIndex; src_index <= maxIndex; src_index++) {
				2643	if (!inMap[src_index]) {
				2644	continue;
				2645	}
				2646	switch (src_index) {
				2647	case threadIdIndex:
				2648	if (__kmp_affinity_gran > affinity_gran_thread) {
				2649	__kmp_affinity_gran_levels++;
				2650	}
				2651
				2652	break;
				2653	case coreIdIndex:
				2654	if (__kmp_affinity_gran > affinity_gran_core) {
				2655	__kmp_affinity_gran_levels++;
				2656	}
				2657	break;
				2658
				2659	case pkgIdIndex:
				2660	if (__kmp_affinity_gran > affinity_gran_package) {
				2661	__kmp_affinity_gran_levels++;
				2662	}
				2663	break;
				2664	}
				2665	}
				2666	}
				2667
				2668	if (__kmp_affinity_verbose) {
				2669	__kmp_affinity_print_topology(*address2os, num_avail, depth, pkgLevel,
				2670	coreLevel, threadLevel);
				2671	}
				2672
				2673	__kmp_free(inMap);
				2674	__kmp_free(lastId);
				2675	__kmp_free(totals);
				2676	__kmp_free(maxCt);
				2677	__kmp_free(counts);
				2678	CLEANUP_THREAD_INFO;
				2679	return depth;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2680	}
				2681
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2682	// Create and return a table of affinity masks, indexed by OS thread ID.
				2683	// This routine handles OR'ing together all the affinity masks of threads
				2684	// that are sufficiently close, if granularity > fine.
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2685	static kmp_affin_mask_t __kmp_create_masks(unsigned maxIndex,
				2686	unsigned *numUnique,
				2687	AddrUnsPair *address2os,
				2688	unsigned numAddrs) {
				2689	// First form a table of affinity masks in order of OS thread id.
				2690	unsigned depth;
				2691	unsigned maxOsId;
				2692	unsigned i;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2693
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2694	KMP_ASSERT(numAddrs > 0);
				2695	depth = address2os[0].first.depth;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2696
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2697	maxOsId = 0;
Andrey Churbanov	a586821	2017-11-30 11:51:47 +0000	[diff] [blame]	2698	for (i = numAddrs - 1;; --i) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2699	unsigned osId = address2os[i].second;
				2700	if (osId > maxOsId) {
				2701	maxOsId = osId;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2702	}
Andrey Churbanov	a586821	2017-11-30 11:51:47 +0000	[diff] [blame]	2703	if (i == 0)
				2704	break;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2705	}
				2706	kmp_affin_mask_t *osId2Mask;
				2707	KMP_CPU_ALLOC_ARRAY(osId2Mask, (maxOsId + 1));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2708
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2709	// Sort the address2os table according to physical order. Doing so will put
				2710	// all threads on the same core/package/node in consecutive locations.
				2711	qsort(address2os, numAddrs, sizeof(*address2os),
				2712	__kmp_affinity_cmp_Address_labels);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2713
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2714	KMP_ASSERT(__kmp_affinity_gran_levels >= 0);
				2715	if (__kmp_affinity_verbose && (__kmp_affinity_gran_levels > 0)) {
				2716	KMP_INFORM(ThreadsMigrate, "KMP_AFFINITY", __kmp_affinity_gran_levels);
				2717	}
				2718	if (__kmp_affinity_gran_levels >= (int)depth) {
				2719	if (__kmp_affinity_verbose \|\|
				2720	(__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) {
				2721	KMP_WARNING(AffThreadsMayMigrate);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2722	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2723	}
				2724
				2725	// Run through the table, forming the masks for all threads on each core.
				2726	// Threads on the same core will have identical "Address" objects, not
				2727	// considering the last level, which must be the thread id. All threads on a
				2728	// core will appear consecutively.
				2729	unsigned unique = 0;
				2730	unsigned j = 0; // index of 1st thread on core
				2731	unsigned leader = 0;
				2732	Address *leaderAddr = &(address2os[0].first);
				2733	kmp_affin_mask_t *sum;
				2734	KMP_CPU_ALLOC_ON_STACK(sum);
				2735	KMP_CPU_ZERO(sum);
				2736	KMP_CPU_SET(address2os[0].second, sum);
				2737	for (i = 1; i < numAddrs; i++) {
				2738	// If this thread is sufficiently close to the leader (within the
				2739	// granularity setting), then set the bit for this os thread in the
				2740	// affinity mask for this group, and go on to the next thread.
				2741	if (leaderAddr->isClose(address2os[i].first, __kmp_affinity_gran_levels)) {
				2742	KMP_CPU_SET(address2os[i].second, sum);
				2743	continue;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2744	}
				2745
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2746	// For every thread in this group, copy the mask to the thread's entry in
				2747	// the osId2Mask table. Mark the first address as a leader.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2748	for (; j < i; j++) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2749	unsigned osId = address2os[j].second;
				2750	KMP_DEBUG_ASSERT(osId <= maxOsId);
				2751	kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
				2752	KMP_CPU_COPY(mask, sum);
				2753	address2os[j].first.leader = (j == leader);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2754	}
				2755	unique++;
				2756
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2757	// Start a new mask.
				2758	leader = i;
				2759	leaderAddr = &(address2os[i].first);
				2760	KMP_CPU_ZERO(sum);
				2761	KMP_CPU_SET(address2os[i].second, sum);
				2762	}
				2763
				2764	// For every thread in last group, copy the mask to the thread's
				2765	// entry in the osId2Mask table.
				2766	for (; j < i; j++) {
				2767	unsigned osId = address2os[j].second;
				2768	KMP_DEBUG_ASSERT(osId <= maxOsId);
				2769	kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
				2770	KMP_CPU_COPY(mask, sum);
				2771	address2os[j].first.leader = (j == leader);
				2772	}
				2773	unique++;
				2774	KMP_CPU_FREE_FROM_STACK(sum);
				2775
				2776	*maxIndex = maxOsId;
				2777	*numUnique = unique;
				2778	return osId2Mask;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2779	}
				2780
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2781	// Stuff for the affinity proclist parsers. It's easier to declare these vars
				2782	// as file-static than to try and pass them through the calling sequence of
				2783	// the recursive-descent OMP_PLACES parser.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2784	static kmp_affin_mask_t *newMasks;
				2785	static int numNewMasks;
				2786	static int nextNewMask;
				2787
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2788	#define ADD_MASK(_mask) \
				2789	{ \
				2790	if (nextNewMask >= numNewMasks) { \
				2791	int i; \
				2792	numNewMasks *= 2; \
				2793	kmp_affin_mask_t *temp; \
				2794	KMP_CPU_INTERNAL_ALLOC_ARRAY(temp, numNewMasks); \
				2795	for (i = 0; i < numNewMasks / 2; i++) { \
				2796	kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i); \
				2797	kmp_affin_mask_t *dest = KMP_CPU_INDEX(temp, i); \
				2798	KMP_CPU_COPY(dest, src); \
				2799	} \
				2800	KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks / 2); \
				2801	newMasks = temp; \
				2802	} \
				2803	KMP_CPU_COPY(KMP_CPU_INDEX(newMasks, nextNewMask), (_mask)); \
				2804	nextNewMask++; \
				2805	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2806
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2807	#define ADD_MASK_OSID(_osId, _osId2Mask, _maxOsId) \
				2808	{ \
				2809	if (((_osId) > _maxOsId) \|\| \
				2810	(!KMP_CPU_ISSET((_osId), KMP_CPU_INDEX((_osId2Mask), (_osId))))) { \
				2811	if (__kmp_affinity_verbose \|\| \
				2812	(__kmp_affinity_warnings && \
				2813	(__kmp_affinity_type != affinity_none))) { \
				2814	KMP_WARNING(AffIgnoreInvalidProcID, _osId); \
				2815	} \
				2816	} else { \
				2817	ADD_MASK(KMP_CPU_INDEX(_osId2Mask, (_osId))); \
				2818	} \
				2819	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2820
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2821	// Re-parse the proclist (for the explicit affinity type), and form the list
				2822	// of affinity newMasks indexed by gtid.
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2823	static void __kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks,
				2824	unsigned int *out_numMasks,
				2825	const char *proclist,
				2826	kmp_affin_mask_t *osId2Mask,
				2827	int maxOsId) {
				2828	int i;
				2829	const char *scan = proclist;
				2830	const char *next = proclist;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2831
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2832	// We use malloc() for the temporary mask vector, so that we can use
				2833	// realloc() to extend it.
				2834	numNewMasks = 2;
				2835	KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks);
				2836	nextNewMask = 0;
				2837	kmp_affin_mask_t *sumMask;
				2838	KMP_CPU_ALLOC(sumMask);
				2839	int setSize = 0;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2840
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2841	for (;;) {
				2842	int start, end, stride;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2843
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2844	SKIP_WS(scan);
				2845	next = scan;
				2846	if (*next == '\0') {
				2847	break;
				2848	}
				2849
				2850	if (*next == '{') {
				2851	int num;
				2852	setSize = 0;
				2853	next++; // skip '{'
				2854	SKIP_WS(next);
				2855	scan = next;
				2856
				2857	// Read the first integer in the set.
				2858	KMP_ASSERT2((next >= '0') && (next <= '9'), "bad proclist");
				2859	SKIP_DIGITS(next);
				2860	num = __kmp_str_to_int(scan, *next);
				2861	KMP_ASSERT2(num >= 0, "bad explicit proc list");
				2862
				2863	// Copy the mask for that osId to the sum (union) mask.
				2864	if ((num > maxOsId) \|\|
				2865	(!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
				2866	if (__kmp_affinity_verbose \|\|
				2867	(__kmp_affinity_warnings &&
				2868	(__kmp_affinity_type != affinity_none))) {
				2869	KMP_WARNING(AffIgnoreInvalidProcID, num);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2870	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2871	KMP_CPU_ZERO(sumMask);
				2872	} else {
				2873	KMP_CPU_COPY(sumMask, KMP_CPU_INDEX(osId2Mask, num));
				2874	setSize = 1;
				2875	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2876
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2877	for (;;) {
				2878	// Check for end of set.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2879	SKIP_WS(next);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2880	if (*next == '}') {
				2881	next++; // skip '}'
				2882	break;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2883	}
				2884
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2885	// Skip optional comma.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2886	if (*next == ',') {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2887	next++;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2888	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2889	SKIP_WS(next);
				2890
				2891	// Read the next integer in the set.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2892	scan = next;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2893	KMP_ASSERT2((next >= '0') && (next <= '9'), "bad explicit proc list");
				2894
				2895	SKIP_DIGITS(next);
				2896	num = __kmp_str_to_int(scan, *next);
				2897	KMP_ASSERT2(num >= 0, "bad explicit proc list");
				2898
				2899	// Add the mask for that osId to the sum mask.
				2900	if ((num > maxOsId) \|\|
				2901	(!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
				2902	if (__kmp_affinity_verbose \|\|
				2903	(__kmp_affinity_warnings &&
				2904	(__kmp_affinity_type != affinity_none))) {
				2905	KMP_WARNING(AffIgnoreInvalidProcID, num);
				2906	}
				2907	} else {
				2908	KMP_CPU_UNION(sumMask, KMP_CPU_INDEX(osId2Mask, num));
				2909	setSize++;
				2910	}
				2911	}
				2912	if (setSize > 0) {
				2913	ADD_MASK(sumMask);
				2914	}
				2915
				2916	SKIP_WS(next);
				2917	if (*next == ',') {
				2918	next++;
				2919	}
				2920	scan = next;
				2921	continue;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2922	}
				2923
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2924	// Read the first integer.
				2925	KMP_ASSERT2((next >= '0') && (next <= '9'), "bad explicit proc list");
				2926	SKIP_DIGITS(next);
				2927	start = __kmp_str_to_int(scan, *next);
				2928	KMP_ASSERT2(start >= 0, "bad explicit proc list");
				2929	SKIP_WS(next);
				2930
				2931	// If this isn't a range, then add a mask to the list and go on.
				2932	if (*next != '-') {
				2933	ADD_MASK_OSID(start, osId2Mask, maxOsId);
				2934
				2935	// Skip optional comma.
				2936	if (*next == ',') {
				2937	next++;
				2938	}
				2939	scan = next;
				2940	continue;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2941	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2942
				2943	// This is a range. Skip over the '-' and read in the 2nd int.
				2944	next++; // skip '-'
				2945	SKIP_WS(next);
				2946	scan = next;
				2947	KMP_ASSERT2((next >= '0') && (next <= '9'), "bad explicit proc list");
				2948	SKIP_DIGITS(next);
				2949	end = __kmp_str_to_int(scan, *next);
				2950	KMP_ASSERT2(end >= 0, "bad explicit proc list");
				2951
				2952	// Check for a stride parameter
				2953	stride = 1;
				2954	SKIP_WS(next);
				2955	if (*next == ':') {
				2956	// A stride is specified. Skip over the ':" and read the 3rd int.
				2957	int sign = +1;
				2958	next++; // skip ':'
				2959	SKIP_WS(next);
				2960	scan = next;
				2961	if (*next == '-') {
				2962	sign = -1;
				2963	next++;
				2964	SKIP_WS(next);
				2965	scan = next;
				2966	}
				2967	KMP_ASSERT2((next >= '0') && (next <= '9'), "bad explicit proc list");
				2968	SKIP_DIGITS(next);
				2969	stride = __kmp_str_to_int(scan, *next);
				2970	KMP_ASSERT2(stride >= 0, "bad explicit proc list");
				2971	stride *= sign;
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	2972	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	2973
				2974	// Do some range checks.
				2975	KMP_ASSERT2(stride != 0, "bad explicit proc list");
				2976	if (stride > 0) {
				2977	KMP_ASSERT2(start <= end, "bad explicit proc list");
				2978	} else {
				2979	KMP_ASSERT2(start >= end, "bad explicit proc list");
				2980	}
				2981	KMP_ASSERT2((end - start) / stride <= 65536, "bad explicit proc list");
				2982
				2983	// Add the mask for each OS proc # to the list.
				2984	if (stride > 0) {
				2985	do {
				2986	ADD_MASK_OSID(start, osId2Mask, maxOsId);
				2987	start += stride;
				2988	} while (start <= end);
				2989	} else {
				2990	do {
				2991	ADD_MASK_OSID(start, osId2Mask, maxOsId);
				2992	start += stride;
				2993	} while (start >= end);
				2994	}
				2995
				2996	// Skip optional comma.
				2997	SKIP_WS(next);
				2998	if (*next == ',') {
				2999	next++;
				3000	}
				3001	scan = next;
				3002	}
				3003
				3004	*out_numMasks = nextNewMask;
				3005	if (nextNewMask == 0) {
				3006	*out_masks = NULL;
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	3007	KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3008	return;
				3009	}
				3010	KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask);
				3011	for (i = 0; i < nextNewMask; i++) {
				3012	kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i);
				3013	kmp_affin_mask_t dest = KMP_CPU_INDEX((out_masks), i);
				3014	KMP_CPU_COPY(dest, src);
				3015	}
				3016	KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
				3017	KMP_CPU_FREE(sumMask);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3018	}
				3019
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3020	#if OMP_40_ENABLED
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3021
				3022	/*-----------------------------------------------------------------------------
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3023	Re-parse the OMP_PLACES proc id list, forming the newMasks for the different
				3024	places. Again, Here is the grammar:
				3025
				3026	place_list := place
				3027	place_list := place , place_list
				3028	place := num
				3029	place := place : num
				3030	place := place : num : signed
				3031	place := { subplacelist }
				3032	place := ! place // (lowest priority)
				3033	subplace_list := subplace
				3034	subplace_list := subplace , subplace_list
				3035	subplace := num
				3036	subplace := num : num
				3037	subplace := num : num : signed
				3038	signed := num
				3039	signed := + signed
				3040	signed := - signed
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3041	-----------------------------------------------------------------------------*/
				3042
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3043	static void __kmp_process_subplace_list(const char **scan,
				3044	kmp_affin_mask_t *osId2Mask,
				3045	int maxOsId, kmp_affin_mask_t *tempMask,
				3046	int *setSize) {
				3047	const char *next;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3048
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3049	for (;;) {
				3050	int start, count, stride, i;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3051
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3052	// Read in the starting proc id
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3053	SKIP_WS(*scan);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3054	KMP_ASSERT2((scan >= '0') && (scan <= '9'), "bad explicit places list");
				3055	next = *scan;
				3056	SKIP_DIGITS(next);
				3057	start = __kmp_str_to_int(scan, next);
				3058	KMP_ASSERT(start >= 0);
				3059	*scan = next;
				3060
				3061	// valid follow sets are ',' ':' and '}'
				3062	SKIP_WS(*scan);
				3063	if (scan == '}' \|\| scan == ',') {
				3064	if ((start > maxOsId) \|\|
				3065	(!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
				3066	if (__kmp_affinity_verbose \|\|
				3067	(__kmp_affinity_warnings &&
				3068	(__kmp_affinity_type != affinity_none))) {
				3069	KMP_WARNING(AffIgnoreInvalidProcID, start);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3070	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3071	} else {
				3072	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
				3073	(*setSize)++;
				3074	}
				3075	if (**scan == '}') {
				3076	break;
				3077	}
				3078	(*scan)++; // skip ','
				3079	continue;
				3080	}
				3081	KMP_ASSERT2(**scan == ':', "bad explicit places list");
				3082	(*scan)++; // skip ':'
				3083
				3084	// Read count parameter
				3085	SKIP_WS(*scan);
				3086	KMP_ASSERT2((scan >= '0') && (scan <= '9'), "bad explicit places list");
				3087	next = *scan;
				3088	SKIP_DIGITS(next);
				3089	count = __kmp_str_to_int(scan, next);
				3090	KMP_ASSERT(count >= 0);
				3091	*scan = next;
				3092
				3093	// valid follow sets are ',' ':' and '}'
				3094	SKIP_WS(*scan);
				3095	if (scan == '}' \|\| scan == ',') {
				3096	for (i = 0; i < count; i++) {
				3097	if ((start > maxOsId) \|\|
				3098	(!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
				3099	if (__kmp_affinity_verbose \|\|
				3100	(__kmp_affinity_warnings &&
				3101	(__kmp_affinity_type != affinity_none))) {
				3102	KMP_WARNING(AffIgnoreInvalidProcID, start);
				3103	}
				3104	break; // don't proliferate warnings for large count
				3105	} else {
				3106	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
				3107	start++;
				3108	(*setSize)++;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3109	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3110	}
				3111	if (**scan == '}') {
				3112	break;
				3113	}
				3114	(*scan)++; // skip ','
				3115	continue;
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3116	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3117	KMP_ASSERT2(**scan == ':', "bad explicit places list");
				3118	(*scan)++; // skip ':'
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3119
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3120	// Read stride parameter
				3121	int sign = +1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3122	for (;;) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3123	SKIP_WS(*scan);
				3124	if (**scan == '+') {
				3125	(*scan)++; // skip '+'
				3126	continue;
				3127	}
				3128	if (**scan == '-') {
				3129	sign *= -1;
				3130	(*scan)++; // skip '-'
				3131	continue;
				3132	}
				3133	break;
				3134	}
				3135	SKIP_WS(*scan);
				3136	KMP_ASSERT2((scan >= '0') && (scan <= '9'), "bad explicit places list");
				3137	next = *scan;
				3138	SKIP_DIGITS(next);
				3139	stride = __kmp_str_to_int(scan, next);
				3140	KMP_ASSERT(stride >= 0);
				3141	*scan = next;
				3142	stride *= sign;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3143
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3144	// valid follow sets are ',' and '}'
				3145	SKIP_WS(*scan);
				3146	if (scan == '}' \|\| scan == ',') {
				3147	for (i = 0; i < count; i++) {
				3148	if ((start > maxOsId) \|\|
				3149	(!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
				3150	if (__kmp_affinity_verbose \|\|
				3151	(__kmp_affinity_warnings &&
				3152	(__kmp_affinity_type != affinity_none))) {
				3153	KMP_WARNING(AffIgnoreInvalidProcID, start);
				3154	}
				3155	break; // don't proliferate warnings for large count
				3156	} else {
				3157	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
				3158	start += stride;
				3159	(*setSize)++;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3160	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3161	}
				3162	if (**scan == '}') {
				3163	break;
				3164	}
				3165	(*scan)++; // skip ','
				3166	continue;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3167	}
				3168
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3169	KMP_ASSERT2(0, "bad explicit places list");
				3170	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3171	}
				3172
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3173	static void __kmp_process_place(const char *scan, kmp_affin_mask_t osId2Mask,
				3174	int maxOsId, kmp_affin_mask_t *tempMask,
				3175	int *setSize) {
				3176	const char *next;
				3177
				3178	// valid follow sets are '{' '!' and num
				3179	SKIP_WS(*scan);
				3180	if (**scan == '{') {
				3181	(*scan)++; // skip '{'
				3182	__kmp_process_subplace_list(scan, osId2Mask, maxOsId, tempMask, setSize);
				3183	KMP_ASSERT2(**scan == '}', "bad explicit places list");
				3184	(*scan)++; // skip '}'
				3185	} else if (**scan == '!') {
				3186	(*scan)++; // skip '!'
				3187	__kmp_process_place(scan, osId2Mask, maxOsId, tempMask, setSize);
				3188	KMP_CPU_COMPLEMENT(maxOsId, tempMask);
				3189	} else if ((scan >= '0') && (scan <= '9')) {
				3190	next = *scan;
				3191	SKIP_DIGITS(next);
				3192	int num = __kmp_str_to_int(scan, next);
				3193	KMP_ASSERT(num >= 0);
				3194	if ((num > maxOsId) \|\|
				3195	(!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
				3196	if (__kmp_affinity_verbose \|\|
				3197	(__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) {
				3198	KMP_WARNING(AffIgnoreInvalidProcID, num);
				3199	}
				3200	} else {
				3201	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, num));
				3202	(*setSize)++;
				3203	}
				3204	*scan = next; // skip num
				3205	} else {
				3206	KMP_ASSERT2(0, "bad explicit places list");
				3207	}
				3208	}
				3209
				3210	// static void
				3211	void __kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
				3212	unsigned int *out_numMasks,
				3213	const char *placelist,
				3214	kmp_affin_mask_t *osId2Mask,
				3215	int maxOsId) {
				3216	int i, j, count, stride, sign;
				3217	const char *scan = placelist;
				3218	const char *next = placelist;
				3219
				3220	numNewMasks = 2;
				3221	KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks);
				3222	nextNewMask = 0;
				3223
				3224	// tempMask is modified based on the previous or initial
				3225	// place to form the current place
				3226	// previousMask contains the previous place
				3227	kmp_affin_mask_t *tempMask;
				3228	kmp_affin_mask_t *previousMask;
				3229	KMP_CPU_ALLOC(tempMask);
				3230	KMP_CPU_ZERO(tempMask);
				3231	KMP_CPU_ALLOC(previousMask);
				3232	KMP_CPU_ZERO(previousMask);
				3233	int setSize = 0;
				3234
				3235	for (;;) {
				3236	__kmp_process_place(&scan, osId2Mask, maxOsId, tempMask, &setSize);
				3237
				3238	// valid follow sets are ',' ':' and EOL
				3239	SKIP_WS(scan);
				3240	if (scan == '\0' \|\| scan == ',') {
				3241	if (setSize > 0) {
				3242	ADD_MASK(tempMask);
				3243	}
				3244	KMP_CPU_ZERO(tempMask);
				3245	setSize = 0;
				3246	if (*scan == '\0') {
				3247	break;
				3248	}
				3249	scan++; // skip ','
				3250	continue;
				3251	}
				3252
				3253	KMP_ASSERT2(*scan == ':', "bad explicit places list");
				3254	scan++; // skip ':'
				3255
				3256	// Read count parameter
				3257	SKIP_WS(scan);
				3258	KMP_ASSERT2((scan >= '0') && (scan <= '9'), "bad explicit places list");
				3259	next = scan;
				3260	SKIP_DIGITS(next);
				3261	count = __kmp_str_to_int(scan, *next);
				3262	KMP_ASSERT(count >= 0);
				3263	scan = next;
				3264
				3265	// valid follow sets are ',' ':' and EOL
				3266	SKIP_WS(scan);
				3267	if (scan == '\0' \|\| scan == ',') {
				3268	stride = +1;
				3269	} else {
				3270	KMP_ASSERT2(*scan == ':', "bad explicit places list");
				3271	scan++; // skip ':'
				3272
				3273	// Read stride parameter
				3274	sign = +1;
				3275	for (;;) {
				3276	SKIP_WS(scan);
				3277	if (*scan == '+') {
				3278	scan++; // skip '+'
				3279	continue;
				3280	}
				3281	if (*scan == '-') {
				3282	sign *= -1;
				3283	scan++; // skip '-'
				3284	continue;
				3285	}
				3286	break;
				3287	}
				3288	SKIP_WS(scan);
				3289	KMP_ASSERT2((scan >= '0') && (scan <= '9'), "bad explicit places list");
				3290	next = scan;
				3291	SKIP_DIGITS(next);
				3292	stride = __kmp_str_to_int(scan, *next);
				3293	KMP_DEBUG_ASSERT(stride >= 0);
				3294	scan = next;
				3295	stride *= sign;
				3296	}
				3297
				3298	// Add places determined by initial_place : count : stride
				3299	for (i = 0; i < count; i++) {
				3300	if (setSize == 0) {
				3301	break;
				3302	}
				3303	// Add the current place, then build the next place (tempMask) from that
				3304	KMP_CPU_COPY(previousMask, tempMask);
				3305	ADD_MASK(previousMask);
				3306	KMP_CPU_ZERO(tempMask);
				3307	setSize = 0;
				3308	KMP_CPU_SET_ITERATE(j, previousMask) {
				3309	if (!KMP_CPU_ISSET(j, previousMask)) {
				3310	continue;
				3311	}
				3312	if ((j + stride > maxOsId) \|\| (j + stride < 0) \|\|
				3313	(!KMP_CPU_ISSET(j, __kmp_affin_fullMask)) \|\|
				3314	(!KMP_CPU_ISSET(j + stride,
				3315	KMP_CPU_INDEX(osId2Mask, j + stride)))) {
				3316	if ((__kmp_affinity_verbose \|\|
				3317	(__kmp_affinity_warnings &&
				3318	(__kmp_affinity_type != affinity_none))) &&
				3319	i < count - 1) {
				3320	KMP_WARNING(AffIgnoreInvalidProcID, j + stride);
				3321	}
				3322	continue;
				3323	}
				3324	KMP_CPU_SET(j + stride, tempMask);
				3325	setSize++;
				3326	}
				3327	}
				3328	KMP_CPU_ZERO(tempMask);
				3329	setSize = 0;
				3330
				3331	// valid follow sets are ',' and EOL
				3332	SKIP_WS(scan);
				3333	if (*scan == '\0') {
				3334	break;
				3335	}
				3336	if (*scan == ',') {
				3337	scan++; // skip ','
				3338	continue;
				3339	}
				3340
				3341	KMP_ASSERT2(0, "bad explicit places list");
				3342	}
				3343
				3344	*out_numMasks = nextNewMask;
				3345	if (nextNewMask == 0) {
				3346	*out_masks = NULL;
				3347	KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
				3348	return;
				3349	}
				3350	KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask);
				3351	KMP_CPU_FREE(tempMask);
				3352	KMP_CPU_FREE(previousMask);
				3353	for (i = 0; i < nextNewMask; i++) {
				3354	kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i);
				3355	kmp_affin_mask_t dest = KMP_CPU_INDEX((out_masks), i);
				3356	KMP_CPU_COPY(dest, src);
				3357	}
				3358	KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
				3359	}
				3360
				3361	#endif /* OMP_40_ENABLED */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3362
				3363	#undef ADD_MASK
				3364	#undef ADD_MASK_OSID
				3365
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3366	#if KMP_USE_HWLOC
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3367	static int __kmp_hwloc_skip_PUs_obj(hwloc_topology_t t, hwloc_obj_t o) {
				3368	// skip PUs descendants of the object o
				3369	int skipped = 0;
				3370	hwloc_obj_t hT = NULL;
				3371	int N = __kmp_hwloc_count_children_by_type(t, o, HWLOC_OBJ_PU, &hT);
				3372	for (int i = 0; i < N; ++i) {
				3373	KMP_DEBUG_ASSERT(hT);
				3374	unsigned idx = hT->os_index;
				3375	if (KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
				3376	KMP_CPU_CLR(idx, __kmp_affin_fullMask);
				3377	KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx));
				3378	++skipped;
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3379	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3380	hT = hwloc_get_next_obj_by_type(t, HWLOC_OBJ_PU, hT);
				3381	}
				3382	return skipped; // count number of skipped units
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3383	}
				3384
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3385	static int __kmp_hwloc_obj_has_PUs(hwloc_topology_t t, hwloc_obj_t o) {
				3386	// check if obj has PUs present in fullMask
				3387	hwloc_obj_t hT = NULL;
				3388	int N = __kmp_hwloc_count_children_by_type(t, o, HWLOC_OBJ_PU, &hT);
				3389	for (int i = 0; i < N; ++i) {
				3390	KMP_DEBUG_ASSERT(hT);
				3391	unsigned idx = hT->os_index;
				3392	if (KMP_CPU_ISSET(idx, __kmp_affin_fullMask))
				3393	return 1; // found PU
				3394	hT = hwloc_get_next_obj_by_type(t, HWLOC_OBJ_PU, hT);
				3395	}
				3396	return 0; // no PUs found
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3397	}
				3398	#endif // KMP_USE_HWLOC
				3399
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3400	static void __kmp_apply_thread_places(AddrUnsPair **pAddr, int depth) {
				3401	AddrUnsPair *newAddr;
				3402	if (__kmp_hws_requested == 0)
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame]	3403	goto _exit; // no topology limiting actions requested, exit
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3404	#if KMP_USE_HWLOC
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3405	if (__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC) {
				3406	// Number of subobjects calculated dynamically, this works fine for
				3407	// any non-uniform topology.
				3408	// L2 cache objects are determined by depth, other objects - by type.
				3409	hwloc_topology_t tp = __kmp_hwloc_topology;
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame]	3410	int nS = 0, nN = 0, nL = 0, nC = 0,
				3411	nT = 0; // logical index including skipped
				3412	int nCr = 0, nTr = 0; // number of requested units
				3413	int nPkg = 0, nCo = 0, n_new = 0, n_old = 0, nCpP = 0, nTpC = 0; // counters
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3414	hwloc_obj_t hT, hC, hL, hN, hS; // hwloc objects (pointers to)
				3415	int L2depth, idx;
Jonathan Peyton	dd4aa9b	2015-10-08 17:55:54 +0000	[diff] [blame]	3416
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3417	// check support of extensions ----------------------------------
				3418	int numa_support = 0, tile_support = 0;
				3419	if (__kmp_pu_os_idx)
				3420	hT = hwloc_get_pu_obj_by_os_index(tp,
				3421	__kmp_pu_os_idx[__kmp_avail_proc - 1]);
				3422	else
				3423	hT = hwloc_get_obj_by_type(tp, HWLOC_OBJ_PU, __kmp_avail_proc - 1);
				3424	if (hT == NULL) { // something's gone wrong
				3425	KMP_WARNING(AffHWSubsetUnsupported);
				3426	goto _exit;
				3427	}
				3428	// check NUMA node
				3429	hN = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hT);
				3430	hS = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_PACKAGE, hT);
				3431	if (hN != NULL && hN->depth > hS->depth) {
				3432	numa_support = 1; // 1 in case socket includes node(s)
				3433	} else if (__kmp_hws_node.num > 0) {
				3434	// don't support sockets inside NUMA node (no such HW found for testing)
				3435	KMP_WARNING(AffHWSubsetUnsupported);
				3436	goto _exit;
				3437	}
				3438	// check L2 cahce, get object by depth because of multiple caches
				3439	L2depth = hwloc_get_cache_type_depth(tp, 2, HWLOC_OBJ_CACHE_UNIFIED);
				3440	hL = hwloc_get_ancestor_obj_by_depth(tp, L2depth, hT);
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame]	3441	if (hL != NULL &&
				3442	__kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE, &hC) > 1) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3443	tile_support = 1; // no sense to count L2 if it includes single core
				3444	} else if (__kmp_hws_tile.num > 0) {
				3445	if (__kmp_hws_core.num == 0) {
				3446	__kmp_hws_core = __kmp_hws_tile; // replace L2 with core
				3447	__kmp_hws_tile.num = 0;
				3448	} else {
				3449	// L2 and core are both requested, but represent same object
				3450	KMP_WARNING(AffHWSubsetInvalid);
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3451	goto _exit;
				3452	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3453	}
				3454	// end of check of extensions -----------------------------------
				3455
				3456	// fill in unset items, validate settings -----------------------
				3457	if (__kmp_hws_socket.num == 0)
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame]	3458	__kmp_hws_socket.num = nPackages; // use all available sockets
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3459	if (__kmp_hws_socket.offset >= nPackages) {
				3460	KMP_WARNING(AffHWSubsetManySockets);
				3461	goto _exit;
				3462	}
				3463	if (numa_support) {
Andrey Churbanov	a586821	2017-11-30 11:51:47 +0000	[diff] [blame]	3464	hN = NULL;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3465	int NN = __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_NUMANODE,
				3466	&hN); // num nodes in socket
				3467	if (__kmp_hws_node.num == 0)
				3468	__kmp_hws_node.num = NN; // use all available nodes
				3469	if (__kmp_hws_node.offset >= NN) {
				3470	KMP_WARNING(AffHWSubsetManyNodes);
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3471	goto _exit;
				3472	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3473	if (tile_support) {
				3474	// get num tiles in node
				3475	int NL = __kmp_hwloc_count_children_by_depth(tp, hN, L2depth, &hL);
				3476	if (__kmp_hws_tile.num == 0) {
				3477	__kmp_hws_tile.num = NL + 1;
				3478	} // use all available tiles, some node may have more tiles, thus +1
				3479	if (__kmp_hws_tile.offset >= NL) {
				3480	KMP_WARNING(AffHWSubsetManyTiles);
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3481	goto _exit;
				3482	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3483	int NC = __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE,
				3484	&hC); // num cores in tile
				3485	if (__kmp_hws_core.num == 0)
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame]	3486	__kmp_hws_core.num = NC; // use all available cores
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3487	if (__kmp_hws_core.offset >= NC) {
				3488	KMP_WARNING(AffHWSubsetManyCores);
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3489	goto _exit;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3490	}
				3491	} else { // tile_support
				3492	int NC = __kmp_hwloc_count_children_by_type(tp, hN, HWLOC_OBJ_CORE,
				3493	&hC); // num cores in node
				3494	if (__kmp_hws_core.num == 0)
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame]	3495	__kmp_hws_core.num = NC; // use all available cores
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3496	if (__kmp_hws_core.offset >= NC) {
				3497	KMP_WARNING(AffHWSubsetManyCores);
				3498	goto _exit;
				3499	}
				3500	} // tile_support
				3501	} else { // numa_support
				3502	if (tile_support) {
				3503	// get num tiles in socket
				3504	int NL = __kmp_hwloc_count_children_by_depth(tp, hS, L2depth, &hL);
				3505	if (__kmp_hws_tile.num == 0)
				3506	__kmp_hws_tile.num = NL; // use all available tiles
				3507	if (__kmp_hws_tile.offset >= NL) {
				3508	KMP_WARNING(AffHWSubsetManyTiles);
				3509	goto _exit;
				3510	}
				3511	int NC = __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE,
				3512	&hC); // num cores in tile
				3513	if (__kmp_hws_core.num == 0)
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame]	3514	__kmp_hws_core.num = NC; // use all available cores
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3515	if (__kmp_hws_core.offset >= NC) {
				3516	KMP_WARNING(AffHWSubsetManyCores);
				3517	goto _exit;
				3518	}
				3519	} else { // tile_support
				3520	int NC = __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_CORE,
				3521	&hC); // num cores in socket
				3522	if (__kmp_hws_core.num == 0)
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame]	3523	__kmp_hws_core.num = NC; // use all available cores
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3524	if (__kmp_hws_core.offset >= NC) {
				3525	KMP_WARNING(AffHWSubsetManyCores);
				3526	goto _exit;
				3527	}
				3528	} // tile_support
				3529	}
				3530	if (__kmp_hws_proc.num == 0)
				3531	__kmp_hws_proc.num = __kmp_nThreadsPerCore; // use all available procs
				3532	if (__kmp_hws_proc.offset >= __kmp_nThreadsPerCore) {
				3533	KMP_WARNING(AffHWSubsetManyProcs);
				3534	goto _exit;
				3535	}
				3536	// end of validation --------------------------------------------
				3537
				3538	if (pAddr) // pAddr is NULL in case of affinity_none
				3539	newAddr = (AddrUnsPair )__kmp_allocate(sizeof(AddrUnsPair)
				3540	__kmp_avail_proc); // max size
				3541	// main loop to form HW subset ----------------------------------
				3542	hS = NULL;
				3543	int NP = hwloc_get_nbobjs_by_type(tp, HWLOC_OBJ_PACKAGE);
				3544	for (int s = 0; s < NP; ++s) {
				3545	// Check Socket -----------------------------------------------
				3546	hS = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PACKAGE, hS);
				3547	if (!__kmp_hwloc_obj_has_PUs(tp, hS))
				3548	continue; // skip socket if all PUs are out of fullMask
				3549	++nS; // only count objects those have PUs in affinity mask
				3550	if (nS <= __kmp_hws_socket.offset \|\|
				3551	nS > __kmp_hws_socket.num + __kmp_hws_socket.offset) {
				3552	n_old += __kmp_hwloc_skip_PUs_obj(tp, hS); // skip socket
				3553	continue; // move to next socket
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3554	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3555	nCr = 0; // count number of cores per socket
				3556	// socket requested, go down the topology tree
				3557	// check 4 cases: (+NUMA+Tile), (+NUMA-Tile), (-NUMA+Tile), (-NUMA-Tile)
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3558	if (numa_support) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3559	nN = 0;
				3560	hN = NULL;
				3561	// num nodes in current socket
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame]	3562	int NN =
				3563	__kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_NUMANODE, &hN);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3564	for (int n = 0; n < NN; ++n) {
				3565	// Check NUMA Node ----------------------------------------
				3566	if (!__kmp_hwloc_obj_has_PUs(tp, hN)) {
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3567	hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3568	continue; // skip node if all PUs are out of fullMask
				3569	}
				3570	++nN;
				3571	if (nN <= __kmp_hws_node.offset \|\|
				3572	nN > __kmp_hws_node.num + __kmp_hws_node.offset) {
				3573	// skip node as not requested
				3574	n_old += __kmp_hwloc_skip_PUs_obj(tp, hN); // skip node
				3575	hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN);
				3576	continue; // move to next node
				3577	}
				3578	// node requested, go down the topology tree
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3579	if (tile_support) {
				3580	nL = 0;
				3581	hL = NULL;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3582	int NL = __kmp_hwloc_count_children_by_depth(tp, hN, L2depth, &hL);
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3583	for (int l = 0; l < NL; ++l) {
				3584	// Check L2 (tile) ------------------------------------
				3585	if (!__kmp_hwloc_obj_has_PUs(tp, hL)) {
				3586	hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
				3587	continue; // skip tile if all PUs are out of fullMask
				3588	}
				3589	++nL;
				3590	if (nL <= __kmp_hws_tile.offset \|\|
				3591	nL > __kmp_hws_tile.num + __kmp_hws_tile.offset) {
				3592	// skip tile as not requested
				3593	n_old += __kmp_hwloc_skip_PUs_obj(tp, hL); // skip tile
				3594	hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
				3595	continue; // move to next tile
				3596	}
				3597	// tile requested, go down the topology tree
				3598	nC = 0;
				3599	hC = NULL;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3600	// num cores in current tile
				3601	int NC = __kmp_hwloc_count_children_by_type(tp, hL,
				3602	HWLOC_OBJ_CORE, &hC);
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3603	for (int c = 0; c < NC; ++c) {
				3604	// Check Core ---------------------------------------
				3605	if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
				3606	hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
				3607	continue; // skip core if all PUs are out of fullMask
				3608	}
				3609	++nC;
				3610	if (nC <= __kmp_hws_core.offset \|\|
				3611	nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
				3612	// skip node as not requested
				3613	n_old += __kmp_hwloc_skip_PUs_obj(tp, hC); // skip core
				3614	hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
				3615	continue; // move to next node
				3616	}
				3617	// core requested, go down to PUs
				3618	nT = 0;
				3619	nTr = 0;
				3620	hT = NULL;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3621	// num procs in current core
				3622	int NT = __kmp_hwloc_count_children_by_type(tp, hC,
				3623	HWLOC_OBJ_PU, &hT);
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3624	for (int t = 0; t < NT; ++t) {
				3625	// Check PU ---------------------------------------
				3626	idx = hT->os_index;
				3627	if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
				3628	hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
				3629	continue; // skip PU if not in fullMask
				3630	}
				3631	++nT;
				3632	if (nT <= __kmp_hws_proc.offset \|\|
				3633	nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
				3634	// skip PU
				3635	KMP_CPU_CLR(idx, __kmp_affin_fullMask);
				3636	++n_old;
				3637	KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx));
				3638	hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
				3639	continue; // move to next node
				3640	}
				3641	++nTr;
				3642	if (pAddr) // collect requested thread's data
				3643	newAddr[n_new] = (*pAddr)[n_old];
				3644	++n_new;
				3645	++n_old;
				3646	hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
				3647	} // threads loop
				3648	if (nTr > 0) {
				3649	++nCr; // num cores per socket
				3650	++nCo; // total num cores
				3651	if (nTr > nTpC)
				3652	nTpC = nTr; // calc max threads per core
				3653	}
				3654	hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
				3655	} // cores loop
				3656	hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
				3657	} // tiles loop
				3658	} else { // tile_support
				3659	// no tiles, check cores
				3660	nC = 0;
				3661	hC = NULL;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3662	// num cores in current node
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame]	3663	int NC =
				3664	__kmp_hwloc_count_children_by_type(tp, hN, HWLOC_OBJ_CORE, &hC);
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3665	for (int c = 0; c < NC; ++c) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3666	// Check Core ---------------------------------------
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3667	if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
				3668	hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
				3669	continue; // skip core if all PUs are out of fullMask
				3670	}
				3671	++nC;
				3672	if (nC <= __kmp_hws_core.offset \|\|
				3673	nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
				3674	// skip node as not requested
				3675	n_old += __kmp_hwloc_skip_PUs_obj(tp, hC); // skip core
				3676	hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
				3677	continue; // move to next node
				3678	}
				3679	// core requested, go down to PUs
				3680	nT = 0;
				3681	nTr = 0;
				3682	hT = NULL;
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame]	3683	int NT =
				3684	__kmp_hwloc_count_children_by_type(tp, hC, HWLOC_OBJ_PU, &hT);
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3685	for (int t = 0; t < NT; ++t) {
				3686	// Check PU ---------------------------------------
				3687	idx = hT->os_index;
				3688	if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
				3689	hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
				3690	continue; // skip PU if not in fullMask
				3691	}
				3692	++nT;
				3693	if (nT <= __kmp_hws_proc.offset \|\|
				3694	nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
				3695	// skip PU
				3696	KMP_CPU_CLR(idx, __kmp_affin_fullMask);
				3697	++n_old;
				3698	KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx));
				3699	hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
				3700	continue; // move to next node
				3701	}
				3702	++nTr;
				3703	if (pAddr) // collect requested thread's data
				3704	newAddr[n_new] = (*pAddr)[n_old];
				3705	++n_new;
				3706	++n_old;
				3707	hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
				3708	} // threads loop
				3709	if (nTr > 0) {
				3710	++nCr; // num cores per socket
				3711	++nCo; // total num cores
				3712	if (nTr > nTpC)
				3713	nTpC = nTr; // calc max threads per core
				3714	}
				3715	hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
				3716	} // cores loop
				3717	} // tiles support
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3718	hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN);
				3719	} // nodes loop
				3720	} else { // numa_support
				3721	// no NUMA support
				3722	if (tile_support) {
				3723	nL = 0;
				3724	hL = NULL;
				3725	// num tiles in current socket
				3726	int NL = __kmp_hwloc_count_children_by_depth(tp, hS, L2depth, &hL);
				3727	for (int l = 0; l < NL; ++l) {
				3728	// Check L2 (tile) ------------------------------------
				3729	if (!__kmp_hwloc_obj_has_PUs(tp, hL)) {
				3730	hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
				3731	continue; // skip tile if all PUs are out of fullMask
				3732	}
				3733	++nL;
				3734	if (nL <= __kmp_hws_tile.offset \|\|
				3735	nL > __kmp_hws_tile.num + __kmp_hws_tile.offset) {
				3736	// skip tile as not requested
				3737	n_old += __kmp_hwloc_skip_PUs_obj(tp, hL); // skip tile
				3738	hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
				3739	continue; // move to next tile
				3740	}
				3741	// tile requested, go down the topology tree
				3742	nC = 0;
				3743	hC = NULL;
				3744	// num cores per tile
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame]	3745	int NC =
				3746	__kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE, &hC);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3747	for (int c = 0; c < NC; ++c) {
				3748	// Check Core ---------------------------------------
				3749	if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
				3750	hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
				3751	continue; // skip core if all PUs are out of fullMask
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3752	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3753	++nC;
				3754	if (nC <= __kmp_hws_core.offset \|\|
				3755	nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
				3756	// skip node as not requested
				3757	n_old += __kmp_hwloc_skip_PUs_obj(tp, hC); // skip core
				3758	hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
				3759	continue; // move to next node
				3760	}
				3761	// core requested, go down to PUs
				3762	nT = 0;
				3763	nTr = 0;
				3764	hT = NULL;
				3765	// num procs per core
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame]	3766	int NT =
				3767	__kmp_hwloc_count_children_by_type(tp, hC, HWLOC_OBJ_PU, &hT);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3768	for (int t = 0; t < NT; ++t) {
				3769	// Check PU ---------------------------------------
				3770	idx = hT->os_index;
				3771	if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
				3772	hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
				3773	continue; // skip PU if not in fullMask
				3774	}
				3775	++nT;
				3776	if (nT <= __kmp_hws_proc.offset \|\|
				3777	nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
				3778	// skip PU
				3779	KMP_CPU_CLR(idx, __kmp_affin_fullMask);
				3780	++n_old;
				3781	KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx));
				3782	hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
				3783	continue; // move to next node
				3784	}
				3785	++nTr;
				3786	if (pAddr) // collect requested thread's data
				3787	newAddr[n_new] = (*pAddr)[n_old];
				3788	++n_new;
				3789	++n_old;
				3790	hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
				3791	} // threads loop
				3792	if (nTr > 0) {
				3793	++nCr; // num cores per socket
				3794	++nCo; // total num cores
				3795	if (nTr > nTpC)
				3796	nTpC = nTr; // calc max threads per core
				3797	}
				3798	hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
				3799	} // cores loop
				3800	hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
				3801	} // tiles loop
				3802	} else { // tile_support
				3803	// no tiles, check cores
				3804	nC = 0;
				3805	hC = NULL;
				3806	// num cores in socket
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame]	3807	int NC =
				3808	__kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_CORE, &hC);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3809	for (int c = 0; c < NC; ++c) {
				3810	// Check Core -------------------------------------------
				3811	if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
				3812	hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
				3813	continue; // skip core if all PUs are out of fullMask
				3814	}
				3815	++nC;
				3816	if (nC <= __kmp_hws_core.offset \|\|
				3817	nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
				3818	// skip node as not requested
				3819	n_old += __kmp_hwloc_skip_PUs_obj(tp, hC); // skip core
				3820	hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
				3821	continue; // move to next node
				3822	}
				3823	// core requested, go down to PUs
				3824	nT = 0;
				3825	nTr = 0;
				3826	hT = NULL;
				3827	// num procs per core
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame]	3828	int NT =
				3829	__kmp_hwloc_count_children_by_type(tp, hC, HWLOC_OBJ_PU, &hT);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3830	for (int t = 0; t < NT; ++t) {
				3831	// Check PU ---------------------------------------
				3832	idx = hT->os_index;
				3833	if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
				3834	hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
				3835	continue; // skip PU if not in fullMask
				3836	}
				3837	++nT;
				3838	if (nT <= __kmp_hws_proc.offset \|\|
				3839	nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
				3840	// skip PU
				3841	KMP_CPU_CLR(idx, __kmp_affin_fullMask);
				3842	++n_old;
				3843	KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx));
				3844	hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
				3845	continue; // move to next node
				3846	}
				3847	++nTr;
				3848	if (pAddr) // collect requested thread's data
				3849	newAddr[n_new] = (*pAddr)[n_old];
				3850	++n_new;
				3851	++n_old;
				3852	hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
				3853	} // threads loop
				3854	if (nTr > 0) {
				3855	++nCr; // num cores per socket
				3856	++nCo; // total num cores
				3857	if (nTr > nTpC)
				3858	nTpC = nTr; // calc max threads per core
				3859	}
				3860	hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
				3861	} // cores loop
				3862	} // tiles support
				3863	} // numa_support
				3864	if (nCr > 0) { // found cores?
				3865	++nPkg; // num sockets
				3866	if (nCr > nCpP)
				3867	nCpP = nCr; // calc max cores per socket
				3868	}
				3869	} // sockets loop
				3870
				3871	// check the subset is valid
				3872	KMP_DEBUG_ASSERT(n_old == __kmp_avail_proc);
				3873	KMP_DEBUG_ASSERT(nPkg > 0);
				3874	KMP_DEBUG_ASSERT(nCpP > 0);
				3875	KMP_DEBUG_ASSERT(nTpC > 0);
				3876	KMP_DEBUG_ASSERT(nCo > 0);
				3877	KMP_DEBUG_ASSERT(nPkg <= nPackages);
				3878	KMP_DEBUG_ASSERT(nCpP <= nCoresPerPkg);
				3879	KMP_DEBUG_ASSERT(nTpC <= __kmp_nThreadsPerCore);
				3880	KMP_DEBUG_ASSERT(nCo <= __kmp_ncores);
				3881
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame]	3882	nPackages = nPkg; // correct num sockets
				3883	nCoresPerPkg = nCpP; // correct num cores per socket
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3884	__kmp_nThreadsPerCore = nTpC; // correct num threads per core
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame]	3885	__kmp_avail_proc = n_new; // correct num procs
				3886	__kmp_ncores = nCo; // correct num cores
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3887	// hwloc topology method end
				3888	} else
				3889	#endif // KMP_USE_HWLOC
				3890	{
				3891	int n_old = 0, n_new = 0, proc_num = 0;
				3892	if (__kmp_hws_node.num > 0 \|\| __kmp_hws_tile.num > 0) {
				3893	KMP_WARNING(AffHWSubsetNoHWLOC);
				3894	goto _exit;
				3895	}
				3896	if (__kmp_hws_socket.num == 0)
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame]	3897	__kmp_hws_socket.num = nPackages; // use all available sockets
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3898	if (__kmp_hws_core.num == 0)
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame]	3899	__kmp_hws_core.num = nCoresPerPkg; // use all available cores
				3900	if (__kmp_hws_proc.num == 0 \|\| __kmp_hws_proc.num > __kmp_nThreadsPerCore)
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3901	__kmp_hws_proc.num = __kmp_nThreadsPerCore; // use all HW contexts
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame]	3902	if (!__kmp_affinity_uniform_topology()) {
				3903	KMP_WARNING(AffHWSubsetNonUniform);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3904	goto _exit; // don't support non-uniform topology
				3905	}
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame]	3906	if (depth > 3) {
				3907	KMP_WARNING(AffHWSubsetNonThreeLevel);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3908	goto _exit; // don't support not-3-level topology
				3909	}
				3910	if (__kmp_hws_socket.offset + __kmp_hws_socket.num > nPackages) {
				3911	KMP_WARNING(AffHWSubsetManySockets);
				3912	goto _exit;
				3913	}
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame]	3914	if (__kmp_hws_core.offset + __kmp_hws_core.num > nCoresPerPkg) {
				3915	KMP_WARNING(AffHWSubsetManyCores);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3916	goto _exit;
				3917	}
				3918	// Form the requested subset
				3919	if (pAddr) // pAddr is NULL in case of affinity_none
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame]	3920	newAddr = (AddrUnsPair *)__kmp_allocate(
				3921	sizeof(AddrUnsPair) * __kmp_hws_socket.num * __kmp_hws_core.num *
				3922	__kmp_hws_proc.num);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3923	for (int i = 0; i < nPackages; ++i) {
				3924	if (i < __kmp_hws_socket.offset \|\|
				3925	i >= __kmp_hws_socket.offset + __kmp_hws_socket.num) {
				3926	// skip not-requested socket
				3927	n_old += nCoresPerPkg * __kmp_nThreadsPerCore;
				3928	if (__kmp_pu_os_idx != NULL) {
				3929	// walk through skipped socket
				3930	for (int j = 0; j < nCoresPerPkg; ++j) {
				3931	for (int k = 0; k < __kmp_nThreadsPerCore; ++k) {
				3932	KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
				3933	++proc_num;
Jonathan Peyton	fd7cc42	2016-06-21 15:54:38 +0000	[diff] [blame]	3934	}
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3935	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3936	}
				3937	} else {
				3938	// walk through requested socket
				3939	for (int j = 0; j < nCoresPerPkg; ++j) {
				3940	if (j < __kmp_hws_core.offset \|\|
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame]	3941	j >= __kmp_hws_core.offset +
				3942	__kmp_hws_core.num) { // skip not-requested core
				3943	n_old += __kmp_nThreadsPerCore;
				3944	if (__kmp_pu_os_idx != NULL) {
				3945	for (int k = 0; k < __kmp_nThreadsPerCore; ++k) {
				3946	KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
				3947	++proc_num;
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3948	}
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame]	3949	}
				3950	} else {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3951	// walk through requested core
				3952	for (int k = 0; k < __kmp_nThreadsPerCore; ++k) {
				3953	if (k < __kmp_hws_proc.num) {
				3954	if (pAddr) // collect requested thread's data
				3955	newAddr[n_new] = (*pAddr)[n_old];
				3956	n_new++;
				3957	} else {
				3958	if (__kmp_pu_os_idx != NULL)
				3959	KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3960	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3961	n_old++;
				3962	++proc_num;
Jonathan Peyton	fd7cc42	2016-06-21 15:54:38 +0000	[diff] [blame]	3963	}
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3964	}
Jonathan Peyton	fd7cc42	2016-06-21 15:54:38 +0000	[diff] [blame]	3965	}
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3966	}
Andrey Churbanov	4a9a892	2017-04-13 17:15:07 +0000	[diff] [blame]	3967	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3968	KMP_DEBUG_ASSERT(n_old == nPackages * nCoresPerPkg * __kmp_nThreadsPerCore);
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame]	3969	KMP_DEBUG_ASSERT(n_new ==
				3970	__kmp_hws_socket.num * __kmp_hws_core.num *
				3971	__kmp_hws_proc.num);
				3972	nPackages = __kmp_hws_socket.num; // correct nPackages
				3973	nCoresPerPkg = __kmp_hws_core.num; // correct nCoresPerPkg
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3974	__kmp_nThreadsPerCore = __kmp_hws_proc.num; // correct __kmp_nThreadsPerCore
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame]	3975	__kmp_avail_proc = n_new; // correct avail_proc
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3976	__kmp_ncores = nPackages * __kmp_hws_core.num; // correct ncores
				3977	} // non-hwloc topology method
				3978	if (pAddr) {
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame]	3979	__kmp_free(*pAddr);
				3980	*pAddr = newAddr; // replace old topology with new one
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3981	}
				3982	if (__kmp_affinity_verbose) {
				3983	char m[KMP_AFFIN_MASK_PRINT_LEN];
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame]	3984	__kmp_affinity_print_mask(m, KMP_AFFIN_MASK_PRINT_LEN,
				3985	__kmp_affin_fullMask);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3986	if (__kmp_affinity_respect_mask) {
				3987	KMP_INFORM(InitOSProcSetRespect, "KMP_HW_SUBSET", m);
Paul Osmialowski	ecbe2ea	2016-07-29 20:55:03 +0000	[diff] [blame]	3988	} else {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3989	KMP_INFORM(InitOSProcSetNotRespect, "KMP_HW_SUBSET", m);
Paul Osmialowski	ecbe2ea	2016-07-29 20:55:03 +0000	[diff] [blame]	3990	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	3991	KMP_INFORM(AvailableOSProc, "KMP_HW_SUBSET", __kmp_avail_proc);
				3992	kmp_str_buf_t buf;
				3993	__kmp_str_buf_init(&buf);
				3994	__kmp_str_buf_print(&buf, "%d", nPackages);
				3995	KMP_INFORM(TopologyExtra, "KMP_HW_SUBSET", buf.str, nCoresPerPkg,
				3996	__kmp_nThreadsPerCore, __kmp_ncores);
				3997	__kmp_str_buf_free(&buf);
				3998	}
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame]	3999	_exit:
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4000	if (__kmp_pu_os_idx != NULL) {
				4001	__kmp_free(__kmp_pu_os_idx);
				4002	__kmp_pu_os_idx = NULL;
				4003	}
				4004	}
				4005
				4006	// This function figures out the deepest level at which there is at least one
				4007	// cluster/core with more than one processing unit bound to it.
				4008	static int __kmp_affinity_find_core_level(const AddrUnsPair *address2os,
				4009	int nprocs, int bottom_level) {
				4010	int core_level = 0;
				4011
				4012	for (int i = 0; i < nprocs; i++) {
				4013	for (int j = bottom_level; j > 0; j--) {
				4014	if (address2os[i].first.labels[j] > 0) {
				4015	if (core_level < (j - 1)) {
				4016	core_level = j - 1;
				4017	}
				4018	}
				4019	}
				4020	}
				4021	return core_level;
				4022	}
				4023
				4024	// This function counts number of clusters/cores at given level.
				4025	static int __kmp_affinity_compute_ncores(const AddrUnsPair *address2os,
				4026	int nprocs, int bottom_level,
				4027	int core_level) {
				4028	int ncores = 0;
				4029	int i, j;
				4030
				4031	j = bottom_level;
				4032	for (i = 0; i < nprocs; i++) {
				4033	for (j = bottom_level; j > core_level; j--) {
				4034	if ((i + 1) < nprocs) {
				4035	if (address2os[i + 1].first.labels[j] > 0) {
				4036	break;
				4037	}
				4038	}
				4039	}
				4040	if (j == core_level) {
				4041	ncores++;
				4042	}
				4043	}
				4044	if (j > core_level) {
				4045	// In case of ( nprocs < __kmp_avail_proc ) we may end too deep and miss one
				4046	// core. May occur when called from __kmp_affinity_find_core().
				4047	ncores++;
				4048	}
				4049	return ncores;
				4050	}
				4051
				4052	// This function finds to which cluster/core given processing unit is bound.
				4053	static int __kmp_affinity_find_core(const AddrUnsPair *address2os, int proc,
				4054	int bottom_level, int core_level) {
				4055	return __kmp_affinity_compute_ncores(address2os, proc + 1, bottom_level,
Andrey Churbanov	c47afcd	2017-07-03 11:24:08 +0000	[diff] [blame]	4056	core_level) -
				4057	1;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4058	}
				4059
				4060	// This function finds maximal number of processing units bound to a
				4061	// cluster/core at given level.
				4062	static int __kmp_affinity_max_proc_per_core(const AddrUnsPair *address2os,
				4063	int nprocs, int bottom_level,
				4064	int core_level) {
				4065	int maxprocpercore = 0;
				4066
				4067	if (core_level < bottom_level) {
				4068	for (int i = 0; i < nprocs; i++) {
				4069	int percore = address2os[i].first.labels[core_level + 1] + 1;
				4070
				4071	if (percore > maxprocpercore) {
				4072	maxprocpercore = percore;
				4073	}
				4074	}
				4075	} else {
				4076	maxprocpercore = 1;
				4077	}
				4078	return maxprocpercore;
Paul Osmialowski	ecbe2ea	2016-07-29 20:55:03 +0000	[diff] [blame]	4079	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4080
				4081	static AddrUnsPair *address2os = NULL;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4082	static int *procarr = NULL;
				4083	static int __kmp_aff_depth = 0;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4084
Jonathan Peyton	f639936	2018-07-09 17:51:13 +0000	[diff] [blame]	4085	#if KMP_USE_HIER_SCHED
				4086	#define KMP_EXIT_AFF_NONE \
				4087	KMP_ASSERT(__kmp_affinity_type == affinity_none); \
				4088	KMP_ASSERT(address2os == NULL); \
				4089	__kmp_apply_thread_places(NULL, 0); \
				4090	__kmp_create_affinity_none_places(); \
				4091	__kmp_dispatch_set_hierarchy_values(); \
				4092	return;
				4093	#else
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4094	#define KMP_EXIT_AFF_NONE \
				4095	KMP_ASSERT(__kmp_affinity_type == affinity_none); \
				4096	KMP_ASSERT(address2os == NULL); \
				4097	__kmp_apply_thread_places(NULL, 0); \
Jonathan Peyton	1482db9	2018-04-18 19:25:48 +0000	[diff] [blame]	4098	__kmp_create_affinity_none_places(); \
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4099	return;
Jonathan Peyton	f639936	2018-07-09 17:51:13 +0000	[diff] [blame]	4100	#endif
Jonathan Peyton	fd7cc42	2016-06-21 15:54:38 +0000	[diff] [blame]	4101
Jonathan Peyton	1482db9	2018-04-18 19:25:48 +0000	[diff] [blame]	4102	// Create a one element mask array (set of places) which only contains the
				4103	// initial process's affinity mask
				4104	static void __kmp_create_affinity_none_places() {
				4105	KMP_ASSERT(__kmp_affin_fullMask != NULL);
				4106	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				4107	__kmp_affinity_num_masks = 1;
				4108	KMP_CPU_ALLOC_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
				4109	kmp_affin_mask_t *dest = KMP_CPU_INDEX(__kmp_affinity_masks, 0);
				4110	KMP_CPU_COPY(dest, __kmp_affin_fullMask);
				4111	}
				4112
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4113	static int __kmp_affinity_cmp_Address_child_num(const void a, const void b) {
Andrey Churbanov	5ba90c7	2017-07-17 09:03:14 +0000	[diff] [blame]	4114	const Address aa = &(((const AddrUnsPair )a)->first);
				4115	const Address bb = &(((const AddrUnsPair )b)->first);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4116	unsigned depth = aa->depth;
				4117	unsigned i;
				4118	KMP_DEBUG_ASSERT(depth == bb->depth);
				4119	KMP_DEBUG_ASSERT((unsigned)__kmp_affinity_compact <= depth);
				4120	KMP_DEBUG_ASSERT(__kmp_affinity_compact >= 0);
				4121	for (i = 0; i < (unsigned)__kmp_affinity_compact; i++) {
				4122	int j = depth - i - 1;
				4123	if (aa->childNums[j] < bb->childNums[j])
				4124	return -1;
				4125	if (aa->childNums[j] > bb->childNums[j])
				4126	return 1;
				4127	}
				4128	for (; i < depth; i++) {
				4129	int j = i - __kmp_affinity_compact;
				4130	if (aa->childNums[j] < bb->childNums[j])
				4131	return -1;
				4132	if (aa->childNums[j] > bb->childNums[j])
				4133	return 1;
				4134	}
				4135	return 0;
Jonathan Peyton	e6abe52	2016-09-02 20:54:58 +0000	[diff] [blame]	4136	}
				4137
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4138	static void __kmp_aux_affinity_initialize(void) {
				4139	if (__kmp_affinity_masks != NULL) {
				4140	KMP_ASSERT(__kmp_affin_fullMask != NULL);
				4141	return;
				4142	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4143
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4144	// Create the "full" mask - this defines all of the processors that we
				4145	// consider to be in the machine model. If respect is set, then it is the
				4146	// initialization thread's affinity mask. Otherwise, it is all processors that
				4147	// we know about on the machine.
				4148	if (__kmp_affin_fullMask == NULL) {
				4149	KMP_CPU_ALLOC(__kmp_affin_fullMask);
				4150	}
				4151	if (KMP_AFFINITY_CAPABLE()) {
				4152	if (__kmp_affinity_respect_mask) {
				4153	__kmp_get_system_affinity(__kmp_affin_fullMask, TRUE);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4154
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4155	// Count the number of available processors.
				4156	unsigned i;
				4157	__kmp_avail_proc = 0;
				4158	KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
				4159	if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
				4160	continue;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4161	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4162	__kmp_avail_proc++;
				4163	}
				4164	if (__kmp_avail_proc > __kmp_xproc) {
				4165	if (__kmp_affinity_verbose \|\|
				4166	(__kmp_affinity_warnings &&
				4167	(__kmp_affinity_type != affinity_none))) {
				4168	KMP_WARNING(ErrorInitializeAffinity);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4169	}
				4170	__kmp_affinity_type = affinity_none;
Andrey Churbanov	1f037e4	2015-03-10 09:15:26 +0000	[diff] [blame]	4171	KMP_AFFINITY_DISABLE();
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4172	return;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4173	}
				4174	} else {
				4175	__kmp_affinity_entire_machine_mask(__kmp_affin_fullMask);
				4176	__kmp_avail_proc = __kmp_xproc;
				4177	}
				4178	}
				4179
Jonathan Peyton	6424950	2017-11-29 22:27:18 +0000	[diff] [blame]	4180	if (__kmp_affinity_gran == affinity_gran_tile &&
				4181	// check if user's request is valid
				4182	__kmp_affinity_dispatch->get_api_type() == KMPAffinity::NATIVE_OS) {
				4183	KMP_WARNING(AffTilesNoHWLOC, "KMP_AFFINITY");
				4184	__kmp_affinity_gran = affinity_gran_package;
				4185	}
				4186
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4187	int depth = -1;
				4188	kmp_i18n_id_t msg_id = kmp_i18n_null;
				4189
				4190	// For backward compatibility, setting KMP_CPUINFO_FILE =>
				4191	// KMP_TOPOLOGY_METHOD=cpuinfo
				4192	if ((__kmp_cpuinfo_file != NULL) &&
				4193	(__kmp_affinity_top_method == affinity_top_method_all)) {
				4194	__kmp_affinity_top_method = affinity_top_method_cpuinfo;
				4195	}
				4196
				4197	if (__kmp_affinity_top_method == affinity_top_method_all) {
				4198	// In the default code path, errors are not fatal - we just try using
				4199	// another method. We only emit a warning message if affinity is on, or the
				4200	// verbose flag is set, an the nowarnings flag was not set.
				4201	const char *file_name = NULL;
				4202	int line = 0;
				4203	#if KMP_USE_HWLOC
				4204	if (depth < 0 &&
				4205	__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC) {
				4206	if (__kmp_affinity_verbose) {
				4207	KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
				4208	}
				4209	if (!__kmp_hwloc_error) {
				4210	depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id);
				4211	if (depth == 0) {
				4212	KMP_EXIT_AFF_NONE;
				4213	} else if (depth < 0 && __kmp_affinity_verbose) {
				4214	KMP_INFORM(AffIgnoringHwloc, "KMP_AFFINITY");
				4215	}
				4216	} else if (__kmp_affinity_verbose) {
				4217	KMP_INFORM(AffIgnoringHwloc, "KMP_AFFINITY");
				4218	}
				4219	}
				4220	#endif
				4221
				4222	#if KMP_ARCH_X86 \|\| KMP_ARCH_X86_64
				4223
				4224	if (depth < 0) {
				4225	if (__kmp_affinity_verbose) {
				4226	KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
				4227	}
				4228
				4229	file_name = NULL;
				4230	depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
				4231	if (depth == 0) {
				4232	KMP_EXIT_AFF_NONE;
				4233	}
				4234
				4235	if (depth < 0) {
				4236	if (__kmp_affinity_verbose) {
				4237	if (msg_id != kmp_i18n_null) {
				4238	KMP_INFORM(AffInfoStrStr, "KMP_AFFINITY",
				4239	__kmp_i18n_catgets(msg_id),
				4240	KMP_I18N_STR(DecodingLegacyAPIC));
				4241	} else {
				4242	KMP_INFORM(AffInfoStr, "KMP_AFFINITY",
				4243	KMP_I18N_STR(DecodingLegacyAPIC));
				4244	}
				4245	}
				4246
				4247	file_name = NULL;
				4248	depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
				4249	if (depth == 0) {
				4250	KMP_EXIT_AFF_NONE;
				4251	}
				4252	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4253	}
				4254
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4255	#endif /* KMP_ARCH_X86 \|\| KMP_ARCH_X86_64 */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4256
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4257	#if KMP_OS_LINUX
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4258
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4259	if (depth < 0) {
				4260	if (__kmp_affinity_verbose) {
				4261	if (msg_id != kmp_i18n_null) {
				4262	KMP_INFORM(AffStrParseFilename, "KMP_AFFINITY",
				4263	__kmp_i18n_catgets(msg_id), "/proc/cpuinfo");
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4264	} else {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4265	KMP_INFORM(AffParseFilename, "KMP_AFFINITY", "/proc/cpuinfo");
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4266	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4267	}
				4268
				4269	FILE *f = fopen("/proc/cpuinfo", "r");
				4270	if (f == NULL) {
				4271	msg_id = kmp_i18n_str_CantOpenCpuinfo;
				4272	} else {
				4273	file_name = "/proc/cpuinfo";
				4274	depth =
				4275	__kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
				4276	fclose(f);
				4277	if (depth == 0) {
				4278	KMP_EXIT_AFF_NONE;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4279	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4280	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4281	}
				4282
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4283	#endif /* KMP_OS_LINUX */
				4284
				4285	#if KMP_GROUP_AFFINITY
				4286
				4287	if ((depth < 0) && (__kmp_num_proc_groups > 1)) {
				4288	if (__kmp_affinity_verbose) {
				4289	KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
				4290	}
				4291
				4292	depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
				4293	KMP_ASSERT(depth != 0);
				4294	}
				4295
				4296	#endif /* KMP_GROUP_AFFINITY */
				4297
				4298	if (depth < 0) {
				4299	if (__kmp_affinity_verbose && (msg_id != kmp_i18n_null)) {
				4300	if (file_name == NULL) {
				4301	KMP_INFORM(UsingFlatOS, __kmp_i18n_catgets(msg_id));
				4302	} else if (line == 0) {
				4303	KMP_INFORM(UsingFlatOSFile, file_name, __kmp_i18n_catgets(msg_id));
				4304	} else {
				4305	KMP_INFORM(UsingFlatOSFileLine, file_name, line,
				4306	__kmp_i18n_catgets(msg_id));
				4307	}
				4308	}
				4309	// FIXME - print msg if msg_id = kmp_i18n_null ???
				4310
				4311	file_name = "";
				4312	depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
				4313	if (depth == 0) {
				4314	KMP_EXIT_AFF_NONE;
				4315	}
				4316	KMP_ASSERT(depth > 0);
				4317	KMP_ASSERT(address2os != NULL);
				4318	}
				4319	}
				4320
Andrey Churbanov	a586821	2017-11-30 11:51:47 +0000	[diff] [blame]	4321	#if KMP_USE_HWLOC
				4322	else if (__kmp_affinity_top_method == affinity_top_method_hwloc) {
				4323	KMP_ASSERT(__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC);
				4324	if (__kmp_affinity_verbose) {
				4325	KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
				4326	}
				4327	depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id);
				4328	if (depth == 0) {
				4329	KMP_EXIT_AFF_NONE;
				4330	}
				4331	}
				4332	#endif // KMP_USE_HWLOC
				4333
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4334	// If the user has specified that a paricular topology discovery method is to be
				4335	// used, then we abort if that method fails. The exception is group affinity,
				4336	// which might have been implicitly set.
				4337
				4338	#if KMP_ARCH_X86 \|\| KMP_ARCH_X86_64
				4339
				4340	else if (__kmp_affinity_top_method == affinity_top_method_x2apicid) {
				4341	if (__kmp_affinity_verbose) {
				4342	KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
				4343	}
				4344
				4345	depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
				4346	if (depth == 0) {
				4347	KMP_EXIT_AFF_NONE;
				4348	}
				4349	if (depth < 0) {
				4350	KMP_ASSERT(msg_id != kmp_i18n_null);
				4351	KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
				4352	}
				4353	} else if (__kmp_affinity_top_method == affinity_top_method_apicid) {
				4354	if (__kmp_affinity_verbose) {
				4355	KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(DecodingLegacyAPIC));
				4356	}
				4357
				4358	depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
				4359	if (depth == 0) {
				4360	KMP_EXIT_AFF_NONE;
				4361	}
				4362	if (depth < 0) {
				4363	KMP_ASSERT(msg_id != kmp_i18n_null);
				4364	KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
				4365	}
				4366	}
				4367
				4368	#endif /* KMP_ARCH_X86 \|\| KMP_ARCH_X86_64 */
				4369
				4370	else if (__kmp_affinity_top_method == affinity_top_method_cpuinfo) {
				4371	const char *filename;
				4372	if (__kmp_cpuinfo_file != NULL) {
				4373	filename = __kmp_cpuinfo_file;
				4374	} else {
				4375	filename = "/proc/cpuinfo";
				4376	}
				4377
				4378	if (__kmp_affinity_verbose) {
				4379	KMP_INFORM(AffParseFilename, "KMP_AFFINITY", filename);
				4380	}
				4381
				4382	FILE *f = fopen(filename, "r");
				4383	if (f == NULL) {
				4384	int code = errno;
				4385	if (__kmp_cpuinfo_file != NULL) {
Jonathan Peyton	6a393f7	2017-09-05 15:43:58 +0000	[diff] [blame]	4386	__kmp_fatal(KMP_MSG(CantOpenFileForReading, filename), KMP_ERR(code),
				4387	KMP_HNT(NameComesFrom_CPUINFO_FILE), __kmp_msg_null);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4388	} else {
Jonathan Peyton	6a393f7	2017-09-05 15:43:58 +0000	[diff] [blame]	4389	__kmp_fatal(KMP_MSG(CantOpenFileForReading, filename), KMP_ERR(code),
				4390	__kmp_msg_null);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4391	}
				4392	}
				4393	int line = 0;
				4394	depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
				4395	fclose(f);
				4396	if (depth < 0) {
				4397	KMP_ASSERT(msg_id != kmp_i18n_null);
				4398	if (line > 0) {
				4399	KMP_FATAL(FileLineMsgExiting, filename, line,
				4400	__kmp_i18n_catgets(msg_id));
				4401	} else {
				4402	KMP_FATAL(FileMsgExiting, filename, __kmp_i18n_catgets(msg_id));
				4403	}
				4404	}
				4405	if (__kmp_affinity_type == affinity_none) {
				4406	KMP_ASSERT(depth == 0);
				4407	KMP_EXIT_AFF_NONE;
				4408	}
				4409	}
				4410
				4411	#if KMP_GROUP_AFFINITY
				4412
				4413	else if (__kmp_affinity_top_method == affinity_top_method_group) {
				4414	if (__kmp_affinity_verbose) {
				4415	KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
				4416	}
				4417
				4418	depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
				4419	KMP_ASSERT(depth != 0);
				4420	if (depth < 0) {
				4421	KMP_ASSERT(msg_id != kmp_i18n_null);
				4422	KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
				4423	}
				4424	}
				4425
				4426	#endif /* KMP_GROUP_AFFINITY */
				4427
				4428	else if (__kmp_affinity_top_method == affinity_top_method_flat) {
				4429	if (__kmp_affinity_verbose) {
				4430	KMP_INFORM(AffUsingFlatOS, "KMP_AFFINITY");
				4431	}
				4432
				4433	depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
				4434	if (depth == 0) {
				4435	KMP_EXIT_AFF_NONE;
				4436	}
				4437	// should not fail
				4438	KMP_ASSERT(depth > 0);
				4439	KMP_ASSERT(address2os != NULL);
				4440	}
				4441
Jonathan Peyton	f639936	2018-07-09 17:51:13 +0000	[diff] [blame]	4442	#if KMP_USE_HIER_SCHED
				4443	__kmp_dispatch_set_hierarchy_values();
				4444	#endif
				4445
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4446	if (address2os == NULL) {
				4447	if (KMP_AFFINITY_CAPABLE() &&
				4448	(__kmp_affinity_verbose \|\|
				4449	(__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none)))) {
				4450	KMP_WARNING(ErrorInitializeAffinity);
				4451	}
				4452	__kmp_affinity_type = affinity_none;
Jonathan Peyton	1482db9	2018-04-18 19:25:48 +0000	[diff] [blame]	4453	__kmp_create_affinity_none_places();
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4454	KMP_AFFINITY_DISABLE();
				4455	return;
				4456	}
				4457
Andrey Churbanov	a586821	2017-11-30 11:51:47 +0000	[diff] [blame]	4458	if (__kmp_affinity_gran == affinity_gran_tile
				4459	#if KMP_USE_HWLOC
				4460	&& __kmp_tile_depth == 0
				4461	#endif
				4462	) {
Jonathan Peyton	6424950	2017-11-29 22:27:18 +0000	[diff] [blame]	4463	// tiles requested but not detected, warn user on this
				4464	KMP_WARNING(AffTilesNoTiles, "KMP_AFFINITY");
				4465	}
				4466
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4467	__kmp_apply_thread_places(&address2os, depth);
				4468
				4469	// Create the table of masks, indexed by thread Id.
				4470	unsigned maxIndex;
				4471	unsigned numUnique;
				4472	kmp_affin_mask_t *osId2Mask =
				4473	__kmp_create_masks(&maxIndex, &numUnique, address2os, __kmp_avail_proc);
				4474	if (__kmp_affinity_gran_levels == 0) {
				4475	KMP_DEBUG_ASSERT((int)numUnique == __kmp_avail_proc);
				4476	}
				4477
				4478	// Set the childNums vector in all Address objects. This must be done before
				4479	// we can sort using __kmp_affinity_cmp_Address_child_num(), which takes into
				4480	// account the setting of __kmp_affinity_compact.
				4481	__kmp_affinity_assign_child_nums(address2os, __kmp_avail_proc);
				4482
				4483	switch (__kmp_affinity_type) {
				4484
				4485	case affinity_explicit:
				4486	KMP_DEBUG_ASSERT(__kmp_affinity_proclist != NULL);
				4487	#if OMP_40_ENABLED
				4488	if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
				4489	#endif
				4490	{
				4491	__kmp_affinity_process_proclist(
				4492	&__kmp_affinity_masks, &__kmp_affinity_num_masks,
				4493	__kmp_affinity_proclist, osId2Mask, maxIndex);
				4494	}
				4495	#if OMP_40_ENABLED
				4496	else {
				4497	__kmp_affinity_process_placelist(
				4498	&__kmp_affinity_masks, &__kmp_affinity_num_masks,
				4499	__kmp_affinity_proclist, osId2Mask, maxIndex);
				4500	}
				4501	#endif
				4502	if (__kmp_affinity_num_masks == 0) {
				4503	if (__kmp_affinity_verbose \|\|
				4504	(__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) {
				4505	KMP_WARNING(AffNoValidProcID);
				4506	}
				4507	__kmp_affinity_type = affinity_none;
Jonathan Peyton	9355d0d	2019-01-15 19:39:32 +0000	[diff] [blame^]	4508	__kmp_create_affinity_none_places();
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4509	return;
				4510	}
				4511	break;
				4512
				4513	// The other affinity types rely on sorting the Addresses according to some
				4514	// permutation of the machine topology tree. Set __kmp_affinity_compact and
				4515	// __kmp_affinity_offset appropriately, then jump to a common code fragment
				4516	// to do the sort and create the array of affinity masks.
				4517
				4518	case affinity_logical:
				4519	__kmp_affinity_compact = 0;
				4520	if (__kmp_affinity_offset) {
				4521	__kmp_affinity_offset =
				4522	__kmp_nThreadsPerCore * __kmp_affinity_offset % __kmp_avail_proc;
				4523	}
				4524	goto sortAddresses;
				4525
				4526	case affinity_physical:
				4527	if (__kmp_nThreadsPerCore > 1) {
				4528	__kmp_affinity_compact = 1;
				4529	if (__kmp_affinity_compact >= depth) {
				4530	__kmp_affinity_compact = 0;
				4531	}
				4532	} else {
				4533	__kmp_affinity_compact = 0;
				4534	}
				4535	if (__kmp_affinity_offset) {
				4536	__kmp_affinity_offset =
				4537	__kmp_nThreadsPerCore * __kmp_affinity_offset % __kmp_avail_proc;
				4538	}
				4539	goto sortAddresses;
				4540
				4541	case affinity_scatter:
				4542	if (__kmp_affinity_compact >= depth) {
				4543	__kmp_affinity_compact = 0;
				4544	} else {
				4545	__kmp_affinity_compact = depth - 1 - __kmp_affinity_compact;
				4546	}
				4547	goto sortAddresses;
				4548
				4549	case affinity_compact:
				4550	if (__kmp_affinity_compact >= depth) {
				4551	__kmp_affinity_compact = depth - 1;
				4552	}
				4553	goto sortAddresses;
				4554
				4555	case affinity_balanced:
				4556	if (depth <= 1) {
				4557	if (__kmp_affinity_verbose \|\| __kmp_affinity_warnings) {
				4558	KMP_WARNING(AffBalancedNotAvail, "KMP_AFFINITY");
				4559	}
				4560	__kmp_affinity_type = affinity_none;
Jonathan Peyton	9355d0d	2019-01-15 19:39:32 +0000	[diff] [blame^]	4561	__kmp_create_affinity_none_places();
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4562	return;
Jonathan Peyton	9355d0d	2019-01-15 19:39:32 +0000	[diff] [blame^]	4563	} else if (!__kmp_affinity_uniform_topology()) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4564	// Save the depth for further usage
				4565	__kmp_aff_depth = depth;
				4566
				4567	int core_level = __kmp_affinity_find_core_level(
				4568	address2os, __kmp_avail_proc, depth - 1);
				4569	int ncores = __kmp_affinity_compute_ncores(address2os, __kmp_avail_proc,
				4570	depth - 1, core_level);
				4571	int maxprocpercore = __kmp_affinity_max_proc_per_core(
				4572	address2os, __kmp_avail_proc, depth - 1, core_level);
				4573
				4574	int nproc = ncores * maxprocpercore;
				4575	if ((nproc < 2) \|\| (nproc < __kmp_avail_proc)) {
				4576	if (__kmp_affinity_verbose \|\| __kmp_affinity_warnings) {
				4577	KMP_WARNING(AffBalancedNotAvail, "KMP_AFFINITY");
				4578	}
				4579	__kmp_affinity_type = affinity_none;
				4580	return;
				4581	}
				4582
				4583	procarr = (int )__kmp_allocate(sizeof(int) nproc);
				4584	for (int i = 0; i < nproc; i++) {
				4585	procarr[i] = -1;
				4586	}
				4587
				4588	int lastcore = -1;
				4589	int inlastcore = 0;
				4590	for (int i = 0; i < __kmp_avail_proc; i++) {
				4591	int proc = address2os[i].second;
				4592	int core =
				4593	__kmp_affinity_find_core(address2os, i, depth - 1, core_level);
				4594
				4595	if (core == lastcore) {
				4596	inlastcore++;
				4597	} else {
				4598	inlastcore = 0;
				4599	}
				4600	lastcore = core;
				4601
				4602	procarr[core * maxprocpercore + inlastcore] = proc;
				4603	}
Jonathan Peyton	9355d0d	2019-01-15 19:39:32 +0000	[diff] [blame^]	4604	}
				4605	if (__kmp_affinity_compact >= depth) {
				4606	__kmp_affinity_compact = depth - 1;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4607	}
				4608
				4609	sortAddresses:
				4610	// Allocate the gtid->affinity mask table.
				4611	if (__kmp_affinity_dups) {
				4612	__kmp_affinity_num_masks = __kmp_avail_proc;
				4613	} else {
				4614	__kmp_affinity_num_masks = numUnique;
				4615	}
				4616
				4617	#if OMP_40_ENABLED
				4618	if ((__kmp_nested_proc_bind.bind_types[0] != proc_bind_intel) &&
				4619	(__kmp_affinity_num_places > 0) &&
				4620	((unsigned)__kmp_affinity_num_places < __kmp_affinity_num_masks)) {
				4621	__kmp_affinity_num_masks = __kmp_affinity_num_places;
				4622	}
				4623	#endif
				4624
				4625	KMP_CPU_ALLOC_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
				4626
				4627	// Sort the address2os table according to the current setting of
				4628	// __kmp_affinity_compact, then fill out __kmp_affinity_masks.
				4629	qsort(address2os, __kmp_avail_proc, sizeof(*address2os),
				4630	__kmp_affinity_cmp_Address_child_num);
				4631	{
				4632	int i;
				4633	unsigned j;
				4634	for (i = 0, j = 0; i < __kmp_avail_proc; i++) {
				4635	if ((!__kmp_affinity_dups) && (!address2os[i].first.leader)) {
				4636	continue;
				4637	}
				4638	unsigned osId = address2os[i].second;
				4639	kmp_affin_mask_t *src = KMP_CPU_INDEX(osId2Mask, osId);
				4640	kmp_affin_mask_t *dest = KMP_CPU_INDEX(__kmp_affinity_masks, j);
				4641	KMP_ASSERT(KMP_CPU_ISSET(osId, src));
				4642	KMP_CPU_COPY(dest, src);
				4643	if (++j >= __kmp_affinity_num_masks) {
				4644	break;
				4645	}
				4646	}
				4647	KMP_DEBUG_ASSERT(j == __kmp_affinity_num_masks);
				4648	}
				4649	break;
				4650
				4651	default:
				4652	KMP_ASSERT2(0, "Unexpected affinity setting");
				4653	}
				4654
				4655	KMP_CPU_FREE_ARRAY(osId2Mask, maxIndex + 1);
				4656	machine_hierarchy.init(address2os, __kmp_avail_proc);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4657	}
Jonathan Peyton	fd7cc42	2016-06-21 15:54:38 +0000	[diff] [blame]	4658	#undef KMP_EXIT_AFF_NONE
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4659
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4660	void __kmp_affinity_initialize(void) {
				4661	// Much of the code above was written assumming that if a machine was not
				4662	// affinity capable, then __kmp_affinity_type == affinity_none. We now
				4663	// explicitly represent this as __kmp_affinity_type == affinity_disabled.
				4664	// There are too many checks for __kmp_affinity_type == affinity_none
				4665	// in this code. Instead of trying to change them all, check if
				4666	// __kmp_affinity_type == affinity_disabled, and if so, slam it with
				4667	// affinity_none, call the real initialization routine, then restore
				4668	// __kmp_affinity_type to affinity_disabled.
				4669	int disabled = (__kmp_affinity_type == affinity_disabled);
				4670	if (!KMP_AFFINITY_CAPABLE()) {
				4671	KMP_ASSERT(disabled);
				4672	}
				4673	if (disabled) {
				4674	__kmp_affinity_type = affinity_none;
				4675	}
				4676	__kmp_aux_affinity_initialize();
				4677	if (disabled) {
				4678	__kmp_affinity_type = affinity_disabled;
				4679	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4680	}
				4681
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4682	void __kmp_affinity_uninitialize(void) {
				4683	if (__kmp_affinity_masks != NULL) {
				4684	KMP_CPU_FREE_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
				4685	__kmp_affinity_masks = NULL;
				4686	}
				4687	if (__kmp_affin_fullMask != NULL) {
				4688	KMP_CPU_FREE(__kmp_affin_fullMask);
				4689	__kmp_affin_fullMask = NULL;
				4690	}
				4691	__kmp_affinity_num_masks = 0;
				4692	__kmp_affinity_type = affinity_default;
				4693	#if OMP_40_ENABLED
				4694	__kmp_affinity_num_places = 0;
				4695	#endif
				4696	if (__kmp_affinity_proclist != NULL) {
				4697	__kmp_free(__kmp_affinity_proclist);
				4698	__kmp_affinity_proclist = NULL;
				4699	}
				4700	if (address2os != NULL) {
				4701	__kmp_free(address2os);
				4702	address2os = NULL;
				4703	}
				4704	if (procarr != NULL) {
				4705	__kmp_free(procarr);
				4706	procarr = NULL;
				4707	}
				4708	#if KMP_USE_HWLOC
				4709	if (__kmp_hwloc_topology != NULL) {
				4710	hwloc_topology_destroy(__kmp_hwloc_topology);
				4711	__kmp_hwloc_topology = NULL;
				4712	}
				4713	#endif
				4714	KMPAffinity::destroy_api();
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4715	}
				4716
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4717	void __kmp_affinity_set_init_mask(int gtid, int isa_root) {
				4718	if (!KMP_AFFINITY_CAPABLE()) {
				4719	return;
				4720	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4721
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4722	kmp_info_t th = (kmp_info_t )TCR_SYNC_PTR(__kmp_threads[gtid]);
				4723	if (th->th.th_affin_mask == NULL) {
				4724	KMP_CPU_ALLOC(th->th.th_affin_mask);
				4725	} else {
				4726	KMP_CPU_ZERO(th->th.th_affin_mask);
				4727	}
				4728
				4729	// Copy the thread mask to the kmp_info_t strucuture. If
				4730	// __kmp_affinity_type == affinity_none, copy the "full" mask, i.e. one that
				4731	// has all of the OS proc ids set, or if __kmp_affinity_respect_mask is set,
				4732	// then the full mask is the same as the mask of the initialization thread.
				4733	kmp_affin_mask_t *mask;
				4734	int i;
				4735
				4736	#if OMP_40_ENABLED
Jonathan Peyton	1482db9	2018-04-18 19:25:48 +0000	[diff] [blame]	4737	if (KMP_AFFINITY_NON_PROC_BIND)
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4738	#endif
				4739	{
				4740	if ((__kmp_affinity_type == affinity_none) \|\|
				4741	(__kmp_affinity_type == affinity_balanced)) {
				4742	#if KMP_GROUP_AFFINITY
				4743	if (__kmp_num_proc_groups > 1) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4744	return;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4745	}
				4746	#endif
				4747	KMP_ASSERT(__kmp_affin_fullMask != NULL);
Jonathan Peyton	1482db9	2018-04-18 19:25:48 +0000	[diff] [blame]	4748	i = 0;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4749	mask = __kmp_affin_fullMask;
				4750	} else {
				4751	KMP_DEBUG_ASSERT(__kmp_affinity_num_masks > 0);
				4752	i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
				4753	mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4754	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4755	}
				4756	#if OMP_40_ENABLED
				4757	else {
				4758	if ((!isa_root) \|\|
				4759	(__kmp_nested_proc_bind.bind_types[0] == proc_bind_false)) {
				4760	#if KMP_GROUP_AFFINITY
				4761	if (__kmp_num_proc_groups > 1) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4762	return;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4763	}
				4764	#endif
				4765	KMP_ASSERT(__kmp_affin_fullMask != NULL);
				4766	i = KMP_PLACE_ALL;
				4767	mask = __kmp_affin_fullMask;
				4768	} else {
				4769	// int i = some hash function or just a counter that doesn't
				4770	// always start at 0. Use gtid for now.
				4771	KMP_DEBUG_ASSERT(__kmp_affinity_num_masks > 0);
				4772	i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
				4773	mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4774	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4775	}
				4776	#endif
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4777
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4778	#if OMP_40_ENABLED
				4779	th->th.th_current_place = i;
				4780	if (isa_root) {
				4781	th->th.th_new_place = i;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4782	th->th.th_first_place = 0;
				4783	th->th.th_last_place = __kmp_affinity_num_masks - 1;
Jonathan Peyton	9355d0d	2019-01-15 19:39:32 +0000	[diff] [blame^]	4784	} else if (KMP_AFFINITY_NON_PROC_BIND) {
				4785	// When using a Non-OMP_PROC_BIND affinity method,
				4786	// set all threads' place-partition-var to the entire place list
				4787	th->th.th_first_place = 0;
				4788	th->th.th_last_place = __kmp_affinity_num_masks - 1;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4789	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4790
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4791	if (i == KMP_PLACE_ALL) {
				4792	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to all places\n",
				4793	gtid));
				4794	} else {
				4795	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to place %d\n",
				4796	gtid, i));
				4797	}
				4798	#else
				4799	if (i == -1) {
				4800	KA_TRACE(
				4801	100,
				4802	("__kmp_affinity_set_init_mask: binding T#%d to __kmp_affin_fullMask\n",
				4803	gtid));
				4804	} else {
				4805	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to mask %d\n",
				4806	gtid, i));
				4807	}
				4808	#endif /* OMP_40_ENABLED */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4809
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4810	KMP_CPU_COPY(th->th.th_affin_mask, mask);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4811
Jonathan Peyton	125203e	2017-12-06 21:07:41 +0000	[diff] [blame]	4812	if (__kmp_affinity_verbose
				4813	/* to avoid duplicate printing (will be correctly printed on barrier) */
Jonathan Peyton	2c3e5d8	2018-08-24 20:35:42 +0000	[diff] [blame]	4814	&& (__kmp_affinity_type == affinity_none \|\|
				4815	(i != KMP_PLACE_ALL && __kmp_affinity_type != affinity_balanced))) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4816	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4817	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4818	th->th.th_affin_mask);
				4819	KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
				4820	__kmp_gettid(), gtid, buf);
				4821	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4822
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4823	#if KMP_OS_WINDOWS
				4824	// On Windows* OS, the process affinity mask might have changed. If the user
				4825	// didn't request affinity and this call fails, just continue silently.
				4826	// See CQ171393.
				4827	if (__kmp_affinity_type == affinity_none) {
				4828	__kmp_set_system_affinity(th->th.th_affin_mask, FALSE);
				4829	} else
Jonathan Peyton	7c465a5	2016-09-12 19:02:53 +0000	[diff] [blame]	4830	#endif
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4831	__kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
Jonathan Peyton	7c465a5	2016-09-12 19:02:53 +0000	[diff] [blame]	4832	}
				4833
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4834	#if OMP_40_ENABLED
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4835
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4836	void __kmp_affinity_set_place(int gtid) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4837	if (!KMP_AFFINITY_CAPABLE()) {
				4838	return;
				4839	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4840
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4841	kmp_info_t th = (kmp_info_t )TCR_SYNC_PTR(__kmp_threads[gtid]);
				4842
				4843	KA_TRACE(100, ("__kmp_affinity_set_place: binding T#%d to place %d (current "
				4844	"place = %d)\n",
				4845	gtid, th->th.th_new_place, th->th.th_current_place));
				4846
				4847	// Check that the new place is within this thread's partition.
				4848	KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
				4849	KMP_ASSERT(th->th.th_new_place >= 0);
				4850	KMP_ASSERT((unsigned)th->th.th_new_place <= __kmp_affinity_num_masks);
				4851	if (th->th.th_first_place <= th->th.th_last_place) {
				4852	KMP_ASSERT((th->th.th_new_place >= th->th.th_first_place) &&
				4853	(th->th.th_new_place <= th->th.th_last_place));
				4854	} else {
				4855	KMP_ASSERT((th->th.th_new_place <= th->th.th_first_place) \|\|
				4856	(th->th.th_new_place >= th->th.th_last_place));
				4857	}
				4858
				4859	// Copy the thread mask to the kmp_info_t strucuture,
				4860	// and set this thread's affinity.
				4861	kmp_affin_mask_t *mask =
				4862	KMP_CPU_INDEX(__kmp_affinity_masks, th->th.th_new_place);
				4863	KMP_CPU_COPY(th->th.th_affin_mask, mask);
				4864	th->th.th_current_place = th->th.th_new_place;
				4865
				4866	if (__kmp_affinity_verbose) {
				4867	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4868	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4869	th->th.th_affin_mask);
				4870	KMP_INFORM(BoundToOSProcSet, "OMP_PROC_BIND", (kmp_int32)getpid(),
				4871	__kmp_gettid(), gtid, buf);
				4872	}
				4873	__kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
				4874	}
				4875
				4876	#endif /* OMP_40_ENABLED */
				4877
				4878	int __kmp_aux_set_affinity(void **mask) {
				4879	int gtid;
				4880	kmp_info_t *th;
				4881	int retval;
				4882
				4883	if (!KMP_AFFINITY_CAPABLE()) {
				4884	return -1;
				4885	}
				4886
				4887	gtid = __kmp_entry_gtid();
				4888	KA_TRACE(1000, ; {
				4889	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4890	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4891	(kmp_affin_mask_t )(mask));
				4892	__kmp_debug_printf(
				4893	"kmp_set_affinity: setting affinity mask for thread %d = %s\n", gtid,
				4894	buf);
				4895	});
				4896
				4897	if (__kmp_env_consistency_check) {
				4898	if ((mask == NULL) \|\| (*mask == NULL)) {
				4899	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				4900	} else {
				4901	unsigned proc;
				4902	int num_procs = 0;
				4903
				4904	KMP_CPU_SET_ITERATE(proc, ((kmp_affin_mask_t )(mask))) {
				4905	if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
				4906	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4907	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4908	if (!KMP_CPU_ISSET(proc, (kmp_affin_mask_t )(mask))) {
				4909	continue;
				4910	}
				4911	num_procs++;
				4912	}
				4913	if (num_procs == 0) {
				4914	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				4915	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4916
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4917	#if KMP_GROUP_AFFINITY
				4918	if (__kmp_get_proc_group((kmp_affin_mask_t )(mask)) < 0) {
				4919	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				4920	}
				4921	#endif /* KMP_GROUP_AFFINITY */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4922	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4923	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4924
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4925	th = __kmp_threads[gtid];
				4926	KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
				4927	retval = __kmp_set_system_affinity((kmp_affin_mask_t )(mask), FALSE);
				4928	if (retval == 0) {
				4929	KMP_CPU_COPY(th->th.th_affin_mask, (kmp_affin_mask_t )(mask));
				4930	}
				4931
				4932	#if OMP_40_ENABLED
				4933	th->th.th_current_place = KMP_PLACE_UNDEFINED;
				4934	th->th.th_new_place = KMP_PLACE_UNDEFINED;
				4935	th->th.th_first_place = 0;
				4936	th->th.th_last_place = __kmp_affinity_num_masks - 1;
				4937
				4938	// Turn off 4.0 affinity for the current tread at this parallel level.
				4939	th->th.th_current_task->td_icvs.proc_bind = proc_bind_false;
				4940	#endif
				4941
				4942	return retval;
				4943	}
				4944
				4945	int __kmp_aux_get_affinity(void **mask) {
				4946	int gtid;
				4947	int retval;
				4948	kmp_info_t *th;
				4949
				4950	if (!KMP_AFFINITY_CAPABLE()) {
				4951	return -1;
				4952	}
				4953
				4954	gtid = __kmp_entry_gtid();
				4955	th = __kmp_threads[gtid];
				4956	KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
				4957
				4958	KA_TRACE(1000, ; {
				4959	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4960	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4961	th->th.th_affin_mask);
				4962	__kmp_printf("kmp_get_affinity: stored affinity mask for thread %d = %s\n",
				4963	gtid, buf);
				4964	});
				4965
				4966	if (__kmp_env_consistency_check) {
				4967	if ((mask == NULL) \|\| (*mask == NULL)) {
				4968	KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity");
				4969	}
				4970	}
				4971
				4972	#if !KMP_OS_WINDOWS
				4973
				4974	retval = __kmp_get_system_affinity((kmp_affin_mask_t )(mask), FALSE);
				4975	KA_TRACE(1000, ; {
				4976	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4977	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4978	(kmp_affin_mask_t )(mask));
				4979	__kmp_printf("kmp_get_affinity: system affinity mask for thread %d = %s\n",
				4980	gtid, buf);
				4981	});
				4982	return retval;
				4983
				4984	#else
				4985
				4986	KMP_CPU_COPY((kmp_affin_mask_t )(mask), th->th.th_affin_mask);
				4987	return 0;
				4988
				4989	#endif /* KMP_OS_WINDOWS */
				4990	}
				4991
				4992	int __kmp_aux_get_affinity_max_proc() {
				4993	if (!KMP_AFFINITY_CAPABLE()) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4994	return 0;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	4995	}
				4996	#if KMP_GROUP_AFFINITY
				4997	if (__kmp_num_proc_groups > 1) {
				4998	return (int)(__kmp_num_proc_groups * sizeof(DWORD_PTR) * CHAR_BIT);
				4999	}
				5000	#endif
				5001	return __kmp_xproc;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	5002	}
				5003
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	5004	int __kmp_aux_set_affinity_mask_proc(int proc, void **mask) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	5005	if (!KMP_AFFINITY_CAPABLE()) {
				5006	return -1;
				5007	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	5008
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	5009	KA_TRACE(1000, ; {
				5010	int gtid = __kmp_entry_gtid();
				5011	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				5012	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				5013	(kmp_affin_mask_t )(mask));
				5014	__kmp_debug_printf("kmp_set_affinity_mask_proc: setting proc %d in "
				5015	"affinity mask for thread %d = %s\n",
				5016	proc, gtid, buf);
				5017	});
				5018
				5019	if (__kmp_env_consistency_check) {
				5020	if ((mask == NULL) \|\| (*mask == NULL)) {
				5021	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity_mask_proc");
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	5022	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	5023	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	5024
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	5025	if ((proc < 0) \|\| (proc >= __kmp_aux_get_affinity_max_proc())) {
				5026	return -1;
				5027	}
				5028	if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
				5029	return -2;
				5030	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	5031
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	5032	KMP_CPU_SET(proc, (kmp_affin_mask_t )(mask));
				5033	return 0;
				5034	}
				5035
				5036	int __kmp_aux_unset_affinity_mask_proc(int proc, void **mask) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	5037	if (!KMP_AFFINITY_CAPABLE()) {
				5038	return -1;
				5039	}
				5040
				5041	KA_TRACE(1000, ; {
				5042	int gtid = __kmp_entry_gtid();
				5043	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				5044	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				5045	(kmp_affin_mask_t )(mask));
				5046	__kmp_debug_printf("kmp_unset_affinity_mask_proc: unsetting proc %d in "
				5047	"affinity mask for thread %d = %s\n",
				5048	proc, gtid, buf);
				5049	});
				5050
				5051	if (__kmp_env_consistency_check) {
				5052	if ((mask == NULL) \|\| (*mask == NULL)) {
				5053	KMP_FATAL(AffinityInvalidMask, "kmp_unset_affinity_mask_proc");
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	5054	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	5055	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	5056
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	5057	if ((proc < 0) \|\| (proc >= __kmp_aux_get_affinity_max_proc())) {
				5058	return -1;
				5059	}
				5060	if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
				5061	return -2;
				5062	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	5063
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	5064	KMP_CPU_CLR(proc, (kmp_affin_mask_t )(mask));
				5065	return 0;
				5066	}
				5067
				5068	int __kmp_aux_get_affinity_mask_proc(int proc, void **mask) {
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	5069	if (!KMP_AFFINITY_CAPABLE()) {
				5070	return -1;
				5071	}
				5072
				5073	KA_TRACE(1000, ; {
				5074	int gtid = __kmp_entry_gtid();
				5075	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				5076	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				5077	(kmp_affin_mask_t )(mask));
				5078	__kmp_debug_printf("kmp_get_affinity_mask_proc: getting proc %d in "
				5079	"affinity mask for thread %d = %s\n",
				5080	proc, gtid, buf);
				5081	});
				5082
				5083	if (__kmp_env_consistency_check) {
				5084	if ((mask == NULL) \|\| (*mask == NULL)) {
				5085	KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity_mask_proc");
				5086	}
				5087	}
				5088
				5089	if ((proc < 0) \|\| (proc >= __kmp_aux_get_affinity_max_proc())) {
				5090	return -1;
				5091	}
				5092	if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	5093	return 0;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	5094	}
				5095
				5096	return KMP_CPU_ISSET(proc, (kmp_affin_mask_t )(mask));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	5097	}
				5098
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	5099	// Dynamic affinity settings - Affinity balanced
Jonathan Peyton	e525f0d	2018-09-26 20:43:23 +0000	[diff] [blame]	5100	void __kmp_balanced_affinity(kmp_info_t *th, int nthreads) {
				5101	KMP_DEBUG_ASSERT(th);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	5102	bool fine_gran = true;
Jonathan Peyton	e525f0d	2018-09-26 20:43:23 +0000	[diff] [blame]	5103	int tid = th->th.th_info.ds.ds_tid;
Paul Osmialowski	ecbe2ea	2016-07-29 20:55:03 +0000	[diff] [blame]	5104
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	5105	switch (__kmp_affinity_gran) {
				5106	case affinity_gran_fine:
				5107	case affinity_gran_thread:
				5108	break;
				5109	case affinity_gran_core:
				5110	if (__kmp_nThreadsPerCore > 1) {
				5111	fine_gran = false;
				5112	}
				5113	break;
				5114	case affinity_gran_package:
				5115	if (nCoresPerPkg > 1) {
				5116	fine_gran = false;
				5117	}
				5118	break;
				5119	default:
				5120	fine_gran = false;
				5121	}
				5122
				5123	if (__kmp_affinity_uniform_topology()) {
				5124	int coreID;
				5125	int threadID;
				5126	// Number of hyper threads per core in HT machine
				5127	int __kmp_nth_per_core = __kmp_avail_proc / __kmp_ncores;
				5128	// Number of cores
				5129	int ncores = __kmp_ncores;
				5130	if ((nPackages > 1) && (__kmp_nth_per_core <= 1)) {
				5131	__kmp_nth_per_core = __kmp_avail_proc / nPackages;
				5132	ncores = nPackages;
				5133	}
				5134	// How many threads will be bound to each core
				5135	int chunk = nthreads / ncores;
				5136	// How many cores will have an additional thread bound to it - "big cores"
				5137	int big_cores = nthreads % ncores;
				5138	// Number of threads on the big cores
				5139	int big_nth = (chunk + 1) * big_cores;
				5140	if (tid < big_nth) {
				5141	coreID = tid / (chunk + 1);
				5142	threadID = (tid % (chunk + 1)) % __kmp_nth_per_core;
				5143	} else { // tid >= big_nth
				5144	coreID = (tid - big_cores) / chunk;
				5145	threadID = ((tid - big_cores) % chunk) % __kmp_nth_per_core;
Paul Osmialowski	ecbe2ea	2016-07-29 20:55:03 +0000	[diff] [blame]	5146	}
				5147
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	5148	KMP_DEBUG_ASSERT2(KMP_AFFINITY_CAPABLE(),
				5149	"Illegal set affinity operation when not capable");
				5150
Jonathan Peyton	e525f0d	2018-09-26 20:43:23 +0000	[diff] [blame]	5151	kmp_affin_mask_t *mask = th->th.th_affin_mask;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	5152	KMP_CPU_ZERO(mask);
				5153
				5154	if (fine_gran) {
				5155	int osID = address2os[coreID * __kmp_nth_per_core + threadID].second;
				5156	KMP_CPU_SET(osID, mask);
				5157	} else {
				5158	for (int i = 0; i < __kmp_nth_per_core; i++) {
				5159	int osID;
				5160	osID = address2os[coreID * __kmp_nth_per_core + i].second;
				5161	KMP_CPU_SET(osID, mask);
				5162	}
				5163	}
				5164	if (__kmp_affinity_verbose) {
				5165	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				5166	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
				5167	KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
				5168	__kmp_gettid(), tid, buf);
				5169	}
				5170	__kmp_set_system_affinity(mask, TRUE);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	5171	} else { // Non-uniform topology
				5172
Jonathan Peyton	e525f0d	2018-09-26 20:43:23 +0000	[diff] [blame]	5173	kmp_affin_mask_t *mask = th->th.th_affin_mask;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	5174	KMP_CPU_ZERO(mask);
				5175
				5176	int core_level = __kmp_affinity_find_core_level(
				5177	address2os, __kmp_avail_proc, __kmp_aff_depth - 1);
				5178	int ncores = __kmp_affinity_compute_ncores(address2os, __kmp_avail_proc,
				5179	__kmp_aff_depth - 1, core_level);
				5180	int nth_per_core = __kmp_affinity_max_proc_per_core(
				5181	address2os, __kmp_avail_proc, __kmp_aff_depth - 1, core_level);
				5182
				5183	// For performance gain consider the special case nthreads ==
				5184	// __kmp_avail_proc
				5185	if (nthreads == __kmp_avail_proc) {
				5186	if (fine_gran) {
				5187	int osID = address2os[tid].second;
				5188	KMP_CPU_SET(osID, mask);
				5189	} else {
				5190	int core = __kmp_affinity_find_core(address2os, tid,
				5191	__kmp_aff_depth - 1, core_level);
				5192	for (int i = 0; i < __kmp_avail_proc; i++) {
				5193	int osID = address2os[i].second;
				5194	if (__kmp_affinity_find_core(address2os, i, __kmp_aff_depth - 1,
				5195	core_level) == core) {
				5196	KMP_CPU_SET(osID, mask);
				5197	}
Paul Osmialowski	ecbe2ea	2016-07-29 20:55:03 +0000	[diff] [blame]	5198	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	5199	}
				5200	} else if (nthreads <= ncores) {
				5201
				5202	int core = 0;
				5203	for (int i = 0; i < ncores; i++) {
				5204	// Check if this core from procarr[] is in the mask
				5205	int in_mask = 0;
				5206	for (int j = 0; j < nth_per_core; j++) {
				5207	if (procarr[i * nth_per_core + j] != -1) {
				5208	in_mask = 1;
				5209	break;
				5210	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	5211	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	5212	if (in_mask) {
				5213	if (tid == core) {
				5214	for (int j = 0; j < nth_per_core; j++) {
				5215	int osID = procarr[i * nth_per_core + j];
				5216	if (osID != -1) {
				5217	KMP_CPU_SET(osID, mask);
				5218	// For fine granularity it is enough to set the first available
				5219	// osID for this core
				5220	if (fine_gran) {
				5221	break;
				5222	}
				5223	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	5224	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	5225	break;
				5226	} else {
				5227	core++;
				5228	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	5229	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	5230	}
				5231	} else { // nthreads > ncores
				5232	// Array to save the number of processors at each core
				5233	int nproc_at_core = (int )KMP_ALLOCA(sizeof(int) * ncores);
				5234	// Array to save the number of cores with "x" available processors;
				5235	int *ncores_with_x_procs =
				5236	(int )KMP_ALLOCA(sizeof(int) (nth_per_core + 1));
				5237	// Array to save the number of cores with # procs from x to nth_per_core
				5238	int *ncores_with_x_to_max_procs =
				5239	(int )KMP_ALLOCA(sizeof(int) (nth_per_core + 1));
				5240
				5241	for (int i = 0; i <= nth_per_core; i++) {
				5242	ncores_with_x_procs[i] = 0;
				5243	ncores_with_x_to_max_procs[i] = 0;
				5244	}
				5245
				5246	for (int i = 0; i < ncores; i++) {
				5247	int cnt = 0;
				5248	for (int j = 0; j < nth_per_core; j++) {
				5249	if (procarr[i * nth_per_core + j] != -1) {
				5250	cnt++;
				5251	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	5252	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	5253	nproc_at_core[i] = cnt;
				5254	ncores_with_x_procs[cnt]++;
				5255	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	5256
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	5257	for (int i = 0; i <= nth_per_core; i++) {
				5258	for (int j = i; j <= nth_per_core; j++) {
				5259	ncores_with_x_to_max_procs[i] += ncores_with_x_procs[j];
				5260	}
				5261	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	5262
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	5263	// Max number of processors
				5264	int nproc = nth_per_core * ncores;
				5265	// An array to keep number of threads per each context
				5266	int newarr = (int )__kmp_allocate(sizeof(int) * nproc);
				5267	for (int i = 0; i < nproc; i++) {
				5268	newarr[i] = 0;
				5269	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	5270
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	5271	int nth = nthreads;
				5272	int flag = 0;
				5273	while (nth > 0) {
				5274	for (int j = 1; j <= nth_per_core; j++) {
				5275	int cnt = ncores_with_x_to_max_procs[j];
				5276	for (int i = 0; i < ncores; i++) {
				5277	// Skip the core with 0 processors
				5278	if (nproc_at_core[i] == 0) {
				5279	continue;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	5280	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	5281	for (int k = 0; k < nth_per_core; k++) {
				5282	if (procarr[i * nth_per_core + k] != -1) {
				5283	if (newarr[i * nth_per_core + k] == 0) {
				5284	newarr[i * nth_per_core + k] = 1;
				5285	cnt--;
				5286	nth--;
				5287	break;
				5288	} else {
				5289	if (flag != 0) {
				5290	newarr[i * nth_per_core + k]++;
				5291	cnt--;
				5292	nth--;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	5293	break;
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	5294	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	5295	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	5296	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	5297	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	5298	if (cnt == 0 \|\| nth == 0) {
				5299	break;
				5300	}
				5301	}
				5302	if (nth == 0) {
				5303	break;
				5304	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	5305	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	5306	flag = 1;
				5307	}
				5308	int sum = 0;
				5309	for (int i = 0; i < nproc; i++) {
				5310	sum += newarr[i];
				5311	if (sum > tid) {
				5312	if (fine_gran) {
				5313	int osID = procarr[i];
				5314	KMP_CPU_SET(osID, mask);
				5315	} else {
				5316	int coreID = i / nth_per_core;
				5317	for (int ii = 0; ii < nth_per_core; ii++) {
				5318	int osID = procarr[coreID * nth_per_core + ii];
				5319	if (osID != -1) {
				5320	KMP_CPU_SET(osID, mask);
				5321	}
				5322	}
				5323	}
				5324	break;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	5325	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	5326	}
				5327	__kmp_free(newarr);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	5328	}
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	5329
				5330	if (__kmp_affinity_verbose) {
				5331	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				5332	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
				5333	KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
				5334	__kmp_gettid(), tid, buf);
				5335	}
				5336	__kmp_set_system_affinity(mask, TRUE);
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	5337	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	5338	}
				5339
Jonathan Peyton	3076fa4	2016-01-12 17:21:55 +0000	[diff] [blame]	5340	#if KMP_OS_LINUX
				5341	// We don't need this entry for Windows because
				5342	// there is GetProcessAffinityMask() api
				5343	//
				5344	// The intended usage is indicated by these steps:
				5345	// 1) The user gets the current affinity mask
				5346	// 2) Then sets the affinity by calling this function
				5347	// 3) Error check the return value
				5348	// 4) Use non-OpenMP parallelization
				5349	// 5) Reset the affinity to what was stored in step 1)
				5350	#ifdef __cplusplus
				5351	extern "C"
				5352	#endif
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	5353	int
				5354	kmp_set_thread_affinity_mask_initial()
Jonathan Peyton	3076fa4	2016-01-12 17:21:55 +0000	[diff] [blame]	5355	// the function returns 0 on success,
				5356	// -1 if we cannot bind thread
				5357	// >0 (errno) if an error happened during binding
				5358	{
Jonathan Peyton	3041982	2017-05-12 18:01:32 +0000	[diff] [blame]	5359	int gtid = __kmp_get_gtid();
				5360	if (gtid < 0) {
				5361	// Do not touch non-omp threads
				5362	KA_TRACE(30, ("kmp_set_thread_affinity_mask_initial: "
				5363	"non-omp thread, returning\n"));
				5364	return -1;
				5365	}
				5366	if (!KMP_AFFINITY_CAPABLE() \|\| !__kmp_init_middle) {
				5367	KA_TRACE(30, ("kmp_set_thread_affinity_mask_initial: "
				5368	"affinity not initialized, returning\n"));
				5369	return -1;
				5370	}
				5371	KA_TRACE(30, ("kmp_set_thread_affinity_mask_initial: "
				5372	"set full mask for thread %d\n",
				5373	gtid));
				5374	KMP_DEBUG_ASSERT(__kmp_affin_fullMask != NULL);
				5375	return __kmp_set_system_affinity(__kmp_affin_fullMask, FALSE);
Jonathan Peyton	3076fa4	2016-01-12 17:21:55 +0000	[diff] [blame]	5376	}
				5377	#endif
				5378
Alp Toker	763b939	2014-02-28 09:42:41 +0000	[diff] [blame]	5379	#endif // KMP_AFFINITY_SUPPORTED