Blame - openmp/runtime/src/kmp_affinity.cpp - toolchain/llvm-project

blob: 8386ec846ba27c772cd0e647068dc5b6561056e5 [file] [log] [blame]

Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1	/*
				2	* kmp_affinity.cpp -- affinity management
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3	*/
				4
				5
				6	//===----------------------------------------------------------------------===//
				7	//
				8	// The LLVM Compiler Infrastructure
				9	//
				10	// This file is dual licensed under the MIT and the University of Illinois Open
				11	// Source Licenses. See LICENSE.txt for details.
				12	//
				13	//===----------------------------------------------------------------------===//
				14
				15
				16	#include "kmp.h"
				17	#include "kmp_i18n.h"
				18	#include "kmp_io.h"
				19	#include "kmp_str.h"
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	20	#include "kmp_wrapper_getpid.h"
Jonathan Peyton	1707836	2015-09-10 19:22:07 +0000	[diff] [blame]	21	#include "kmp_affinity.h"
				22
				23	// Store the real or imagined machine hierarchy here
				24	static hierarchy_info machine_hierarchy;
				25
				26	void __kmp_cleanup_hierarchy() {
				27	machine_hierarchy.fini();
				28	}
				29
				30	void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
				31	kmp_uint32 depth;
				32	// The test below is true if affinity is available, but set to "none". Need to init on first use of hierarchical barrier.
				33	if (TCR_1(machine_hierarchy.uninitialized))
				34	machine_hierarchy.init(NULL, nproc);
Jonathan Peyton	1707836	2015-09-10 19:22:07 +0000	[diff] [blame]	35
				36	depth = machine_hierarchy.depth;
				37	KMP_DEBUG_ASSERT(depth > 0);
Jonathan Peyton	df4d3dd	2015-09-10 20:34:32 +0000	[diff] [blame]	38	// Adjust the hierarchy in case num threads exceeds original
				39	if (nproc > machine_hierarchy.skipPerLevel[depth-1])
				40	machine_hierarchy.resize(nproc);
Jonathan Peyton	1707836	2015-09-10 19:22:07 +0000	[diff] [blame]	41
				42	thr_bar->depth = depth;
				43	thr_bar->base_leaf_kids = (kmp_uint8)machine_hierarchy.numPerLevel[0]-1;
				44	thr_bar->skip_per_level = machine_hierarchy.skipPerLevel;
				45	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	46
Alp Toker	763b939	2014-02-28 09:42:41 +0000	[diff] [blame]	47	#if KMP_AFFINITY_SUPPORTED
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	48
				49	//
				50	// Print the affinity mask to the character array in a pretty format.
				51	//
				52	char *
				53	__kmp_affinity_print_mask(char buf, int buf_len, kmp_affin_mask_t mask)
				54	{
				55	KMP_ASSERT(buf_len >= 40);
				56	char *scan = buf;
				57	char *end = buf + buf_len - 1;
				58
				59	//
				60	// Find first element / check for empty set.
				61	//
				62	size_t i;
				63	for (i = 0; i < KMP_CPU_SETSIZE; i++) {
				64	if (KMP_CPU_ISSET(i, mask)) {
				65	break;
				66	}
				67	}
				68	if (i == KMP_CPU_SETSIZE) {
Jonathan Peyton	7edeef1	2015-09-25 17:23:17 +0000	[diff] [blame^]	69	KMP_SNPRINTF(scan, end-scan+1, "{<empty>}");
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	70	while (*scan != '\0') scan++;
				71	KMP_ASSERT(scan <= end);
				72	return buf;
				73	}
				74
Jonathan Peyton	7edeef1	2015-09-25 17:23:17 +0000	[diff] [blame^]	75	KMP_SNPRINTF(scan, end-scan+1, "{%ld", (long)i);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	76	while (*scan != '\0') scan++;
				77	i++;
				78	for (; i < KMP_CPU_SETSIZE; i++) {
				79	if (! KMP_CPU_ISSET(i, mask)) {
				80	continue;
				81	}
				82
				83	//
				84	// Check for buffer overflow. A string of the form ",<n>" will have
				85	// at most 10 characters, plus we want to leave room to print ",...}"
				86	// if the set is too large to print for a total of 15 characters.
				87	// We already left room for '\0' in setting end.
				88	//
				89	if (end - scan < 15) {
				90	break;
				91	}
Jonathan Peyton	7edeef1	2015-09-25 17:23:17 +0000	[diff] [blame^]	92	KMP_SNPRINTF(scan, end-scan+1, ",%-ld", (long)i);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	93	while (*scan != '\0') scan++;
				94	}
				95	if (i < KMP_CPU_SETSIZE) {
Jonathan Peyton	7edeef1	2015-09-25 17:23:17 +0000	[diff] [blame^]	96	KMP_SNPRINTF(scan, end-scan+1, ",...");
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	97	while (*scan != '\0') scan++;
				98	}
Jonathan Peyton	7edeef1	2015-09-25 17:23:17 +0000	[diff] [blame^]	99	KMP_SNPRINTF(scan, end-scan+1, "}");
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	100	while (*scan != '\0') scan++;
				101	KMP_ASSERT(scan <= end);
				102	return buf;
				103	}
				104
				105
				106	void
				107	__kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask)
				108	{
				109	KMP_CPU_ZERO(mask);
				110
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	111	# if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	112
				113	if (__kmp_num_proc_groups > 1) {
				114	int group;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	115	KMP_DEBUG_ASSERT(__kmp_GetActiveProcessorCount != NULL);
				116	for (group = 0; group < __kmp_num_proc_groups; group++) {
				117	int i;
				118	int num = __kmp_GetActiveProcessorCount(group);
				119	for (i = 0; i < num; i++) {
				120	KMP_CPU_SET(i + group * (CHAR_BIT * sizeof(DWORD_PTR)), mask);
				121	}
				122	}
				123	}
				124	else
				125
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	126	# endif /* KMP_GROUP_AFFINITY */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	127
				128	{
				129	int proc;
				130	for (proc = 0; proc < __kmp_xproc; proc++) {
				131	KMP_CPU_SET(proc, mask);
				132	}
				133	}
				134	}
				135
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	136	//
				137	// When sorting by labels, __kmp_affinity_assign_child_nums() must first be
				138	// called to renumber the labels from [0..n] and place them into the child_num
				139	// vector of the address object. This is done in case the labels used for
Alp Toker	8f2d3f0	2014-02-24 10:40:15 +0000	[diff] [blame]	140	// the children at one node of the hierarchy differ from those used for
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	141	// another node at the same level. Example: suppose the machine has 2 nodes
				142	// with 2 packages each. The first node contains packages 601 and 602, and
				143	// second node contains packages 603 and 604. If we try to sort the table
				144	// for "scatter" affinity, the table will still be sorted 601, 602, 603, 604
				145	// because we are paying attention to the labels themselves, not the ordinal
				146	// child numbers. By using the child numbers in the sort, the result is
				147	// {0,0}=601, {0,1}=603, {1,0}=602, {1,1}=604.
				148	//
				149	static void
				150	__kmp_affinity_assign_child_nums(AddrUnsPair *address2os,
				151	int numAddrs)
				152	{
				153	KMP_DEBUG_ASSERT(numAddrs > 0);
				154	int depth = address2os->first.depth;
				155	unsigned counts = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				156	unsigned lastLabel = (unsigned )__kmp_allocate(depth
				157	* sizeof(unsigned));
				158	int labCt;
				159	for (labCt = 0; labCt < depth; labCt++) {
				160	address2os[0].first.childNums[labCt] = counts[labCt] = 0;
				161	lastLabel[labCt] = address2os[0].first.labels[labCt];
				162	}
				163	int i;
				164	for (i = 1; i < numAddrs; i++) {
				165	for (labCt = 0; labCt < depth; labCt++) {
				166	if (address2os[i].first.labels[labCt] != lastLabel[labCt]) {
				167	int labCt2;
				168	for (labCt2 = labCt + 1; labCt2 < depth; labCt2++) {
				169	counts[labCt2] = 0;
				170	lastLabel[labCt2] = address2os[i].first.labels[labCt2];
				171	}
				172	counts[labCt]++;
				173	lastLabel[labCt] = address2os[i].first.labels[labCt];
				174	break;
				175	}
				176	}
				177	for (labCt = 0; labCt < depth; labCt++) {
				178	address2os[i].first.childNums[labCt] = counts[labCt];
				179	}
				180	for (; labCt < (int)Address::maxDepth; labCt++) {
				181	address2os[i].first.childNums[labCt] = 0;
				182	}
				183	}
				184	}
				185
				186
				187	//
				188	// All of the __kmp_affinity_create_*_map() routines should set
				189	// __kmp_affinity_masks to a vector of affinity mask objects of length
				190	// __kmp_affinity_num_masks, if __kmp_affinity_type != affinity_none, and
				191	// return the number of levels in the machine topology tree (zero if
				192	// __kmp_affinity_type == affinity_none).
				193	//
				194	// All of the __kmp_affinity_create__map() routines should set fullMask
				195	// to the affinity mask for the initialization thread. They need to save and
				196	// restore the mask, and it could be needed later, so saving it is just an
				197	// optimization to avoid calling kmp_get_system_affinity() again.
				198	//
				199	static kmp_affin_mask_t *fullMask = NULL;
				200
				201	kmp_affin_mask_t *
				202	__kmp_affinity_get_fullMask() { return fullMask; }
				203
				204
				205	static int nCoresPerPkg, nPackages;
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame]	206	static int __kmp_nThreadsPerCore;
				207	#ifndef KMP_DFLT_NTH_CORES
				208	static int __kmp_ncores;
				209	#endif
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	210
				211	//
				212	// __kmp_affinity_uniform_topology() doesn't work when called from
				213	// places which support arbitrarily many levels in the machine topology
				214	// map, i.e. the non-default cases in __kmp_affinity_create_cpuinfo_map()
				215	// __kmp_affinity_create_x2apicid_map().
				216	//
				217	inline static bool
				218	__kmp_affinity_uniform_topology()
				219	{
				220	return __kmp_avail_proc == (__kmp_nThreadsPerCore * nCoresPerPkg * nPackages);
				221	}
				222
				223
				224	//
				225	// Print out the detailed machine topology map, i.e. the physical locations
				226	// of each OS proc.
				227	//
				228	static void
				229	__kmp_affinity_print_topology(AddrUnsPair *address2os, int len, int depth,
				230	int pkgLevel, int coreLevel, int threadLevel)
				231	{
				232	int proc;
				233
				234	KMP_INFORM(OSProcToPhysicalThreadMap, "KMP_AFFINITY");
				235	for (proc = 0; proc < len; proc++) {
				236	int level;
				237	kmp_str_buf_t buf;
				238	__kmp_str_buf_init(&buf);
				239	for (level = 0; level < depth; level++) {
				240	if (level == threadLevel) {
				241	__kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Thread));
				242	}
				243	else if (level == coreLevel) {
				244	__kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Core));
				245	}
				246	else if (level == pkgLevel) {
				247	__kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Package));
				248	}
				249	else if (level > pkgLevel) {
				250	__kmp_str_buf_print(&buf, "%s_%d ", KMP_I18N_STR(Node),
				251	level - pkgLevel - 1);
				252	}
				253	else {
				254	__kmp_str_buf_print(&buf, "L%d ", level);
				255	}
				256	__kmp_str_buf_print(&buf, "%d ",
				257	address2os[proc].first.labels[level]);
				258	}
				259	KMP_INFORM(OSProcMapToPack, "KMP_AFFINITY", address2os[proc].second,
				260	buf.str);
				261	__kmp_str_buf_free(&buf);
				262	}
				263	}
				264
				265
				266	//
				267	// If we don't know how to retrieve the machine's processor topology, or
				268	// encounter an error in doing so, this routine is called to form a "flat"
				269	// mapping of os thread id's <-> processor id's.
				270	//
				271	static int
				272	__kmp_affinity_create_flat_map(AddrUnsPair **address2os,
				273	kmp_i18n_id_t *const msg_id)
				274	{
				275	*address2os = NULL;
				276	*msg_id = kmp_i18n_null;
				277
				278	//
				279	// Even if __kmp_affinity_type == affinity_none, this routine might still
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame]	280	// called to set __kmp_ncores, as well as
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	281	// __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
				282	//
				283	if (! KMP_AFFINITY_CAPABLE()) {
				284	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				285	__kmp_ncores = nPackages = __kmp_xproc;
				286	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	287	if (__kmp_affinity_verbose) {
				288	KMP_INFORM(AffFlatTopology, "KMP_AFFINITY");
				289	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				290	KMP_INFORM(Uniform, "KMP_AFFINITY");
				291	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				292	__kmp_nThreadsPerCore, __kmp_ncores);
				293	}
				294	return 0;
				295	}
				296
				297	//
				298	// When affinity is off, this routine will still be called to set
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame]	299	// __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	300	// nCoresPerPkg, & nPackages. Make sure all these vars are set
				301	// correctly, and return now if affinity is not enabled.
				302	//
				303	__kmp_ncores = nPackages = __kmp_avail_proc;
				304	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	305	if (__kmp_affinity_verbose) {
				306	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				307	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, fullMask);
				308
				309	KMP_INFORM(AffCapableUseFlat, "KMP_AFFINITY");
				310	if (__kmp_affinity_respect_mask) {
				311	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				312	} else {
				313	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				314	}
				315	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				316	KMP_INFORM(Uniform, "KMP_AFFINITY");
				317	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				318	__kmp_nThreadsPerCore, __kmp_ncores);
				319	}
				320	if (__kmp_affinity_type == affinity_none) {
				321	return 0;
				322	}
				323
				324	//
				325	// Contruct the data structure to be returned.
				326	//
				327	address2os = (AddrUnsPair)
				328	__kmp_allocate(sizeof(*address2os) __kmp_avail_proc);
				329	int avail_ct = 0;
				330	unsigned int i;
				331	for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
				332	//
				333	// Skip this proc if it is not included in the machine model.
				334	//
				335	if (! KMP_CPU_ISSET(i, fullMask)) {
				336	continue;
				337	}
				338
				339	Address addr(1);
				340	addr.labels[0] = i;
				341	(*address2os)[avail_ct++] = AddrUnsPair(addr,i);
				342	}
				343	if (__kmp_affinity_verbose) {
				344	KMP_INFORM(OSProcToPackage, "KMP_AFFINITY");
				345	}
				346
				347	if (__kmp_affinity_gran_levels < 0) {
				348	//
				349	// Only the package level is modeled in the machine topology map,
				350	// so the #levels of granularity is either 0 or 1.
				351	//
				352	if (__kmp_affinity_gran > affinity_gran_package) {
				353	__kmp_affinity_gran_levels = 1;
				354	}
				355	else {
				356	__kmp_affinity_gran_levels = 0;
				357	}
				358	}
				359	return 1;
				360	}
				361
				362
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	363	# if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	364
				365	//
				366	// If multiple Windows* OS processor groups exist, we can create a 2-level
				367	// topology map with the groups at level 0 and the individual procs at
				368	// level 1.
				369	//
				370	// This facilitates letting the threads float among all procs in a group,
				371	// if granularity=group (the default when there are multiple groups).
				372	//
				373	static int
				374	__kmp_affinity_create_proc_group_map(AddrUnsPair **address2os,
				375	kmp_i18n_id_t *const msg_id)
				376	{
				377	*address2os = NULL;
				378	*msg_id = kmp_i18n_null;
				379
				380	//
				381	// If we don't have multiple processor groups, return now.
				382	// The flat mapping will be used.
				383	//
				384	if ((! KMP_AFFINITY_CAPABLE()) \|\| (__kmp_get_proc_group(fullMask) >= 0)) {
				385	// FIXME set *msg_id
				386	return -1;
				387	}
				388
				389	//
				390	// Contruct the data structure to be returned.
				391	//
				392	address2os = (AddrUnsPair)
				393	__kmp_allocate(sizeof(*address2os) __kmp_avail_proc);
				394	int avail_ct = 0;
				395	int i;
				396	for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
				397	//
				398	// Skip this proc if it is not included in the machine model.
				399	//
				400	if (! KMP_CPU_ISSET(i, fullMask)) {
				401	continue;
				402	}
				403
				404	Address addr(2);
				405	addr.labels[0] = i / (CHAR_BIT * sizeof(DWORD_PTR));
				406	addr.labels[1] = i % (CHAR_BIT * sizeof(DWORD_PTR));
				407	(*address2os)[avail_ct++] = AddrUnsPair(addr,i);
				408
				409	if (__kmp_affinity_verbose) {
				410	KMP_INFORM(AffOSProcToGroup, "KMP_AFFINITY", i, addr.labels[0],
				411	addr.labels[1]);
				412	}
				413	}
				414
				415	if (__kmp_affinity_gran_levels < 0) {
				416	if (__kmp_affinity_gran == affinity_gran_group) {
				417	__kmp_affinity_gran_levels = 1;
				418	}
				419	else if ((__kmp_affinity_gran == affinity_gran_fine)
				420	\|\| (__kmp_affinity_gran == affinity_gran_thread)) {
				421	__kmp_affinity_gran_levels = 0;
				422	}
				423	else {
				424	const char *gran_str = NULL;
				425	if (__kmp_affinity_gran == affinity_gran_core) {
				426	gran_str = "core";
				427	}
				428	else if (__kmp_affinity_gran == affinity_gran_package) {
				429	gran_str = "package";
				430	}
				431	else if (__kmp_affinity_gran == affinity_gran_node) {
				432	gran_str = "node";
				433	}
				434	else {
				435	KMP_ASSERT(0);
				436	}
				437
				438	// Warning: can't use affinity granularity \"gran\" with group topology method, using "thread"
				439	__kmp_affinity_gran_levels = 0;
				440	}
				441	}
				442	return 2;
				443	}
				444
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	445	# endif /* KMP_GROUP_AFFINITY */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	446
				447
				448	# if KMP_ARCH_X86 \|\| KMP_ARCH_X86_64
				449
				450	static int
				451	__kmp_cpuid_mask_width(int count) {
				452	int r = 0;
				453
				454	while((1<<r) < count)
				455	++r;
				456	return r;
				457	}
				458
				459
				460	class apicThreadInfo {
				461	public:
				462	unsigned osId; // param to __kmp_affinity_bind_thread
				463	unsigned apicId; // from cpuid after binding
				464	unsigned maxCoresPerPkg; // ""
				465	unsigned maxThreadsPerPkg; // ""
				466	unsigned pkgId; // inferred from above values
				467	unsigned coreId; // ""
				468	unsigned threadId; // ""
				469	};
				470
				471
				472	static int
				473	__kmp_affinity_cmp_apicThreadInfo_os_id(const void a, const void b)
				474	{
				475	const apicThreadInfo aa = (const apicThreadInfo )a;
				476	const apicThreadInfo bb = (const apicThreadInfo )b;
				477	if (aa->osId < bb->osId) return -1;
				478	if (aa->osId > bb->osId) return 1;
				479	return 0;
				480	}
				481
				482
				483	static int
				484	__kmp_affinity_cmp_apicThreadInfo_phys_id(const void a, const void b)
				485	{
				486	const apicThreadInfo aa = (const apicThreadInfo )a;
				487	const apicThreadInfo bb = (const apicThreadInfo )b;
				488	if (aa->pkgId < bb->pkgId) return -1;
				489	if (aa->pkgId > bb->pkgId) return 1;
				490	if (aa->coreId < bb->coreId) return -1;
				491	if (aa->coreId > bb->coreId) return 1;
				492	if (aa->threadId < bb->threadId) return -1;
				493	if (aa->threadId > bb->threadId) return 1;
				494	return 0;
				495	}
				496
				497
				498	//
				499	// On IA-32 architecture and Intel(R) 64 architecture, we attempt to use
				500	// an algorithm which cycles through the available os threads, setting
				501	// the current thread's affinity mask to that thread, and then retrieves
				502	// the Apic Id for each thread context using the cpuid instruction.
				503	//
				504	static int
				505	__kmp_affinity_create_apicid_map(AddrUnsPair **address2os,
				506	kmp_i18n_id_t *const msg_id)
				507	{
Andrey Churbanov	1c33129	2015-01-27 17:03:42 +0000	[diff] [blame]	508	kmp_cpuid buf;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	509	int rc;
				510	*address2os = NULL;
				511	*msg_id = kmp_i18n_null;
				512
Andrey Churbanov	1c33129	2015-01-27 17:03:42 +0000	[diff] [blame]	513	//
				514	// Check if cpuid leaf 4 is supported.
				515	//
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	516	__kmp_x86_cpuid(0, 0, &buf);
				517	if (buf.eax < 4) {
				518	*msg_id = kmp_i18n_str_NoLeaf4Support;
				519	return -1;
				520	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	521
				522	//
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	523	// The algorithm used starts by setting the affinity to each available
Andrey Churbanov	1c33129	2015-01-27 17:03:42 +0000	[diff] [blame]	524	// thread and retrieving info from the cpuid instruction, so if we are
				525	// not capable of calling __kmp_get_system_affinity() and
				526	// _kmp_get_system_affinity(), then we need to do something else - use
				527	// the defaults that we calculated from issuing cpuid without binding
				528	// to each proc.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	529	//
				530	if (! KMP_AFFINITY_CAPABLE()) {
				531	//
				532	// Hack to try and infer the machine topology using only the data
				533	// available from cpuid on the current thread, and __kmp_xproc.
				534	//
				535	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				536
				537	//
				538	// Get an upper bound on the number of threads per package using
				539	// cpuid(1).
				540	//
				541	// On some OS/chps combinations where HT is supported by the chip
				542	// but is disabled, this value will be 2 on a single core chip.
				543	// Usually, it will be 2 if HT is enabled and 1 if HT is disabled.
				544	//
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	545	__kmp_x86_cpuid(1, 0, &buf);
				546	int maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
				547	if (maxThreadsPerPkg == 0) {
				548	maxThreadsPerPkg = 1;
				549	}
				550
				551	//
				552	// The num cores per pkg comes from cpuid(4).
				553	// 1 must be added to the encoded value.
				554	//
				555	// The author of cpu_count.cpp treated this only an upper bound
				556	// on the number of cores, but I haven't seen any cases where it
				557	// was greater than the actual number of cores, so we will treat
				558	// it as exact in this block of code.
				559	//
				560	// First, we need to check if cpuid(4) is supported on this chip.
				561	// To see if cpuid(n) is supported, issue cpuid(0) and check if eax
				562	// has the value n or greater.
				563	//
				564	__kmp_x86_cpuid(0, 0, &buf);
				565	if (buf.eax >= 4) {
				566	__kmp_x86_cpuid(4, 0, &buf);
				567	nCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
				568	}
				569	else {
				570	nCoresPerPkg = 1;
				571	}
				572
				573	//
				574	// There is no way to reliably tell if HT is enabled without issuing
				575	// the cpuid instruction from every thread, can correlating the cpuid
				576	// info, so if the machine is not affinity capable, we assume that HT
				577	// is off. We have seen quite a few machines where maxThreadsPerPkg
				578	// is 2, yet the machine does not support HT.
				579	//
				580	// - Older OSes are usually found on machines with older chips, which
				581	// do not support HT.
				582	//
				583	// - The performance penalty for mistakenly identifying a machine as
				584	// HT when it isn't (which results in blocktime being incorrecly set
				585	// to 0) is greater than the penalty when for mistakenly identifying
				586	// a machine as being 1 thread/core when it is really HT enabled
				587	// (which results in blocktime being incorrectly set to a positive
				588	// value).
				589	//
				590	__kmp_ncores = __kmp_xproc;
				591	nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
				592	__kmp_nThreadsPerCore = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	593	if (__kmp_affinity_verbose) {
				594	KMP_INFORM(AffNotCapableUseLocCpuid, "KMP_AFFINITY");
				595	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				596	if (__kmp_affinity_uniform_topology()) {
				597	KMP_INFORM(Uniform, "KMP_AFFINITY");
				598	} else {
				599	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				600	}
				601	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				602	__kmp_nThreadsPerCore, __kmp_ncores);
				603	}
				604	return 0;
				605	}
				606
				607	//
				608	//
				609	// From here on, we can assume that it is safe to call
				610	// __kmp_get_system_affinity() and __kmp_set_system_affinity(),
				611	// even if __kmp_affinity_type = affinity_none.
				612	//
				613
				614	//
				615	// Save the affinity mask for the current thread.
				616	//
				617	kmp_affin_mask_t *oldMask;
				618	KMP_CPU_ALLOC(oldMask);
				619	KMP_ASSERT(oldMask != NULL);
				620	__kmp_get_system_affinity(oldMask, TRUE);
				621
				622	//
				623	// Run through each of the available contexts, binding the current thread
				624	// to it, and obtaining the pertinent information using the cpuid instr.
				625	//
				626	// The relevant information is:
				627	//
				628	// Apic Id: Bits 24:31 of ebx after issuing cpuid(1) - each thread context
				629	// has a uniqie Apic Id, which is of the form pkg# : core# : thread#.
				630	//
				631	// Max Threads Per Pkg: Bits 16:23 of ebx after issuing cpuid(1). The
				632	// value of this field determines the width of the core# + thread#
				633	// fields in the Apic Id. It is also an upper bound on the number
				634	// of threads per package, but it has been verified that situations
				635	// happen were it is not exact. In particular, on certain OS/chip
				636	// combinations where Intel(R) Hyper-Threading Technology is supported
				637	// by the chip but has
				638	// been disabled, the value of this field will be 2 (for a single core
				639	// chip). On other OS/chip combinations supporting
				640	// Intel(R) Hyper-Threading Technology, the value of
				641	// this field will be 1 when Intel(R) Hyper-Threading Technology is
				642	// disabled and 2 when it is enabled.
				643	//
				644	// Max Cores Per Pkg: Bits 26:31 of eax after issuing cpuid(4). The
				645	// value of this field (+1) determines the width of the core# field in
				646	// the Apic Id. The comments in "cpucount.cpp" say that this value is
				647	// an upper bound, but the IA-32 architecture manual says that it is
				648	// exactly the number of cores per package, and I haven't seen any
				649	// case where it wasn't.
				650	//
				651	// From this information, deduce the package Id, core Id, and thread Id,
				652	// and set the corresponding fields in the apicThreadInfo struct.
				653	//
				654	unsigned i;
				655	apicThreadInfo threadInfo = (apicThreadInfo )__kmp_allocate(
				656	__kmp_avail_proc * sizeof(apicThreadInfo));
				657	unsigned nApics = 0;
				658	for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
				659	//
				660	// Skip this proc if it is not included in the machine model.
				661	//
				662	if (! KMP_CPU_ISSET(i, fullMask)) {
				663	continue;
				664	}
				665	KMP_DEBUG_ASSERT((int)nApics < __kmp_avail_proc);
				666
				667	__kmp_affinity_bind_thread(i);
				668	threadInfo[nApics].osId = i;
				669
				670	//
				671	// The apic id and max threads per pkg come from cpuid(1).
				672	//
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	673	__kmp_x86_cpuid(1, 0, &buf);
				674	if (! (buf.edx >> 9) & 1) {
				675	__kmp_set_system_affinity(oldMask, TRUE);
				676	__kmp_free(threadInfo);
				677	KMP_CPU_FREE(oldMask);
				678	*msg_id = kmp_i18n_str_ApicNotPresent;
				679	return -1;
				680	}
				681	threadInfo[nApics].apicId = (buf.ebx >> 24) & 0xff;
				682	threadInfo[nApics].maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
				683	if (threadInfo[nApics].maxThreadsPerPkg == 0) {
				684	threadInfo[nApics].maxThreadsPerPkg = 1;
				685	}
				686
				687	//
				688	// Max cores per pkg comes from cpuid(4).
				689	// 1 must be added to the encoded value.
				690	//
				691	// First, we need to check if cpuid(4) is supported on this chip.
				692	// To see if cpuid(n) is supported, issue cpuid(0) and check if eax
				693	// has the value n or greater.
				694	//
				695	__kmp_x86_cpuid(0, 0, &buf);
				696	if (buf.eax >= 4) {
				697	__kmp_x86_cpuid(4, 0, &buf);
				698	threadInfo[nApics].maxCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
				699	}
				700	else {
				701	threadInfo[nApics].maxCoresPerPkg = 1;
				702	}
				703
				704	//
				705	// Infer the pkgId / coreId / threadId using only the info
				706	// obtained locally.
				707	//
				708	int widthCT = __kmp_cpuid_mask_width(
				709	threadInfo[nApics].maxThreadsPerPkg);
				710	threadInfo[nApics].pkgId = threadInfo[nApics].apicId >> widthCT;
				711
				712	int widthC = __kmp_cpuid_mask_width(
				713	threadInfo[nApics].maxCoresPerPkg);
				714	int widthT = widthCT - widthC;
				715	if (widthT < 0) {
				716	//
				717	// I've never seen this one happen, but I suppose it could, if
				718	// the cpuid instruction on a chip was really screwed up.
				719	// Make sure to restore the affinity mask before the tail call.
				720	//
				721	__kmp_set_system_affinity(oldMask, TRUE);
				722	__kmp_free(threadInfo);
				723	KMP_CPU_FREE(oldMask);
				724	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				725	return -1;
				726	}
				727
				728	int maskC = (1 << widthC) - 1;
				729	threadInfo[nApics].coreId = (threadInfo[nApics].apicId >> widthT)
				730	&maskC;
				731
				732	int maskT = (1 << widthT) - 1;
				733	threadInfo[nApics].threadId = threadInfo[nApics].apicId &maskT;
				734
				735	nApics++;
				736	}
				737
				738	//
				739	// We've collected all the info we need.
				740	// Restore the old affinity mask for this thread.
				741	//
				742	__kmp_set_system_affinity(oldMask, TRUE);
				743
				744	//
				745	// If there's only one thread context to bind to, form an Address object
				746	// with depth 1 and return immediately (or, if affinity is off, set
				747	// address2os to NULL and return).
				748	//
				749	// If it is configured to omit the package level when there is only a
				750	// single package, the logic at the end of this routine won't work if
				751	// there is only a single thread - it would try to form an Address
				752	// object with depth 0.
				753	//
				754	KMP_ASSERT(nApics > 0);
				755	if (nApics == 1) {
				756	__kmp_ncores = nPackages = 1;
				757	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	758	if (__kmp_affinity_verbose) {
				759	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				760	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				761
				762	KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
				763	if (__kmp_affinity_respect_mask) {
				764	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				765	} else {
				766	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				767	}
				768	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				769	KMP_INFORM(Uniform, "KMP_AFFINITY");
				770	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				771	__kmp_nThreadsPerCore, __kmp_ncores);
				772	}
				773
				774	if (__kmp_affinity_type == affinity_none) {
				775	__kmp_free(threadInfo);
				776	KMP_CPU_FREE(oldMask);
				777	return 0;
				778	}
				779
				780	address2os = (AddrUnsPair)__kmp_allocate(sizeof(AddrUnsPair));
				781	Address addr(1);
				782	addr.labels[0] = threadInfo[0].pkgId;
				783	(*address2os)[0] = AddrUnsPair(addr, threadInfo[0].osId);
				784
				785	if (__kmp_affinity_gran_levels < 0) {
				786	__kmp_affinity_gran_levels = 0;
				787	}
				788
				789	if (__kmp_affinity_verbose) {
				790	__kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
				791	}
				792
				793	__kmp_free(threadInfo);
				794	KMP_CPU_FREE(oldMask);
				795	return 1;
				796	}
				797
				798	//
				799	// Sort the threadInfo table by physical Id.
				800	//
				801	qsort(threadInfo, nApics, sizeof(*threadInfo),
				802	__kmp_affinity_cmp_apicThreadInfo_phys_id);
				803
				804	//
				805	// The table is now sorted by pkgId / coreId / threadId, but we really
				806	// don't know the radix of any of the fields. pkgId's may be sparsely
				807	// assigned among the chips on a system. Although coreId's are usually
				808	// assigned [0 .. coresPerPkg-1] and threadId's are usually assigned
				809	// [0..threadsPerCore-1], we don't want to make any such assumptions.
				810	//
				811	// For that matter, we don't know what coresPerPkg and threadsPerCore
				812	// (or the total # packages) are at this point - we want to determine
				813	// that now. We only have an upper bound on the first two figures.
				814	//
				815	// We also perform a consistency check at this point: the values returned
				816	// by the cpuid instruction for any thread bound to a given package had
				817	// better return the same info for maxThreadsPerPkg and maxCoresPerPkg.
				818	//
				819	nPackages = 1;
				820	nCoresPerPkg = 1;
				821	__kmp_nThreadsPerCore = 1;
				822	unsigned nCores = 1;
				823
				824	unsigned pkgCt = 1; // to determine radii
				825	unsigned lastPkgId = threadInfo[0].pkgId;
				826	unsigned coreCt = 1;
				827	unsigned lastCoreId = threadInfo[0].coreId;
				828	unsigned threadCt = 1;
				829	unsigned lastThreadId = threadInfo[0].threadId;
				830
				831	// intra-pkg consist checks
				832	unsigned prevMaxCoresPerPkg = threadInfo[0].maxCoresPerPkg;
				833	unsigned prevMaxThreadsPerPkg = threadInfo[0].maxThreadsPerPkg;
				834
				835	for (i = 1; i < nApics; i++) {
				836	if (threadInfo[i].pkgId != lastPkgId) {
				837	nCores++;
				838	pkgCt++;
				839	lastPkgId = threadInfo[i].pkgId;
				840	if ((int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt;
				841	coreCt = 1;
				842	lastCoreId = threadInfo[i].coreId;
				843	if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
				844	threadCt = 1;
				845	lastThreadId = threadInfo[i].threadId;
				846
				847	//
				848	// This is a different package, so go on to the next iteration
				849	// without doing any consistency checks. Reset the consistency
				850	// check vars, though.
				851	//
				852	prevMaxCoresPerPkg = threadInfo[i].maxCoresPerPkg;
				853	prevMaxThreadsPerPkg = threadInfo[i].maxThreadsPerPkg;
				854	continue;
				855	}
				856
				857	if (threadInfo[i].coreId != lastCoreId) {
				858	nCores++;
				859	coreCt++;
				860	lastCoreId = threadInfo[i].coreId;
				861	if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
				862	threadCt = 1;
				863	lastThreadId = threadInfo[i].threadId;
				864	}
				865	else if (threadInfo[i].threadId != lastThreadId) {
				866	threadCt++;
				867	lastThreadId = threadInfo[i].threadId;
				868	}
				869	else {
				870	__kmp_free(threadInfo);
				871	KMP_CPU_FREE(oldMask);
				872	*msg_id = kmp_i18n_str_LegacyApicIDsNotUnique;
				873	return -1;
				874	}
				875
				876	//
				877	// Check to make certain that the maxCoresPerPkg and maxThreadsPerPkg
				878	// fields agree between all the threads bounds to a given package.
				879	//
				880	if ((prevMaxCoresPerPkg != threadInfo[i].maxCoresPerPkg)
				881	\|\| (prevMaxThreadsPerPkg != threadInfo[i].maxThreadsPerPkg)) {
				882	__kmp_free(threadInfo);
				883	KMP_CPU_FREE(oldMask);
				884	*msg_id = kmp_i18n_str_InconsistentCpuidInfo;
				885	return -1;
				886	}
				887	}
				888	nPackages = pkgCt;
				889	if ((int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt;
				890	if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
				891
				892	//
				893	// When affinity is off, this routine will still be called to set
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame]	894	// __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	895	// nCoresPerPkg, & nPackages. Make sure all these vars are set
				896	// correctly, and return now if affinity is not enabled.
				897	//
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	898	__kmp_ncores = nCores;
				899	if (__kmp_affinity_verbose) {
				900	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				901	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				902
				903	KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
				904	if (__kmp_affinity_respect_mask) {
				905	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				906	} else {
				907	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				908	}
				909	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				910	if (__kmp_affinity_uniform_topology()) {
				911	KMP_INFORM(Uniform, "KMP_AFFINITY");
				912	} else {
				913	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				914	}
				915	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				916	__kmp_nThreadsPerCore, __kmp_ncores);
				917
				918	}
				919
				920	if (__kmp_affinity_type == affinity_none) {
				921	__kmp_free(threadInfo);
				922	KMP_CPU_FREE(oldMask);
				923	return 0;
				924	}
				925
				926	//
				927	// Now that we've determined the number of packages, the number of cores
				928	// per package, and the number of threads per core, we can construct the
				929	// data structure that is to be returned.
				930	//
				931	int pkgLevel = 0;
				932	int coreLevel = (nCoresPerPkg <= 1) ? -1 : 1;
				933	int threadLevel = (__kmp_nThreadsPerCore <= 1) ? -1 : ((coreLevel >= 0) ? 2 : 1);
				934	unsigned depth = (pkgLevel >= 0) + (coreLevel >= 0) + (threadLevel >= 0);
				935
				936	KMP_ASSERT(depth > 0);
				937	address2os = (AddrUnsPair)__kmp_allocate(sizeof(AddrUnsPair) * nApics);
				938
				939	for (i = 0; i < nApics; ++i) {
				940	Address addr(depth);
				941	unsigned os = threadInfo[i].osId;
				942	int d = 0;
				943
				944	if (pkgLevel >= 0) {
				945	addr.labels[d++] = threadInfo[i].pkgId;
				946	}
				947	if (coreLevel >= 0) {
				948	addr.labels[d++] = threadInfo[i].coreId;
				949	}
				950	if (threadLevel >= 0) {
				951	addr.labels[d++] = threadInfo[i].threadId;
				952	}
				953	(*address2os)[i] = AddrUnsPair(addr, os);
				954	}
				955
				956	if (__kmp_affinity_gran_levels < 0) {
				957	//
				958	// Set the granularity level based on what levels are modeled
				959	// in the machine topology map.
				960	//
				961	__kmp_affinity_gran_levels = 0;
				962	if ((threadLevel >= 0)
				963	&& (__kmp_affinity_gran > affinity_gran_thread)) {
				964	__kmp_affinity_gran_levels++;
				965	}
				966	if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
				967	__kmp_affinity_gran_levels++;
				968	}
				969	if ((pkgLevel >= 0) && (__kmp_affinity_gran > affinity_gran_package)) {
				970	__kmp_affinity_gran_levels++;
				971	}
				972	}
				973
				974	if (__kmp_affinity_verbose) {
				975	__kmp_affinity_print_topology(*address2os, nApics, depth, pkgLevel,
				976	coreLevel, threadLevel);
				977	}
				978
				979	__kmp_free(threadInfo);
				980	KMP_CPU_FREE(oldMask);
				981	return depth;
				982	}
				983
				984
				985	//
				986	// Intel(R) microarchitecture code name Nehalem, Dunnington and later
				987	// architectures support a newer interface for specifying the x2APIC Ids,
				988	// based on cpuid leaf 11.
				989	//
				990	static int
				991	__kmp_affinity_create_x2apicid_map(AddrUnsPair **address2os,
				992	kmp_i18n_id_t *const msg_id)
				993	{
				994	kmp_cpuid buf;
				995
				996	*address2os = NULL;
				997	*msg_id = kmp_i18n_null;
				998
				999	//
				1000	// Check to see if cpuid leaf 11 is supported.
				1001	//
				1002	__kmp_x86_cpuid(0, 0, &buf);
				1003	if (buf.eax < 11) {
				1004	*msg_id = kmp_i18n_str_NoLeaf11Support;
				1005	return -1;
				1006	}
				1007	__kmp_x86_cpuid(11, 0, &buf);
				1008	if (buf.ebx == 0) {
				1009	*msg_id = kmp_i18n_str_NoLeaf11Support;
				1010	return -1;
				1011	}
				1012
				1013	//
				1014	// Find the number of levels in the machine topology. While we're at it,
				1015	// get the default values for __kmp_nThreadsPerCore & nCoresPerPkg. We will
				1016	// try to get more accurate values later by explicitly counting them,
				1017	// but get reasonable defaults now, in case we return early.
				1018	//
				1019	int level;
				1020	int threadLevel = -1;
				1021	int coreLevel = -1;
				1022	int pkgLevel = -1;
				1023	__kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
				1024
				1025	for (level = 0;; level++) {
				1026	if (level > 31) {
				1027	//
				1028	// FIXME: Hack for DPD200163180
				1029	//
				1030	// If level is big then something went wrong -> exiting
				1031	//
				1032	// There could actually be 32 valid levels in the machine topology,
				1033	// but so far, the only machine we have seen which does not exit
				1034	// this loop before iteration 32 has fubar x2APIC settings.
				1035	//
				1036	// For now, just reject this case based upon loop trip count.
				1037	//
				1038	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1039	return -1;
				1040	}
				1041	__kmp_x86_cpuid(11, level, &buf);
				1042	if (buf.ebx == 0) {
				1043	if (pkgLevel < 0) {
				1044	//
				1045	// Will infer nPackages from __kmp_xproc
				1046	//
				1047	pkgLevel = level;
				1048	level++;
				1049	}
				1050	break;
				1051	}
				1052	int kind = (buf.ecx >> 8) & 0xff;
				1053	if (kind == 1) {
				1054	//
				1055	// SMT level
				1056	//
				1057	threadLevel = level;
				1058	coreLevel = -1;
				1059	pkgLevel = -1;
				1060	__kmp_nThreadsPerCore = buf.ebx & 0xff;
				1061	if (__kmp_nThreadsPerCore == 0) {
				1062	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1063	return -1;
				1064	}
				1065	}
				1066	else if (kind == 2) {
				1067	//
				1068	// core level
				1069	//
				1070	coreLevel = level;
				1071	pkgLevel = -1;
				1072	nCoresPerPkg = buf.ebx & 0xff;
				1073	if (nCoresPerPkg == 0) {
				1074	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1075	return -1;
				1076	}
				1077	}
				1078	else {
				1079	if (level <= 0) {
				1080	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1081	return -1;
				1082	}
				1083	if (pkgLevel >= 0) {
				1084	continue;
				1085	}
				1086	pkgLevel = level;
				1087	nPackages = buf.ebx & 0xff;
				1088	if (nPackages == 0) {
				1089	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1090	return -1;
				1091	}
				1092	}
				1093	}
				1094	int depth = level;
				1095
				1096	//
				1097	// In the above loop, "level" was counted from the finest level (usually
				1098	// thread) to the coarsest. The caller expects that we will place the
				1099	// labels in (*address2os)[].first.labels[] in the inverse order, so
				1100	// we need to invert the vars saying which level means what.
				1101	//
				1102	if (threadLevel >= 0) {
				1103	threadLevel = depth - threadLevel - 1;
				1104	}
				1105	if (coreLevel >= 0) {
				1106	coreLevel = depth - coreLevel - 1;
				1107	}
				1108	KMP_DEBUG_ASSERT(pkgLevel >= 0);
				1109	pkgLevel = depth - pkgLevel - 1;
				1110
				1111	//
				1112	// The algorithm used starts by setting the affinity to each available
Andrey Churbanov	1c33129	2015-01-27 17:03:42 +0000	[diff] [blame]	1113	// thread and retrieving info from the cpuid instruction, so if we are
				1114	// not capable of calling __kmp_get_system_affinity() and
				1115	// _kmp_get_system_affinity(), then we need to do something else - use
				1116	// the defaults that we calculated from issuing cpuid without binding
				1117	// to each proc.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1118	//
				1119	if (! KMP_AFFINITY_CAPABLE())
				1120	{
				1121	//
				1122	// Hack to try and infer the machine topology using only the data
				1123	// available from cpuid on the current thread, and __kmp_xproc.
				1124	//
				1125	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				1126
				1127	__kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
				1128	nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1129	if (__kmp_affinity_verbose) {
				1130	KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY");
				1131	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1132	if (__kmp_affinity_uniform_topology()) {
				1133	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1134	} else {
				1135	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				1136	}
				1137	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				1138	__kmp_nThreadsPerCore, __kmp_ncores);
				1139	}
				1140	return 0;
				1141	}
				1142
				1143	//
				1144	//
				1145	// From here on, we can assume that it is safe to call
				1146	// __kmp_get_system_affinity() and __kmp_set_system_affinity(),
				1147	// even if __kmp_affinity_type = affinity_none.
				1148	//
				1149
				1150	//
				1151	// Save the affinity mask for the current thread.
				1152	//
				1153	kmp_affin_mask_t *oldMask;
				1154	KMP_CPU_ALLOC(oldMask);
				1155	__kmp_get_system_affinity(oldMask, TRUE);
				1156
				1157	//
				1158	// Allocate the data structure to be returned.
				1159	//
				1160	AddrUnsPair retval = (AddrUnsPair )
				1161	__kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc);
				1162
				1163	//
				1164	// Run through each of the available contexts, binding the current thread
				1165	// to it, and obtaining the pertinent information using the cpuid instr.
				1166	//
				1167	unsigned int proc;
				1168	int nApics = 0;
				1169	for (proc = 0; proc < KMP_CPU_SETSIZE; ++proc) {
				1170	//
				1171	// Skip this proc if it is not included in the machine model.
				1172	//
				1173	if (! KMP_CPU_ISSET(proc, fullMask)) {
				1174	continue;
				1175	}
				1176	KMP_DEBUG_ASSERT(nApics < __kmp_avail_proc);
				1177
				1178	__kmp_affinity_bind_thread(proc);
				1179
				1180	//
				1181	// Extrach the labels for each level in the machine topology map
				1182	// from the Apic ID.
				1183	//
				1184	Address addr(depth);
				1185	int prev_shift = 0;
				1186
				1187	for (level = 0; level < depth; level++) {
				1188	__kmp_x86_cpuid(11, level, &buf);
				1189	unsigned apicId = buf.edx;
				1190	if (buf.ebx == 0) {
				1191	if (level != depth - 1) {
				1192	KMP_CPU_FREE(oldMask);
				1193	*msg_id = kmp_i18n_str_InconsistentCpuidInfo;
				1194	return -1;
				1195	}
				1196	addr.labels[depth - level - 1] = apicId >> prev_shift;
				1197	level++;
				1198	break;
				1199	}
				1200	int shift = buf.eax & 0x1f;
				1201	int mask = (1 << shift) - 1;
				1202	addr.labels[depth - level - 1] = (apicId & mask) >> prev_shift;
				1203	prev_shift = shift;
				1204	}
				1205	if (level != depth) {
				1206	KMP_CPU_FREE(oldMask);
				1207	*msg_id = kmp_i18n_str_InconsistentCpuidInfo;
				1208	return -1;
				1209	}
				1210
				1211	retval[nApics] = AddrUnsPair(addr, proc);
				1212	nApics++;
				1213	}
				1214
				1215	//
				1216	// We've collected all the info we need.
				1217	// Restore the old affinity mask for this thread.
				1218	//
				1219	__kmp_set_system_affinity(oldMask, TRUE);
				1220
				1221	//
				1222	// If there's only one thread context to bind to, return now.
				1223	//
				1224	KMP_ASSERT(nApics > 0);
				1225	if (nApics == 1) {
				1226	__kmp_ncores = nPackages = 1;
				1227	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1228	if (__kmp_affinity_verbose) {
				1229	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				1230	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				1231
				1232	KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
				1233	if (__kmp_affinity_respect_mask) {
				1234	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				1235	} else {
				1236	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				1237	}
				1238	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1239	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1240	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				1241	__kmp_nThreadsPerCore, __kmp_ncores);
				1242	}
				1243
				1244	if (__kmp_affinity_type == affinity_none) {
				1245	__kmp_free(retval);
				1246	KMP_CPU_FREE(oldMask);
				1247	return 0;
				1248	}
				1249
				1250	//
				1251	// Form an Address object which only includes the package level.
				1252	//
				1253	Address addr(1);
				1254	addr.labels[0] = retval[0].first.labels[pkgLevel];
				1255	retval[0].first = addr;
				1256
				1257	if (__kmp_affinity_gran_levels < 0) {
				1258	__kmp_affinity_gran_levels = 0;
				1259	}
				1260
				1261	if (__kmp_affinity_verbose) {
				1262	__kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
				1263	}
				1264
				1265	*address2os = retval;
				1266	KMP_CPU_FREE(oldMask);
				1267	return 1;
				1268	}
				1269
				1270	//
				1271	// Sort the table by physical Id.
				1272	//
				1273	qsort(retval, nApics, sizeof(*retval), __kmp_affinity_cmp_Address_labels);
				1274
				1275	//
				1276	// Find the radix at each of the levels.
				1277	//
				1278	unsigned totals = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1279	unsigned counts = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1280	unsigned maxCt = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1281	unsigned last = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1282	for (level = 0; level < depth; level++) {
				1283	totals[level] = 1;
				1284	maxCt[level] = 1;
				1285	counts[level] = 1;
				1286	last[level] = retval[0].first.labels[level];
				1287	}
				1288
				1289	//
				1290	// From here on, the iteration variable "level" runs from the finest
				1291	// level to the coarsest, i.e. we iterate forward through
				1292	// (*address2os)[].first.labels[] - in the previous loops, we iterated
				1293	// backwards.
				1294	//
				1295	for (proc = 1; (int)proc < nApics; proc++) {
				1296	int level;
				1297	for (level = 0; level < depth; level++) {
				1298	if (retval[proc].first.labels[level] != last[level]) {
				1299	int j;
				1300	for (j = level + 1; j < depth; j++) {
				1301	totals[j]++;
				1302	counts[j] = 1;
				1303	// The line below causes printing incorrect topology information
				1304	// in case the max value for some level (maxCt[level]) is encountered earlier than
				1305	// some less value while going through the array.
				1306	// For example, let pkg0 has 4 cores and pkg1 has 2 cores. Then maxCt[1] == 2
				1307	// whereas it must be 4.
				1308	// TODO!!! Check if it can be commented safely
				1309	//maxCt[j] = 1;
				1310	last[j] = retval[proc].first.labels[j];
				1311	}
				1312	totals[level]++;
				1313	counts[level]++;
				1314	if (counts[level] > maxCt[level]) {
				1315	maxCt[level] = counts[level];
				1316	}
				1317	last[level] = retval[proc].first.labels[level];
				1318	break;
				1319	}
				1320	else if (level == depth - 1) {
				1321	__kmp_free(last);
				1322	__kmp_free(maxCt);
				1323	__kmp_free(counts);
				1324	__kmp_free(totals);
				1325	__kmp_free(retval);
				1326	KMP_CPU_FREE(oldMask);
				1327	*msg_id = kmp_i18n_str_x2ApicIDsNotUnique;
				1328	return -1;
				1329	}
				1330	}
				1331	}
				1332
				1333	//
				1334	// When affinity is off, this routine will still be called to set
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame]	1335	// __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1336	// nCoresPerPkg, & nPackages. Make sure all these vars are set
				1337	// correctly, and return if affinity is not enabled.
				1338	//
				1339	if (threadLevel >= 0) {
				1340	__kmp_nThreadsPerCore = maxCt[threadLevel];
				1341	}
				1342	else {
				1343	__kmp_nThreadsPerCore = 1;
				1344	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1345	nPackages = totals[pkgLevel];
				1346
				1347	if (coreLevel >= 0) {
				1348	__kmp_ncores = totals[coreLevel];
				1349	nCoresPerPkg = maxCt[coreLevel];
				1350	}
				1351	else {
				1352	__kmp_ncores = nPackages;
				1353	nCoresPerPkg = 1;
				1354	}
				1355
				1356	//
				1357	// Check to see if the machine topology is uniform
				1358	//
				1359	unsigned prod = maxCt[0];
				1360	for (level = 1; level < depth; level++) {
				1361	prod *= maxCt[level];
				1362	}
				1363	bool uniform = (prod == totals[level - 1]);
				1364
				1365	//
				1366	// Print the machine topology summary.
				1367	//
				1368	if (__kmp_affinity_verbose) {
				1369	char mask[KMP_AFFIN_MASK_PRINT_LEN];
				1370	__kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				1371
				1372	KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
				1373	if (__kmp_affinity_respect_mask) {
				1374	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask);
				1375	} else {
				1376	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask);
				1377	}
				1378	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1379	if (uniform) {
				1380	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1381	} else {
				1382	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				1383	}
				1384
				1385	kmp_str_buf_t buf;
				1386	__kmp_str_buf_init(&buf);
				1387
				1388	__kmp_str_buf_print(&buf, "%d", totals[0]);
				1389	for (level = 1; level <= pkgLevel; level++) {
				1390	__kmp_str_buf_print(&buf, " x %d", maxCt[level]);
				1391	}
				1392	KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg,
				1393	__kmp_nThreadsPerCore, __kmp_ncores);
				1394
				1395	__kmp_str_buf_free(&buf);
				1396	}
				1397
				1398	if (__kmp_affinity_type == affinity_none) {
				1399	__kmp_free(last);
				1400	__kmp_free(maxCt);
				1401	__kmp_free(counts);
				1402	__kmp_free(totals);
				1403	__kmp_free(retval);
				1404	KMP_CPU_FREE(oldMask);
				1405	return 0;
				1406	}
				1407
				1408	//
				1409	// Find any levels with radiix 1, and remove them from the map
				1410	// (except for the package level).
				1411	//
				1412	int new_depth = 0;
				1413	for (level = 0; level < depth; level++) {
				1414	if ((maxCt[level] == 1) && (level != pkgLevel)) {
				1415	continue;
				1416	}
				1417	new_depth++;
				1418	}
				1419
				1420	//
				1421	// If we are removing any levels, allocate a new vector to return,
				1422	// and copy the relevant information to it.
				1423	//
				1424	if (new_depth != depth) {
				1425	AddrUnsPair new_retval = (AddrUnsPair )__kmp_allocate(
				1426	sizeof(AddrUnsPair) * nApics);
				1427	for (proc = 0; (int)proc < nApics; proc++) {
				1428	Address addr(new_depth);
				1429	new_retval[proc] = AddrUnsPair(addr, retval[proc].second);
				1430	}
				1431	int new_level = 0;
Jonathan Peyton	62f3840	2015-08-25 18:44:41 +0000	[diff] [blame]	1432	int newPkgLevel = -1;
				1433	int newCoreLevel = -1;
				1434	int newThreadLevel = -1;
				1435	int i;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1436	for (level = 0; level < depth; level++) {
Jonathan Peyton	62f3840	2015-08-25 18:44:41 +0000	[diff] [blame]	1437	if ((maxCt[level] == 1)
				1438	&& (level != pkgLevel)) {
				1439	//
				1440	// Remove this level. Never remove the package level
				1441	//
				1442	continue;
				1443	}
				1444	if (level == pkgLevel) {
				1445	newPkgLevel = level;
				1446	}
				1447	if (level == coreLevel) {
				1448	newCoreLevel = level;
				1449	}
				1450	if (level == threadLevel) {
				1451	newThreadLevel = level;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1452	}
				1453	for (proc = 0; (int)proc < nApics; proc++) {
				1454	new_retval[proc].first.labels[new_level]
				1455	= retval[proc].first.labels[level];
				1456	}
				1457	new_level++;
				1458	}
				1459
				1460	__kmp_free(retval);
				1461	retval = new_retval;
				1462	depth = new_depth;
Jonathan Peyton	62f3840	2015-08-25 18:44:41 +0000	[diff] [blame]	1463	pkgLevel = newPkgLevel;
				1464	coreLevel = newCoreLevel;
				1465	threadLevel = newThreadLevel;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1466	}
				1467
				1468	if (__kmp_affinity_gran_levels < 0) {
				1469	//
				1470	// Set the granularity level based on what levels are modeled
				1471	// in the machine topology map.
				1472	//
				1473	__kmp_affinity_gran_levels = 0;
				1474	if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
				1475	__kmp_affinity_gran_levels++;
				1476	}
				1477	if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
				1478	__kmp_affinity_gran_levels++;
				1479	}
				1480	if (__kmp_affinity_gran > affinity_gran_package) {
				1481	__kmp_affinity_gran_levels++;
				1482	}
				1483	}
				1484
				1485	if (__kmp_affinity_verbose) {
				1486	__kmp_affinity_print_topology(retval, nApics, depth, pkgLevel,
				1487	coreLevel, threadLevel);
				1488	}
				1489
				1490	__kmp_free(last);
				1491	__kmp_free(maxCt);
				1492	__kmp_free(counts);
				1493	__kmp_free(totals);
				1494	KMP_CPU_FREE(oldMask);
				1495	*address2os = retval;
				1496	return depth;
				1497	}
				1498
				1499
				1500	# endif /* KMP_ARCH_X86 \|\| KMP_ARCH_X86_64 */
				1501
				1502
				1503	#define osIdIndex 0
				1504	#define threadIdIndex 1
				1505	#define coreIdIndex 2
				1506	#define pkgIdIndex 3
				1507	#define nodeIdIndex 4
				1508
				1509	typedef unsigned *ProcCpuInfo;
				1510	static unsigned maxIndex = pkgIdIndex;
				1511
				1512
				1513	static int
				1514	__kmp_affinity_cmp_ProcCpuInfo_os_id(const void a, const void b)
				1515	{
				1516	const unsigned aa = (const unsigned )a;
				1517	const unsigned bb = (const unsigned )b;
				1518	if (aa[osIdIndex] < bb[osIdIndex]) return -1;
				1519	if (aa[osIdIndex] > bb[osIdIndex]) return 1;
				1520	return 0;
				1521	};
				1522
				1523
				1524	static int
				1525	__kmp_affinity_cmp_ProcCpuInfo_phys_id(const void a, const void b)
				1526	{
				1527	unsigned i;
				1528	const unsigned aa = ((const unsigned **)a);
				1529	const unsigned bb = ((const unsigned **)b);
				1530	for (i = maxIndex; ; i--) {
				1531	if (aa[i] < bb[i]) return -1;
				1532	if (aa[i] > bb[i]) return 1;
				1533	if (i == osIdIndex) break;
				1534	}
				1535	return 0;
				1536	}
				1537
				1538
				1539	//
				1540	// Parse /proc/cpuinfo (or an alternate file in the same format) to obtain the
				1541	// affinity map.
				1542	//
				1543	static int
				1544	__kmp_affinity_create_cpuinfo_map(AddrUnsPair *address2os, int line,
				1545	kmp_i18n_id_t const msg_id, FILE f)
				1546	{
				1547	*address2os = NULL;
				1548	*msg_id = kmp_i18n_null;
				1549
				1550	//
				1551	// Scan of the file, and count the number of "processor" (osId) fields,
Alp Toker	8f2d3f0	2014-02-24 10:40:15 +0000	[diff] [blame]	1552	// and find the highest value of <n> for a node_<n> field.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1553	//
				1554	char buf[256];
				1555	unsigned num_records = 0;
				1556	while (! feof(f)) {
				1557	buf[sizeof(buf) - 1] = 1;
				1558	if (! fgets(buf, sizeof(buf), f)) {
				1559	//
				1560	// Read errors presumably because of EOF
				1561	//
				1562	break;
				1563	}
				1564
				1565	char s1[] = "processor";
				1566	if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
				1567	num_records++;
				1568	continue;
				1569	}
				1570
				1571	//
				1572	// FIXME - this will match "node_<n> <garbage>"
				1573	//
				1574	unsigned level;
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	1575	if (KMP_SSCANF(buf, "node_%d id", &level) == 1) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1576	if (nodeIdIndex + level >= maxIndex) {
				1577	maxIndex = nodeIdIndex + level;
				1578	}
				1579	continue;
				1580	}
				1581	}
				1582
				1583	//
				1584	// Check for empty file / no valid processor records, or too many.
				1585	// The number of records can't exceed the number of valid bits in the
				1586	// affinity mask.
				1587	//
				1588	if (num_records == 0) {
				1589	*line = 0;
				1590	*msg_id = kmp_i18n_str_NoProcRecords;
				1591	return -1;
				1592	}
				1593	if (num_records > (unsigned)__kmp_xproc) {
				1594	*line = 0;
				1595	*msg_id = kmp_i18n_str_TooManyProcRecords;
				1596	return -1;
				1597	}
				1598
				1599	//
				1600	// Set the file pointer back to the begginning, so that we can scan the
				1601	// file again, this time performing a full parse of the data.
				1602	// Allocate a vector of ProcCpuInfo object, where we will place the data.
				1603	// Adding an extra element at the end allows us to remove a lot of extra
				1604	// checks for termination conditions.
				1605	//
				1606	if (fseek(f, 0, SEEK_SET) != 0) {
				1607	*line = 0;
				1608	*msg_id = kmp_i18n_str_CantRewindCpuinfo;
				1609	return -1;
				1610	}
				1611
				1612	//
				1613	// Allocate the array of records to store the proc info in. The dummy
				1614	// element at the end makes the logic in filling them out easier to code.
				1615	//
				1616	unsigned threadInfo = (unsigned )__kmp_allocate((num_records + 1)
				1617	* sizeof(unsigned *));
				1618	unsigned i;
				1619	for (i = 0; i <= num_records; i++) {
				1620	threadInfo[i] = (unsigned *)__kmp_allocate((maxIndex + 1)
				1621	* sizeof(unsigned));
				1622	}
				1623
				1624	#define CLEANUP_THREAD_INFO \
				1625	for (i = 0; i <= num_records; i++) { \
				1626	__kmp_free(threadInfo[i]); \
				1627	} \
				1628	__kmp_free(threadInfo);
				1629
				1630	//
				1631	// A value of UINT_MAX means that we didn't find the field
				1632	//
				1633	unsigned __index;
				1634
				1635	#define INIT_PROC_INFO(p) \
				1636	for (__index = 0; __index <= maxIndex; __index++) { \
				1637	(p)[__index] = UINT_MAX; \
				1638	}
				1639
				1640	for (i = 0; i <= num_records; i++) {
				1641	INIT_PROC_INFO(threadInfo[i]);
				1642	}
				1643
				1644	unsigned num_avail = 0;
				1645	*line = 0;
				1646	while (! feof(f)) {
				1647	//
				1648	// Create an inner scoping level, so that all the goto targets at the
				1649	// end of the loop appear in an outer scoping level. This avoids
				1650	// warnings about jumping past an initialization to a target in the
				1651	// same block.
				1652	//
				1653	{
				1654	buf[sizeof(buf) - 1] = 1;
				1655	bool long_line = false;
				1656	if (! fgets(buf, sizeof(buf), f)) {
				1657	//
				1658	// Read errors presumably because of EOF
				1659	//
				1660	// If there is valid data in threadInfo[num_avail], then fake
				1661	// a blank line in ensure that the last address gets parsed.
				1662	//
				1663	bool valid = false;
				1664	for (i = 0; i <= maxIndex; i++) {
				1665	if (threadInfo[num_avail][i] != UINT_MAX) {
				1666	valid = true;
				1667	}
				1668	}
				1669	if (! valid) {
				1670	break;
				1671	}
				1672	buf[0] = 0;
				1673	} else if (!buf[sizeof(buf) - 1]) {
				1674	//
				1675	// The line is longer than the buffer. Set a flag and don't
				1676	// emit an error if we were going to ignore the line, anyway.
				1677	//
				1678	long_line = true;
				1679
				1680	#define CHECK_LINE \
				1681	if (long_line) { \
				1682	CLEANUP_THREAD_INFO; \
				1683	*msg_id = kmp_i18n_str_LongLineCpuinfo; \
				1684	return -1; \
				1685	}
				1686	}
				1687	(*line)++;
				1688
				1689	char s1[] = "processor";
				1690	if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
				1691	CHECK_LINE;
				1692	char *p = strchr(buf + sizeof(s1) - 1, ':');
				1693	unsigned val;
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	1694	if ((p == NULL) \|\| (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1695	if (threadInfo[num_avail][osIdIndex] != UINT_MAX) goto dup_field;
				1696	threadInfo[num_avail][osIdIndex] = val;
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	1697	#if KMP_OS_LINUX && USE_SYSFS_INFO
				1698	char path[256];
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	1699	KMP_SNPRINTF(path, sizeof(path),
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	1700	"/sys/devices/system/cpu/cpu%u/topology/physical_package_id",
				1701	threadInfo[num_avail][osIdIndex]);
				1702	__kmp_read_from_file(path, "%u", &threadInfo[num_avail][pkgIdIndex]);
				1703
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	1704	KMP_SNPRINTF(path, sizeof(path),
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	1705	"/sys/devices/system/cpu/cpu%u/topology/core_id",
				1706	threadInfo[num_avail][osIdIndex]);
				1707	__kmp_read_from_file(path, "%u", &threadInfo[num_avail][coreIdIndex]);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1708	continue;
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	1709	#else
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1710	}
				1711	char s2[] = "physical id";
				1712	if (strncmp(buf, s2, sizeof(s2) - 1) == 0) {
				1713	CHECK_LINE;
				1714	char *p = strchr(buf + sizeof(s2) - 1, ':');
				1715	unsigned val;
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	1716	if ((p == NULL) \|\| (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1717	if (threadInfo[num_avail][pkgIdIndex] != UINT_MAX) goto dup_field;
				1718	threadInfo[num_avail][pkgIdIndex] = val;
				1719	continue;
				1720	}
				1721	char s3[] = "core id";
				1722	if (strncmp(buf, s3, sizeof(s3) - 1) == 0) {
				1723	CHECK_LINE;
				1724	char *p = strchr(buf + sizeof(s3) - 1, ':');
				1725	unsigned val;
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	1726	if ((p == NULL) \|\| (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1727	if (threadInfo[num_avail][coreIdIndex] != UINT_MAX) goto dup_field;
				1728	threadInfo[num_avail][coreIdIndex] = val;
				1729	continue;
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	1730	#endif // KMP_OS_LINUX && USE_SYSFS_INFO
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1731	}
				1732	char s4[] = "thread id";
				1733	if (strncmp(buf, s4, sizeof(s4) - 1) == 0) {
				1734	CHECK_LINE;
				1735	char *p = strchr(buf + sizeof(s4) - 1, ':');
				1736	unsigned val;
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	1737	if ((p == NULL) \|\| (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1738	if (threadInfo[num_avail][threadIdIndex] != UINT_MAX) goto dup_field;
				1739	threadInfo[num_avail][threadIdIndex] = val;
				1740	continue;
				1741	}
				1742	unsigned level;
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	1743	if (KMP_SSCANF(buf, "node_%d id", &level) == 1) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1744	CHECK_LINE;
				1745	char *p = strchr(buf + sizeof(s4) - 1, ':');
				1746	unsigned val;
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	1747	if ((p == NULL) \|\| (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1748	KMP_ASSERT(nodeIdIndex + level <= maxIndex);
				1749	if (threadInfo[num_avail][nodeIdIndex + level] != UINT_MAX) goto dup_field;
				1750	threadInfo[num_avail][nodeIdIndex + level] = val;
				1751	continue;
				1752	}
				1753
				1754	//
				1755	// We didn't recognize the leading token on the line.
				1756	// There are lots of leading tokens that we don't recognize -
				1757	// if the line isn't empty, go on to the next line.
				1758	//
				1759	if ((buf != 0) && (buf != '\n')) {
				1760	//
				1761	// If the line is longer than the buffer, read characters
				1762	// until we find a newline.
				1763	//
				1764	if (long_line) {
				1765	int ch;
				1766	while (((ch = fgetc(f)) != EOF) && (ch != '\n'));
				1767	}
				1768	continue;
				1769	}
				1770
				1771	//
				1772	// A newline has signalled the end of the processor record.
				1773	// Check that there aren't too many procs specified.
				1774	//
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	1775	if ((int)num_avail == __kmp_xproc) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1776	CLEANUP_THREAD_INFO;
				1777	*msg_id = kmp_i18n_str_TooManyEntries;
				1778	return -1;
				1779	}
				1780
				1781	//
				1782	// Check for missing fields. The osId field must be there, and we
				1783	// currently require that the physical id field is specified, also.
				1784	//
				1785	if (threadInfo[num_avail][osIdIndex] == UINT_MAX) {
				1786	CLEANUP_THREAD_INFO;
				1787	*msg_id = kmp_i18n_str_MissingProcField;
				1788	return -1;
				1789	}
				1790	if (threadInfo[0][pkgIdIndex] == UINT_MAX) {
				1791	CLEANUP_THREAD_INFO;
				1792	*msg_id = kmp_i18n_str_MissingPhysicalIDField;
				1793	return -1;
				1794	}
				1795
				1796	//
				1797	// Skip this proc if it is not included in the machine model.
				1798	//
				1799	if (! KMP_CPU_ISSET(threadInfo[num_avail][osIdIndex], fullMask)) {
				1800	INIT_PROC_INFO(threadInfo[num_avail]);
				1801	continue;
				1802	}
				1803
				1804	//
				1805	// We have a successful parse of this proc's info.
				1806	// Increment the counter, and prepare for the next proc.
				1807	//
				1808	num_avail++;
				1809	KMP_ASSERT(num_avail <= num_records);
				1810	INIT_PROC_INFO(threadInfo[num_avail]);
				1811	}
				1812	continue;
				1813
				1814	no_val:
				1815	CLEANUP_THREAD_INFO;
				1816	*msg_id = kmp_i18n_str_MissingValCpuinfo;
				1817	return -1;
				1818
				1819	dup_field:
				1820	CLEANUP_THREAD_INFO;
				1821	*msg_id = kmp_i18n_str_DuplicateFieldCpuinfo;
				1822	return -1;
				1823	}
				1824	*line = 0;
				1825
				1826	# if KMP_MIC && REDUCE_TEAM_SIZE
				1827	unsigned teamSize = 0;
				1828	# endif // KMP_MIC && REDUCE_TEAM_SIZE
				1829
				1830	// check for num_records == __kmp_xproc ???
				1831
				1832	//
				1833	// If there's only one thread context to bind to, form an Address object
				1834	// with depth 1 and return immediately (or, if affinity is off, set
				1835	// address2os to NULL and return).
				1836	//
				1837	// If it is configured to omit the package level when there is only a
				1838	// single package, the logic at the end of this routine won't work if
				1839	// there is only a single thread - it would try to form an Address
				1840	// object with depth 0.
				1841	//
				1842	KMP_ASSERT(num_avail > 0);
				1843	KMP_ASSERT(num_avail <= num_records);
				1844	if (num_avail == 1) {
				1845	__kmp_ncores = 1;
				1846	__kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1847	if (__kmp_affinity_verbose) {
				1848	if (! KMP_AFFINITY_CAPABLE()) {
				1849	KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
				1850	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1851	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1852	}
				1853	else {
				1854	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				1855	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				1856	fullMask);
				1857	KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
				1858	if (__kmp_affinity_respect_mask) {
				1859	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				1860	} else {
				1861	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				1862	}
				1863	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1864	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1865	}
				1866	int index;
				1867	kmp_str_buf_t buf;
				1868	__kmp_str_buf_init(&buf);
				1869	__kmp_str_buf_print(&buf, "1");
				1870	for (index = maxIndex - 1; index > pkgIdIndex; index--) {
				1871	__kmp_str_buf_print(&buf, " x 1");
				1872	}
				1873	KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, 1, 1, 1);
				1874	__kmp_str_buf_free(&buf);
				1875	}
				1876
				1877	if (__kmp_affinity_type == affinity_none) {
				1878	CLEANUP_THREAD_INFO;
				1879	return 0;
				1880	}
				1881
				1882	address2os = (AddrUnsPair)__kmp_allocate(sizeof(AddrUnsPair));
				1883	Address addr(1);
				1884	addr.labels[0] = threadInfo[0][pkgIdIndex];
				1885	(*address2os)[0] = AddrUnsPair(addr, threadInfo[0][osIdIndex]);
				1886
				1887	if (__kmp_affinity_gran_levels < 0) {
				1888	__kmp_affinity_gran_levels = 0;
				1889	}
				1890
				1891	if (__kmp_affinity_verbose) {
				1892	__kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
				1893	}
				1894
				1895	CLEANUP_THREAD_INFO;
				1896	return 1;
				1897	}
				1898
				1899	//
				1900	// Sort the threadInfo table by physical Id.
				1901	//
				1902	qsort(threadInfo, num_avail, sizeof(*threadInfo),
				1903	__kmp_affinity_cmp_ProcCpuInfo_phys_id);
				1904
				1905	//
				1906	// The table is now sorted by pkgId / coreId / threadId, but we really
				1907	// don't know the radix of any of the fields. pkgId's may be sparsely
				1908	// assigned among the chips on a system. Although coreId's are usually
				1909	// assigned [0 .. coresPerPkg-1] and threadId's are usually assigned
				1910	// [0..threadsPerCore-1], we don't want to make any such assumptions.
				1911	//
				1912	// For that matter, we don't know what coresPerPkg and threadsPerCore
				1913	// (or the total # packages) are at this point - we want to determine
				1914	// that now. We only have an upper bound on the first two figures.
				1915	//
				1916	unsigned counts = (unsigned )__kmp_allocate((maxIndex + 1)
				1917	* sizeof(unsigned));
				1918	unsigned maxCt = (unsigned )__kmp_allocate((maxIndex + 1)
				1919	* sizeof(unsigned));
				1920	unsigned totals = (unsigned )__kmp_allocate((maxIndex + 1)
				1921	* sizeof(unsigned));
				1922	unsigned lastId = (unsigned )__kmp_allocate((maxIndex + 1)
				1923	* sizeof(unsigned));
				1924
				1925	bool assign_thread_ids = false;
				1926	unsigned threadIdCt;
				1927	unsigned index;
				1928
				1929	restart_radix_check:
				1930	threadIdCt = 0;
				1931
				1932	//
				1933	// Initialize the counter arrays with data from threadInfo[0].
				1934	//
				1935	if (assign_thread_ids) {
				1936	if (threadInfo[0][threadIdIndex] == UINT_MAX) {
				1937	threadInfo[0][threadIdIndex] = threadIdCt++;
				1938	}
				1939	else if (threadIdCt <= threadInfo[0][threadIdIndex]) {
				1940	threadIdCt = threadInfo[0][threadIdIndex] + 1;
				1941	}
				1942	}
				1943	for (index = 0; index <= maxIndex; index++) {
				1944	counts[index] = 1;
				1945	maxCt[index] = 1;
				1946	totals[index] = 1;
				1947	lastId[index] = threadInfo[0][index];;
				1948	}
				1949
				1950	//
				1951	// Run through the rest of the OS procs.
				1952	//
				1953	for (i = 1; i < num_avail; i++) {
				1954	//
				1955	// Find the most significant index whose id differs
				1956	// from the id for the previous OS proc.
				1957	//
				1958	for (index = maxIndex; index >= threadIdIndex; index--) {
				1959	if (assign_thread_ids && (index == threadIdIndex)) {
				1960	//
				1961	// Auto-assign the thread id field if it wasn't specified.
				1962	//
				1963	if (threadInfo[i][threadIdIndex] == UINT_MAX) {
				1964	threadInfo[i][threadIdIndex] = threadIdCt++;
				1965	}
				1966
				1967	//
				1968	// Aparrently the thread id field was specified for some
				1969	// entries and not others. Start the thread id counter
				1970	// off at the next higher thread id.
				1971	//
				1972	else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
				1973	threadIdCt = threadInfo[i][threadIdIndex] + 1;
				1974	}
				1975	}
				1976	if (threadInfo[i][index] != lastId[index]) {
				1977	//
				1978	// Run through all indices which are less significant,
				1979	// and reset the counts to 1.
				1980	//
				1981	// At all levels up to and including index, we need to
				1982	// increment the totals and record the last id.
				1983	//
				1984	unsigned index2;
				1985	for (index2 = threadIdIndex; index2 < index; index2++) {
				1986	totals[index2]++;
				1987	if (counts[index2] > maxCt[index2]) {
				1988	maxCt[index2] = counts[index2];
				1989	}
				1990	counts[index2] = 1;
				1991	lastId[index2] = threadInfo[i][index2];
				1992	}
				1993	counts[index]++;
				1994	totals[index]++;
				1995	lastId[index] = threadInfo[i][index];
				1996
				1997	if (assign_thread_ids && (index > threadIdIndex)) {
				1998
				1999	# if KMP_MIC && REDUCE_TEAM_SIZE
				2000	//
				2001	// The default team size is the total #threads in the machine
				2002	// minus 1 thread for every core that has 3 or more threads.
				2003	//
				2004	teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 );
				2005	# endif // KMP_MIC && REDUCE_TEAM_SIZE
				2006
				2007	//
				2008	// Restart the thread counter, as we are on a new core.
				2009	//
				2010	threadIdCt = 0;
				2011
				2012	//
				2013	// Auto-assign the thread id field if it wasn't specified.
				2014	//
				2015	if (threadInfo[i][threadIdIndex] == UINT_MAX) {
				2016	threadInfo[i][threadIdIndex] = threadIdCt++;
				2017	}
				2018
				2019	//
				2020	// Aparrently the thread id field was specified for some
				2021	// entries and not others. Start the thread id counter
				2022	// off at the next higher thread id.
				2023	//
				2024	else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
				2025	threadIdCt = threadInfo[i][threadIdIndex] + 1;
				2026	}
				2027	}
				2028	break;
				2029	}
				2030	}
				2031	if (index < threadIdIndex) {
				2032	//
				2033	// If thread ids were specified, it is an error if they are not
				2034	// unique. Also, check that we waven't already restarted the
				2035	// loop (to be safe - shouldn't need to).
				2036	//
				2037	if ((threadInfo[i][threadIdIndex] != UINT_MAX)
				2038	\|\| assign_thread_ids) {
				2039	__kmp_free(lastId);
				2040	__kmp_free(totals);
				2041	__kmp_free(maxCt);
				2042	__kmp_free(counts);
				2043	CLEANUP_THREAD_INFO;
				2044	*msg_id = kmp_i18n_str_PhysicalIDsNotUnique;
				2045	return -1;
				2046	}
				2047
				2048	//
				2049	// If the thread ids were not specified and we see entries
				2050	// entries that are duplicates, start the loop over and
				2051	// assign the thread ids manually.
				2052	//
				2053	assign_thread_ids = true;
				2054	goto restart_radix_check;
				2055	}
				2056	}
				2057
				2058	# if KMP_MIC && REDUCE_TEAM_SIZE
				2059	//
				2060	// The default team size is the total #threads in the machine
				2061	// minus 1 thread for every core that has 3 or more threads.
				2062	//
				2063	teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 );
				2064	# endif // KMP_MIC && REDUCE_TEAM_SIZE
				2065
				2066	for (index = threadIdIndex; index <= maxIndex; index++) {
				2067	if (counts[index] > maxCt[index]) {
				2068	maxCt[index] = counts[index];
				2069	}
				2070	}
				2071
				2072	__kmp_nThreadsPerCore = maxCt[threadIdIndex];
				2073	nCoresPerPkg = maxCt[coreIdIndex];
				2074	nPackages = totals[pkgIdIndex];
				2075
				2076	//
				2077	// Check to see if the machine topology is uniform
				2078	//
				2079	unsigned prod = totals[maxIndex];
				2080	for (index = threadIdIndex; index < maxIndex; index++) {
				2081	prod *= maxCt[index];
				2082	}
				2083	bool uniform = (prod == totals[threadIdIndex]);
				2084
				2085	//
				2086	// When affinity is off, this routine will still be called to set
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame]	2087	// __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2088	// nCoresPerPkg, & nPackages. Make sure all these vars are set
				2089	// correctly, and return now if affinity is not enabled.
				2090	//
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2091	__kmp_ncores = totals[coreIdIndex];
				2092
				2093	if (__kmp_affinity_verbose) {
				2094	if (! KMP_AFFINITY_CAPABLE()) {
				2095	KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
				2096	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				2097	if (uniform) {
				2098	KMP_INFORM(Uniform, "KMP_AFFINITY");
				2099	} else {
				2100	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				2101	}
				2102	}
				2103	else {
				2104	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				2105	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, fullMask);
				2106	KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
				2107	if (__kmp_affinity_respect_mask) {
				2108	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				2109	} else {
				2110	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				2111	}
				2112	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				2113	if (uniform) {
				2114	KMP_INFORM(Uniform, "KMP_AFFINITY");
				2115	} else {
				2116	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				2117	}
				2118	}
				2119	kmp_str_buf_t buf;
				2120	__kmp_str_buf_init(&buf);
				2121
				2122	__kmp_str_buf_print(&buf, "%d", totals[maxIndex]);
				2123	for (index = maxIndex - 1; index >= pkgIdIndex; index--) {
				2124	__kmp_str_buf_print(&buf, " x %d", maxCt[index]);
				2125	}
				2126	KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, maxCt[coreIdIndex],
				2127	maxCt[threadIdIndex], __kmp_ncores);
				2128
				2129	__kmp_str_buf_free(&buf);
				2130	}
				2131
				2132	# if KMP_MIC && REDUCE_TEAM_SIZE
				2133	//
				2134	// Set the default team size.
				2135	//
				2136	if ((__kmp_dflt_team_nth == 0) && (teamSize > 0)) {
				2137	__kmp_dflt_team_nth = teamSize;
				2138	KA_TRACE(20, ("__kmp_affinity_create_cpuinfo_map: setting __kmp_dflt_team_nth = %d\n",
				2139	__kmp_dflt_team_nth));
				2140	}
				2141	# endif // KMP_MIC && REDUCE_TEAM_SIZE
				2142
				2143	if (__kmp_affinity_type == affinity_none) {
				2144	__kmp_free(lastId);
				2145	__kmp_free(totals);
				2146	__kmp_free(maxCt);
				2147	__kmp_free(counts);
				2148	CLEANUP_THREAD_INFO;
				2149	return 0;
				2150	}
				2151
				2152	//
				2153	// Count the number of levels which have more nodes at that level than
				2154	// at the parent's level (with there being an implicit root node of
				2155	// the top level). This is equivalent to saying that there is at least
				2156	// one node at this level which has a sibling. These levels are in the
				2157	// map, and the package level is always in the map.
				2158	//
				2159	bool inMap = (bool )__kmp_allocate((maxIndex + 1) * sizeof(bool));
				2160	int level = 0;
				2161	for (index = threadIdIndex; index < maxIndex; index++) {
				2162	KMP_ASSERT(totals[index] >= totals[index + 1]);
				2163	inMap[index] = (totals[index] > totals[index + 1]);
				2164	}
				2165	inMap[maxIndex] = (totals[maxIndex] > 1);
				2166	inMap[pkgIdIndex] = true;
				2167
				2168	int depth = 0;
				2169	for (index = threadIdIndex; index <= maxIndex; index++) {
				2170	if (inMap[index]) {
				2171	depth++;
				2172	}
				2173	}
				2174	KMP_ASSERT(depth > 0);
				2175
				2176	//
				2177	// Construct the data structure that is to be returned.
				2178	//
				2179	address2os = (AddrUnsPair)
				2180	__kmp_allocate(sizeof(AddrUnsPair) * num_avail);
				2181	int pkgLevel = -1;
				2182	int coreLevel = -1;
				2183	int threadLevel = -1;
				2184
				2185	for (i = 0; i < num_avail; ++i) {
				2186	Address addr(depth);
				2187	unsigned os = threadInfo[i][osIdIndex];
				2188	int src_index;
				2189	int dst_index = 0;
				2190
				2191	for (src_index = maxIndex; src_index >= threadIdIndex; src_index--) {
				2192	if (! inMap[src_index]) {
				2193	continue;
				2194	}
				2195	addr.labels[dst_index] = threadInfo[i][src_index];
				2196	if (src_index == pkgIdIndex) {
				2197	pkgLevel = dst_index;
				2198	}
				2199	else if (src_index == coreIdIndex) {
				2200	coreLevel = dst_index;
				2201	}
				2202	else if (src_index == threadIdIndex) {
				2203	threadLevel = dst_index;
				2204	}
				2205	dst_index++;
				2206	}
				2207	(*address2os)[i] = AddrUnsPair(addr, os);
				2208	}
				2209
				2210	if (__kmp_affinity_gran_levels < 0) {
				2211	//
				2212	// Set the granularity level based on what levels are modeled
				2213	// in the machine topology map.
				2214	//
				2215	unsigned src_index;
				2216	__kmp_affinity_gran_levels = 0;
				2217	for (src_index = threadIdIndex; src_index <= maxIndex; src_index++) {
				2218	if (! inMap[src_index]) {
				2219	continue;
				2220	}
				2221	switch (src_index) {
				2222	case threadIdIndex:
				2223	if (__kmp_affinity_gran > affinity_gran_thread) {
				2224	__kmp_affinity_gran_levels++;
				2225	}
				2226
				2227	break;
				2228	case coreIdIndex:
				2229	if (__kmp_affinity_gran > affinity_gran_core) {
				2230	__kmp_affinity_gran_levels++;
				2231	}
				2232	break;
				2233
				2234	case pkgIdIndex:
				2235	if (__kmp_affinity_gran > affinity_gran_package) {
				2236	__kmp_affinity_gran_levels++;
				2237	}
				2238	break;
				2239	}
				2240	}
				2241	}
				2242
				2243	if (__kmp_affinity_verbose) {
				2244	__kmp_affinity_print_topology(*address2os, num_avail, depth, pkgLevel,
				2245	coreLevel, threadLevel);
				2246	}
				2247
				2248	__kmp_free(inMap);
				2249	__kmp_free(lastId);
				2250	__kmp_free(totals);
				2251	__kmp_free(maxCt);
				2252	__kmp_free(counts);
				2253	CLEANUP_THREAD_INFO;
				2254	return depth;
				2255	}
				2256
				2257
				2258	//
				2259	// Create and return a table of affinity masks, indexed by OS thread ID.
				2260	// This routine handles OR'ing together all the affinity masks of threads
				2261	// that are sufficiently close, if granularity > fine.
				2262	//
				2263	static kmp_affin_mask_t *
				2264	__kmp_create_masks(unsigned maxIndex, unsigned numUnique,
				2265	AddrUnsPair *address2os, unsigned numAddrs)
				2266	{
				2267	//
				2268	// First form a table of affinity masks in order of OS thread id.
				2269	//
				2270	unsigned depth;
				2271	unsigned maxOsId;
				2272	unsigned i;
				2273
				2274	KMP_ASSERT(numAddrs > 0);
				2275	depth = address2os[0].first.depth;
				2276
				2277	maxOsId = 0;
				2278	for (i = 0; i < numAddrs; i++) {
				2279	unsigned osId = address2os[i].second;
				2280	if (osId > maxOsId) {
				2281	maxOsId = osId;
				2282	}
				2283	}
				2284	kmp_affin_mask_t osId2Mask = (kmp_affin_mask_t )__kmp_allocate(
				2285	(maxOsId + 1) * __kmp_affin_mask_size);
				2286
				2287	//
				2288	// Sort the address2os table according to physical order. Doing so
				2289	// will put all threads on the same core/package/node in consecutive
				2290	// locations.
				2291	//
				2292	qsort(address2os, numAddrs, sizeof(*address2os),
				2293	__kmp_affinity_cmp_Address_labels);
				2294
				2295	KMP_ASSERT(__kmp_affinity_gran_levels >= 0);
				2296	if (__kmp_affinity_verbose && (__kmp_affinity_gran_levels > 0)) {
				2297	KMP_INFORM(ThreadsMigrate, "KMP_AFFINITY", __kmp_affinity_gran_levels);
				2298	}
				2299	if (__kmp_affinity_gran_levels >= (int)depth) {
				2300	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				2301	&& (__kmp_affinity_type != affinity_none))) {
				2302	KMP_WARNING(AffThreadsMayMigrate);
				2303	}
				2304	}
				2305
				2306	//
				2307	// Run through the table, forming the masks for all threads on each
				2308	// core. Threads on the same core will have identical "Address"
				2309	// objects, not considering the last level, which must be the thread
				2310	// id. All threads on a core will appear consecutively.
				2311	//
				2312	unsigned unique = 0;
				2313	unsigned j = 0; // index of 1st thread on core
				2314	unsigned leader = 0;
				2315	Address *leaderAddr = &(address2os[0].first);
				2316	kmp_affin_mask_t *sum
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	2317	= (kmp_affin_mask_t *)KMP_ALLOCA(__kmp_affin_mask_size);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2318	KMP_CPU_ZERO(sum);
				2319	KMP_CPU_SET(address2os[0].second, sum);
				2320	for (i = 1; i < numAddrs; i++) {
				2321	//
Alp Toker	8f2d3f0	2014-02-24 10:40:15 +0000	[diff] [blame]	2322	// If this thread is sufficiently close to the leader (within the
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2323	// granularity setting), then set the bit for this os thread in the
				2324	// affinity mask for this group, and go on to the next thread.
				2325	//
				2326	if (leaderAddr->isClose(address2os[i].first,
				2327	__kmp_affinity_gran_levels)) {
				2328	KMP_CPU_SET(address2os[i].second, sum);
				2329	continue;
				2330	}
				2331
				2332	//
				2333	// For every thread in this group, copy the mask to the thread's
				2334	// entry in the osId2Mask table. Mark the first address as a
				2335	// leader.
				2336	//
				2337	for (; j < i; j++) {
				2338	unsigned osId = address2os[j].second;
				2339	KMP_DEBUG_ASSERT(osId <= maxOsId);
				2340	kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
				2341	KMP_CPU_COPY(mask, sum);
				2342	address2os[j].first.leader = (j == leader);
				2343	}
				2344	unique++;
				2345
				2346	//
				2347	// Start a new mask.
				2348	//
				2349	leader = i;
				2350	leaderAddr = &(address2os[i].first);
				2351	KMP_CPU_ZERO(sum);
				2352	KMP_CPU_SET(address2os[i].second, sum);
				2353	}
				2354
				2355	//
				2356	// For every thread in last group, copy the mask to the thread's
				2357	// entry in the osId2Mask table.
				2358	//
				2359	for (; j < i; j++) {
				2360	unsigned osId = address2os[j].second;
				2361	KMP_DEBUG_ASSERT(osId <= maxOsId);
				2362	kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
				2363	KMP_CPU_COPY(mask, sum);
				2364	address2os[j].first.leader = (j == leader);
				2365	}
				2366	unique++;
				2367
				2368	*maxIndex = maxOsId;
				2369	*numUnique = unique;
				2370	return osId2Mask;
				2371	}
				2372
				2373
				2374	//
				2375	// Stuff for the affinity proclist parsers. It's easier to declare these vars
				2376	// as file-static than to try and pass them through the calling sequence of
				2377	// the recursive-descent OMP_PLACES parser.
				2378	//
				2379	static kmp_affin_mask_t *newMasks;
				2380	static int numNewMasks;
				2381	static int nextNewMask;
				2382
				2383	#define ADD_MASK(_mask) \
				2384	{ \
				2385	if (nextNewMask >= numNewMasks) { \
				2386	numNewMasks *= 2; \
				2387	newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_REALLOC(newMasks, \
				2388	numNewMasks * __kmp_affin_mask_size); \
				2389	} \
				2390	KMP_CPU_COPY(KMP_CPU_INDEX(newMasks, nextNewMask), (_mask)); \
				2391	nextNewMask++; \
				2392	}
				2393
				2394	#define ADD_MASK_OSID(_osId,_osId2Mask,_maxOsId) \
				2395	{ \
				2396	if (((_osId) > _maxOsId) \|\| \
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	2397	(! KMP_CPU_ISSET((_osId), KMP_CPU_INDEX((_osId2Mask), (_osId))))) { \
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2398	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings \
				2399	&& (__kmp_affinity_type != affinity_none))) { \
				2400	KMP_WARNING(AffIgnoreInvalidProcID, _osId); \
				2401	} \
				2402	} \
				2403	else { \
				2404	ADD_MASK(KMP_CPU_INDEX(_osId2Mask, (_osId))); \
				2405	} \
				2406	}
				2407
				2408
				2409	//
				2410	// Re-parse the proclist (for the explicit affinity type), and form the list
				2411	// of affinity newMasks indexed by gtid.
				2412	//
				2413	static void
				2414	__kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks,
				2415	unsigned int out_numMasks, const char proclist,
				2416	kmp_affin_mask_t *osId2Mask, int maxOsId)
				2417	{
				2418	const char *scan = proclist;
				2419	const char *next = proclist;
				2420
				2421	//
				2422	// We use malloc() for the temporary mask vector,
				2423	// so that we can use realloc() to extend it.
				2424	//
				2425	numNewMasks = 2;
				2426	newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(numNewMasks
				2427	* __kmp_affin_mask_size);
				2428	nextNewMask = 0;
				2429	kmp_affin_mask_t sumMask = (kmp_affin_mask_t )__kmp_allocate(
				2430	__kmp_affin_mask_size);
				2431	int setSize = 0;
				2432
				2433	for (;;) {
				2434	int start, end, stride;
				2435
				2436	SKIP_WS(scan);
				2437	next = scan;
				2438	if (*next == '\0') {
				2439	break;
				2440	}
				2441
				2442	if (*next == '{') {
				2443	int num;
				2444	setSize = 0;
				2445	next++; // skip '{'
				2446	SKIP_WS(next);
				2447	scan = next;
				2448
				2449	//
				2450	// Read the first integer in the set.
				2451	//
				2452	KMP_ASSERT2((next >= '0') && (next <= '9'),
				2453	"bad proclist");
				2454	SKIP_DIGITS(next);
				2455	num = __kmp_str_to_int(scan, *next);
				2456	KMP_ASSERT2(num >= 0, "bad explicit proc list");
				2457
				2458	//
				2459	// Copy the mask for that osId to the sum (union) mask.
				2460	//
				2461	if ((num > maxOsId) \|\|
				2462	(! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
				2463	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				2464	&& (__kmp_affinity_type != affinity_none))) {
				2465	KMP_WARNING(AffIgnoreInvalidProcID, num);
				2466	}
				2467	KMP_CPU_ZERO(sumMask);
				2468	}
				2469	else {
				2470	KMP_CPU_COPY(sumMask, KMP_CPU_INDEX(osId2Mask, num));
				2471	setSize = 1;
				2472	}
				2473
				2474	for (;;) {
				2475	//
				2476	// Check for end of set.
				2477	//
				2478	SKIP_WS(next);
				2479	if (*next == '}') {
				2480	next++; // skip '}'
				2481	break;
				2482	}
				2483
				2484	//
				2485	// Skip optional comma.
				2486	//
				2487	if (*next == ',') {
				2488	next++;
				2489	}
				2490	SKIP_WS(next);
				2491
				2492	//
				2493	// Read the next integer in the set.
				2494	//
				2495	scan = next;
				2496	KMP_ASSERT2((next >= '0') && (next <= '9'),
				2497	"bad explicit proc list");
				2498
				2499	SKIP_DIGITS(next);
				2500	num = __kmp_str_to_int(scan, *next);
				2501	KMP_ASSERT2(num >= 0, "bad explicit proc list");
				2502
				2503	//
				2504	// Add the mask for that osId to the sum mask.
				2505	//
				2506	if ((num > maxOsId) \|\|
				2507	(! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
				2508	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				2509	&& (__kmp_affinity_type != affinity_none))) {
				2510	KMP_WARNING(AffIgnoreInvalidProcID, num);
				2511	}
				2512	}
				2513	else {
				2514	KMP_CPU_UNION(sumMask, KMP_CPU_INDEX(osId2Mask, num));
				2515	setSize++;
				2516	}
				2517	}
				2518	if (setSize > 0) {
				2519	ADD_MASK(sumMask);
				2520	}
				2521
				2522	SKIP_WS(next);
				2523	if (*next == ',') {
				2524	next++;
				2525	}
				2526	scan = next;
				2527	continue;
				2528	}
				2529
				2530	//
				2531	// Read the first integer.
				2532	//
				2533	KMP_ASSERT2((next >= '0') && (next <= '9'), "bad explicit proc list");
				2534	SKIP_DIGITS(next);
				2535	start = __kmp_str_to_int(scan, *next);
				2536	KMP_ASSERT2(start >= 0, "bad explicit proc list");
				2537	SKIP_WS(next);
				2538
				2539	//
				2540	// If this isn't a range, then add a mask to the list and go on.
				2541	//
				2542	if (*next != '-') {
				2543	ADD_MASK_OSID(start, osId2Mask, maxOsId);
				2544
				2545	//
				2546	// Skip optional comma.
				2547	//
				2548	if (*next == ',') {
				2549	next++;
				2550	}
				2551	scan = next;
				2552	continue;
				2553	}
				2554
				2555	//
				2556	// This is a range. Skip over the '-' and read in the 2nd int.
				2557	//
				2558	next++; // skip '-'
				2559	SKIP_WS(next);
				2560	scan = next;
				2561	KMP_ASSERT2((next >= '0') && (next <= '9'), "bad explicit proc list");
				2562	SKIP_DIGITS(next);
				2563	end = __kmp_str_to_int(scan, *next);
				2564	KMP_ASSERT2(end >= 0, "bad explicit proc list");
				2565
				2566	//
				2567	// Check for a stride parameter
				2568	//
				2569	stride = 1;
				2570	SKIP_WS(next);
				2571	if (*next == ':') {
				2572	//
				2573	// A stride is specified. Skip over the ':" and read the 3rd int.
				2574	//
				2575	int sign = +1;
				2576	next++; // skip ':'
				2577	SKIP_WS(next);
				2578	scan = next;
				2579	if (*next == '-') {
				2580	sign = -1;
				2581	next++;
				2582	SKIP_WS(next);
				2583	scan = next;
				2584	}
				2585	KMP_ASSERT2((next >= '0') && (next <= '9'),
				2586	"bad explicit proc list");
				2587	SKIP_DIGITS(next);
				2588	stride = __kmp_str_to_int(scan, *next);
				2589	KMP_ASSERT2(stride >= 0, "bad explicit proc list");
				2590	stride *= sign;
				2591	}
				2592
				2593	//
				2594	// Do some range checks.
				2595	//
				2596	KMP_ASSERT2(stride != 0, "bad explicit proc list");
				2597	if (stride > 0) {
				2598	KMP_ASSERT2(start <= end, "bad explicit proc list");
				2599	}
				2600	else {
				2601	KMP_ASSERT2(start >= end, "bad explicit proc list");
				2602	}
				2603	KMP_ASSERT2((end - start) / stride <= 65536, "bad explicit proc list");
				2604
				2605	//
				2606	// Add the mask for each OS proc # to the list.
				2607	//
				2608	if (stride > 0) {
				2609	do {
				2610	ADD_MASK_OSID(start, osId2Mask, maxOsId);
				2611	start += stride;
				2612	} while (start <= end);
				2613	}
				2614	else {
				2615	do {
				2616	ADD_MASK_OSID(start, osId2Mask, maxOsId);
				2617	start += stride;
				2618	} while (start >= end);
				2619	}
				2620
				2621	//
				2622	// Skip optional comma.
				2623	//
				2624	SKIP_WS(next);
				2625	if (*next == ',') {
				2626	next++;
				2627	}
				2628	scan = next;
				2629	}
				2630
				2631	*out_numMasks = nextNewMask;
				2632	if (nextNewMask == 0) {
				2633	*out_masks = NULL;
				2634	KMP_INTERNAL_FREE(newMasks);
				2635	return;
				2636	}
				2637	*out_masks
				2638	= (kmp_affin_mask_t )__kmp_allocate(nextNewMask __kmp_affin_mask_size);
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	2639	KMP_MEMCPY(out_masks, newMasks, nextNewMask __kmp_affin_mask_size);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2640	__kmp_free(sumMask);
				2641	KMP_INTERNAL_FREE(newMasks);
				2642	}
				2643
				2644
				2645	# if OMP_40_ENABLED
				2646
				2647	/*-----------------------------------------------------------------------------
				2648
				2649	Re-parse the OMP_PLACES proc id list, forming the newMasks for the different
				2650	places. Again, Here is the grammar:
				2651
				2652	place_list := place
				2653	place_list := place , place_list
				2654	place := num
				2655	place := place : num
				2656	place := place : num : signed
				2657	place := { subplacelist }
				2658	place := ! place // (lowest priority)
				2659	subplace_list := subplace
				2660	subplace_list := subplace , subplace_list
				2661	subplace := num
				2662	subplace := num : num
				2663	subplace := num : num : signed
				2664	signed := num
				2665	signed := + signed
				2666	signed := - signed
				2667
				2668	-----------------------------------------------------------------------------*/
				2669
				2670	static void
				2671	__kmp_process_subplace_list(const char *scan, kmp_affin_mask_t osId2Mask,
				2672	int maxOsId, kmp_affin_mask_t tempMask, int setSize)
				2673	{
				2674	const char *next;
				2675
				2676	for (;;) {
				2677	int start, count, stride, i;
				2678
				2679	//
				2680	// Read in the starting proc id
				2681	//
				2682	SKIP_WS(*scan);
				2683	KMP_ASSERT2((scan >= '0') && (scan <= '9'),
				2684	"bad explicit places list");
				2685	next = *scan;
				2686	SKIP_DIGITS(next);
				2687	start = __kmp_str_to_int(scan, next);
				2688	KMP_ASSERT(start >= 0);
				2689	*scan = next;
				2690
				2691	//
				2692	// valid follow sets are ',' ':' and '}'
				2693	//
				2694	SKIP_WS(*scan);
				2695	if (scan == '}' \|\| scan == ',') {
				2696	if ((start > maxOsId) \|\|
				2697	(! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
				2698	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				2699	&& (__kmp_affinity_type != affinity_none))) {
				2700	KMP_WARNING(AffIgnoreInvalidProcID, start);
				2701	}
				2702	}
				2703	else {
				2704	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
				2705	(*setSize)++;
				2706	}
				2707	if (**scan == '}') {
				2708	break;
				2709	}
				2710	(*scan)++; // skip ','
				2711	continue;
				2712	}
				2713	KMP_ASSERT2(**scan == ':', "bad explicit places list");
				2714	(*scan)++; // skip ':'
				2715
				2716	//
				2717	// Read count parameter
				2718	//
				2719	SKIP_WS(*scan);
				2720	KMP_ASSERT2((scan >= '0') && (scan <= '9'),
				2721	"bad explicit places list");
				2722	next = *scan;
				2723	SKIP_DIGITS(next);
				2724	count = __kmp_str_to_int(scan, next);
				2725	KMP_ASSERT(count >= 0);
				2726	*scan = next;
				2727
				2728	//
				2729	// valid follow sets are ',' ':' and '}'
				2730	//
				2731	SKIP_WS(*scan);
				2732	if (scan == '}' \|\| scan == ',') {
				2733	for (i = 0; i < count; i++) {
				2734	if ((start > maxOsId) \|\|
				2735	(! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
				2736	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				2737	&& (__kmp_affinity_type != affinity_none))) {
				2738	KMP_WARNING(AffIgnoreInvalidProcID, start);
				2739	}
				2740	break; // don't proliferate warnings for large count
				2741	}
				2742	else {
				2743	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
				2744	start++;
				2745	(*setSize)++;
				2746	}
				2747	}
				2748	if (**scan == '}') {
				2749	break;
				2750	}
				2751	(*scan)++; // skip ','
				2752	continue;
				2753	}
				2754	KMP_ASSERT2(**scan == ':', "bad explicit places list");
				2755	(*scan)++; // skip ':'
				2756
				2757	//
				2758	// Read stride parameter
				2759	//
				2760	int sign = +1;
				2761	for (;;) {
				2762	SKIP_WS(*scan);
				2763	if (**scan == '+') {
				2764	(*scan)++; // skip '+'
				2765	continue;
				2766	}
				2767	if (**scan == '-') {
				2768	sign *= -1;
				2769	(*scan)++; // skip '-'
				2770	continue;
				2771	}
				2772	break;
				2773	}
				2774	SKIP_WS(*scan);
				2775	KMP_ASSERT2((scan >= '0') && (scan <= '9'),
				2776	"bad explicit places list");
				2777	next = *scan;
				2778	SKIP_DIGITS(next);
				2779	stride = __kmp_str_to_int(scan, next);
				2780	KMP_ASSERT(stride >= 0);
				2781	*scan = next;
				2782	stride *= sign;
				2783
				2784	//
				2785	// valid follow sets are ',' and '}'
				2786	//
				2787	SKIP_WS(*scan);
				2788	if (scan == '}' \|\| scan == ',') {
				2789	for (i = 0; i < count; i++) {
				2790	if ((start > maxOsId) \|\|
				2791	(! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
				2792	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				2793	&& (__kmp_affinity_type != affinity_none))) {
				2794	KMP_WARNING(AffIgnoreInvalidProcID, start);
				2795	}
				2796	break; // don't proliferate warnings for large count
				2797	}
				2798	else {
				2799	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
				2800	start += stride;
				2801	(*setSize)++;
				2802	}
				2803	}
				2804	if (**scan == '}') {
				2805	break;
				2806	}
				2807	(*scan)++; // skip ','
				2808	continue;
				2809	}
				2810
				2811	KMP_ASSERT2(0, "bad explicit places list");
				2812	}
				2813	}
				2814
				2815
				2816	static void
				2817	__kmp_process_place(const char *scan, kmp_affin_mask_t osId2Mask,
				2818	int maxOsId, kmp_affin_mask_t tempMask, int setSize)
				2819	{
				2820	const char *next;
				2821
				2822	//
				2823	// valid follow sets are '{' '!' and num
				2824	//
				2825	SKIP_WS(*scan);
				2826	if (**scan == '{') {
				2827	(*scan)++; // skip '{'
				2828	__kmp_process_subplace_list(scan, osId2Mask, maxOsId , tempMask,
				2829	setSize);
				2830	KMP_ASSERT2(**scan == '}', "bad explicit places list");
				2831	(*scan)++; // skip '}'
				2832	}
				2833	else if (**scan == '!') {
				2834	__kmp_process_place(scan, osId2Mask, maxOsId, tempMask, setSize);
				2835	KMP_CPU_COMPLEMENT(tempMask);
				2836	(*scan)++; // skip '!'
				2837	}
				2838	else if ((scan >= '0') && (scan <= '9')) {
				2839	next = *scan;
				2840	SKIP_DIGITS(next);
				2841	int num = __kmp_str_to_int(scan, next);
				2842	KMP_ASSERT(num >= 0);
				2843	if ((num > maxOsId) \|\|
				2844	(! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
				2845	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				2846	&& (__kmp_affinity_type != affinity_none))) {
				2847	KMP_WARNING(AffIgnoreInvalidProcID, num);
				2848	}
				2849	}
				2850	else {
				2851	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, num));
				2852	(*setSize)++;
				2853	}
				2854	*scan = next; // skip num
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	2855	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2856	else {
				2857	KMP_ASSERT2(0, "bad explicit places list");
				2858	}
				2859	}
				2860
				2861
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	2862	//static void
				2863	void
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2864	__kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
				2865	unsigned int out_numMasks, const char placelist,
				2866	kmp_affin_mask_t *osId2Mask, int maxOsId)
				2867	{
				2868	const char *scan = placelist;
				2869	const char *next = placelist;
				2870
				2871	numNewMasks = 2;
				2872	newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(numNewMasks
				2873	* __kmp_affin_mask_size);
				2874	nextNewMask = 0;
				2875
				2876	kmp_affin_mask_t tempMask = (kmp_affin_mask_t )__kmp_allocate(
				2877	__kmp_affin_mask_size);
				2878	KMP_CPU_ZERO(tempMask);
				2879	int setSize = 0;
				2880
				2881	for (;;) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2882	__kmp_process_place(&scan, osId2Mask, maxOsId, tempMask, &setSize);
				2883
				2884	//
				2885	// valid follow sets are ',' ':' and EOL
				2886	//
				2887	SKIP_WS(scan);
				2888	if (scan == '\0' \|\| scan == ',') {
				2889	if (setSize > 0) {
				2890	ADD_MASK(tempMask);
				2891	}
				2892	KMP_CPU_ZERO(tempMask);
				2893	setSize = 0;
				2894	if (*scan == '\0') {
				2895	break;
				2896	}
				2897	scan++; // skip ','
				2898	continue;
				2899	}
				2900
				2901	KMP_ASSERT2(*scan == ':', "bad explicit places list");
				2902	scan++; // skip ':'
				2903
				2904	//
				2905	// Read count parameter
				2906	//
				2907	SKIP_WS(scan);
				2908	KMP_ASSERT2((scan >= '0') && (scan <= '9'),
				2909	"bad explicit places list");
				2910	next = scan;
				2911	SKIP_DIGITS(next);
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	2912	int count = __kmp_str_to_int(scan, *next);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2913	KMP_ASSERT(count >= 0);
				2914	scan = next;
				2915
				2916	//
				2917	// valid follow sets are ',' ':' and EOL
				2918	//
				2919	SKIP_WS(scan);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	2920	int stride;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2921	if (scan == '\0' \|\| scan == ',') {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	2922	stride = +1;
				2923	}
				2924	else {
				2925	KMP_ASSERT2(*scan == ':', "bad explicit places list");
				2926	scan++; // skip ':'
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2927
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	2928	//
				2929	// Read stride parameter
				2930	//
				2931	int sign = +1;
				2932	for (;;) {
				2933	SKIP_WS(scan);
				2934	if (*scan == '+') {
				2935	scan++; // skip '+'
				2936	continue;
				2937	}
				2938	if (*scan == '-') {
				2939	sign *= -1;
				2940	scan++; // skip '-'
				2941	continue;
				2942	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2943	break;
				2944	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2945	SKIP_WS(scan);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	2946	KMP_ASSERT2((scan >= '0') && (scan <= '9'),
				2947	"bad explicit places list");
				2948	next = scan;
				2949	SKIP_DIGITS(next);
				2950	stride = __kmp_str_to_int(scan, *next);
				2951	KMP_DEBUG_ASSERT(stride >= 0);
				2952	scan = next;
				2953	stride *= sign;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2954	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2955
				2956	if (stride > 0) {
				2957	int i;
				2958	for (i = 0; i < count; i++) {
				2959	int j;
				2960	if (setSize == 0) {
				2961	break;
				2962	}
				2963	ADD_MASK(tempMask);
				2964	setSize = 0;
				2965	for (j = __kmp_affin_mask_size * CHAR_BIT - 1; j >= stride; j--) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	2966	if (! KMP_CPU_ISSET(j - stride, tempMask)) {
				2967	KMP_CPU_CLR(j, tempMask);
				2968	}
				2969	else if ((j > maxOsId) \|\|
				2970	(! KMP_CPU_ISSET(j, KMP_CPU_INDEX(osId2Mask, j)))) {
Andrey Churbanov	16a1432	2015-03-10 09:34:38 +0000	[diff] [blame]	2971	if ((__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				2972	&& (__kmp_affinity_type != affinity_none))) && i < count - 1) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	2973	KMP_WARNING(AffIgnoreInvalidProcID, j);
				2974	}
				2975	KMP_CPU_CLR(j, tempMask);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2976	}
				2977	else {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	2978	KMP_CPU_SET(j, tempMask);
				2979	setSize++;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2980	}
				2981	}
				2982	for (; j >= 0; j--) {
				2983	KMP_CPU_CLR(j, tempMask);
				2984	}
				2985	}
				2986	}
				2987	else {
				2988	int i;
				2989	for (i = 0; i < count; i++) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	2990	int j;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2991	if (setSize == 0) {
				2992	break;
				2993	}
				2994	ADD_MASK(tempMask);
				2995	setSize = 0;
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	2996	for (j = 0; j < ((int)__kmp_affin_mask_size * CHAR_BIT) + stride;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2997	j++) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	2998	if (! KMP_CPU_ISSET(j - stride, tempMask)) {
				2999	KMP_CPU_CLR(j, tempMask);
				3000	}
				3001	else if ((j > maxOsId) \|\|
				3002	(! KMP_CPU_ISSET(j, KMP_CPU_INDEX(osId2Mask, j)))) {
Andrey Churbanov	16a1432	2015-03-10 09:34:38 +0000	[diff] [blame]	3003	if ((__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3004	&& (__kmp_affinity_type != affinity_none))) && i < count - 1) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3005	KMP_WARNING(AffIgnoreInvalidProcID, j);
				3006	}
				3007	KMP_CPU_CLR(j, tempMask);
				3008	}
				3009	else {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3010	KMP_CPU_SET(j, tempMask);
				3011	setSize++;
				3012	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3013	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3014	for (; j < (int)__kmp_affin_mask_size * CHAR_BIT; j++) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3015	KMP_CPU_CLR(j, tempMask);
				3016	}
				3017	}
				3018	}
				3019	KMP_CPU_ZERO(tempMask);
				3020	setSize = 0;
				3021
				3022	//
				3023	// valid follow sets are ',' and EOL
				3024	//
				3025	SKIP_WS(scan);
				3026	if (*scan == '\0') {
				3027	break;
				3028	}
				3029	if (*scan == ',') {
				3030	scan++; // skip ','
				3031	continue;
				3032	}
				3033
				3034	KMP_ASSERT2(0, "bad explicit places list");
				3035	}
				3036
				3037	*out_numMasks = nextNewMask;
				3038	if (nextNewMask == 0) {
				3039	*out_masks = NULL;
				3040	KMP_INTERNAL_FREE(newMasks);
				3041	return;
				3042	}
				3043	*out_masks
				3044	= (kmp_affin_mask_t )__kmp_allocate(nextNewMask __kmp_affin_mask_size);
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	3045	KMP_MEMCPY(out_masks, newMasks, nextNewMask __kmp_affin_mask_size);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3046	__kmp_free(tempMask);
				3047	KMP_INTERNAL_FREE(newMasks);
				3048	}
				3049
				3050	# endif /* OMP_40_ENABLED */
				3051
				3052	#undef ADD_MASK
				3053	#undef ADD_MASK_OSID
				3054
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3055	static void
				3056	__kmp_apply_thread_places(AddrUnsPair **pAddr, int depth)
				3057	{
				3058	if ( __kmp_place_num_cores == 0 ) {
				3059	if ( __kmp_place_num_threads_per_core == 0 ) {
				3060	return; // no cores limiting actions requested, exit
				3061	}
				3062	__kmp_place_num_cores = nCoresPerPkg; // use all available cores
				3063	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3064	if ( !__kmp_affinity_uniform_topology() ) {
				3065	KMP_WARNING( AffThrPlaceNonUniform );
				3066	return; // don't support non-uniform topology
				3067	}
				3068	if ( depth != 3 ) {
				3069	KMP_WARNING( AffThrPlaceNonThreeLevel );
				3070	return; // don't support not-3-level topology
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3071	}
				3072	if ( __kmp_place_num_threads_per_core == 0 ) {
				3073	__kmp_place_num_threads_per_core = __kmp_nThreadsPerCore; // use all HW contexts
				3074	}
Andrey Churbanov	1287557	2015-03-10 09:00:36 +0000	[diff] [blame]	3075	if ( __kmp_place_core_offset + __kmp_place_num_cores > nCoresPerPkg ) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3076	KMP_WARNING( AffThrPlaceManyCores );
				3077	return;
				3078	}
				3079
				3080	AddrUnsPair newAddr = (AddrUnsPair )__kmp_allocate( sizeof(AddrUnsPair) *
				3081	nPackages * __kmp_place_num_cores * __kmp_place_num_threads_per_core);
				3082	int i, j, k, n_old = 0, n_new = 0;
				3083	for ( i = 0; i < nPackages; ++i ) {
				3084	for ( j = 0; j < nCoresPerPkg; ++j ) {
Andrey Churbanov	1287557	2015-03-10 09:00:36 +0000	[diff] [blame]	3085	if ( j < __kmp_place_core_offset \|\| j >= __kmp_place_core_offset + __kmp_place_num_cores ) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3086	n_old += __kmp_nThreadsPerCore; // skip not-requested core
				3087	} else {
				3088	for ( k = 0; k < __kmp_nThreadsPerCore; ++k ) {
Andrey Churbanov	1287557	2015-03-10 09:00:36 +0000	[diff] [blame]	3089	if ( k < __kmp_place_num_threads_per_core ) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3090	newAddr[n_new] = (*pAddr)[n_old]; // copy requested core' data to new location
				3091	n_new++;
				3092	}
				3093	n_old++;
				3094	}
				3095	}
				3096	}
				3097	}
				3098	nCoresPerPkg = __kmp_place_num_cores; // correct nCoresPerPkg
				3099	__kmp_nThreadsPerCore = __kmp_place_num_threads_per_core; // correct __kmp_nThreadsPerCore
				3100	__kmp_avail_proc = n_new; // correct avail_proc
				3101	__kmp_ncores = nPackages * __kmp_place_num_cores; // correct ncores
				3102
				3103	__kmp_free( *pAddr );
				3104	*pAddr = newAddr; // replace old topology with new one
				3105	}
				3106
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3107
				3108	static AddrUnsPair *address2os = NULL;
				3109	static int * procarr = NULL;
				3110	static int __kmp_aff_depth = 0;
				3111
				3112	static void
				3113	__kmp_aux_affinity_initialize(void)
				3114	{
				3115	if (__kmp_affinity_masks != NULL) {
				3116	KMP_ASSERT(fullMask != NULL);
				3117	return;
				3118	}
				3119
				3120	//
				3121	// Create the "full" mask - this defines all of the processors that we
				3122	// consider to be in the machine model. If respect is set, then it is
				3123	// the initialization thread's affinity mask. Otherwise, it is all
				3124	// processors that we know about on the machine.
				3125	//
				3126	if (fullMask == NULL) {
				3127	fullMask = (kmp_affin_mask_t *)__kmp_allocate(__kmp_affin_mask_size);
				3128	}
				3129	if (KMP_AFFINITY_CAPABLE()) {
				3130	if (__kmp_affinity_respect_mask) {
				3131	__kmp_get_system_affinity(fullMask, TRUE);
				3132
				3133	//
				3134	// Count the number of available processors.
				3135	//
				3136	unsigned i;
				3137	__kmp_avail_proc = 0;
				3138	for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
				3139	if (! KMP_CPU_ISSET(i, fullMask)) {
				3140	continue;
				3141	}
				3142	__kmp_avail_proc++;
				3143	}
				3144	if (__kmp_avail_proc > __kmp_xproc) {
				3145	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3146	&& (__kmp_affinity_type != affinity_none))) {
				3147	KMP_WARNING(ErrorInitializeAffinity);
				3148	}
				3149	__kmp_affinity_type = affinity_none;
Andrey Churbanov	1f037e4	2015-03-10 09:15:26 +0000	[diff] [blame]	3150	KMP_AFFINITY_DISABLE();
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3151	return;
				3152	}
				3153	}
				3154	else {
				3155	__kmp_affinity_entire_machine_mask(fullMask);
				3156	__kmp_avail_proc = __kmp_xproc;
				3157	}
				3158	}
				3159
				3160	int depth = -1;
				3161	kmp_i18n_id_t msg_id = kmp_i18n_null;
				3162
				3163	//
Alp Toker	8f2d3f0	2014-02-24 10:40:15 +0000	[diff] [blame]	3164	// For backward compatibility, setting KMP_CPUINFO_FILE =>
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3165	// KMP_TOPOLOGY_METHOD=cpuinfo
				3166	//
				3167	if ((__kmp_cpuinfo_file != NULL) &&
				3168	(__kmp_affinity_top_method == affinity_top_method_all)) {
				3169	__kmp_affinity_top_method = affinity_top_method_cpuinfo;
				3170	}
				3171
				3172	if (__kmp_affinity_top_method == affinity_top_method_all) {
				3173	//
				3174	// In the default code path, errors are not fatal - we just try using
				3175	// another method. We only emit a warning message if affinity is on,
				3176	// or the verbose flag is set, an the nowarnings flag was not set.
				3177	//
				3178	const char *file_name = NULL;
				3179	int line = 0;
				3180
				3181	# if KMP_ARCH_X86 \|\| KMP_ARCH_X86_64
				3182
				3183	if (__kmp_affinity_verbose) {
				3184	KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
				3185	}
				3186
				3187	file_name = NULL;
				3188	depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
				3189	if (depth == 0) {
				3190	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3191	KMP_ASSERT(address2os == NULL);
				3192	return;
				3193	}
				3194
				3195	if (depth < 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3196	if (__kmp_affinity_verbose) {
				3197	if (msg_id != kmp_i18n_null) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3198	KMP_INFORM(AffInfoStrStr, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id),
				3199	KMP_I18N_STR(DecodingLegacyAPIC));
				3200	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3201	else {
				3202	KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(DecodingLegacyAPIC));
				3203	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3204	}
				3205
				3206	file_name = NULL;
				3207	depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
				3208	if (depth == 0) {
				3209	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3210	KMP_ASSERT(address2os == NULL);
				3211	return;
				3212	}
				3213	}
				3214
				3215	# endif /* KMP_ARCH_X86 \|\| KMP_ARCH_X86_64 */
				3216
				3217	# if KMP_OS_LINUX
				3218
				3219	if (depth < 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3220	if (__kmp_affinity_verbose) {
				3221	if (msg_id != kmp_i18n_null) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3222	KMP_INFORM(AffStrParseFilename, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id), "/proc/cpuinfo");
				3223	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3224	else {
				3225	KMP_INFORM(AffParseFilename, "KMP_AFFINITY", "/proc/cpuinfo");
				3226	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3227	}
				3228
				3229	FILE *f = fopen("/proc/cpuinfo", "r");
				3230	if (f == NULL) {
				3231	msg_id = kmp_i18n_str_CantOpenCpuinfo;
				3232	}
				3233	else {
				3234	file_name = "/proc/cpuinfo";
				3235	depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
				3236	fclose(f);
				3237	if (depth == 0) {
				3238	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3239	KMP_ASSERT(address2os == NULL);
				3240	return;
				3241	}
				3242	}
				3243	}
				3244
				3245	# endif /* KMP_OS_LINUX */
				3246
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	3247	# if KMP_GROUP_AFFINITY
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3248
				3249	if ((depth < 0) && (__kmp_num_proc_groups > 1)) {
				3250	if (__kmp_affinity_verbose) {
				3251	KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
				3252	}
				3253
				3254	depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
				3255	KMP_ASSERT(depth != 0);
				3256	}
				3257
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	3258	# endif /* KMP_GROUP_AFFINITY */
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3259
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3260	if (depth < 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3261	if (__kmp_affinity_verbose && (msg_id != kmp_i18n_null)) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3262	if (file_name == NULL) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3263	KMP_INFORM(UsingFlatOS, __kmp_i18n_catgets(msg_id));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3264	}
				3265	else if (line == 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3266	KMP_INFORM(UsingFlatOSFile, file_name, __kmp_i18n_catgets(msg_id));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3267	}
				3268	else {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3269	KMP_INFORM(UsingFlatOSFileLine, file_name, line, __kmp_i18n_catgets(msg_id));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3270	}
				3271	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3272	// FIXME - print msg if msg_id = kmp_i18n_null ???
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3273
				3274	file_name = "";
				3275	depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
				3276	if (depth == 0) {
				3277	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3278	KMP_ASSERT(address2os == NULL);
				3279	return;
				3280	}
				3281	KMP_ASSERT(depth > 0);
				3282	KMP_ASSERT(address2os != NULL);
				3283	}
				3284	}
				3285
				3286	//
				3287	// If the user has specified that a paricular topology discovery method
				3288	// is to be used, then we abort if that method fails. The exception is
				3289	// group affinity, which might have been implicitly set.
				3290	//
				3291
				3292	# if KMP_ARCH_X86 \|\| KMP_ARCH_X86_64
				3293
				3294	else if (__kmp_affinity_top_method == affinity_top_method_x2apicid) {
				3295	if (__kmp_affinity_verbose) {
				3296	KMP_INFORM(AffInfoStr, "KMP_AFFINITY",
				3297	KMP_I18N_STR(Decodingx2APIC));
				3298	}
				3299
				3300	depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
				3301	if (depth == 0) {
				3302	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3303	KMP_ASSERT(address2os == NULL);
				3304	return;
				3305	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3306	if (depth < 0) {
				3307	KMP_ASSERT(msg_id != kmp_i18n_null);
				3308	KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
				3309	}
				3310	}
				3311	else if (__kmp_affinity_top_method == affinity_top_method_apicid) {
				3312	if (__kmp_affinity_verbose) {
				3313	KMP_INFORM(AffInfoStr, "KMP_AFFINITY",
				3314	KMP_I18N_STR(DecodingLegacyAPIC));
				3315	}
				3316
				3317	depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
				3318	if (depth == 0) {
				3319	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3320	KMP_ASSERT(address2os == NULL);
				3321	return;
				3322	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3323	if (depth < 0) {
				3324	KMP_ASSERT(msg_id != kmp_i18n_null);
				3325	KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
				3326	}
				3327	}
				3328
				3329	# endif /* KMP_ARCH_X86 \|\| KMP_ARCH_X86_64 */
				3330
				3331	else if (__kmp_affinity_top_method == affinity_top_method_cpuinfo) {
				3332	const char *filename;
				3333	if (__kmp_cpuinfo_file != NULL) {
				3334	filename = __kmp_cpuinfo_file;
				3335	}
				3336	else {
				3337	filename = "/proc/cpuinfo";
				3338	}
				3339
				3340	if (__kmp_affinity_verbose) {
				3341	KMP_INFORM(AffParseFilename, "KMP_AFFINITY", filename);
				3342	}
				3343
				3344	FILE *f = fopen(filename, "r");
				3345	if (f == NULL) {
				3346	int code = errno;
				3347	if (__kmp_cpuinfo_file != NULL) {
				3348	__kmp_msg(
				3349	kmp_ms_fatal,
				3350	KMP_MSG(CantOpenFileForReading, filename),
				3351	KMP_ERR(code),
				3352	KMP_HNT(NameComesFrom_CPUINFO_FILE),
				3353	__kmp_msg_null
				3354	);
				3355	}
				3356	else {
				3357	__kmp_msg(
				3358	kmp_ms_fatal,
				3359	KMP_MSG(CantOpenFileForReading, filename),
				3360	KMP_ERR(code),
				3361	__kmp_msg_null
				3362	);
				3363	}
				3364	}
				3365	int line = 0;
				3366	depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
				3367	fclose(f);
				3368	if (depth < 0) {
				3369	KMP_ASSERT(msg_id != kmp_i18n_null);
				3370	if (line > 0) {
				3371	KMP_FATAL(FileLineMsgExiting, filename, line, __kmp_i18n_catgets(msg_id));
				3372	}
				3373	else {
				3374	KMP_FATAL(FileMsgExiting, filename, __kmp_i18n_catgets(msg_id));
				3375	}
				3376	}
				3377	if (__kmp_affinity_type == affinity_none) {
				3378	KMP_ASSERT(depth == 0);
				3379	KMP_ASSERT(address2os == NULL);
				3380	return;
				3381	}
				3382	}
				3383
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	3384	# if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3385
				3386	else if (__kmp_affinity_top_method == affinity_top_method_group) {
				3387	if (__kmp_affinity_verbose) {
				3388	KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
				3389	}
				3390
				3391	depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
				3392	KMP_ASSERT(depth != 0);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3393	if (depth < 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3394	KMP_ASSERT(msg_id != kmp_i18n_null);
				3395	KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3396	}
				3397	}
				3398
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	3399	# endif /* KMP_GROUP_AFFINITY */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3400
				3401	else if (__kmp_affinity_top_method == affinity_top_method_flat) {
				3402	if (__kmp_affinity_verbose) {
				3403	KMP_INFORM(AffUsingFlatOS, "KMP_AFFINITY");
				3404	}
				3405
				3406	depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
				3407	if (depth == 0) {
				3408	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3409	KMP_ASSERT(address2os == NULL);
				3410	return;
				3411	}
				3412	// should not fail
				3413	KMP_ASSERT(depth > 0);
				3414	KMP_ASSERT(address2os != NULL);
				3415	}
				3416
				3417	if (address2os == NULL) {
				3418	if (KMP_AFFINITY_CAPABLE()
				3419	&& (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3420	&& (__kmp_affinity_type != affinity_none)))) {
				3421	KMP_WARNING(ErrorInitializeAffinity);
				3422	}
				3423	__kmp_affinity_type = affinity_none;
Andrey Churbanov	1f037e4	2015-03-10 09:15:26 +0000	[diff] [blame]	3424	KMP_AFFINITY_DISABLE();
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3425	return;
				3426	}
				3427
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3428	__kmp_apply_thread_places(&address2os, depth);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3429
				3430	//
				3431	// Create the table of masks, indexed by thread Id.
				3432	//
				3433	unsigned maxIndex;
				3434	unsigned numUnique;
				3435	kmp_affin_mask_t *osId2Mask = __kmp_create_masks(&maxIndex, &numUnique,
				3436	address2os, __kmp_avail_proc);
				3437	if (__kmp_affinity_gran_levels == 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3438	KMP_DEBUG_ASSERT((int)numUnique == __kmp_avail_proc);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3439	}
				3440
				3441	//
				3442	// Set the childNums vector in all Address objects. This must be done
				3443	// before we can sort using __kmp_affinity_cmp_Address_child_num(),
				3444	// which takes into account the setting of __kmp_affinity_compact.
				3445	//
				3446	__kmp_affinity_assign_child_nums(address2os, __kmp_avail_proc);
				3447
				3448	switch (__kmp_affinity_type) {
				3449
				3450	case affinity_explicit:
				3451	KMP_DEBUG_ASSERT(__kmp_affinity_proclist != NULL);
				3452	# if OMP_40_ENABLED
				3453	if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
				3454	# endif
				3455	{
				3456	__kmp_affinity_process_proclist(&__kmp_affinity_masks,
				3457	&__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask,
				3458	maxIndex);
				3459	}
				3460	# if OMP_40_ENABLED
				3461	else {
				3462	__kmp_affinity_process_placelist(&__kmp_affinity_masks,
				3463	&__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask,
				3464	maxIndex);
				3465	}
				3466	# endif
				3467	if (__kmp_affinity_num_masks == 0) {
				3468	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3469	&& (__kmp_affinity_type != affinity_none))) {
				3470	KMP_WARNING(AffNoValidProcID);
				3471	}
				3472	__kmp_affinity_type = affinity_none;
				3473	return;
				3474	}
				3475	break;
				3476
				3477	//
				3478	// The other affinity types rely on sorting the Addresses according
				3479	// to some permutation of the machine topology tree. Set
				3480	// __kmp_affinity_compact and __kmp_affinity_offset appropriately,
				3481	// then jump to a common code fragment to do the sort and create
				3482	// the array of affinity masks.
				3483	//
				3484
				3485	case affinity_logical:
				3486	__kmp_affinity_compact = 0;
				3487	if (__kmp_affinity_offset) {
				3488	__kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset
				3489	% __kmp_avail_proc;
				3490	}
				3491	goto sortAddresses;
				3492
				3493	case affinity_physical:
				3494	if (__kmp_nThreadsPerCore > 1) {
				3495	__kmp_affinity_compact = 1;
				3496	if (__kmp_affinity_compact >= depth) {
				3497	__kmp_affinity_compact = 0;
				3498	}
				3499	} else {
				3500	__kmp_affinity_compact = 0;
				3501	}
				3502	if (__kmp_affinity_offset) {
				3503	__kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset
				3504	% __kmp_avail_proc;
				3505	}
				3506	goto sortAddresses;
				3507
				3508	case affinity_scatter:
				3509	if (__kmp_affinity_compact >= depth) {
				3510	__kmp_affinity_compact = 0;
				3511	}
				3512	else {
				3513	__kmp_affinity_compact = depth - 1 - __kmp_affinity_compact;
				3514	}
				3515	goto sortAddresses;
				3516
				3517	case affinity_compact:
				3518	if (__kmp_affinity_compact >= depth) {
				3519	__kmp_affinity_compact = depth - 1;
				3520	}
				3521	goto sortAddresses;
				3522
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3523	case affinity_balanced:
Jonathan Peyton	caf09fe	2015-05-27 23:27:33 +0000	[diff] [blame]	3524	// Balanced works only for the case of a single package
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3525	if( nPackages > 1 ) {
				3526	if( __kmp_affinity_verbose \|\| __kmp_affinity_warnings ) {
				3527	KMP_WARNING( AffBalancedNotAvail, "KMP_AFFINITY" );
				3528	}
				3529	__kmp_affinity_type = affinity_none;
				3530	return;
				3531	} else if( __kmp_affinity_uniform_topology() ) {
				3532	break;
				3533	} else { // Non-uniform topology
				3534
				3535	// Save the depth for further usage
				3536	__kmp_aff_depth = depth;
				3537
				3538	// Number of hyper threads per core in HT machine
				3539	int nth_per_core = __kmp_nThreadsPerCore;
				3540
				3541	int core_level;
				3542	if( nth_per_core > 1 ) {
				3543	core_level = depth - 2;
				3544	} else {
				3545	core_level = depth - 1;
				3546	}
				3547	int ncores = address2os[ __kmp_avail_proc - 1 ].first.labels[ core_level ] + 1;
				3548	int nproc = nth_per_core * ncores;
				3549
				3550	procarr = ( int * )__kmp_allocate( sizeof( int ) * nproc );
				3551	for( int i = 0; i < nproc; i++ ) {
				3552	procarr[ i ] = -1;
				3553	}
				3554
				3555	for( int i = 0; i < __kmp_avail_proc; i++ ) {
				3556	int proc = address2os[ i ].second;
				3557	// If depth == 3 then level=0 - package, level=1 - core, level=2 - thread.
				3558	// If there is only one thread per core then depth == 2: level 0 - package,
				3559	// level 1 - core.
				3560	int level = depth - 1;
				3561
				3562	// __kmp_nth_per_core == 1
				3563	int thread = 0;
				3564	int core = address2os[ i ].first.labels[ level ];
				3565	// If the thread level exists, that is we have more than one thread context per core
				3566	if( nth_per_core > 1 ) {
				3567	thread = address2os[ i ].first.labels[ level ] % nth_per_core;
				3568	core = address2os[ i ].first.labels[ level - 1 ];
				3569	}
				3570	procarr[ core * nth_per_core + thread ] = proc;
				3571	}
				3572
				3573	break;
				3574	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3575
				3576	sortAddresses:
				3577	//
				3578	// Allocate the gtid->affinity mask table.
				3579	//
				3580	if (__kmp_affinity_dups) {
				3581	__kmp_affinity_num_masks = __kmp_avail_proc;
				3582	}
				3583	else {
				3584	__kmp_affinity_num_masks = numUnique;
				3585	}
				3586
				3587	# if OMP_40_ENABLED
				3588	if ( ( __kmp_nested_proc_bind.bind_types[0] != proc_bind_intel )
				3589	&& ( __kmp_affinity_num_places > 0 )
				3590	&& ( (unsigned)__kmp_affinity_num_places < __kmp_affinity_num_masks ) ) {
				3591	__kmp_affinity_num_masks = __kmp_affinity_num_places;
				3592	}
				3593	# endif
				3594
				3595	__kmp_affinity_masks = (kmp_affin_mask_t*)__kmp_allocate(
				3596	__kmp_affinity_num_masks * __kmp_affin_mask_size);
				3597
				3598	//
				3599	// Sort the address2os table according to the current setting of
				3600	// __kmp_affinity_compact, then fill out __kmp_affinity_masks.
				3601	//
				3602	qsort(address2os, __kmp_avail_proc, sizeof(*address2os),
				3603	__kmp_affinity_cmp_Address_child_num);
				3604	{
				3605	int i;
				3606	unsigned j;
				3607	for (i = 0, j = 0; i < __kmp_avail_proc; i++) {
				3608	if ((! __kmp_affinity_dups) && (! address2os[i].first.leader)) {
				3609	continue;
				3610	}
				3611	unsigned osId = address2os[i].second;
				3612	kmp_affin_mask_t *src = KMP_CPU_INDEX(osId2Mask, osId);
				3613	kmp_affin_mask_t *dest
				3614	= KMP_CPU_INDEX(__kmp_affinity_masks, j);
				3615	KMP_ASSERT(KMP_CPU_ISSET(osId, src));
				3616	KMP_CPU_COPY(dest, src);
				3617	if (++j >= __kmp_affinity_num_masks) {
				3618	break;
				3619	}
				3620	}
				3621	KMP_DEBUG_ASSERT(j == __kmp_affinity_num_masks);
				3622	}
				3623	break;
				3624
				3625	default:
				3626	KMP_ASSERT2(0, "Unexpected affinity setting");
				3627	}
				3628
				3629	__kmp_free(osId2Mask);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3630	machine_hierarchy.init(address2os, __kmp_avail_proc);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3631	}
				3632
				3633
				3634	void
				3635	__kmp_affinity_initialize(void)
				3636	{
				3637	//
				3638	// Much of the code above was written assumming that if a machine was not
				3639	// affinity capable, then __kmp_affinity_type == affinity_none. We now
				3640	// explicitly represent this as __kmp_affinity_type == affinity_disabled.
				3641	//
				3642	// There are too many checks for __kmp_affinity_type == affinity_none
				3643	// in this code. Instead of trying to change them all, check if
				3644	// __kmp_affinity_type == affinity_disabled, and if so, slam it with
				3645	// affinity_none, call the real initialization routine, then restore
				3646	// __kmp_affinity_type to affinity_disabled.
				3647	//
				3648	int disabled = (__kmp_affinity_type == affinity_disabled);
				3649	if (! KMP_AFFINITY_CAPABLE()) {
				3650	KMP_ASSERT(disabled);
				3651	}
				3652	if (disabled) {
				3653	__kmp_affinity_type = affinity_none;
				3654	}
				3655	__kmp_aux_affinity_initialize();
				3656	if (disabled) {
				3657	__kmp_affinity_type = affinity_disabled;
				3658	}
				3659	}
				3660
				3661
				3662	void
				3663	__kmp_affinity_uninitialize(void)
				3664	{
				3665	if (__kmp_affinity_masks != NULL) {
				3666	__kmp_free(__kmp_affinity_masks);
				3667	__kmp_affinity_masks = NULL;
				3668	}
				3669	if (fullMask != NULL) {
				3670	KMP_CPU_FREE(fullMask);
				3671	fullMask = NULL;
				3672	}
				3673	__kmp_affinity_num_masks = 0;
				3674	# if OMP_40_ENABLED
				3675	__kmp_affinity_num_places = 0;
				3676	# endif
				3677	if (__kmp_affinity_proclist != NULL) {
				3678	__kmp_free(__kmp_affinity_proclist);
				3679	__kmp_affinity_proclist = NULL;
				3680	}
				3681	if( address2os != NULL ) {
				3682	__kmp_free( address2os );
				3683	address2os = NULL;
				3684	}
				3685	if( procarr != NULL ) {
				3686	__kmp_free( procarr );
				3687	procarr = NULL;
				3688	}
				3689	}
				3690
				3691
				3692	void
				3693	__kmp_affinity_set_init_mask(int gtid, int isa_root)
				3694	{
				3695	if (! KMP_AFFINITY_CAPABLE()) {
				3696	return;
				3697	}
				3698
				3699	kmp_info_t th = (kmp_info_t )TCR_SYNC_PTR(__kmp_threads[gtid]);
				3700	if (th->th.th_affin_mask == NULL) {
				3701	KMP_CPU_ALLOC(th->th.th_affin_mask);
				3702	}
				3703	else {
				3704	KMP_CPU_ZERO(th->th.th_affin_mask);
				3705	}
				3706
				3707	//
				3708	// Copy the thread mask to the kmp_info_t strucuture.
				3709	// If __kmp_affinity_type == affinity_none, copy the "full" mask, i.e. one
				3710	// that has all of the OS proc ids set, or if __kmp_affinity_respect_mask
				3711	// is set, then the full mask is the same as the mask of the initialization
				3712	// thread.
				3713	//
				3714	kmp_affin_mask_t *mask;
				3715	int i;
				3716
				3717	# if OMP_40_ENABLED
				3718	if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
				3719	# endif
				3720	{
Andrey Churbanov	f28f613	2015-01-13 14:54:00 +0000	[diff] [blame]	3721	if ((__kmp_affinity_type == affinity_none) \|\| (__kmp_affinity_type == affinity_balanced)
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3722	) {
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	3723	# if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3724	if (__kmp_num_proc_groups > 1) {
				3725	return;
				3726	}
				3727	# endif
				3728	KMP_ASSERT(fullMask != NULL);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3729	i = KMP_PLACE_ALL;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3730	mask = fullMask;
				3731	}
				3732	else {
				3733	KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 );
				3734	i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
				3735	mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
				3736	}
				3737	}
				3738	# if OMP_40_ENABLED
				3739	else {
				3740	if ((! isa_root)
				3741	\|\| (__kmp_nested_proc_bind.bind_types[0] == proc_bind_false)) {
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	3742	# if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3743	if (__kmp_num_proc_groups > 1) {
				3744	return;
				3745	}
				3746	# endif
				3747	KMP_ASSERT(fullMask != NULL);
				3748	i = KMP_PLACE_ALL;
				3749	mask = fullMask;
				3750	}
				3751	else {
				3752	//
				3753	// int i = some hash function or just a counter that doesn't
				3754	// always start at 0. Use gtid for now.
				3755	//
				3756	KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 );
				3757	i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
				3758	mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
				3759	}
				3760	}
				3761	# endif
				3762
				3763	# if OMP_40_ENABLED
				3764	th->th.th_current_place = i;
				3765	if (isa_root) {
				3766	th->th.th_new_place = i;
				3767	th->th.th_first_place = 0;
				3768	th->th.th_last_place = __kmp_affinity_num_masks - 1;
				3769	}
				3770
				3771	if (i == KMP_PLACE_ALL) {
				3772	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to all places\n",
				3773	gtid));
				3774	}
				3775	else {
				3776	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to place %d\n",
				3777	gtid, i));
				3778	}
				3779	# else
				3780	if (i == -1) {
				3781	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to fullMask\n",
				3782	gtid));
				3783	}
				3784	else {
				3785	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to mask %d\n",
				3786	gtid, i));
				3787	}
				3788	# endif /* OMP_40_ENABLED */
				3789
				3790	KMP_CPU_COPY(th->th.th_affin_mask, mask);
				3791
				3792	if (__kmp_affinity_verbose) {
				3793	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				3794	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				3795	th->th.th_affin_mask);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3796	KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(), gtid,
				3797	buf);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3798	}
				3799
				3800	# if KMP_OS_WINDOWS
				3801	//
				3802	// On Windows* OS, the process affinity mask might have changed.
				3803	// If the user didn't request affinity and this call fails,
				3804	// just continue silently. See CQ171393.
				3805	//
				3806	if ( __kmp_affinity_type == affinity_none ) {
				3807	__kmp_set_system_affinity(th->th.th_affin_mask, FALSE);
				3808	}
				3809	else
				3810	# endif
				3811	__kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
				3812	}
				3813
				3814
				3815	# if OMP_40_ENABLED
				3816
				3817	void
				3818	__kmp_affinity_set_place(int gtid)
				3819	{
				3820	int retval;
				3821
				3822	if (! KMP_AFFINITY_CAPABLE()) {
				3823	return;
				3824	}
				3825
				3826	kmp_info_t th = (kmp_info_t )TCR_SYNC_PTR(__kmp_threads[gtid]);
				3827
				3828	KA_TRACE(100, ("__kmp_affinity_set_place: binding T#%d to place %d (current place = %d)\n",
				3829	gtid, th->th.th_new_place, th->th.th_current_place));
				3830
				3831	//
Alp Toker	8f2d3f0	2014-02-24 10:40:15 +0000	[diff] [blame]	3832	// Check that the new place is within this thread's partition.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3833	//
				3834	KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3835	KMP_ASSERT(th->th.th_new_place >= 0);
				3836	KMP_ASSERT((unsigned)th->th.th_new_place <= __kmp_affinity_num_masks);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3837	if (th->th.th_first_place <= th->th.th_last_place) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3838	KMP_ASSERT((th->th.th_new_place >= th->th.th_first_place)
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3839	&& (th->th.th_new_place <= th->th.th_last_place));
				3840	}
				3841	else {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3842	KMP_ASSERT((th->th.th_new_place <= th->th.th_first_place)
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3843	\|\| (th->th.th_new_place >= th->th.th_last_place));
				3844	}
				3845
				3846	//
				3847	// Copy the thread mask to the kmp_info_t strucuture,
				3848	// and set this thread's affinity.
				3849	//
				3850	kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity_masks,
				3851	th->th.th_new_place);
				3852	KMP_CPU_COPY(th->th.th_affin_mask, mask);
				3853	th->th.th_current_place = th->th.th_new_place;
				3854
				3855	if (__kmp_affinity_verbose) {
				3856	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				3857	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				3858	th->th.th_affin_mask);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3859	KMP_INFORM(BoundToOSProcSet, "OMP_PROC_BIND", (kmp_int32)getpid(),
				3860	gtid, buf);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3861	}
				3862	__kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
				3863	}
				3864
				3865	# endif /* OMP_40_ENABLED */
				3866
				3867
				3868	int
				3869	__kmp_aux_set_affinity(void **mask)
				3870	{
				3871	int gtid;
				3872	kmp_info_t *th;
				3873	int retval;
				3874
				3875	if (! KMP_AFFINITY_CAPABLE()) {
				3876	return -1;
				3877	}
				3878
				3879	gtid = __kmp_entry_gtid();
				3880	KA_TRACE(1000, ;{
				3881	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				3882	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				3883	(kmp_affin_mask_t )(mask));
				3884	__kmp_debug_printf("kmp_set_affinity: setting affinity mask for thread %d = %s\n",
				3885	gtid, buf);
				3886	});
				3887
				3888	if (__kmp_env_consistency_check) {
				3889	if ((mask == NULL) \|\| (*mask == NULL)) {
				3890	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				3891	}
				3892	else {
				3893	unsigned proc;
				3894	int num_procs = 0;
				3895
				3896	for (proc = 0; proc < KMP_CPU_SETSIZE; proc++) {
				3897	if (! KMP_CPU_ISSET(proc, (kmp_affin_mask_t )(mask))) {
				3898	continue;
				3899	}
				3900	num_procs++;
				3901	if (! KMP_CPU_ISSET(proc, fullMask)) {
				3902	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				3903	break;
				3904	}
				3905	}
				3906	if (num_procs == 0) {
				3907	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				3908	}
				3909
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	3910	# if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3911	if (__kmp_get_proc_group((kmp_affin_mask_t )(mask)) < 0) {
				3912	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				3913	}
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	3914	# endif /* KMP_GROUP_AFFINITY */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3915
				3916	}
				3917	}
				3918
				3919	th = __kmp_threads[gtid];
				3920	KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
				3921	retval = __kmp_set_system_affinity((kmp_affin_mask_t )(mask), FALSE);
				3922	if (retval == 0) {
				3923	KMP_CPU_COPY(th->th.th_affin_mask, (kmp_affin_mask_t )(mask));
				3924	}
				3925
				3926	# if OMP_40_ENABLED
				3927	th->th.th_current_place = KMP_PLACE_UNDEFINED;
				3928	th->th.th_new_place = KMP_PLACE_UNDEFINED;
				3929	th->th.th_first_place = 0;
				3930	th->th.th_last_place = __kmp_affinity_num_masks - 1;
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3931
				3932	//
				3933	// Turn off 4.0 affinity for the current tread at this parallel level.
				3934	//
				3935	th->th.th_current_task->td_icvs.proc_bind = proc_bind_false;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3936	# endif
				3937
				3938	return retval;
				3939	}
				3940
				3941
				3942	int
				3943	__kmp_aux_get_affinity(void **mask)
				3944	{
				3945	int gtid;
				3946	int retval;
				3947	kmp_info_t *th;
				3948
				3949	if (! KMP_AFFINITY_CAPABLE()) {
				3950	return -1;
				3951	}
				3952
				3953	gtid = __kmp_entry_gtid();
				3954	th = __kmp_threads[gtid];
				3955	KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
				3956
				3957	KA_TRACE(1000, ;{
				3958	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				3959	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				3960	th->th.th_affin_mask);
				3961	__kmp_printf("kmp_get_affinity: stored affinity mask for thread %d = %s\n", gtid, buf);
				3962	});
				3963
				3964	if (__kmp_env_consistency_check) {
				3965	if ((mask == NULL) \|\| (*mask == NULL)) {
				3966	KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity");
				3967	}
				3968	}
				3969
				3970	# if !KMP_OS_WINDOWS
				3971
				3972	retval = __kmp_get_system_affinity((kmp_affin_mask_t )(mask), FALSE);
				3973	KA_TRACE(1000, ;{
				3974	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				3975	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				3976	(kmp_affin_mask_t )(mask));
				3977	__kmp_printf("kmp_get_affinity: system affinity mask for thread %d = %s\n", gtid, buf);
				3978	});
				3979	return retval;
				3980
				3981	# else
				3982
				3983	KMP_CPU_COPY((kmp_affin_mask_t )(mask), th->th.th_affin_mask);
				3984	return 0;
				3985
				3986	# endif /* KMP_OS_WINDOWS */
				3987
				3988	}
				3989
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3990	int
				3991	__kmp_aux_set_affinity_mask_proc(int proc, void **mask)
				3992	{
				3993	int retval;
				3994
				3995	if (! KMP_AFFINITY_CAPABLE()) {
				3996	return -1;
				3997	}
				3998
				3999	KA_TRACE(1000, ;{
				4000	int gtid = __kmp_entry_gtid();
				4001	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4002	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4003	(kmp_affin_mask_t )(mask));
				4004	__kmp_debug_printf("kmp_set_affinity_mask_proc: setting proc %d in affinity mask for thread %d = %s\n",
				4005	proc, gtid, buf);
				4006	});
				4007
				4008	if (__kmp_env_consistency_check) {
				4009	if ((mask == NULL) \|\| (*mask == NULL)) {
				4010	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity_mask_proc");
				4011	}
				4012	}
				4013
				4014	if ((proc < 0) \|\| ((unsigned)proc >= KMP_CPU_SETSIZE)) {
				4015	return -1;
				4016	}
				4017	if (! KMP_CPU_ISSET(proc, fullMask)) {
				4018	return -2;
				4019	}
				4020
				4021	KMP_CPU_SET(proc, (kmp_affin_mask_t )(mask));
				4022	return 0;
				4023	}
				4024
				4025
				4026	int
				4027	__kmp_aux_unset_affinity_mask_proc(int proc, void **mask)
				4028	{
				4029	int retval;
				4030
				4031	if (! KMP_AFFINITY_CAPABLE()) {
				4032	return -1;
				4033	}
				4034
				4035	KA_TRACE(1000, ;{
				4036	int gtid = __kmp_entry_gtid();
				4037	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4038	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4039	(kmp_affin_mask_t )(mask));
				4040	__kmp_debug_printf("kmp_unset_affinity_mask_proc: unsetting proc %d in affinity mask for thread %d = %s\n",
				4041	proc, gtid, buf);
				4042	});
				4043
				4044	if (__kmp_env_consistency_check) {
				4045	if ((mask == NULL) \|\| (*mask == NULL)) {
				4046	KMP_FATAL(AffinityInvalidMask, "kmp_unset_affinity_mask_proc");
				4047	}
				4048	}
				4049
				4050	if ((proc < 0) \|\| ((unsigned)proc >= KMP_CPU_SETSIZE)) {
				4051	return -1;
				4052	}
				4053	if (! KMP_CPU_ISSET(proc, fullMask)) {
				4054	return -2;
				4055	}
				4056
				4057	KMP_CPU_CLR(proc, (kmp_affin_mask_t )(mask));
				4058	return 0;
				4059	}
				4060
				4061
				4062	int
				4063	__kmp_aux_get_affinity_mask_proc(int proc, void **mask)
				4064	{
				4065	int retval;
				4066
				4067	if (! KMP_AFFINITY_CAPABLE()) {
				4068	return -1;
				4069	}
				4070
				4071	KA_TRACE(1000, ;{
				4072	int gtid = __kmp_entry_gtid();
				4073	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4074	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4075	(kmp_affin_mask_t )(mask));
				4076	__kmp_debug_printf("kmp_get_affinity_mask_proc: getting proc %d in affinity mask for thread %d = %s\n",
				4077	proc, gtid, buf);
				4078	});
				4079
				4080	if (__kmp_env_consistency_check) {
				4081	if ((mask == NULL) \|\| (*mask == NULL)) {
Andrey Churbanov	4b2f17a	2015-01-29 15:49:22 +0000	[diff] [blame]	4082	KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity_mask_proc");
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4083	}
				4084	}
				4085
				4086	if ((proc < 0) \|\| ((unsigned)proc >= KMP_CPU_SETSIZE)) {
				4087	return 0;
				4088	}
				4089	if (! KMP_CPU_ISSET(proc, fullMask)) {
				4090	return 0;
				4091	}
				4092
				4093	return KMP_CPU_ISSET(proc, (kmp_affin_mask_t )(mask));
				4094	}
				4095
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4096
				4097	// Dynamic affinity settings - Affinity balanced
				4098	void __kmp_balanced_affinity( int tid, int nthreads )
				4099	{
				4100	if( __kmp_affinity_uniform_topology() ) {
				4101	int coreID;
				4102	int threadID;
				4103	// Number of hyper threads per core in HT machine
				4104	int __kmp_nth_per_core = __kmp_avail_proc / __kmp_ncores;
				4105	// Number of cores
				4106	int ncores = __kmp_ncores;
				4107	// How many threads will be bound to each core
				4108	int chunk = nthreads / ncores;
				4109	// How many cores will have an additional thread bound to it - "big cores"
				4110	int big_cores = nthreads % ncores;
				4111	// Number of threads on the big cores
				4112	int big_nth = ( chunk + 1 ) * big_cores;
				4113	if( tid < big_nth ) {
				4114	coreID = tid / (chunk + 1 );
				4115	threadID = ( tid % (chunk + 1 ) ) % __kmp_nth_per_core ;
				4116	} else { //tid >= big_nth
				4117	coreID = ( tid - big_cores ) / chunk;
				4118	threadID = ( ( tid - big_cores ) % chunk ) % __kmp_nth_per_core ;
				4119	}
				4120
				4121	KMP_DEBUG_ASSERT2(KMP_AFFINITY_CAPABLE(),
				4122	"Illegal set affinity operation when not capable");
				4123
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	4124	kmp_affin_mask_t mask = (kmp_affin_mask_t )KMP_ALLOCA(__kmp_affin_mask_size);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4125	KMP_CPU_ZERO(mask);
				4126
				4127	// Granularity == thread
				4128	if( __kmp_affinity_gran == affinity_gran_fine \|\| __kmp_affinity_gran == affinity_gran_thread) {
				4129	int osID = address2os[ coreID * __kmp_nth_per_core + threadID ].second;
				4130	KMP_CPU_SET( osID, mask);
				4131	} else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
				4132	for( int i = 0; i < __kmp_nth_per_core; i++ ) {
				4133	int osID;
				4134	osID = address2os[ coreID * __kmp_nth_per_core + i ].second;
				4135	KMP_CPU_SET( osID, mask);
				4136	}
				4137	}
				4138	if (__kmp_affinity_verbose) {
				4139	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4140	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4141	KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
				4142	tid, buf);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4143	}
				4144	__kmp_set_system_affinity( mask, TRUE );
				4145	} else { // Non-uniform topology
				4146
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	4147	kmp_affin_mask_t mask = (kmp_affin_mask_t )KMP_ALLOCA(__kmp_affin_mask_size);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4148	KMP_CPU_ZERO(mask);
				4149
				4150	// Number of hyper threads per core in HT machine
				4151	int nth_per_core = __kmp_nThreadsPerCore;
				4152	int core_level;
				4153	if( nth_per_core > 1 ) {
				4154	core_level = __kmp_aff_depth - 2;
				4155	} else {
				4156	core_level = __kmp_aff_depth - 1;
				4157	}
				4158
				4159	// Number of cores - maximum value; it does not count trail cores with 0 processors
				4160	int ncores = address2os[ __kmp_avail_proc - 1 ].first.labels[ core_level ] + 1;
				4161
				4162	// For performance gain consider the special case nthreads == __kmp_avail_proc
				4163	if( nthreads == __kmp_avail_proc ) {
				4164	if( __kmp_affinity_gran == affinity_gran_fine \|\| __kmp_affinity_gran == affinity_gran_thread) {
				4165	int osID = address2os[ tid ].second;
				4166	KMP_CPU_SET( osID, mask);
				4167	} else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
				4168	int coreID = address2os[ tid ].first.labels[ core_level ];
				4169	// We'll count found osIDs for the current core; they can be not more than nth_per_core;
				4170	// since the address2os is sortied we can break when cnt==nth_per_core
				4171	int cnt = 0;
				4172	for( int i = 0; i < __kmp_avail_proc; i++ ) {
				4173	int osID = address2os[ i ].second;
				4174	int core = address2os[ i ].first.labels[ core_level ];
				4175	if( core == coreID ) {
				4176	KMP_CPU_SET( osID, mask);
				4177	cnt++;
				4178	if( cnt == nth_per_core ) {
				4179	break;
				4180	}
				4181	}
				4182	}
				4183	}
				4184	} else if( nthreads <= __kmp_ncores ) {
				4185
				4186	int core = 0;
				4187	for( int i = 0; i < ncores; i++ ) {
				4188	// Check if this core from procarr[] is in the mask
				4189	int in_mask = 0;
				4190	for( int j = 0; j < nth_per_core; j++ ) {
				4191	if( procarr[ i * nth_per_core + j ] != - 1 ) {
				4192	in_mask = 1;
				4193	break;
				4194	}
				4195	}
				4196	if( in_mask ) {
				4197	if( tid == core ) {
				4198	for( int j = 0; j < nth_per_core; j++ ) {
				4199	int osID = procarr[ i * nth_per_core + j ];
				4200	if( osID != -1 ) {
				4201	KMP_CPU_SET( osID, mask );
				4202	// For granularity=thread it is enough to set the first available osID for this core
				4203	if( __kmp_affinity_gran == affinity_gran_fine \|\| __kmp_affinity_gran == affinity_gran_thread) {
				4204	break;
				4205	}
				4206	}
				4207	}
				4208	break;
				4209	} else {
				4210	core++;
				4211	}
				4212	}
				4213	}
				4214
				4215	} else { // nthreads > __kmp_ncores
				4216
				4217	// Array to save the number of processors at each core
Jonathan Peyton	7be07533	2015-06-22 15:53:50 +0000	[diff] [blame]	4218	int* nproc_at_core = (int)KMP_ALLOCA(sizeof(int)ncores);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4219	// Array to save the number of cores with "x" available processors;
Jonathan Peyton	7be07533	2015-06-22 15:53:50 +0000	[diff] [blame]	4220	int* ncores_with_x_procs = (int)KMP_ALLOCA(sizeof(int)(nth_per_core+1));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4221	// Array to save the number of cores with # procs from x to nth_per_core
Jonathan Peyton	7be07533	2015-06-22 15:53:50 +0000	[diff] [blame]	4222	int* ncores_with_x_to_max_procs = (int)KMP_ALLOCA(sizeof(int)(nth_per_core+1));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4223
				4224	for( int i = 0; i <= nth_per_core; i++ ) {
				4225	ncores_with_x_procs[ i ] = 0;
				4226	ncores_with_x_to_max_procs[ i ] = 0;
				4227	}
				4228
				4229	for( int i = 0; i < ncores; i++ ) {
				4230	int cnt = 0;
				4231	for( int j = 0; j < nth_per_core; j++ ) {
				4232	if( procarr[ i * nth_per_core + j ] != -1 ) {
				4233	cnt++;
				4234	}
				4235	}
				4236	nproc_at_core[ i ] = cnt;
				4237	ncores_with_x_procs[ cnt ]++;
				4238	}
				4239
				4240	for( int i = 0; i <= nth_per_core; i++ ) {
				4241	for( int j = i; j <= nth_per_core; j++ ) {
				4242	ncores_with_x_to_max_procs[ i ] += ncores_with_x_procs[ j ];
				4243	}
				4244	}
				4245
				4246	// Max number of processors
				4247	int nproc = nth_per_core * ncores;
				4248	// An array to keep number of threads per each context
				4249	int * newarr = ( int * )__kmp_allocate( sizeof( int ) * nproc );
				4250	for( int i = 0; i < nproc; i++ ) {
				4251	newarr[ i ] = 0;
				4252	}
				4253
				4254	int nth = nthreads;
				4255	int flag = 0;
				4256	while( nth > 0 ) {
				4257	for( int j = 1; j <= nth_per_core; j++ ) {
				4258	int cnt = ncores_with_x_to_max_procs[ j ];
				4259	for( int i = 0; i < ncores; i++ ) {
				4260	// Skip the core with 0 processors
				4261	if( nproc_at_core[ i ] == 0 ) {
				4262	continue;
				4263	}
				4264	for( int k = 0; k < nth_per_core; k++ ) {
				4265	if( procarr[ i * nth_per_core + k ] != -1 ) {
				4266	if( newarr[ i * nth_per_core + k ] == 0 ) {
				4267	newarr[ i * nth_per_core + k ] = 1;
				4268	cnt--;
				4269	nth--;
				4270	break;
				4271	} else {
				4272	if( flag != 0 ) {
				4273	newarr[ i * nth_per_core + k ] ++;
				4274	cnt--;
				4275	nth--;
				4276	break;
				4277	}
				4278	}
				4279	}
				4280	}
				4281	if( cnt == 0 \|\| nth == 0 ) {
				4282	break;
				4283	}
				4284	}
				4285	if( nth == 0 ) {
				4286	break;
				4287	}
				4288	}
				4289	flag = 1;
				4290	}
				4291	int sum = 0;
				4292	for( int i = 0; i < nproc; i++ ) {
				4293	sum += newarr[ i ];
				4294	if( sum > tid ) {
				4295	// Granularity == thread
				4296	if( __kmp_affinity_gran == affinity_gran_fine \|\| __kmp_affinity_gran == affinity_gran_thread) {
				4297	int osID = procarr[ i ];
				4298	KMP_CPU_SET( osID, mask);
				4299	} else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
				4300	int coreID = i / nth_per_core;
				4301	for( int ii = 0; ii < nth_per_core; ii++ ) {
				4302	int osID = procarr[ coreID * nth_per_core + ii ];
				4303	if( osID != -1 ) {
				4304	KMP_CPU_SET( osID, mask);
				4305	}
				4306	}
				4307	}
				4308	break;
				4309	}
				4310	}
				4311	__kmp_free( newarr );
				4312	}
				4313
				4314	if (__kmp_affinity_verbose) {
				4315	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4316	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4317	KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
				4318	tid, buf);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4319	}
				4320	__kmp_set_system_affinity( mask, TRUE );
				4321	}
				4322	}
				4323
Alp Toker	763b939	2014-02-28 09:42:41 +0000	[diff] [blame]	4324	#endif // KMP_AFFINITY_SUPPORTED