Blame - openmp/runtime/src/kmp_affinity.cpp - toolchain/llvm-project

blob: 0dc0d4829ea56c68d1743bb0a60572e45d6fc13c [file] [log] [blame]

Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1	/*
				2	* kmp_affinity.cpp -- affinity management
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3	*/
				4
				5
				6	//===----------------------------------------------------------------------===//
				7	//
				8	// The LLVM Compiler Infrastructure
				9	//
				10	// This file is dual licensed under the MIT and the University of Illinois Open
				11	// Source Licenses. See LICENSE.txt for details.
				12	//
				13	//===----------------------------------------------------------------------===//
				14
				15
				16	#include "kmp.h"
				17	#include "kmp_i18n.h"
				18	#include "kmp_io.h"
				19	#include "kmp_str.h"
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	20	#include "kmp_wrapper_getpid.h"
Jonathan Peyton	1707836	2015-09-10 19:22:07 +0000	[diff] [blame]	21	#include "kmp_affinity.h"
				22
				23	// Store the real or imagined machine hierarchy here
				24	static hierarchy_info machine_hierarchy;
				25
				26	void __kmp_cleanup_hierarchy() {
				27	machine_hierarchy.fini();
				28	}
				29
				30	void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
				31	kmp_uint32 depth;
				32	// The test below is true if affinity is available, but set to "none". Need to init on first use of hierarchical barrier.
				33	if (TCR_1(machine_hierarchy.uninitialized))
				34	machine_hierarchy.init(NULL, nproc);
Jonathan Peyton	1707836	2015-09-10 19:22:07 +0000	[diff] [blame]	35
Jonathan Peyton	7dee82e	2015-11-09 16:24:53 +0000	[diff] [blame]	36	// Adjust the hierarchy in case num threads exceeds original
				37	if (nproc > machine_hierarchy.base_num_threads)
				38	machine_hierarchy.resize(nproc);
				39
Jonathan Peyton	1707836	2015-09-10 19:22:07 +0000	[diff] [blame]	40	depth = machine_hierarchy.depth;
				41	KMP_DEBUG_ASSERT(depth > 0);
Jonathan Peyton	1707836	2015-09-10 19:22:07 +0000	[diff] [blame]	42
				43	thr_bar->depth = depth;
				44	thr_bar->base_leaf_kids = (kmp_uint8)machine_hierarchy.numPerLevel[0]-1;
				45	thr_bar->skip_per_level = machine_hierarchy.skipPerLevel;
				46	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	47
Alp Toker	763b939	2014-02-28 09:42:41 +0000	[diff] [blame]	48	#if KMP_AFFINITY_SUPPORTED
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	49
				50	//
				51	// Print the affinity mask to the character array in a pretty format.
				52	//
				53	char *
				54	__kmp_affinity_print_mask(char buf, int buf_len, kmp_affin_mask_t mask)
				55	{
				56	KMP_ASSERT(buf_len >= 40);
				57	char *scan = buf;
				58	char *end = buf + buf_len - 1;
				59
				60	//
				61	// Find first element / check for empty set.
				62	//
				63	size_t i;
				64	for (i = 0; i < KMP_CPU_SETSIZE; i++) {
				65	if (KMP_CPU_ISSET(i, mask)) {
				66	break;
				67	}
				68	}
				69	if (i == KMP_CPU_SETSIZE) {
Jonathan Peyton	7edeef1	2015-09-25 17:23:17 +0000	[diff] [blame]	70	KMP_SNPRINTF(scan, end-scan+1, "{<empty>}");
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	71	while (*scan != '\0') scan++;
				72	KMP_ASSERT(scan <= end);
				73	return buf;
				74	}
				75
Jonathan Peyton	7edeef1	2015-09-25 17:23:17 +0000	[diff] [blame]	76	KMP_SNPRINTF(scan, end-scan+1, "{%ld", (long)i);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	77	while (*scan != '\0') scan++;
				78	i++;
				79	for (; i < KMP_CPU_SETSIZE; i++) {
				80	if (! KMP_CPU_ISSET(i, mask)) {
				81	continue;
				82	}
				83
				84	//
				85	// Check for buffer overflow. A string of the form ",<n>" will have
				86	// at most 10 characters, plus we want to leave room to print ",...}"
				87	// if the set is too large to print for a total of 15 characters.
				88	// We already left room for '\0' in setting end.
				89	//
				90	if (end - scan < 15) {
				91	break;
				92	}
Jonathan Peyton	7edeef1	2015-09-25 17:23:17 +0000	[diff] [blame]	93	KMP_SNPRINTF(scan, end-scan+1, ",%-ld", (long)i);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	94	while (*scan != '\0') scan++;
				95	}
				96	if (i < KMP_CPU_SETSIZE) {
Jonathan Peyton	7edeef1	2015-09-25 17:23:17 +0000	[diff] [blame]	97	KMP_SNPRINTF(scan, end-scan+1, ",...");
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	98	while (*scan != '\0') scan++;
				99	}
Jonathan Peyton	7edeef1	2015-09-25 17:23:17 +0000	[diff] [blame]	100	KMP_SNPRINTF(scan, end-scan+1, "}");
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	101	while (*scan != '\0') scan++;
				102	KMP_ASSERT(scan <= end);
				103	return buf;
				104	}
				105
				106
				107	void
				108	__kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask)
				109	{
				110	KMP_CPU_ZERO(mask);
				111
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	112	# if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	113
				114	if (__kmp_num_proc_groups > 1) {
				115	int group;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	116	KMP_DEBUG_ASSERT(__kmp_GetActiveProcessorCount != NULL);
				117	for (group = 0; group < __kmp_num_proc_groups; group++) {
				118	int i;
				119	int num = __kmp_GetActiveProcessorCount(group);
				120	for (i = 0; i < num; i++) {
				121	KMP_CPU_SET(i + group * (CHAR_BIT * sizeof(DWORD_PTR)), mask);
				122	}
				123	}
				124	}
				125	else
				126
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	127	# endif /* KMP_GROUP_AFFINITY */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	128
				129	{
				130	int proc;
				131	for (proc = 0; proc < __kmp_xproc; proc++) {
				132	KMP_CPU_SET(proc, mask);
				133	}
				134	}
				135	}
				136
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	137	//
				138	// When sorting by labels, __kmp_affinity_assign_child_nums() must first be
				139	// called to renumber the labels from [0..n] and place them into the child_num
				140	// vector of the address object. This is done in case the labels used for
Alp Toker	8f2d3f0	2014-02-24 10:40:15 +0000	[diff] [blame]	141	// the children at one node of the hierarchy differ from those used for
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	142	// another node at the same level. Example: suppose the machine has 2 nodes
				143	// with 2 packages each. The first node contains packages 601 and 602, and
				144	// second node contains packages 603 and 604. If we try to sort the table
				145	// for "scatter" affinity, the table will still be sorted 601, 602, 603, 604
				146	// because we are paying attention to the labels themselves, not the ordinal
				147	// child numbers. By using the child numbers in the sort, the result is
				148	// {0,0}=601, {0,1}=603, {1,0}=602, {1,1}=604.
				149	//
				150	static void
				151	__kmp_affinity_assign_child_nums(AddrUnsPair *address2os,
				152	int numAddrs)
				153	{
				154	KMP_DEBUG_ASSERT(numAddrs > 0);
				155	int depth = address2os->first.depth;
				156	unsigned counts = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				157	unsigned lastLabel = (unsigned )__kmp_allocate(depth
				158	* sizeof(unsigned));
				159	int labCt;
				160	for (labCt = 0; labCt < depth; labCt++) {
				161	address2os[0].first.childNums[labCt] = counts[labCt] = 0;
				162	lastLabel[labCt] = address2os[0].first.labels[labCt];
				163	}
				164	int i;
				165	for (i = 1; i < numAddrs; i++) {
				166	for (labCt = 0; labCt < depth; labCt++) {
				167	if (address2os[i].first.labels[labCt] != lastLabel[labCt]) {
				168	int labCt2;
				169	for (labCt2 = labCt + 1; labCt2 < depth; labCt2++) {
				170	counts[labCt2] = 0;
				171	lastLabel[labCt2] = address2os[i].first.labels[labCt2];
				172	}
				173	counts[labCt]++;
				174	lastLabel[labCt] = address2os[i].first.labels[labCt];
				175	break;
				176	}
				177	}
				178	for (labCt = 0; labCt < depth; labCt++) {
				179	address2os[i].first.childNums[labCt] = counts[labCt];
				180	}
				181	for (; labCt < (int)Address::maxDepth; labCt++) {
				182	address2os[i].first.childNums[labCt] = 0;
				183	}
				184	}
				185	}
				186
				187
				188	//
				189	// All of the __kmp_affinity_create_*_map() routines should set
				190	// __kmp_affinity_masks to a vector of affinity mask objects of length
				191	// __kmp_affinity_num_masks, if __kmp_affinity_type != affinity_none, and
				192	// return the number of levels in the machine topology tree (zero if
				193	// __kmp_affinity_type == affinity_none).
				194	//
				195	// All of the __kmp_affinity_create__map() routines should set fullMask
				196	// to the affinity mask for the initialization thread. They need to save and
				197	// restore the mask, and it could be needed later, so saving it is just an
				198	// optimization to avoid calling kmp_get_system_affinity() again.
				199	//
				200	static kmp_affin_mask_t *fullMask = NULL;
				201
				202	kmp_affin_mask_t *
				203	__kmp_affinity_get_fullMask() { return fullMask; }
				204
				205
				206	static int nCoresPerPkg, nPackages;
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame]	207	static int __kmp_nThreadsPerCore;
				208	#ifndef KMP_DFLT_NTH_CORES
				209	static int __kmp_ncores;
				210	#endif
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	211
				212	//
				213	// __kmp_affinity_uniform_topology() doesn't work when called from
				214	// places which support arbitrarily many levels in the machine topology
				215	// map, i.e. the non-default cases in __kmp_affinity_create_cpuinfo_map()
				216	// __kmp_affinity_create_x2apicid_map().
				217	//
				218	inline static bool
				219	__kmp_affinity_uniform_topology()
				220	{
				221	return __kmp_avail_proc == (__kmp_nThreadsPerCore * nCoresPerPkg * nPackages);
				222	}
				223
				224
				225	//
				226	// Print out the detailed machine topology map, i.e. the physical locations
				227	// of each OS proc.
				228	//
				229	static void
				230	__kmp_affinity_print_topology(AddrUnsPair *address2os, int len, int depth,
				231	int pkgLevel, int coreLevel, int threadLevel)
				232	{
				233	int proc;
				234
				235	KMP_INFORM(OSProcToPhysicalThreadMap, "KMP_AFFINITY");
				236	for (proc = 0; proc < len; proc++) {
				237	int level;
				238	kmp_str_buf_t buf;
				239	__kmp_str_buf_init(&buf);
				240	for (level = 0; level < depth; level++) {
				241	if (level == threadLevel) {
				242	__kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Thread));
				243	}
				244	else if (level == coreLevel) {
				245	__kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Core));
				246	}
				247	else if (level == pkgLevel) {
				248	__kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Package));
				249	}
				250	else if (level > pkgLevel) {
				251	__kmp_str_buf_print(&buf, "%s_%d ", KMP_I18N_STR(Node),
				252	level - pkgLevel - 1);
				253	}
				254	else {
				255	__kmp_str_buf_print(&buf, "L%d ", level);
				256	}
				257	__kmp_str_buf_print(&buf, "%d ",
				258	address2os[proc].first.labels[level]);
				259	}
				260	KMP_INFORM(OSProcMapToPack, "KMP_AFFINITY", address2os[proc].second,
				261	buf.str);
				262	__kmp_str_buf_free(&buf);
				263	}
				264	}
				265
				266
				267	//
				268	// If we don't know how to retrieve the machine's processor topology, or
				269	// encounter an error in doing so, this routine is called to form a "flat"
				270	// mapping of os thread id's <-> processor id's.
				271	//
				272	static int
				273	__kmp_affinity_create_flat_map(AddrUnsPair **address2os,
				274	kmp_i18n_id_t *const msg_id)
				275	{
				276	*address2os = NULL;
				277	*msg_id = kmp_i18n_null;
				278
				279	//
				280	// Even if __kmp_affinity_type == affinity_none, this routine might still
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame]	281	// called to set __kmp_ncores, as well as
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	282	// __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
				283	//
				284	if (! KMP_AFFINITY_CAPABLE()) {
				285	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				286	__kmp_ncores = nPackages = __kmp_xproc;
				287	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	288	if (__kmp_affinity_verbose) {
				289	KMP_INFORM(AffFlatTopology, "KMP_AFFINITY");
				290	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				291	KMP_INFORM(Uniform, "KMP_AFFINITY");
				292	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				293	__kmp_nThreadsPerCore, __kmp_ncores);
				294	}
				295	return 0;
				296	}
				297
				298	//
				299	// When affinity is off, this routine will still be called to set
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame]	300	// __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	301	// nCoresPerPkg, & nPackages. Make sure all these vars are set
				302	// correctly, and return now if affinity is not enabled.
				303	//
				304	__kmp_ncores = nPackages = __kmp_avail_proc;
				305	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	306	if (__kmp_affinity_verbose) {
				307	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				308	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, fullMask);
				309
				310	KMP_INFORM(AffCapableUseFlat, "KMP_AFFINITY");
				311	if (__kmp_affinity_respect_mask) {
				312	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				313	} else {
				314	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				315	}
				316	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				317	KMP_INFORM(Uniform, "KMP_AFFINITY");
				318	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				319	__kmp_nThreadsPerCore, __kmp_ncores);
				320	}
				321	if (__kmp_affinity_type == affinity_none) {
				322	return 0;
				323	}
				324
				325	//
				326	// Contruct the data structure to be returned.
				327	//
				328	address2os = (AddrUnsPair)
				329	__kmp_allocate(sizeof(*address2os) __kmp_avail_proc);
				330	int avail_ct = 0;
				331	unsigned int i;
				332	for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
				333	//
				334	// Skip this proc if it is not included in the machine model.
				335	//
				336	if (! KMP_CPU_ISSET(i, fullMask)) {
				337	continue;
				338	}
				339
				340	Address addr(1);
				341	addr.labels[0] = i;
				342	(*address2os)[avail_ct++] = AddrUnsPair(addr,i);
				343	}
				344	if (__kmp_affinity_verbose) {
				345	KMP_INFORM(OSProcToPackage, "KMP_AFFINITY");
				346	}
				347
				348	if (__kmp_affinity_gran_levels < 0) {
				349	//
				350	// Only the package level is modeled in the machine topology map,
				351	// so the #levels of granularity is either 0 or 1.
				352	//
				353	if (__kmp_affinity_gran > affinity_gran_package) {
				354	__kmp_affinity_gran_levels = 1;
				355	}
				356	else {
				357	__kmp_affinity_gran_levels = 0;
				358	}
				359	}
				360	return 1;
				361	}
				362
				363
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	364	# if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	365
				366	//
				367	// If multiple Windows* OS processor groups exist, we can create a 2-level
				368	// topology map with the groups at level 0 and the individual procs at
				369	// level 1.
				370	//
				371	// This facilitates letting the threads float among all procs in a group,
				372	// if granularity=group (the default when there are multiple groups).
				373	//
				374	static int
				375	__kmp_affinity_create_proc_group_map(AddrUnsPair **address2os,
				376	kmp_i18n_id_t *const msg_id)
				377	{
				378	*address2os = NULL;
				379	*msg_id = kmp_i18n_null;
				380
				381	//
				382	// If we don't have multiple processor groups, return now.
				383	// The flat mapping will be used.
				384	//
				385	if ((! KMP_AFFINITY_CAPABLE()) \|\| (__kmp_get_proc_group(fullMask) >= 0)) {
				386	// FIXME set *msg_id
				387	return -1;
				388	}
				389
				390	//
				391	// Contruct the data structure to be returned.
				392	//
				393	address2os = (AddrUnsPair)
				394	__kmp_allocate(sizeof(*address2os) __kmp_avail_proc);
				395	int avail_ct = 0;
				396	int i;
				397	for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
				398	//
				399	// Skip this proc if it is not included in the machine model.
				400	//
				401	if (! KMP_CPU_ISSET(i, fullMask)) {
				402	continue;
				403	}
				404
				405	Address addr(2);
				406	addr.labels[0] = i / (CHAR_BIT * sizeof(DWORD_PTR));
				407	addr.labels[1] = i % (CHAR_BIT * sizeof(DWORD_PTR));
				408	(*address2os)[avail_ct++] = AddrUnsPair(addr,i);
				409
				410	if (__kmp_affinity_verbose) {
				411	KMP_INFORM(AffOSProcToGroup, "KMP_AFFINITY", i, addr.labels[0],
				412	addr.labels[1]);
				413	}
				414	}
				415
				416	if (__kmp_affinity_gran_levels < 0) {
				417	if (__kmp_affinity_gran == affinity_gran_group) {
				418	__kmp_affinity_gran_levels = 1;
				419	}
				420	else if ((__kmp_affinity_gran == affinity_gran_fine)
				421	\|\| (__kmp_affinity_gran == affinity_gran_thread)) {
				422	__kmp_affinity_gran_levels = 0;
				423	}
				424	else {
				425	const char *gran_str = NULL;
				426	if (__kmp_affinity_gran == affinity_gran_core) {
				427	gran_str = "core";
				428	}
				429	else if (__kmp_affinity_gran == affinity_gran_package) {
				430	gran_str = "package";
				431	}
				432	else if (__kmp_affinity_gran == affinity_gran_node) {
				433	gran_str = "node";
				434	}
				435	else {
				436	KMP_ASSERT(0);
				437	}
				438
				439	// Warning: can't use affinity granularity \"gran\" with group topology method, using "thread"
				440	__kmp_affinity_gran_levels = 0;
				441	}
				442	}
				443	return 2;
				444	}
				445
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	446	# endif /* KMP_GROUP_AFFINITY */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	447
				448
				449	# if KMP_ARCH_X86 \|\| KMP_ARCH_X86_64
				450
				451	static int
				452	__kmp_cpuid_mask_width(int count) {
				453	int r = 0;
				454
				455	while((1<<r) < count)
				456	++r;
				457	return r;
				458	}
				459
				460
				461	class apicThreadInfo {
				462	public:
				463	unsigned osId; // param to __kmp_affinity_bind_thread
				464	unsigned apicId; // from cpuid after binding
				465	unsigned maxCoresPerPkg; // ""
				466	unsigned maxThreadsPerPkg; // ""
				467	unsigned pkgId; // inferred from above values
				468	unsigned coreId; // ""
				469	unsigned threadId; // ""
				470	};
				471
				472
				473	static int
				474	__kmp_affinity_cmp_apicThreadInfo_os_id(const void a, const void b)
				475	{
				476	const apicThreadInfo aa = (const apicThreadInfo )a;
				477	const apicThreadInfo bb = (const apicThreadInfo )b;
				478	if (aa->osId < bb->osId) return -1;
				479	if (aa->osId > bb->osId) return 1;
				480	return 0;
				481	}
				482
				483
				484	static int
				485	__kmp_affinity_cmp_apicThreadInfo_phys_id(const void a, const void b)
				486	{
				487	const apicThreadInfo aa = (const apicThreadInfo )a;
				488	const apicThreadInfo bb = (const apicThreadInfo )b;
				489	if (aa->pkgId < bb->pkgId) return -1;
				490	if (aa->pkgId > bb->pkgId) return 1;
				491	if (aa->coreId < bb->coreId) return -1;
				492	if (aa->coreId > bb->coreId) return 1;
				493	if (aa->threadId < bb->threadId) return -1;
				494	if (aa->threadId > bb->threadId) return 1;
				495	return 0;
				496	}
				497
				498
				499	//
				500	// On IA-32 architecture and Intel(R) 64 architecture, we attempt to use
				501	// an algorithm which cycles through the available os threads, setting
				502	// the current thread's affinity mask to that thread, and then retrieves
				503	// the Apic Id for each thread context using the cpuid instruction.
				504	//
				505	static int
				506	__kmp_affinity_create_apicid_map(AddrUnsPair **address2os,
				507	kmp_i18n_id_t *const msg_id)
				508	{
Andrey Churbanov	1c33129	2015-01-27 17:03:42 +0000	[diff] [blame]	509	kmp_cpuid buf;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	510	int rc;
				511	*address2os = NULL;
				512	*msg_id = kmp_i18n_null;
				513
Andrey Churbanov	1c33129	2015-01-27 17:03:42 +0000	[diff] [blame]	514	//
				515	// Check if cpuid leaf 4 is supported.
				516	//
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	517	__kmp_x86_cpuid(0, 0, &buf);
				518	if (buf.eax < 4) {
				519	*msg_id = kmp_i18n_str_NoLeaf4Support;
				520	return -1;
				521	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	522
				523	//
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	524	// The algorithm used starts by setting the affinity to each available
Andrey Churbanov	1c33129	2015-01-27 17:03:42 +0000	[diff] [blame]	525	// thread and retrieving info from the cpuid instruction, so if we are
				526	// not capable of calling __kmp_get_system_affinity() and
				527	// _kmp_get_system_affinity(), then we need to do something else - use
				528	// the defaults that we calculated from issuing cpuid without binding
				529	// to each proc.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	530	//
				531	if (! KMP_AFFINITY_CAPABLE()) {
				532	//
				533	// Hack to try and infer the machine topology using only the data
				534	// available from cpuid on the current thread, and __kmp_xproc.
				535	//
				536	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				537
				538	//
				539	// Get an upper bound on the number of threads per package using
				540	// cpuid(1).
				541	//
				542	// On some OS/chps combinations where HT is supported by the chip
				543	// but is disabled, this value will be 2 on a single core chip.
				544	// Usually, it will be 2 if HT is enabled and 1 if HT is disabled.
				545	//
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	546	__kmp_x86_cpuid(1, 0, &buf);
				547	int maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
				548	if (maxThreadsPerPkg == 0) {
				549	maxThreadsPerPkg = 1;
				550	}
				551
				552	//
				553	// The num cores per pkg comes from cpuid(4).
				554	// 1 must be added to the encoded value.
				555	//
				556	// The author of cpu_count.cpp treated this only an upper bound
				557	// on the number of cores, but I haven't seen any cases where it
				558	// was greater than the actual number of cores, so we will treat
				559	// it as exact in this block of code.
				560	//
				561	// First, we need to check if cpuid(4) is supported on this chip.
				562	// To see if cpuid(n) is supported, issue cpuid(0) and check if eax
				563	// has the value n or greater.
				564	//
				565	__kmp_x86_cpuid(0, 0, &buf);
				566	if (buf.eax >= 4) {
				567	__kmp_x86_cpuid(4, 0, &buf);
				568	nCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
				569	}
				570	else {
				571	nCoresPerPkg = 1;
				572	}
				573
				574	//
				575	// There is no way to reliably tell if HT is enabled without issuing
				576	// the cpuid instruction from every thread, can correlating the cpuid
				577	// info, so if the machine is not affinity capable, we assume that HT
				578	// is off. We have seen quite a few machines where maxThreadsPerPkg
				579	// is 2, yet the machine does not support HT.
				580	//
				581	// - Older OSes are usually found on machines with older chips, which
				582	// do not support HT.
				583	//
				584	// - The performance penalty for mistakenly identifying a machine as
				585	// HT when it isn't (which results in blocktime being incorrecly set
				586	// to 0) is greater than the penalty when for mistakenly identifying
				587	// a machine as being 1 thread/core when it is really HT enabled
				588	// (which results in blocktime being incorrectly set to a positive
				589	// value).
				590	//
				591	__kmp_ncores = __kmp_xproc;
				592	nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
				593	__kmp_nThreadsPerCore = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	594	if (__kmp_affinity_verbose) {
				595	KMP_INFORM(AffNotCapableUseLocCpuid, "KMP_AFFINITY");
				596	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				597	if (__kmp_affinity_uniform_topology()) {
				598	KMP_INFORM(Uniform, "KMP_AFFINITY");
				599	} else {
				600	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				601	}
				602	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				603	__kmp_nThreadsPerCore, __kmp_ncores);
				604	}
				605	return 0;
				606	}
				607
				608	//
				609	//
				610	// From here on, we can assume that it is safe to call
				611	// __kmp_get_system_affinity() and __kmp_set_system_affinity(),
				612	// even if __kmp_affinity_type = affinity_none.
				613	//
				614
				615	//
				616	// Save the affinity mask for the current thread.
				617	//
				618	kmp_affin_mask_t *oldMask;
				619	KMP_CPU_ALLOC(oldMask);
				620	KMP_ASSERT(oldMask != NULL);
				621	__kmp_get_system_affinity(oldMask, TRUE);
				622
				623	//
				624	// Run through each of the available contexts, binding the current thread
				625	// to it, and obtaining the pertinent information using the cpuid instr.
				626	//
				627	// The relevant information is:
				628	//
				629	// Apic Id: Bits 24:31 of ebx after issuing cpuid(1) - each thread context
				630	// has a uniqie Apic Id, which is of the form pkg# : core# : thread#.
				631	//
				632	// Max Threads Per Pkg: Bits 16:23 of ebx after issuing cpuid(1). The
				633	// value of this field determines the width of the core# + thread#
				634	// fields in the Apic Id. It is also an upper bound on the number
				635	// of threads per package, but it has been verified that situations
				636	// happen were it is not exact. In particular, on certain OS/chip
				637	// combinations where Intel(R) Hyper-Threading Technology is supported
				638	// by the chip but has
				639	// been disabled, the value of this field will be 2 (for a single core
				640	// chip). On other OS/chip combinations supporting
				641	// Intel(R) Hyper-Threading Technology, the value of
				642	// this field will be 1 when Intel(R) Hyper-Threading Technology is
				643	// disabled and 2 when it is enabled.
				644	//
				645	// Max Cores Per Pkg: Bits 26:31 of eax after issuing cpuid(4). The
				646	// value of this field (+1) determines the width of the core# field in
				647	// the Apic Id. The comments in "cpucount.cpp" say that this value is
				648	// an upper bound, but the IA-32 architecture manual says that it is
				649	// exactly the number of cores per package, and I haven't seen any
				650	// case where it wasn't.
				651	//
				652	// From this information, deduce the package Id, core Id, and thread Id,
				653	// and set the corresponding fields in the apicThreadInfo struct.
				654	//
				655	unsigned i;
				656	apicThreadInfo threadInfo = (apicThreadInfo )__kmp_allocate(
				657	__kmp_avail_proc * sizeof(apicThreadInfo));
				658	unsigned nApics = 0;
				659	for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
				660	//
				661	// Skip this proc if it is not included in the machine model.
				662	//
				663	if (! KMP_CPU_ISSET(i, fullMask)) {
				664	continue;
				665	}
				666	KMP_DEBUG_ASSERT((int)nApics < __kmp_avail_proc);
				667
				668	__kmp_affinity_bind_thread(i);
				669	threadInfo[nApics].osId = i;
				670
				671	//
				672	// The apic id and max threads per pkg come from cpuid(1).
				673	//
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	674	__kmp_x86_cpuid(1, 0, &buf);
				675	if (! (buf.edx >> 9) & 1) {
				676	__kmp_set_system_affinity(oldMask, TRUE);
				677	__kmp_free(threadInfo);
				678	KMP_CPU_FREE(oldMask);
				679	*msg_id = kmp_i18n_str_ApicNotPresent;
				680	return -1;
				681	}
				682	threadInfo[nApics].apicId = (buf.ebx >> 24) & 0xff;
				683	threadInfo[nApics].maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
				684	if (threadInfo[nApics].maxThreadsPerPkg == 0) {
				685	threadInfo[nApics].maxThreadsPerPkg = 1;
				686	}
				687
				688	//
				689	// Max cores per pkg comes from cpuid(4).
				690	// 1 must be added to the encoded value.
				691	//
				692	// First, we need to check if cpuid(4) is supported on this chip.
				693	// To see if cpuid(n) is supported, issue cpuid(0) and check if eax
				694	// has the value n or greater.
				695	//
				696	__kmp_x86_cpuid(0, 0, &buf);
				697	if (buf.eax >= 4) {
				698	__kmp_x86_cpuid(4, 0, &buf);
				699	threadInfo[nApics].maxCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
				700	}
				701	else {
				702	threadInfo[nApics].maxCoresPerPkg = 1;
				703	}
				704
				705	//
				706	// Infer the pkgId / coreId / threadId using only the info
				707	// obtained locally.
				708	//
				709	int widthCT = __kmp_cpuid_mask_width(
				710	threadInfo[nApics].maxThreadsPerPkg);
				711	threadInfo[nApics].pkgId = threadInfo[nApics].apicId >> widthCT;
				712
				713	int widthC = __kmp_cpuid_mask_width(
				714	threadInfo[nApics].maxCoresPerPkg);
				715	int widthT = widthCT - widthC;
				716	if (widthT < 0) {
				717	//
				718	// I've never seen this one happen, but I suppose it could, if
				719	// the cpuid instruction on a chip was really screwed up.
				720	// Make sure to restore the affinity mask before the tail call.
				721	//
				722	__kmp_set_system_affinity(oldMask, TRUE);
				723	__kmp_free(threadInfo);
				724	KMP_CPU_FREE(oldMask);
				725	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				726	return -1;
				727	}
				728
				729	int maskC = (1 << widthC) - 1;
				730	threadInfo[nApics].coreId = (threadInfo[nApics].apicId >> widthT)
				731	&maskC;
				732
				733	int maskT = (1 << widthT) - 1;
				734	threadInfo[nApics].threadId = threadInfo[nApics].apicId &maskT;
				735
				736	nApics++;
				737	}
				738
				739	//
				740	// We've collected all the info we need.
				741	// Restore the old affinity mask for this thread.
				742	//
				743	__kmp_set_system_affinity(oldMask, TRUE);
				744
				745	//
				746	// If there's only one thread context to bind to, form an Address object
				747	// with depth 1 and return immediately (or, if affinity is off, set
				748	// address2os to NULL and return).
				749	//
				750	// If it is configured to omit the package level when there is only a
				751	// single package, the logic at the end of this routine won't work if
				752	// there is only a single thread - it would try to form an Address
				753	// object with depth 0.
				754	//
				755	KMP_ASSERT(nApics > 0);
				756	if (nApics == 1) {
				757	__kmp_ncores = nPackages = 1;
				758	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	759	if (__kmp_affinity_verbose) {
				760	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				761	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				762
				763	KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
				764	if (__kmp_affinity_respect_mask) {
				765	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				766	} else {
				767	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				768	}
				769	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				770	KMP_INFORM(Uniform, "KMP_AFFINITY");
				771	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				772	__kmp_nThreadsPerCore, __kmp_ncores);
				773	}
				774
				775	if (__kmp_affinity_type == affinity_none) {
				776	__kmp_free(threadInfo);
				777	KMP_CPU_FREE(oldMask);
				778	return 0;
				779	}
				780
				781	address2os = (AddrUnsPair)__kmp_allocate(sizeof(AddrUnsPair));
				782	Address addr(1);
				783	addr.labels[0] = threadInfo[0].pkgId;
				784	(*address2os)[0] = AddrUnsPair(addr, threadInfo[0].osId);
				785
				786	if (__kmp_affinity_gran_levels < 0) {
				787	__kmp_affinity_gran_levels = 0;
				788	}
				789
				790	if (__kmp_affinity_verbose) {
				791	__kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
				792	}
				793
				794	__kmp_free(threadInfo);
				795	KMP_CPU_FREE(oldMask);
				796	return 1;
				797	}
				798
				799	//
				800	// Sort the threadInfo table by physical Id.
				801	//
				802	qsort(threadInfo, nApics, sizeof(*threadInfo),
				803	__kmp_affinity_cmp_apicThreadInfo_phys_id);
				804
				805	//
				806	// The table is now sorted by pkgId / coreId / threadId, but we really
				807	// don't know the radix of any of the fields. pkgId's may be sparsely
				808	// assigned among the chips on a system. Although coreId's are usually
				809	// assigned [0 .. coresPerPkg-1] and threadId's are usually assigned
				810	// [0..threadsPerCore-1], we don't want to make any such assumptions.
				811	//
				812	// For that matter, we don't know what coresPerPkg and threadsPerCore
				813	// (or the total # packages) are at this point - we want to determine
				814	// that now. We only have an upper bound on the first two figures.
				815	//
				816	// We also perform a consistency check at this point: the values returned
				817	// by the cpuid instruction for any thread bound to a given package had
				818	// better return the same info for maxThreadsPerPkg and maxCoresPerPkg.
				819	//
				820	nPackages = 1;
				821	nCoresPerPkg = 1;
				822	__kmp_nThreadsPerCore = 1;
				823	unsigned nCores = 1;
				824
				825	unsigned pkgCt = 1; // to determine radii
				826	unsigned lastPkgId = threadInfo[0].pkgId;
				827	unsigned coreCt = 1;
				828	unsigned lastCoreId = threadInfo[0].coreId;
				829	unsigned threadCt = 1;
				830	unsigned lastThreadId = threadInfo[0].threadId;
				831
				832	// intra-pkg consist checks
				833	unsigned prevMaxCoresPerPkg = threadInfo[0].maxCoresPerPkg;
				834	unsigned prevMaxThreadsPerPkg = threadInfo[0].maxThreadsPerPkg;
				835
				836	for (i = 1; i < nApics; i++) {
				837	if (threadInfo[i].pkgId != lastPkgId) {
				838	nCores++;
				839	pkgCt++;
				840	lastPkgId = threadInfo[i].pkgId;
				841	if ((int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt;
				842	coreCt = 1;
				843	lastCoreId = threadInfo[i].coreId;
				844	if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
				845	threadCt = 1;
				846	lastThreadId = threadInfo[i].threadId;
				847
				848	//
				849	// This is a different package, so go on to the next iteration
				850	// without doing any consistency checks. Reset the consistency
				851	// check vars, though.
				852	//
				853	prevMaxCoresPerPkg = threadInfo[i].maxCoresPerPkg;
				854	prevMaxThreadsPerPkg = threadInfo[i].maxThreadsPerPkg;
				855	continue;
				856	}
				857
				858	if (threadInfo[i].coreId != lastCoreId) {
				859	nCores++;
				860	coreCt++;
				861	lastCoreId = threadInfo[i].coreId;
				862	if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
				863	threadCt = 1;
				864	lastThreadId = threadInfo[i].threadId;
				865	}
				866	else if (threadInfo[i].threadId != lastThreadId) {
				867	threadCt++;
				868	lastThreadId = threadInfo[i].threadId;
				869	}
				870	else {
				871	__kmp_free(threadInfo);
				872	KMP_CPU_FREE(oldMask);
				873	*msg_id = kmp_i18n_str_LegacyApicIDsNotUnique;
				874	return -1;
				875	}
				876
				877	//
				878	// Check to make certain that the maxCoresPerPkg and maxThreadsPerPkg
				879	// fields agree between all the threads bounds to a given package.
				880	//
				881	if ((prevMaxCoresPerPkg != threadInfo[i].maxCoresPerPkg)
				882	\|\| (prevMaxThreadsPerPkg != threadInfo[i].maxThreadsPerPkg)) {
				883	__kmp_free(threadInfo);
				884	KMP_CPU_FREE(oldMask);
				885	*msg_id = kmp_i18n_str_InconsistentCpuidInfo;
				886	return -1;
				887	}
				888	}
				889	nPackages = pkgCt;
				890	if ((int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt;
				891	if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
				892
				893	//
				894	// When affinity is off, this routine will still be called to set
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame]	895	// __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	896	// nCoresPerPkg, & nPackages. Make sure all these vars are set
				897	// correctly, and return now if affinity is not enabled.
				898	//
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	899	__kmp_ncores = nCores;
				900	if (__kmp_affinity_verbose) {
				901	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				902	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				903
				904	KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
				905	if (__kmp_affinity_respect_mask) {
				906	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				907	} else {
				908	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				909	}
				910	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				911	if (__kmp_affinity_uniform_topology()) {
				912	KMP_INFORM(Uniform, "KMP_AFFINITY");
				913	} else {
				914	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				915	}
				916	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				917	__kmp_nThreadsPerCore, __kmp_ncores);
				918
				919	}
				920
				921	if (__kmp_affinity_type == affinity_none) {
				922	__kmp_free(threadInfo);
				923	KMP_CPU_FREE(oldMask);
				924	return 0;
				925	}
				926
				927	//
				928	// Now that we've determined the number of packages, the number of cores
				929	// per package, and the number of threads per core, we can construct the
				930	// data structure that is to be returned.
				931	//
				932	int pkgLevel = 0;
				933	int coreLevel = (nCoresPerPkg <= 1) ? -1 : 1;
				934	int threadLevel = (__kmp_nThreadsPerCore <= 1) ? -1 : ((coreLevel >= 0) ? 2 : 1);
				935	unsigned depth = (pkgLevel >= 0) + (coreLevel >= 0) + (threadLevel >= 0);
				936
				937	KMP_ASSERT(depth > 0);
				938	address2os = (AddrUnsPair)__kmp_allocate(sizeof(AddrUnsPair) * nApics);
				939
				940	for (i = 0; i < nApics; ++i) {
				941	Address addr(depth);
				942	unsigned os = threadInfo[i].osId;
				943	int d = 0;
				944
				945	if (pkgLevel >= 0) {
				946	addr.labels[d++] = threadInfo[i].pkgId;
				947	}
				948	if (coreLevel >= 0) {
				949	addr.labels[d++] = threadInfo[i].coreId;
				950	}
				951	if (threadLevel >= 0) {
				952	addr.labels[d++] = threadInfo[i].threadId;
				953	}
				954	(*address2os)[i] = AddrUnsPair(addr, os);
				955	}
				956
				957	if (__kmp_affinity_gran_levels < 0) {
				958	//
				959	// Set the granularity level based on what levels are modeled
				960	// in the machine topology map.
				961	//
				962	__kmp_affinity_gran_levels = 0;
				963	if ((threadLevel >= 0)
				964	&& (__kmp_affinity_gran > affinity_gran_thread)) {
				965	__kmp_affinity_gran_levels++;
				966	}
				967	if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
				968	__kmp_affinity_gran_levels++;
				969	}
				970	if ((pkgLevel >= 0) && (__kmp_affinity_gran > affinity_gran_package)) {
				971	__kmp_affinity_gran_levels++;
				972	}
				973	}
				974
				975	if (__kmp_affinity_verbose) {
				976	__kmp_affinity_print_topology(*address2os, nApics, depth, pkgLevel,
				977	coreLevel, threadLevel);
				978	}
				979
				980	__kmp_free(threadInfo);
				981	KMP_CPU_FREE(oldMask);
				982	return depth;
				983	}
				984
				985
				986	//
				987	// Intel(R) microarchitecture code name Nehalem, Dunnington and later
				988	// architectures support a newer interface for specifying the x2APIC Ids,
				989	// based on cpuid leaf 11.
				990	//
				991	static int
				992	__kmp_affinity_create_x2apicid_map(AddrUnsPair **address2os,
				993	kmp_i18n_id_t *const msg_id)
				994	{
				995	kmp_cpuid buf;
				996
				997	*address2os = NULL;
				998	*msg_id = kmp_i18n_null;
				999
				1000	//
				1001	// Check to see if cpuid leaf 11 is supported.
				1002	//
				1003	__kmp_x86_cpuid(0, 0, &buf);
				1004	if (buf.eax < 11) {
				1005	*msg_id = kmp_i18n_str_NoLeaf11Support;
				1006	return -1;
				1007	}
				1008	__kmp_x86_cpuid(11, 0, &buf);
				1009	if (buf.ebx == 0) {
				1010	*msg_id = kmp_i18n_str_NoLeaf11Support;
				1011	return -1;
				1012	}
				1013
				1014	//
				1015	// Find the number of levels in the machine topology. While we're at it,
				1016	// get the default values for __kmp_nThreadsPerCore & nCoresPerPkg. We will
				1017	// try to get more accurate values later by explicitly counting them,
				1018	// but get reasonable defaults now, in case we return early.
				1019	//
				1020	int level;
				1021	int threadLevel = -1;
				1022	int coreLevel = -1;
				1023	int pkgLevel = -1;
				1024	__kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
				1025
				1026	for (level = 0;; level++) {
				1027	if (level > 31) {
				1028	//
				1029	// FIXME: Hack for DPD200163180
				1030	//
				1031	// If level is big then something went wrong -> exiting
				1032	//
				1033	// There could actually be 32 valid levels in the machine topology,
				1034	// but so far, the only machine we have seen which does not exit
				1035	// this loop before iteration 32 has fubar x2APIC settings.
				1036	//
				1037	// For now, just reject this case based upon loop trip count.
				1038	//
				1039	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1040	return -1;
				1041	}
				1042	__kmp_x86_cpuid(11, level, &buf);
				1043	if (buf.ebx == 0) {
				1044	if (pkgLevel < 0) {
				1045	//
				1046	// Will infer nPackages from __kmp_xproc
				1047	//
				1048	pkgLevel = level;
				1049	level++;
				1050	}
				1051	break;
				1052	}
				1053	int kind = (buf.ecx >> 8) & 0xff;
				1054	if (kind == 1) {
				1055	//
				1056	// SMT level
				1057	//
				1058	threadLevel = level;
				1059	coreLevel = -1;
				1060	pkgLevel = -1;
				1061	__kmp_nThreadsPerCore = buf.ebx & 0xff;
				1062	if (__kmp_nThreadsPerCore == 0) {
				1063	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1064	return -1;
				1065	}
				1066	}
				1067	else if (kind == 2) {
				1068	//
				1069	// core level
				1070	//
				1071	coreLevel = level;
				1072	pkgLevel = -1;
				1073	nCoresPerPkg = buf.ebx & 0xff;
				1074	if (nCoresPerPkg == 0) {
				1075	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1076	return -1;
				1077	}
				1078	}
				1079	else {
				1080	if (level <= 0) {
				1081	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1082	return -1;
				1083	}
				1084	if (pkgLevel >= 0) {
				1085	continue;
				1086	}
				1087	pkgLevel = level;
				1088	nPackages = buf.ebx & 0xff;
				1089	if (nPackages == 0) {
				1090	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1091	return -1;
				1092	}
				1093	}
				1094	}
				1095	int depth = level;
				1096
				1097	//
				1098	// In the above loop, "level" was counted from the finest level (usually
				1099	// thread) to the coarsest. The caller expects that we will place the
				1100	// labels in (*address2os)[].first.labels[] in the inverse order, so
				1101	// we need to invert the vars saying which level means what.
				1102	//
				1103	if (threadLevel >= 0) {
				1104	threadLevel = depth - threadLevel - 1;
				1105	}
				1106	if (coreLevel >= 0) {
				1107	coreLevel = depth - coreLevel - 1;
				1108	}
				1109	KMP_DEBUG_ASSERT(pkgLevel >= 0);
				1110	pkgLevel = depth - pkgLevel - 1;
				1111
				1112	//
				1113	// The algorithm used starts by setting the affinity to each available
Andrey Churbanov	1c33129	2015-01-27 17:03:42 +0000	[diff] [blame]	1114	// thread and retrieving info from the cpuid instruction, so if we are
				1115	// not capable of calling __kmp_get_system_affinity() and
				1116	// _kmp_get_system_affinity(), then we need to do something else - use
				1117	// the defaults that we calculated from issuing cpuid without binding
				1118	// to each proc.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1119	//
				1120	if (! KMP_AFFINITY_CAPABLE())
				1121	{
				1122	//
				1123	// Hack to try and infer the machine topology using only the data
				1124	// available from cpuid on the current thread, and __kmp_xproc.
				1125	//
				1126	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				1127
				1128	__kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
				1129	nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1130	if (__kmp_affinity_verbose) {
				1131	KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY");
				1132	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1133	if (__kmp_affinity_uniform_topology()) {
				1134	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1135	} else {
				1136	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				1137	}
				1138	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				1139	__kmp_nThreadsPerCore, __kmp_ncores);
				1140	}
				1141	return 0;
				1142	}
				1143
				1144	//
				1145	//
				1146	// From here on, we can assume that it is safe to call
				1147	// __kmp_get_system_affinity() and __kmp_set_system_affinity(),
				1148	// even if __kmp_affinity_type = affinity_none.
				1149	//
				1150
				1151	//
				1152	// Save the affinity mask for the current thread.
				1153	//
				1154	kmp_affin_mask_t *oldMask;
				1155	KMP_CPU_ALLOC(oldMask);
				1156	__kmp_get_system_affinity(oldMask, TRUE);
				1157
				1158	//
				1159	// Allocate the data structure to be returned.
				1160	//
				1161	AddrUnsPair retval = (AddrUnsPair )
				1162	__kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc);
				1163
				1164	//
				1165	// Run through each of the available contexts, binding the current thread
				1166	// to it, and obtaining the pertinent information using the cpuid instr.
				1167	//
				1168	unsigned int proc;
				1169	int nApics = 0;
				1170	for (proc = 0; proc < KMP_CPU_SETSIZE; ++proc) {
				1171	//
				1172	// Skip this proc if it is not included in the machine model.
				1173	//
				1174	if (! KMP_CPU_ISSET(proc, fullMask)) {
				1175	continue;
				1176	}
				1177	KMP_DEBUG_ASSERT(nApics < __kmp_avail_proc);
				1178
				1179	__kmp_affinity_bind_thread(proc);
				1180
				1181	//
				1182	// Extrach the labels for each level in the machine topology map
				1183	// from the Apic ID.
				1184	//
				1185	Address addr(depth);
				1186	int prev_shift = 0;
				1187
				1188	for (level = 0; level < depth; level++) {
				1189	__kmp_x86_cpuid(11, level, &buf);
				1190	unsigned apicId = buf.edx;
				1191	if (buf.ebx == 0) {
				1192	if (level != depth - 1) {
				1193	KMP_CPU_FREE(oldMask);
				1194	*msg_id = kmp_i18n_str_InconsistentCpuidInfo;
				1195	return -1;
				1196	}
				1197	addr.labels[depth - level - 1] = apicId >> prev_shift;
				1198	level++;
				1199	break;
				1200	}
				1201	int shift = buf.eax & 0x1f;
				1202	int mask = (1 << shift) - 1;
				1203	addr.labels[depth - level - 1] = (apicId & mask) >> prev_shift;
				1204	prev_shift = shift;
				1205	}
				1206	if (level != depth) {
				1207	KMP_CPU_FREE(oldMask);
				1208	*msg_id = kmp_i18n_str_InconsistentCpuidInfo;
				1209	return -1;
				1210	}
				1211
				1212	retval[nApics] = AddrUnsPair(addr, proc);
				1213	nApics++;
				1214	}
				1215
				1216	//
				1217	// We've collected all the info we need.
				1218	// Restore the old affinity mask for this thread.
				1219	//
				1220	__kmp_set_system_affinity(oldMask, TRUE);
				1221
				1222	//
				1223	// If there's only one thread context to bind to, return now.
				1224	//
				1225	KMP_ASSERT(nApics > 0);
				1226	if (nApics == 1) {
				1227	__kmp_ncores = nPackages = 1;
				1228	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1229	if (__kmp_affinity_verbose) {
				1230	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				1231	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				1232
				1233	KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
				1234	if (__kmp_affinity_respect_mask) {
				1235	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				1236	} else {
				1237	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				1238	}
				1239	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1240	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1241	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				1242	__kmp_nThreadsPerCore, __kmp_ncores);
				1243	}
				1244
				1245	if (__kmp_affinity_type == affinity_none) {
				1246	__kmp_free(retval);
				1247	KMP_CPU_FREE(oldMask);
				1248	return 0;
				1249	}
				1250
				1251	//
				1252	// Form an Address object which only includes the package level.
				1253	//
				1254	Address addr(1);
				1255	addr.labels[0] = retval[0].first.labels[pkgLevel];
				1256	retval[0].first = addr;
				1257
				1258	if (__kmp_affinity_gran_levels < 0) {
				1259	__kmp_affinity_gran_levels = 0;
				1260	}
				1261
				1262	if (__kmp_affinity_verbose) {
				1263	__kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
				1264	}
				1265
				1266	*address2os = retval;
				1267	KMP_CPU_FREE(oldMask);
				1268	return 1;
				1269	}
				1270
				1271	//
				1272	// Sort the table by physical Id.
				1273	//
				1274	qsort(retval, nApics, sizeof(*retval), __kmp_affinity_cmp_Address_labels);
				1275
				1276	//
				1277	// Find the radix at each of the levels.
				1278	//
				1279	unsigned totals = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1280	unsigned counts = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1281	unsigned maxCt = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1282	unsigned last = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1283	for (level = 0; level < depth; level++) {
				1284	totals[level] = 1;
				1285	maxCt[level] = 1;
				1286	counts[level] = 1;
				1287	last[level] = retval[0].first.labels[level];
				1288	}
				1289
				1290	//
				1291	// From here on, the iteration variable "level" runs from the finest
				1292	// level to the coarsest, i.e. we iterate forward through
				1293	// (*address2os)[].first.labels[] - in the previous loops, we iterated
				1294	// backwards.
				1295	//
				1296	for (proc = 1; (int)proc < nApics; proc++) {
				1297	int level;
				1298	for (level = 0; level < depth; level++) {
				1299	if (retval[proc].first.labels[level] != last[level]) {
				1300	int j;
				1301	for (j = level + 1; j < depth; j++) {
				1302	totals[j]++;
				1303	counts[j] = 1;
				1304	// The line below causes printing incorrect topology information
				1305	// in case the max value for some level (maxCt[level]) is encountered earlier than
				1306	// some less value while going through the array.
				1307	// For example, let pkg0 has 4 cores and pkg1 has 2 cores. Then maxCt[1] == 2
				1308	// whereas it must be 4.
				1309	// TODO!!! Check if it can be commented safely
				1310	//maxCt[j] = 1;
				1311	last[j] = retval[proc].first.labels[j];
				1312	}
				1313	totals[level]++;
				1314	counts[level]++;
				1315	if (counts[level] > maxCt[level]) {
				1316	maxCt[level] = counts[level];
				1317	}
				1318	last[level] = retval[proc].first.labels[level];
				1319	break;
				1320	}
				1321	else if (level == depth - 1) {
				1322	__kmp_free(last);
				1323	__kmp_free(maxCt);
				1324	__kmp_free(counts);
				1325	__kmp_free(totals);
				1326	__kmp_free(retval);
				1327	KMP_CPU_FREE(oldMask);
				1328	*msg_id = kmp_i18n_str_x2ApicIDsNotUnique;
				1329	return -1;
				1330	}
				1331	}
				1332	}
				1333
				1334	//
				1335	// When affinity is off, this routine will still be called to set
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame]	1336	// __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1337	// nCoresPerPkg, & nPackages. Make sure all these vars are set
				1338	// correctly, and return if affinity is not enabled.
				1339	//
				1340	if (threadLevel >= 0) {
				1341	__kmp_nThreadsPerCore = maxCt[threadLevel];
				1342	}
				1343	else {
				1344	__kmp_nThreadsPerCore = 1;
				1345	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1346	nPackages = totals[pkgLevel];
				1347
				1348	if (coreLevel >= 0) {
				1349	__kmp_ncores = totals[coreLevel];
				1350	nCoresPerPkg = maxCt[coreLevel];
				1351	}
				1352	else {
				1353	__kmp_ncores = nPackages;
				1354	nCoresPerPkg = 1;
				1355	}
				1356
				1357	//
				1358	// Check to see if the machine topology is uniform
				1359	//
				1360	unsigned prod = maxCt[0];
				1361	for (level = 1; level < depth; level++) {
				1362	prod *= maxCt[level];
				1363	}
				1364	bool uniform = (prod == totals[level - 1]);
				1365
				1366	//
				1367	// Print the machine topology summary.
				1368	//
				1369	if (__kmp_affinity_verbose) {
				1370	char mask[KMP_AFFIN_MASK_PRINT_LEN];
				1371	__kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				1372
				1373	KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
				1374	if (__kmp_affinity_respect_mask) {
				1375	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask);
				1376	} else {
				1377	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask);
				1378	}
				1379	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1380	if (uniform) {
				1381	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1382	} else {
				1383	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				1384	}
				1385
				1386	kmp_str_buf_t buf;
				1387	__kmp_str_buf_init(&buf);
				1388
				1389	__kmp_str_buf_print(&buf, "%d", totals[0]);
				1390	for (level = 1; level <= pkgLevel; level++) {
				1391	__kmp_str_buf_print(&buf, " x %d", maxCt[level]);
				1392	}
				1393	KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg,
				1394	__kmp_nThreadsPerCore, __kmp_ncores);
				1395
				1396	__kmp_str_buf_free(&buf);
				1397	}
				1398
				1399	if (__kmp_affinity_type == affinity_none) {
				1400	__kmp_free(last);
				1401	__kmp_free(maxCt);
				1402	__kmp_free(counts);
				1403	__kmp_free(totals);
				1404	__kmp_free(retval);
				1405	KMP_CPU_FREE(oldMask);
				1406	return 0;
				1407	}
				1408
				1409	//
				1410	// Find any levels with radiix 1, and remove them from the map
				1411	// (except for the package level).
				1412	//
				1413	int new_depth = 0;
				1414	for (level = 0; level < depth; level++) {
				1415	if ((maxCt[level] == 1) && (level != pkgLevel)) {
				1416	continue;
				1417	}
				1418	new_depth++;
				1419	}
				1420
				1421	//
				1422	// If we are removing any levels, allocate a new vector to return,
				1423	// and copy the relevant information to it.
				1424	//
				1425	if (new_depth != depth) {
				1426	AddrUnsPair new_retval = (AddrUnsPair )__kmp_allocate(
				1427	sizeof(AddrUnsPair) * nApics);
				1428	for (proc = 0; (int)proc < nApics; proc++) {
				1429	Address addr(new_depth);
				1430	new_retval[proc] = AddrUnsPair(addr, retval[proc].second);
				1431	}
				1432	int new_level = 0;
Jonathan Peyton	62f3840	2015-08-25 18:44:41 +0000	[diff] [blame]	1433	int newPkgLevel = -1;
				1434	int newCoreLevel = -1;
				1435	int newThreadLevel = -1;
				1436	int i;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1437	for (level = 0; level < depth; level++) {
Jonathan Peyton	62f3840	2015-08-25 18:44:41 +0000	[diff] [blame]	1438	if ((maxCt[level] == 1)
				1439	&& (level != pkgLevel)) {
				1440	//
				1441	// Remove this level. Never remove the package level
				1442	//
				1443	continue;
				1444	}
				1445	if (level == pkgLevel) {
				1446	newPkgLevel = level;
				1447	}
				1448	if (level == coreLevel) {
				1449	newCoreLevel = level;
				1450	}
				1451	if (level == threadLevel) {
				1452	newThreadLevel = level;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1453	}
				1454	for (proc = 0; (int)proc < nApics; proc++) {
				1455	new_retval[proc].first.labels[new_level]
				1456	= retval[proc].first.labels[level];
				1457	}
				1458	new_level++;
				1459	}
				1460
				1461	__kmp_free(retval);
				1462	retval = new_retval;
				1463	depth = new_depth;
Jonathan Peyton	62f3840	2015-08-25 18:44:41 +0000	[diff] [blame]	1464	pkgLevel = newPkgLevel;
				1465	coreLevel = newCoreLevel;
				1466	threadLevel = newThreadLevel;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1467	}
				1468
				1469	if (__kmp_affinity_gran_levels < 0) {
				1470	//
				1471	// Set the granularity level based on what levels are modeled
				1472	// in the machine topology map.
				1473	//
				1474	__kmp_affinity_gran_levels = 0;
				1475	if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
				1476	__kmp_affinity_gran_levels++;
				1477	}
				1478	if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
				1479	__kmp_affinity_gran_levels++;
				1480	}
				1481	if (__kmp_affinity_gran > affinity_gran_package) {
				1482	__kmp_affinity_gran_levels++;
				1483	}
				1484	}
				1485
				1486	if (__kmp_affinity_verbose) {
				1487	__kmp_affinity_print_topology(retval, nApics, depth, pkgLevel,
				1488	coreLevel, threadLevel);
				1489	}
				1490
				1491	__kmp_free(last);
				1492	__kmp_free(maxCt);
				1493	__kmp_free(counts);
				1494	__kmp_free(totals);
				1495	KMP_CPU_FREE(oldMask);
				1496	*address2os = retval;
				1497	return depth;
				1498	}
				1499
				1500
				1501	# endif /* KMP_ARCH_X86 \|\| KMP_ARCH_X86_64 */
				1502
				1503
				1504	#define osIdIndex 0
				1505	#define threadIdIndex 1
				1506	#define coreIdIndex 2
				1507	#define pkgIdIndex 3
				1508	#define nodeIdIndex 4
				1509
				1510	typedef unsigned *ProcCpuInfo;
				1511	static unsigned maxIndex = pkgIdIndex;
				1512
				1513
				1514	static int
				1515	__kmp_affinity_cmp_ProcCpuInfo_os_id(const void a, const void b)
				1516	{
				1517	const unsigned aa = (const unsigned )a;
				1518	const unsigned bb = (const unsigned )b;
				1519	if (aa[osIdIndex] < bb[osIdIndex]) return -1;
				1520	if (aa[osIdIndex] > bb[osIdIndex]) return 1;
				1521	return 0;
				1522	};
				1523
				1524
				1525	static int
				1526	__kmp_affinity_cmp_ProcCpuInfo_phys_id(const void a, const void b)
				1527	{
				1528	unsigned i;
				1529	const unsigned aa = ((const unsigned **)a);
				1530	const unsigned bb = ((const unsigned **)b);
				1531	for (i = maxIndex; ; i--) {
				1532	if (aa[i] < bb[i]) return -1;
				1533	if (aa[i] > bb[i]) return 1;
				1534	if (i == osIdIndex) break;
				1535	}
				1536	return 0;
				1537	}
				1538
				1539
				1540	//
				1541	// Parse /proc/cpuinfo (or an alternate file in the same format) to obtain the
				1542	// affinity map.
				1543	//
				1544	static int
				1545	__kmp_affinity_create_cpuinfo_map(AddrUnsPair *address2os, int line,
				1546	kmp_i18n_id_t const msg_id, FILE f)
				1547	{
				1548	*address2os = NULL;
				1549	*msg_id = kmp_i18n_null;
				1550
				1551	//
				1552	// Scan of the file, and count the number of "processor" (osId) fields,
Alp Toker	8f2d3f0	2014-02-24 10:40:15 +0000	[diff] [blame]	1553	// and find the highest value of <n> for a node_<n> field.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1554	//
				1555	char buf[256];
				1556	unsigned num_records = 0;
				1557	while (! feof(f)) {
				1558	buf[sizeof(buf) - 1] = 1;
				1559	if (! fgets(buf, sizeof(buf), f)) {
				1560	//
				1561	// Read errors presumably because of EOF
				1562	//
				1563	break;
				1564	}
				1565
				1566	char s1[] = "processor";
				1567	if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
				1568	num_records++;
				1569	continue;
				1570	}
				1571
				1572	//
				1573	// FIXME - this will match "node_<n> <garbage>"
				1574	//
				1575	unsigned level;
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	1576	if (KMP_SSCANF(buf, "node_%d id", &level) == 1) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1577	if (nodeIdIndex + level >= maxIndex) {
				1578	maxIndex = nodeIdIndex + level;
				1579	}
				1580	continue;
				1581	}
				1582	}
				1583
				1584	//
				1585	// Check for empty file / no valid processor records, or too many.
				1586	// The number of records can't exceed the number of valid bits in the
				1587	// affinity mask.
				1588	//
				1589	if (num_records == 0) {
				1590	*line = 0;
				1591	*msg_id = kmp_i18n_str_NoProcRecords;
				1592	return -1;
				1593	}
				1594	if (num_records > (unsigned)__kmp_xproc) {
				1595	*line = 0;
				1596	*msg_id = kmp_i18n_str_TooManyProcRecords;
				1597	return -1;
				1598	}
				1599
				1600	//
				1601	// Set the file pointer back to the begginning, so that we can scan the
				1602	// file again, this time performing a full parse of the data.
				1603	// Allocate a vector of ProcCpuInfo object, where we will place the data.
				1604	// Adding an extra element at the end allows us to remove a lot of extra
				1605	// checks for termination conditions.
				1606	//
				1607	if (fseek(f, 0, SEEK_SET) != 0) {
				1608	*line = 0;
				1609	*msg_id = kmp_i18n_str_CantRewindCpuinfo;
				1610	return -1;
				1611	}
				1612
				1613	//
				1614	// Allocate the array of records to store the proc info in. The dummy
				1615	// element at the end makes the logic in filling them out easier to code.
				1616	//
				1617	unsigned threadInfo = (unsigned )__kmp_allocate((num_records + 1)
				1618	* sizeof(unsigned *));
				1619	unsigned i;
				1620	for (i = 0; i <= num_records; i++) {
				1621	threadInfo[i] = (unsigned *)__kmp_allocate((maxIndex + 1)
				1622	* sizeof(unsigned));
				1623	}
				1624
				1625	#define CLEANUP_THREAD_INFO \
				1626	for (i = 0; i <= num_records; i++) { \
				1627	__kmp_free(threadInfo[i]); \
				1628	} \
				1629	__kmp_free(threadInfo);
				1630
				1631	//
				1632	// A value of UINT_MAX means that we didn't find the field
				1633	//
				1634	unsigned __index;
				1635
				1636	#define INIT_PROC_INFO(p) \
				1637	for (__index = 0; __index <= maxIndex; __index++) { \
				1638	(p)[__index] = UINT_MAX; \
				1639	}
				1640
				1641	for (i = 0; i <= num_records; i++) {
				1642	INIT_PROC_INFO(threadInfo[i]);
				1643	}
				1644
				1645	unsigned num_avail = 0;
				1646	*line = 0;
				1647	while (! feof(f)) {
				1648	//
				1649	// Create an inner scoping level, so that all the goto targets at the
				1650	// end of the loop appear in an outer scoping level. This avoids
				1651	// warnings about jumping past an initialization to a target in the
				1652	// same block.
				1653	//
				1654	{
				1655	buf[sizeof(buf) - 1] = 1;
				1656	bool long_line = false;
				1657	if (! fgets(buf, sizeof(buf), f)) {
				1658	//
				1659	// Read errors presumably because of EOF
				1660	//
				1661	// If there is valid data in threadInfo[num_avail], then fake
				1662	// a blank line in ensure that the last address gets parsed.
				1663	//
				1664	bool valid = false;
				1665	for (i = 0; i <= maxIndex; i++) {
				1666	if (threadInfo[num_avail][i] != UINT_MAX) {
				1667	valid = true;
				1668	}
				1669	}
				1670	if (! valid) {
				1671	break;
				1672	}
				1673	buf[0] = 0;
				1674	} else if (!buf[sizeof(buf) - 1]) {
				1675	//
				1676	// The line is longer than the buffer. Set a flag and don't
				1677	// emit an error if we were going to ignore the line, anyway.
				1678	//
				1679	long_line = true;
				1680
				1681	#define CHECK_LINE \
				1682	if (long_line) { \
				1683	CLEANUP_THREAD_INFO; \
				1684	*msg_id = kmp_i18n_str_LongLineCpuinfo; \
				1685	return -1; \
				1686	}
				1687	}
				1688	(*line)++;
				1689
				1690	char s1[] = "processor";
				1691	if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
				1692	CHECK_LINE;
				1693	char *p = strchr(buf + sizeof(s1) - 1, ':');
				1694	unsigned val;
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	1695	if ((p == NULL) \|\| (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1696	if (threadInfo[num_avail][osIdIndex] != UINT_MAX) goto dup_field;
				1697	threadInfo[num_avail][osIdIndex] = val;
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	1698	#if KMP_OS_LINUX && USE_SYSFS_INFO
				1699	char path[256];
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	1700	KMP_SNPRINTF(path, sizeof(path),
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	1701	"/sys/devices/system/cpu/cpu%u/topology/physical_package_id",
				1702	threadInfo[num_avail][osIdIndex]);
				1703	__kmp_read_from_file(path, "%u", &threadInfo[num_avail][pkgIdIndex]);
				1704
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	1705	KMP_SNPRINTF(path, sizeof(path),
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	1706	"/sys/devices/system/cpu/cpu%u/topology/core_id",
				1707	threadInfo[num_avail][osIdIndex]);
				1708	__kmp_read_from_file(path, "%u", &threadInfo[num_avail][coreIdIndex]);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1709	continue;
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	1710	#else
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1711	}
				1712	char s2[] = "physical id";
				1713	if (strncmp(buf, s2, sizeof(s2) - 1) == 0) {
				1714	CHECK_LINE;
				1715	char *p = strchr(buf + sizeof(s2) - 1, ':');
				1716	unsigned val;
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	1717	if ((p == NULL) \|\| (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1718	if (threadInfo[num_avail][pkgIdIndex] != UINT_MAX) goto dup_field;
				1719	threadInfo[num_avail][pkgIdIndex] = val;
				1720	continue;
				1721	}
				1722	char s3[] = "core id";
				1723	if (strncmp(buf, s3, sizeof(s3) - 1) == 0) {
				1724	CHECK_LINE;
				1725	char *p = strchr(buf + sizeof(s3) - 1, ':');
				1726	unsigned val;
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	1727	if ((p == NULL) \|\| (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1728	if (threadInfo[num_avail][coreIdIndex] != UINT_MAX) goto dup_field;
				1729	threadInfo[num_avail][coreIdIndex] = val;
				1730	continue;
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	1731	#endif // KMP_OS_LINUX && USE_SYSFS_INFO
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1732	}
				1733	char s4[] = "thread id";
				1734	if (strncmp(buf, s4, sizeof(s4) - 1) == 0) {
				1735	CHECK_LINE;
				1736	char *p = strchr(buf + sizeof(s4) - 1, ':');
				1737	unsigned val;
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	1738	if ((p == NULL) \|\| (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1739	if (threadInfo[num_avail][threadIdIndex] != UINT_MAX) goto dup_field;
				1740	threadInfo[num_avail][threadIdIndex] = val;
				1741	continue;
				1742	}
				1743	unsigned level;
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	1744	if (KMP_SSCANF(buf, "node_%d id", &level) == 1) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1745	CHECK_LINE;
				1746	char *p = strchr(buf + sizeof(s4) - 1, ':');
				1747	unsigned val;
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	1748	if ((p == NULL) \|\| (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1749	KMP_ASSERT(nodeIdIndex + level <= maxIndex);
				1750	if (threadInfo[num_avail][nodeIdIndex + level] != UINT_MAX) goto dup_field;
				1751	threadInfo[num_avail][nodeIdIndex + level] = val;
				1752	continue;
				1753	}
				1754
				1755	//
				1756	// We didn't recognize the leading token on the line.
				1757	// There are lots of leading tokens that we don't recognize -
				1758	// if the line isn't empty, go on to the next line.
				1759	//
				1760	if ((buf != 0) && (buf != '\n')) {
				1761	//
				1762	// If the line is longer than the buffer, read characters
				1763	// until we find a newline.
				1764	//
				1765	if (long_line) {
				1766	int ch;
				1767	while (((ch = fgetc(f)) != EOF) && (ch != '\n'));
				1768	}
				1769	continue;
				1770	}
				1771
				1772	//
				1773	// A newline has signalled the end of the processor record.
				1774	// Check that there aren't too many procs specified.
				1775	//
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	1776	if ((int)num_avail == __kmp_xproc) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1777	CLEANUP_THREAD_INFO;
				1778	*msg_id = kmp_i18n_str_TooManyEntries;
				1779	return -1;
				1780	}
				1781
				1782	//
				1783	// Check for missing fields. The osId field must be there, and we
				1784	// currently require that the physical id field is specified, also.
				1785	//
				1786	if (threadInfo[num_avail][osIdIndex] == UINT_MAX) {
				1787	CLEANUP_THREAD_INFO;
				1788	*msg_id = kmp_i18n_str_MissingProcField;
				1789	return -1;
				1790	}
				1791	if (threadInfo[0][pkgIdIndex] == UINT_MAX) {
				1792	CLEANUP_THREAD_INFO;
				1793	*msg_id = kmp_i18n_str_MissingPhysicalIDField;
				1794	return -1;
				1795	}
				1796
				1797	//
				1798	// Skip this proc if it is not included in the machine model.
				1799	//
				1800	if (! KMP_CPU_ISSET(threadInfo[num_avail][osIdIndex], fullMask)) {
				1801	INIT_PROC_INFO(threadInfo[num_avail]);
				1802	continue;
				1803	}
				1804
				1805	//
				1806	// We have a successful parse of this proc's info.
				1807	// Increment the counter, and prepare for the next proc.
				1808	//
				1809	num_avail++;
				1810	KMP_ASSERT(num_avail <= num_records);
				1811	INIT_PROC_INFO(threadInfo[num_avail]);
				1812	}
				1813	continue;
				1814
				1815	no_val:
				1816	CLEANUP_THREAD_INFO;
				1817	*msg_id = kmp_i18n_str_MissingValCpuinfo;
				1818	return -1;
				1819
				1820	dup_field:
				1821	CLEANUP_THREAD_INFO;
				1822	*msg_id = kmp_i18n_str_DuplicateFieldCpuinfo;
				1823	return -1;
				1824	}
				1825	*line = 0;
				1826
				1827	# if KMP_MIC && REDUCE_TEAM_SIZE
				1828	unsigned teamSize = 0;
				1829	# endif // KMP_MIC && REDUCE_TEAM_SIZE
				1830
				1831	// check for num_records == __kmp_xproc ???
				1832
				1833	//
				1834	// If there's only one thread context to bind to, form an Address object
				1835	// with depth 1 and return immediately (or, if affinity is off, set
				1836	// address2os to NULL and return).
				1837	//
				1838	// If it is configured to omit the package level when there is only a
				1839	// single package, the logic at the end of this routine won't work if
				1840	// there is only a single thread - it would try to form an Address
				1841	// object with depth 0.
				1842	//
				1843	KMP_ASSERT(num_avail > 0);
				1844	KMP_ASSERT(num_avail <= num_records);
				1845	if (num_avail == 1) {
				1846	__kmp_ncores = 1;
				1847	__kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1848	if (__kmp_affinity_verbose) {
				1849	if (! KMP_AFFINITY_CAPABLE()) {
				1850	KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
				1851	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1852	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1853	}
				1854	else {
				1855	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				1856	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				1857	fullMask);
				1858	KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
				1859	if (__kmp_affinity_respect_mask) {
				1860	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				1861	} else {
				1862	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				1863	}
				1864	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1865	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1866	}
				1867	int index;
				1868	kmp_str_buf_t buf;
				1869	__kmp_str_buf_init(&buf);
				1870	__kmp_str_buf_print(&buf, "1");
				1871	for (index = maxIndex - 1; index > pkgIdIndex; index--) {
				1872	__kmp_str_buf_print(&buf, " x 1");
				1873	}
				1874	KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, 1, 1, 1);
				1875	__kmp_str_buf_free(&buf);
				1876	}
				1877
				1878	if (__kmp_affinity_type == affinity_none) {
				1879	CLEANUP_THREAD_INFO;
				1880	return 0;
				1881	}
				1882
				1883	address2os = (AddrUnsPair)__kmp_allocate(sizeof(AddrUnsPair));
				1884	Address addr(1);
				1885	addr.labels[0] = threadInfo[0][pkgIdIndex];
				1886	(*address2os)[0] = AddrUnsPair(addr, threadInfo[0][osIdIndex]);
				1887
				1888	if (__kmp_affinity_gran_levels < 0) {
				1889	__kmp_affinity_gran_levels = 0;
				1890	}
				1891
				1892	if (__kmp_affinity_verbose) {
				1893	__kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
				1894	}
				1895
				1896	CLEANUP_THREAD_INFO;
				1897	return 1;
				1898	}
				1899
				1900	//
				1901	// Sort the threadInfo table by physical Id.
				1902	//
				1903	qsort(threadInfo, num_avail, sizeof(*threadInfo),
				1904	__kmp_affinity_cmp_ProcCpuInfo_phys_id);
				1905
				1906	//
				1907	// The table is now sorted by pkgId / coreId / threadId, but we really
				1908	// don't know the radix of any of the fields. pkgId's may be sparsely
				1909	// assigned among the chips on a system. Although coreId's are usually
				1910	// assigned [0 .. coresPerPkg-1] and threadId's are usually assigned
				1911	// [0..threadsPerCore-1], we don't want to make any such assumptions.
				1912	//
				1913	// For that matter, we don't know what coresPerPkg and threadsPerCore
				1914	// (or the total # packages) are at this point - we want to determine
				1915	// that now. We only have an upper bound on the first two figures.
				1916	//
				1917	unsigned counts = (unsigned )__kmp_allocate((maxIndex + 1)
				1918	* sizeof(unsigned));
				1919	unsigned maxCt = (unsigned )__kmp_allocate((maxIndex + 1)
				1920	* sizeof(unsigned));
				1921	unsigned totals = (unsigned )__kmp_allocate((maxIndex + 1)
				1922	* sizeof(unsigned));
				1923	unsigned lastId = (unsigned )__kmp_allocate((maxIndex + 1)
				1924	* sizeof(unsigned));
				1925
				1926	bool assign_thread_ids = false;
				1927	unsigned threadIdCt;
				1928	unsigned index;
				1929
				1930	restart_radix_check:
				1931	threadIdCt = 0;
				1932
				1933	//
				1934	// Initialize the counter arrays with data from threadInfo[0].
				1935	//
				1936	if (assign_thread_ids) {
				1937	if (threadInfo[0][threadIdIndex] == UINT_MAX) {
				1938	threadInfo[0][threadIdIndex] = threadIdCt++;
				1939	}
				1940	else if (threadIdCt <= threadInfo[0][threadIdIndex]) {
				1941	threadIdCt = threadInfo[0][threadIdIndex] + 1;
				1942	}
				1943	}
				1944	for (index = 0; index <= maxIndex; index++) {
				1945	counts[index] = 1;
				1946	maxCt[index] = 1;
				1947	totals[index] = 1;
				1948	lastId[index] = threadInfo[0][index];;
				1949	}
				1950
				1951	//
				1952	// Run through the rest of the OS procs.
				1953	//
				1954	for (i = 1; i < num_avail; i++) {
				1955	//
				1956	// Find the most significant index whose id differs
				1957	// from the id for the previous OS proc.
				1958	//
				1959	for (index = maxIndex; index >= threadIdIndex; index--) {
				1960	if (assign_thread_ids && (index == threadIdIndex)) {
				1961	//
				1962	// Auto-assign the thread id field if it wasn't specified.
				1963	//
				1964	if (threadInfo[i][threadIdIndex] == UINT_MAX) {
				1965	threadInfo[i][threadIdIndex] = threadIdCt++;
				1966	}
				1967
				1968	//
				1969	// Aparrently the thread id field was specified for some
				1970	// entries and not others. Start the thread id counter
				1971	// off at the next higher thread id.
				1972	//
				1973	else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
				1974	threadIdCt = threadInfo[i][threadIdIndex] + 1;
				1975	}
				1976	}
				1977	if (threadInfo[i][index] != lastId[index]) {
				1978	//
				1979	// Run through all indices which are less significant,
				1980	// and reset the counts to 1.
				1981	//
				1982	// At all levels up to and including index, we need to
				1983	// increment the totals and record the last id.
				1984	//
				1985	unsigned index2;
				1986	for (index2 = threadIdIndex; index2 < index; index2++) {
				1987	totals[index2]++;
				1988	if (counts[index2] > maxCt[index2]) {
				1989	maxCt[index2] = counts[index2];
				1990	}
				1991	counts[index2] = 1;
				1992	lastId[index2] = threadInfo[i][index2];
				1993	}
				1994	counts[index]++;
				1995	totals[index]++;
				1996	lastId[index] = threadInfo[i][index];
				1997
				1998	if (assign_thread_ids && (index > threadIdIndex)) {
				1999
				2000	# if KMP_MIC && REDUCE_TEAM_SIZE
				2001	//
				2002	// The default team size is the total #threads in the machine
				2003	// minus 1 thread for every core that has 3 or more threads.
				2004	//
				2005	teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 );
				2006	# endif // KMP_MIC && REDUCE_TEAM_SIZE
				2007
				2008	//
				2009	// Restart the thread counter, as we are on a new core.
				2010	//
				2011	threadIdCt = 0;
				2012
				2013	//
				2014	// Auto-assign the thread id field if it wasn't specified.
				2015	//
				2016	if (threadInfo[i][threadIdIndex] == UINT_MAX) {
				2017	threadInfo[i][threadIdIndex] = threadIdCt++;
				2018	}
				2019
				2020	//
				2021	// Aparrently the thread id field was specified for some
				2022	// entries and not others. Start the thread id counter
				2023	// off at the next higher thread id.
				2024	//
				2025	else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
				2026	threadIdCt = threadInfo[i][threadIdIndex] + 1;
				2027	}
				2028	}
				2029	break;
				2030	}
				2031	}
				2032	if (index < threadIdIndex) {
				2033	//
				2034	// If thread ids were specified, it is an error if they are not
				2035	// unique. Also, check that we waven't already restarted the
				2036	// loop (to be safe - shouldn't need to).
				2037	//
				2038	if ((threadInfo[i][threadIdIndex] != UINT_MAX)
				2039	\|\| assign_thread_ids) {
				2040	__kmp_free(lastId);
				2041	__kmp_free(totals);
				2042	__kmp_free(maxCt);
				2043	__kmp_free(counts);
				2044	CLEANUP_THREAD_INFO;
				2045	*msg_id = kmp_i18n_str_PhysicalIDsNotUnique;
				2046	return -1;
				2047	}
				2048
				2049	//
				2050	// If the thread ids were not specified and we see entries
				2051	// entries that are duplicates, start the loop over and
				2052	// assign the thread ids manually.
				2053	//
				2054	assign_thread_ids = true;
				2055	goto restart_radix_check;
				2056	}
				2057	}
				2058
				2059	# if KMP_MIC && REDUCE_TEAM_SIZE
				2060	//
				2061	// The default team size is the total #threads in the machine
				2062	// minus 1 thread for every core that has 3 or more threads.
				2063	//
				2064	teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 );
				2065	# endif // KMP_MIC && REDUCE_TEAM_SIZE
				2066
				2067	for (index = threadIdIndex; index <= maxIndex; index++) {
				2068	if (counts[index] > maxCt[index]) {
				2069	maxCt[index] = counts[index];
				2070	}
				2071	}
				2072
				2073	__kmp_nThreadsPerCore = maxCt[threadIdIndex];
				2074	nCoresPerPkg = maxCt[coreIdIndex];
				2075	nPackages = totals[pkgIdIndex];
				2076
				2077	//
				2078	// Check to see if the machine topology is uniform
				2079	//
				2080	unsigned prod = totals[maxIndex];
				2081	for (index = threadIdIndex; index < maxIndex; index++) {
				2082	prod *= maxCt[index];
				2083	}
				2084	bool uniform = (prod == totals[threadIdIndex]);
				2085
				2086	//
				2087	// When affinity is off, this routine will still be called to set
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame]	2088	// __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2089	// nCoresPerPkg, & nPackages. Make sure all these vars are set
				2090	// correctly, and return now if affinity is not enabled.
				2091	//
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2092	__kmp_ncores = totals[coreIdIndex];
				2093
				2094	if (__kmp_affinity_verbose) {
				2095	if (! KMP_AFFINITY_CAPABLE()) {
				2096	KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
				2097	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				2098	if (uniform) {
				2099	KMP_INFORM(Uniform, "KMP_AFFINITY");
				2100	} else {
				2101	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				2102	}
				2103	}
				2104	else {
				2105	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				2106	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, fullMask);
				2107	KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
				2108	if (__kmp_affinity_respect_mask) {
				2109	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				2110	} else {
				2111	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				2112	}
				2113	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				2114	if (uniform) {
				2115	KMP_INFORM(Uniform, "KMP_AFFINITY");
				2116	} else {
				2117	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				2118	}
				2119	}
				2120	kmp_str_buf_t buf;
				2121	__kmp_str_buf_init(&buf);
				2122
				2123	__kmp_str_buf_print(&buf, "%d", totals[maxIndex]);
				2124	for (index = maxIndex - 1; index >= pkgIdIndex; index--) {
				2125	__kmp_str_buf_print(&buf, " x %d", maxCt[index]);
				2126	}
				2127	KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, maxCt[coreIdIndex],
				2128	maxCt[threadIdIndex], __kmp_ncores);
				2129
				2130	__kmp_str_buf_free(&buf);
				2131	}
				2132
				2133	# if KMP_MIC && REDUCE_TEAM_SIZE
				2134	//
				2135	// Set the default team size.
				2136	//
				2137	if ((__kmp_dflt_team_nth == 0) && (teamSize > 0)) {
				2138	__kmp_dflt_team_nth = teamSize;
				2139	KA_TRACE(20, ("__kmp_affinity_create_cpuinfo_map: setting __kmp_dflt_team_nth = %d\n",
				2140	__kmp_dflt_team_nth));
				2141	}
				2142	# endif // KMP_MIC && REDUCE_TEAM_SIZE
				2143
				2144	if (__kmp_affinity_type == affinity_none) {
				2145	__kmp_free(lastId);
				2146	__kmp_free(totals);
				2147	__kmp_free(maxCt);
				2148	__kmp_free(counts);
				2149	CLEANUP_THREAD_INFO;
				2150	return 0;
				2151	}
				2152
				2153	//
				2154	// Count the number of levels which have more nodes at that level than
				2155	// at the parent's level (with there being an implicit root node of
				2156	// the top level). This is equivalent to saying that there is at least
				2157	// one node at this level which has a sibling. These levels are in the
				2158	// map, and the package level is always in the map.
				2159	//
				2160	bool inMap = (bool )__kmp_allocate((maxIndex + 1) * sizeof(bool));
				2161	int level = 0;
				2162	for (index = threadIdIndex; index < maxIndex; index++) {
				2163	KMP_ASSERT(totals[index] >= totals[index + 1]);
				2164	inMap[index] = (totals[index] > totals[index + 1]);
				2165	}
				2166	inMap[maxIndex] = (totals[maxIndex] > 1);
				2167	inMap[pkgIdIndex] = true;
				2168
				2169	int depth = 0;
				2170	for (index = threadIdIndex; index <= maxIndex; index++) {
				2171	if (inMap[index]) {
				2172	depth++;
				2173	}
				2174	}
				2175	KMP_ASSERT(depth > 0);
				2176
				2177	//
				2178	// Construct the data structure that is to be returned.
				2179	//
				2180	address2os = (AddrUnsPair)
				2181	__kmp_allocate(sizeof(AddrUnsPair) * num_avail);
				2182	int pkgLevel = -1;
				2183	int coreLevel = -1;
				2184	int threadLevel = -1;
				2185
				2186	for (i = 0; i < num_avail; ++i) {
				2187	Address addr(depth);
				2188	unsigned os = threadInfo[i][osIdIndex];
				2189	int src_index;
				2190	int dst_index = 0;
				2191
				2192	for (src_index = maxIndex; src_index >= threadIdIndex; src_index--) {
				2193	if (! inMap[src_index]) {
				2194	continue;
				2195	}
				2196	addr.labels[dst_index] = threadInfo[i][src_index];
				2197	if (src_index == pkgIdIndex) {
				2198	pkgLevel = dst_index;
				2199	}
				2200	else if (src_index == coreIdIndex) {
				2201	coreLevel = dst_index;
				2202	}
				2203	else if (src_index == threadIdIndex) {
				2204	threadLevel = dst_index;
				2205	}
				2206	dst_index++;
				2207	}
				2208	(*address2os)[i] = AddrUnsPair(addr, os);
				2209	}
				2210
				2211	if (__kmp_affinity_gran_levels < 0) {
				2212	//
				2213	// Set the granularity level based on what levels are modeled
				2214	// in the machine topology map.
				2215	//
				2216	unsigned src_index;
				2217	__kmp_affinity_gran_levels = 0;
				2218	for (src_index = threadIdIndex; src_index <= maxIndex; src_index++) {
				2219	if (! inMap[src_index]) {
				2220	continue;
				2221	}
				2222	switch (src_index) {
				2223	case threadIdIndex:
				2224	if (__kmp_affinity_gran > affinity_gran_thread) {
				2225	__kmp_affinity_gran_levels++;
				2226	}
				2227
				2228	break;
				2229	case coreIdIndex:
				2230	if (__kmp_affinity_gran > affinity_gran_core) {
				2231	__kmp_affinity_gran_levels++;
				2232	}
				2233	break;
				2234
				2235	case pkgIdIndex:
				2236	if (__kmp_affinity_gran > affinity_gran_package) {
				2237	__kmp_affinity_gran_levels++;
				2238	}
				2239	break;
				2240	}
				2241	}
				2242	}
				2243
				2244	if (__kmp_affinity_verbose) {
				2245	__kmp_affinity_print_topology(*address2os, num_avail, depth, pkgLevel,
				2246	coreLevel, threadLevel);
				2247	}
				2248
				2249	__kmp_free(inMap);
				2250	__kmp_free(lastId);
				2251	__kmp_free(totals);
				2252	__kmp_free(maxCt);
				2253	__kmp_free(counts);
				2254	CLEANUP_THREAD_INFO;
				2255	return depth;
				2256	}
				2257
				2258
				2259	//
				2260	// Create and return a table of affinity masks, indexed by OS thread ID.
				2261	// This routine handles OR'ing together all the affinity masks of threads
				2262	// that are sufficiently close, if granularity > fine.
				2263	//
				2264	static kmp_affin_mask_t *
				2265	__kmp_create_masks(unsigned maxIndex, unsigned numUnique,
				2266	AddrUnsPair *address2os, unsigned numAddrs)
				2267	{
				2268	//
				2269	// First form a table of affinity masks in order of OS thread id.
				2270	//
				2271	unsigned depth;
				2272	unsigned maxOsId;
				2273	unsigned i;
				2274
				2275	KMP_ASSERT(numAddrs > 0);
				2276	depth = address2os[0].first.depth;
				2277
				2278	maxOsId = 0;
				2279	for (i = 0; i < numAddrs; i++) {
				2280	unsigned osId = address2os[i].second;
				2281	if (osId > maxOsId) {
				2282	maxOsId = osId;
				2283	}
				2284	}
				2285	kmp_affin_mask_t osId2Mask = (kmp_affin_mask_t )__kmp_allocate(
				2286	(maxOsId + 1) * __kmp_affin_mask_size);
				2287
				2288	//
				2289	// Sort the address2os table according to physical order. Doing so
				2290	// will put all threads on the same core/package/node in consecutive
				2291	// locations.
				2292	//
				2293	qsort(address2os, numAddrs, sizeof(*address2os),
				2294	__kmp_affinity_cmp_Address_labels);
				2295
				2296	KMP_ASSERT(__kmp_affinity_gran_levels >= 0);
				2297	if (__kmp_affinity_verbose && (__kmp_affinity_gran_levels > 0)) {
				2298	KMP_INFORM(ThreadsMigrate, "KMP_AFFINITY", __kmp_affinity_gran_levels);
				2299	}
				2300	if (__kmp_affinity_gran_levels >= (int)depth) {
				2301	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				2302	&& (__kmp_affinity_type != affinity_none))) {
				2303	KMP_WARNING(AffThreadsMayMigrate);
				2304	}
				2305	}
				2306
				2307	//
				2308	// Run through the table, forming the masks for all threads on each
				2309	// core. Threads on the same core will have identical "Address"
				2310	// objects, not considering the last level, which must be the thread
				2311	// id. All threads on a core will appear consecutively.
				2312	//
				2313	unsigned unique = 0;
				2314	unsigned j = 0; // index of 1st thread on core
				2315	unsigned leader = 0;
				2316	Address *leaderAddr = &(address2os[0].first);
				2317	kmp_affin_mask_t *sum
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	2318	= (kmp_affin_mask_t *)KMP_ALLOCA(__kmp_affin_mask_size);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2319	KMP_CPU_ZERO(sum);
				2320	KMP_CPU_SET(address2os[0].second, sum);
				2321	for (i = 1; i < numAddrs; i++) {
				2322	//
Alp Toker	8f2d3f0	2014-02-24 10:40:15 +0000	[diff] [blame]	2323	// If this thread is sufficiently close to the leader (within the
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2324	// granularity setting), then set the bit for this os thread in the
				2325	// affinity mask for this group, and go on to the next thread.
				2326	//
				2327	if (leaderAddr->isClose(address2os[i].first,
				2328	__kmp_affinity_gran_levels)) {
				2329	KMP_CPU_SET(address2os[i].second, sum);
				2330	continue;
				2331	}
				2332
				2333	//
				2334	// For every thread in this group, copy the mask to the thread's
				2335	// entry in the osId2Mask table. Mark the first address as a
				2336	// leader.
				2337	//
				2338	for (; j < i; j++) {
				2339	unsigned osId = address2os[j].second;
				2340	KMP_DEBUG_ASSERT(osId <= maxOsId);
				2341	kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
				2342	KMP_CPU_COPY(mask, sum);
				2343	address2os[j].first.leader = (j == leader);
				2344	}
				2345	unique++;
				2346
				2347	//
				2348	// Start a new mask.
				2349	//
				2350	leader = i;
				2351	leaderAddr = &(address2os[i].first);
				2352	KMP_CPU_ZERO(sum);
				2353	KMP_CPU_SET(address2os[i].second, sum);
				2354	}
				2355
				2356	//
				2357	// For every thread in last group, copy the mask to the thread's
				2358	// entry in the osId2Mask table.
				2359	//
				2360	for (; j < i; j++) {
				2361	unsigned osId = address2os[j].second;
				2362	KMP_DEBUG_ASSERT(osId <= maxOsId);
				2363	kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
				2364	KMP_CPU_COPY(mask, sum);
				2365	address2os[j].first.leader = (j == leader);
				2366	}
				2367	unique++;
				2368
				2369	*maxIndex = maxOsId;
				2370	*numUnique = unique;
				2371	return osId2Mask;
				2372	}
				2373
				2374
				2375	//
				2376	// Stuff for the affinity proclist parsers. It's easier to declare these vars
				2377	// as file-static than to try and pass them through the calling sequence of
				2378	// the recursive-descent OMP_PLACES parser.
				2379	//
				2380	static kmp_affin_mask_t *newMasks;
				2381	static int numNewMasks;
				2382	static int nextNewMask;
				2383
				2384	#define ADD_MASK(_mask) \
				2385	{ \
				2386	if (nextNewMask >= numNewMasks) { \
				2387	numNewMasks *= 2; \
				2388	newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_REALLOC(newMasks, \
				2389	numNewMasks * __kmp_affin_mask_size); \
				2390	} \
				2391	KMP_CPU_COPY(KMP_CPU_INDEX(newMasks, nextNewMask), (_mask)); \
				2392	nextNewMask++; \
				2393	}
				2394
				2395	#define ADD_MASK_OSID(_osId,_osId2Mask,_maxOsId) \
				2396	{ \
				2397	if (((_osId) > _maxOsId) \|\| \
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	2398	(! KMP_CPU_ISSET((_osId), KMP_CPU_INDEX((_osId2Mask), (_osId))))) { \
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2399	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings \
				2400	&& (__kmp_affinity_type != affinity_none))) { \
				2401	KMP_WARNING(AffIgnoreInvalidProcID, _osId); \
				2402	} \
				2403	} \
				2404	else { \
				2405	ADD_MASK(KMP_CPU_INDEX(_osId2Mask, (_osId))); \
				2406	} \
				2407	}
				2408
				2409
				2410	//
				2411	// Re-parse the proclist (for the explicit affinity type), and form the list
				2412	// of affinity newMasks indexed by gtid.
				2413	//
				2414	static void
				2415	__kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks,
				2416	unsigned int out_numMasks, const char proclist,
				2417	kmp_affin_mask_t *osId2Mask, int maxOsId)
				2418	{
				2419	const char *scan = proclist;
				2420	const char *next = proclist;
				2421
				2422	//
				2423	// We use malloc() for the temporary mask vector,
				2424	// so that we can use realloc() to extend it.
				2425	//
				2426	numNewMasks = 2;
				2427	newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(numNewMasks
				2428	* __kmp_affin_mask_size);
				2429	nextNewMask = 0;
				2430	kmp_affin_mask_t sumMask = (kmp_affin_mask_t )__kmp_allocate(
				2431	__kmp_affin_mask_size);
				2432	int setSize = 0;
				2433
				2434	for (;;) {
				2435	int start, end, stride;
				2436
				2437	SKIP_WS(scan);
				2438	next = scan;
				2439	if (*next == '\0') {
				2440	break;
				2441	}
				2442
				2443	if (*next == '{') {
				2444	int num;
				2445	setSize = 0;
				2446	next++; // skip '{'
				2447	SKIP_WS(next);
				2448	scan = next;
				2449
				2450	//
				2451	// Read the first integer in the set.
				2452	//
				2453	KMP_ASSERT2((next >= '0') && (next <= '9'),
				2454	"bad proclist");
				2455	SKIP_DIGITS(next);
				2456	num = __kmp_str_to_int(scan, *next);
				2457	KMP_ASSERT2(num >= 0, "bad explicit proc list");
				2458
				2459	//
				2460	// Copy the mask for that osId to the sum (union) mask.
				2461	//
				2462	if ((num > maxOsId) \|\|
				2463	(! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
				2464	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				2465	&& (__kmp_affinity_type != affinity_none))) {
				2466	KMP_WARNING(AffIgnoreInvalidProcID, num);
				2467	}
				2468	KMP_CPU_ZERO(sumMask);
				2469	}
				2470	else {
				2471	KMP_CPU_COPY(sumMask, KMP_CPU_INDEX(osId2Mask, num));
				2472	setSize = 1;
				2473	}
				2474
				2475	for (;;) {
				2476	//
				2477	// Check for end of set.
				2478	//
				2479	SKIP_WS(next);
				2480	if (*next == '}') {
				2481	next++; // skip '}'
				2482	break;
				2483	}
				2484
				2485	//
				2486	// Skip optional comma.
				2487	//
				2488	if (*next == ',') {
				2489	next++;
				2490	}
				2491	SKIP_WS(next);
				2492
				2493	//
				2494	// Read the next integer in the set.
				2495	//
				2496	scan = next;
				2497	KMP_ASSERT2((next >= '0') && (next <= '9'),
				2498	"bad explicit proc list");
				2499
				2500	SKIP_DIGITS(next);
				2501	num = __kmp_str_to_int(scan, *next);
				2502	KMP_ASSERT2(num >= 0, "bad explicit proc list");
				2503
				2504	//
				2505	// Add the mask for that osId to the sum mask.
				2506	//
				2507	if ((num > maxOsId) \|\|
				2508	(! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
				2509	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				2510	&& (__kmp_affinity_type != affinity_none))) {
				2511	KMP_WARNING(AffIgnoreInvalidProcID, num);
				2512	}
				2513	}
				2514	else {
				2515	KMP_CPU_UNION(sumMask, KMP_CPU_INDEX(osId2Mask, num));
				2516	setSize++;
				2517	}
				2518	}
				2519	if (setSize > 0) {
				2520	ADD_MASK(sumMask);
				2521	}
				2522
				2523	SKIP_WS(next);
				2524	if (*next == ',') {
				2525	next++;
				2526	}
				2527	scan = next;
				2528	continue;
				2529	}
				2530
				2531	//
				2532	// Read the first integer.
				2533	//
				2534	KMP_ASSERT2((next >= '0') && (next <= '9'), "bad explicit proc list");
				2535	SKIP_DIGITS(next);
				2536	start = __kmp_str_to_int(scan, *next);
				2537	KMP_ASSERT2(start >= 0, "bad explicit proc list");
				2538	SKIP_WS(next);
				2539
				2540	//
				2541	// If this isn't a range, then add a mask to the list and go on.
				2542	//
				2543	if (*next != '-') {
				2544	ADD_MASK_OSID(start, osId2Mask, maxOsId);
				2545
				2546	//
				2547	// Skip optional comma.
				2548	//
				2549	if (*next == ',') {
				2550	next++;
				2551	}
				2552	scan = next;
				2553	continue;
				2554	}
				2555
				2556	//
				2557	// This is a range. Skip over the '-' and read in the 2nd int.
				2558	//
				2559	next++; // skip '-'
				2560	SKIP_WS(next);
				2561	scan = next;
				2562	KMP_ASSERT2((next >= '0') && (next <= '9'), "bad explicit proc list");
				2563	SKIP_DIGITS(next);
				2564	end = __kmp_str_to_int(scan, *next);
				2565	KMP_ASSERT2(end >= 0, "bad explicit proc list");
				2566
				2567	//
				2568	// Check for a stride parameter
				2569	//
				2570	stride = 1;
				2571	SKIP_WS(next);
				2572	if (*next == ':') {
				2573	//
				2574	// A stride is specified. Skip over the ':" and read the 3rd int.
				2575	//
				2576	int sign = +1;
				2577	next++; // skip ':'
				2578	SKIP_WS(next);
				2579	scan = next;
				2580	if (*next == '-') {
				2581	sign = -1;
				2582	next++;
				2583	SKIP_WS(next);
				2584	scan = next;
				2585	}
				2586	KMP_ASSERT2((next >= '0') && (next <= '9'),
				2587	"bad explicit proc list");
				2588	SKIP_DIGITS(next);
				2589	stride = __kmp_str_to_int(scan, *next);
				2590	KMP_ASSERT2(stride >= 0, "bad explicit proc list");
				2591	stride *= sign;
				2592	}
				2593
				2594	//
				2595	// Do some range checks.
				2596	//
				2597	KMP_ASSERT2(stride != 0, "bad explicit proc list");
				2598	if (stride > 0) {
				2599	KMP_ASSERT2(start <= end, "bad explicit proc list");
				2600	}
				2601	else {
				2602	KMP_ASSERT2(start >= end, "bad explicit proc list");
				2603	}
				2604	KMP_ASSERT2((end - start) / stride <= 65536, "bad explicit proc list");
				2605
				2606	//
				2607	// Add the mask for each OS proc # to the list.
				2608	//
				2609	if (stride > 0) {
				2610	do {
				2611	ADD_MASK_OSID(start, osId2Mask, maxOsId);
				2612	start += stride;
				2613	} while (start <= end);
				2614	}
				2615	else {
				2616	do {
				2617	ADD_MASK_OSID(start, osId2Mask, maxOsId);
				2618	start += stride;
				2619	} while (start >= end);
				2620	}
				2621
				2622	//
				2623	// Skip optional comma.
				2624	//
				2625	SKIP_WS(next);
				2626	if (*next == ',') {
				2627	next++;
				2628	}
				2629	scan = next;
				2630	}
				2631
				2632	*out_numMasks = nextNewMask;
				2633	if (nextNewMask == 0) {
				2634	*out_masks = NULL;
				2635	KMP_INTERNAL_FREE(newMasks);
				2636	return;
				2637	}
				2638	*out_masks
				2639	= (kmp_affin_mask_t )__kmp_allocate(nextNewMask __kmp_affin_mask_size);
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	2640	KMP_MEMCPY(out_masks, newMasks, nextNewMask __kmp_affin_mask_size);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2641	__kmp_free(sumMask);
				2642	KMP_INTERNAL_FREE(newMasks);
				2643	}
				2644
				2645
				2646	# if OMP_40_ENABLED
				2647
				2648	/*-----------------------------------------------------------------------------
				2649
				2650	Re-parse the OMP_PLACES proc id list, forming the newMasks for the different
				2651	places. Again, Here is the grammar:
				2652
				2653	place_list := place
				2654	place_list := place , place_list
				2655	place := num
				2656	place := place : num
				2657	place := place : num : signed
				2658	place := { subplacelist }
				2659	place := ! place // (lowest priority)
				2660	subplace_list := subplace
				2661	subplace_list := subplace , subplace_list
				2662	subplace := num
				2663	subplace := num : num
				2664	subplace := num : num : signed
				2665	signed := num
				2666	signed := + signed
				2667	signed := - signed
				2668
				2669	-----------------------------------------------------------------------------*/
				2670
				2671	static void
				2672	__kmp_process_subplace_list(const char *scan, kmp_affin_mask_t osId2Mask,
				2673	int maxOsId, kmp_affin_mask_t tempMask, int setSize)
				2674	{
				2675	const char *next;
				2676
				2677	for (;;) {
				2678	int start, count, stride, i;
				2679
				2680	//
				2681	// Read in the starting proc id
				2682	//
				2683	SKIP_WS(*scan);
				2684	KMP_ASSERT2((scan >= '0') && (scan <= '9'),
				2685	"bad explicit places list");
				2686	next = *scan;
				2687	SKIP_DIGITS(next);
				2688	start = __kmp_str_to_int(scan, next);
				2689	KMP_ASSERT(start >= 0);
				2690	*scan = next;
				2691
				2692	//
				2693	// valid follow sets are ',' ':' and '}'
				2694	//
				2695	SKIP_WS(*scan);
				2696	if (scan == '}' \|\| scan == ',') {
				2697	if ((start > maxOsId) \|\|
				2698	(! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
				2699	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				2700	&& (__kmp_affinity_type != affinity_none))) {
				2701	KMP_WARNING(AffIgnoreInvalidProcID, start);
				2702	}
				2703	}
				2704	else {
				2705	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
				2706	(*setSize)++;
				2707	}
				2708	if (**scan == '}') {
				2709	break;
				2710	}
				2711	(*scan)++; // skip ','
				2712	continue;
				2713	}
				2714	KMP_ASSERT2(**scan == ':', "bad explicit places list");
				2715	(*scan)++; // skip ':'
				2716
				2717	//
				2718	// Read count parameter
				2719	//
				2720	SKIP_WS(*scan);
				2721	KMP_ASSERT2((scan >= '0') && (scan <= '9'),
				2722	"bad explicit places list");
				2723	next = *scan;
				2724	SKIP_DIGITS(next);
				2725	count = __kmp_str_to_int(scan, next);
				2726	KMP_ASSERT(count >= 0);
				2727	*scan = next;
				2728
				2729	//
				2730	// valid follow sets are ',' ':' and '}'
				2731	//
				2732	SKIP_WS(*scan);
				2733	if (scan == '}' \|\| scan == ',') {
				2734	for (i = 0; i < count; i++) {
				2735	if ((start > maxOsId) \|\|
				2736	(! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
				2737	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				2738	&& (__kmp_affinity_type != affinity_none))) {
				2739	KMP_WARNING(AffIgnoreInvalidProcID, start);
				2740	}
				2741	break; // don't proliferate warnings for large count
				2742	}
				2743	else {
				2744	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
				2745	start++;
				2746	(*setSize)++;
				2747	}
				2748	}
				2749	if (**scan == '}') {
				2750	break;
				2751	}
				2752	(*scan)++; // skip ','
				2753	continue;
				2754	}
				2755	KMP_ASSERT2(**scan == ':', "bad explicit places list");
				2756	(*scan)++; // skip ':'
				2757
				2758	//
				2759	// Read stride parameter
				2760	//
				2761	int sign = +1;
				2762	for (;;) {
				2763	SKIP_WS(*scan);
				2764	if (**scan == '+') {
				2765	(*scan)++; // skip '+'
				2766	continue;
				2767	}
				2768	if (**scan == '-') {
				2769	sign *= -1;
				2770	(*scan)++; // skip '-'
				2771	continue;
				2772	}
				2773	break;
				2774	}
				2775	SKIP_WS(*scan);
				2776	KMP_ASSERT2((scan >= '0') && (scan <= '9'),
				2777	"bad explicit places list");
				2778	next = *scan;
				2779	SKIP_DIGITS(next);
				2780	stride = __kmp_str_to_int(scan, next);
				2781	KMP_ASSERT(stride >= 0);
				2782	*scan = next;
				2783	stride *= sign;
				2784
				2785	//
				2786	// valid follow sets are ',' and '}'
				2787	//
				2788	SKIP_WS(*scan);
				2789	if (scan == '}' \|\| scan == ',') {
				2790	for (i = 0; i < count; i++) {
				2791	if ((start > maxOsId) \|\|
				2792	(! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
				2793	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				2794	&& (__kmp_affinity_type != affinity_none))) {
				2795	KMP_WARNING(AffIgnoreInvalidProcID, start);
				2796	}
				2797	break; // don't proliferate warnings for large count
				2798	}
				2799	else {
				2800	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
				2801	start += stride;
				2802	(*setSize)++;
				2803	}
				2804	}
				2805	if (**scan == '}') {
				2806	break;
				2807	}
				2808	(*scan)++; // skip ','
				2809	continue;
				2810	}
				2811
				2812	KMP_ASSERT2(0, "bad explicit places list");
				2813	}
				2814	}
				2815
				2816
				2817	static void
				2818	__kmp_process_place(const char *scan, kmp_affin_mask_t osId2Mask,
				2819	int maxOsId, kmp_affin_mask_t tempMask, int setSize)
				2820	{
				2821	const char *next;
				2822
				2823	//
				2824	// valid follow sets are '{' '!' and num
				2825	//
				2826	SKIP_WS(*scan);
				2827	if (**scan == '{') {
				2828	(*scan)++; // skip '{'
				2829	__kmp_process_subplace_list(scan, osId2Mask, maxOsId , tempMask,
				2830	setSize);
				2831	KMP_ASSERT2(**scan == '}', "bad explicit places list");
				2832	(*scan)++; // skip '}'
				2833	}
				2834	else if (**scan == '!') {
Jonathan Peyton	6778c73	2015-10-19 19:43:01 +0000	[diff] [blame]	2835	(*scan)++; // skip '!'
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2836	__kmp_process_place(scan, osId2Mask, maxOsId, tempMask, setSize);
				2837	KMP_CPU_COMPLEMENT(tempMask);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2838	}
				2839	else if ((scan >= '0') && (scan <= '9')) {
				2840	next = *scan;
				2841	SKIP_DIGITS(next);
				2842	int num = __kmp_str_to_int(scan, next);
				2843	KMP_ASSERT(num >= 0);
				2844	if ((num > maxOsId) \|\|
				2845	(! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
				2846	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				2847	&& (__kmp_affinity_type != affinity_none))) {
				2848	KMP_WARNING(AffIgnoreInvalidProcID, num);
				2849	}
				2850	}
				2851	else {
				2852	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, num));
				2853	(*setSize)++;
				2854	}
				2855	*scan = next; // skip num
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	2856	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2857	else {
				2858	KMP_ASSERT2(0, "bad explicit places list");
				2859	}
				2860	}
				2861
				2862
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	2863	//static void
				2864	void
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2865	__kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
				2866	unsigned int out_numMasks, const char placelist,
				2867	kmp_affin_mask_t *osId2Mask, int maxOsId)
				2868	{
				2869	const char *scan = placelist;
				2870	const char *next = placelist;
				2871
				2872	numNewMasks = 2;
				2873	newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(numNewMasks
				2874	* __kmp_affin_mask_size);
				2875	nextNewMask = 0;
				2876
				2877	kmp_affin_mask_t tempMask = (kmp_affin_mask_t )__kmp_allocate(
				2878	__kmp_affin_mask_size);
				2879	KMP_CPU_ZERO(tempMask);
				2880	int setSize = 0;
				2881
				2882	for (;;) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2883	__kmp_process_place(&scan, osId2Mask, maxOsId, tempMask, &setSize);
				2884
				2885	//
				2886	// valid follow sets are ',' ':' and EOL
				2887	//
				2888	SKIP_WS(scan);
				2889	if (scan == '\0' \|\| scan == ',') {
				2890	if (setSize > 0) {
				2891	ADD_MASK(tempMask);
				2892	}
				2893	KMP_CPU_ZERO(tempMask);
				2894	setSize = 0;
				2895	if (*scan == '\0') {
				2896	break;
				2897	}
				2898	scan++; // skip ','
				2899	continue;
				2900	}
				2901
				2902	KMP_ASSERT2(*scan == ':', "bad explicit places list");
				2903	scan++; // skip ':'
				2904
				2905	//
				2906	// Read count parameter
				2907	//
				2908	SKIP_WS(scan);
				2909	KMP_ASSERT2((scan >= '0') && (scan <= '9'),
				2910	"bad explicit places list");
				2911	next = scan;
				2912	SKIP_DIGITS(next);
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	2913	int count = __kmp_str_to_int(scan, *next);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2914	KMP_ASSERT(count >= 0);
				2915	scan = next;
				2916
				2917	//
				2918	// valid follow sets are ',' ':' and EOL
				2919	//
				2920	SKIP_WS(scan);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	2921	int stride;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2922	if (scan == '\0' \|\| scan == ',') {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	2923	stride = +1;
				2924	}
				2925	else {
				2926	KMP_ASSERT2(*scan == ':', "bad explicit places list");
				2927	scan++; // skip ':'
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2928
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	2929	//
				2930	// Read stride parameter
				2931	//
				2932	int sign = +1;
				2933	for (;;) {
				2934	SKIP_WS(scan);
				2935	if (*scan == '+') {
				2936	scan++; // skip '+'
				2937	continue;
				2938	}
				2939	if (*scan == '-') {
				2940	sign *= -1;
				2941	scan++; // skip '-'
				2942	continue;
				2943	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2944	break;
				2945	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2946	SKIP_WS(scan);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	2947	KMP_ASSERT2((scan >= '0') && (scan <= '9'),
				2948	"bad explicit places list");
				2949	next = scan;
				2950	SKIP_DIGITS(next);
				2951	stride = __kmp_str_to_int(scan, *next);
				2952	KMP_DEBUG_ASSERT(stride >= 0);
				2953	scan = next;
				2954	stride *= sign;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2955	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2956
				2957	if (stride > 0) {
				2958	int i;
				2959	for (i = 0; i < count; i++) {
				2960	int j;
				2961	if (setSize == 0) {
				2962	break;
				2963	}
				2964	ADD_MASK(tempMask);
				2965	setSize = 0;
				2966	for (j = __kmp_affin_mask_size * CHAR_BIT - 1; j >= stride; j--) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	2967	if (! KMP_CPU_ISSET(j - stride, tempMask)) {
				2968	KMP_CPU_CLR(j, tempMask);
				2969	}
				2970	else if ((j > maxOsId) \|\|
				2971	(! KMP_CPU_ISSET(j, KMP_CPU_INDEX(osId2Mask, j)))) {
Andrey Churbanov	16a1432	2015-03-10 09:34:38 +0000	[diff] [blame]	2972	if ((__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				2973	&& (__kmp_affinity_type != affinity_none))) && i < count - 1) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	2974	KMP_WARNING(AffIgnoreInvalidProcID, j);
				2975	}
				2976	KMP_CPU_CLR(j, tempMask);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2977	}
				2978	else {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	2979	KMP_CPU_SET(j, tempMask);
				2980	setSize++;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2981	}
				2982	}
				2983	for (; j >= 0; j--) {
				2984	KMP_CPU_CLR(j, tempMask);
				2985	}
				2986	}
				2987	}
				2988	else {
				2989	int i;
				2990	for (i = 0; i < count; i++) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	2991	int j;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2992	if (setSize == 0) {
				2993	break;
				2994	}
				2995	ADD_MASK(tempMask);
				2996	setSize = 0;
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	2997	for (j = 0; j < ((int)__kmp_affin_mask_size * CHAR_BIT) + stride;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2998	j++) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	2999	if (! KMP_CPU_ISSET(j - stride, tempMask)) {
				3000	KMP_CPU_CLR(j, tempMask);
				3001	}
				3002	else if ((j > maxOsId) \|\|
				3003	(! KMP_CPU_ISSET(j, KMP_CPU_INDEX(osId2Mask, j)))) {
Andrey Churbanov	16a1432	2015-03-10 09:34:38 +0000	[diff] [blame]	3004	if ((__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3005	&& (__kmp_affinity_type != affinity_none))) && i < count - 1) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3006	KMP_WARNING(AffIgnoreInvalidProcID, j);
				3007	}
				3008	KMP_CPU_CLR(j, tempMask);
				3009	}
				3010	else {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3011	KMP_CPU_SET(j, tempMask);
				3012	setSize++;
				3013	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3014	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3015	for (; j < (int)__kmp_affin_mask_size * CHAR_BIT; j++) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3016	KMP_CPU_CLR(j, tempMask);
				3017	}
				3018	}
				3019	}
				3020	KMP_CPU_ZERO(tempMask);
				3021	setSize = 0;
				3022
				3023	//
				3024	// valid follow sets are ',' and EOL
				3025	//
				3026	SKIP_WS(scan);
				3027	if (*scan == '\0') {
				3028	break;
				3029	}
				3030	if (*scan == ',') {
				3031	scan++; // skip ','
				3032	continue;
				3033	}
				3034
				3035	KMP_ASSERT2(0, "bad explicit places list");
				3036	}
				3037
				3038	*out_numMasks = nextNewMask;
				3039	if (nextNewMask == 0) {
				3040	*out_masks = NULL;
				3041	KMP_INTERNAL_FREE(newMasks);
				3042	return;
				3043	}
				3044	*out_masks
				3045	= (kmp_affin_mask_t )__kmp_allocate(nextNewMask __kmp_affin_mask_size);
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	3046	KMP_MEMCPY(out_masks, newMasks, nextNewMask __kmp_affin_mask_size);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3047	__kmp_free(tempMask);
				3048	KMP_INTERNAL_FREE(newMasks);
				3049	}
				3050
				3051	# endif /* OMP_40_ENABLED */
				3052
				3053	#undef ADD_MASK
				3054	#undef ADD_MASK_OSID
				3055
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3056	static void
				3057	__kmp_apply_thread_places(AddrUnsPair **pAddr, int depth)
				3058	{
Jonathan Peyton	dd4aa9b	2015-10-08 17:55:54 +0000	[diff] [blame]	3059	if (__kmp_place_num_sockets == 0 &&
				3060	__kmp_place_num_cores == 0 &&
				3061	__kmp_place_num_threads_per_core == 0 )
				3062	return; // no topology limiting actions requested, exit
				3063	if (__kmp_place_num_sockets == 0)
				3064	__kmp_place_num_sockets = nPackages; // use all available sockets
				3065	if (__kmp_place_num_cores == 0)
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3066	__kmp_place_num_cores = nCoresPerPkg; // use all available cores
Jonathan Peyton	dd4aa9b	2015-10-08 17:55:54 +0000	[diff] [blame]	3067	if (__kmp_place_num_threads_per_core == 0 \|\|
				3068	__kmp_place_num_threads_per_core > __kmp_nThreadsPerCore)
				3069	__kmp_place_num_threads_per_core = __kmp_nThreadsPerCore; // use all HW contexts
				3070
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3071	if ( !__kmp_affinity_uniform_topology() ) {
				3072	KMP_WARNING( AffThrPlaceNonUniform );
				3073	return; // don't support non-uniform topology
				3074	}
				3075	if ( depth != 3 ) {
				3076	KMP_WARNING( AffThrPlaceNonThreeLevel );
				3077	return; // don't support not-3-level topology
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3078	}
Jonathan Peyton	dd4aa9b	2015-10-08 17:55:54 +0000	[diff] [blame]	3079	if (__kmp_place_socket_offset + __kmp_place_num_sockets > nPackages) {
				3080	KMP_WARNING(AffThrPlaceManySockets);
				3081	return;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3082	}
Andrey Churbanov	1287557	2015-03-10 09:00:36 +0000	[diff] [blame]	3083	if ( __kmp_place_core_offset + __kmp_place_num_cores > nCoresPerPkg ) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3084	KMP_WARNING( AffThrPlaceManyCores );
				3085	return;
				3086	}
				3087
				3088	AddrUnsPair newAddr = (AddrUnsPair )__kmp_allocate( sizeof(AddrUnsPair) *
Jonathan Peyton	dd4aa9b	2015-10-08 17:55:54 +0000	[diff] [blame]	3089	__kmp_place_num_sockets * __kmp_place_num_cores * __kmp_place_num_threads_per_core);
				3090
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3091	int i, j, k, n_old = 0, n_new = 0;
Jonathan Peyton	dd4aa9b	2015-10-08 17:55:54 +0000	[diff] [blame]	3092	for (i = 0; i < nPackages; ++i)
				3093	if (i < __kmp_place_socket_offset \|\|
				3094	i >= __kmp_place_socket_offset + __kmp_place_num_sockets)
				3095	n_old += nCoresPerPkg * __kmp_nThreadsPerCore; // skip not-requested socket
				3096	else
				3097	for (j = 0; j < nCoresPerPkg; ++j) // walk through requested socket
				3098	if (j < __kmp_place_core_offset \|\|
				3099	j >= __kmp_place_core_offset + __kmp_place_num_cores)
				3100	n_old += __kmp_nThreadsPerCore; // skip not-requested core
				3101	else
				3102	for (k = 0; k < __kmp_nThreadsPerCore; ++k) { // walk through requested core
				3103	if (k < __kmp_place_num_threads_per_core) {
				3104	newAddr[n_new] = (*pAddr)[n_old]; // collect requested thread's data
				3105	n_new++;
				3106	}
				3107	n_old++;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3108	}
Jonathan Peyton	dd4aa9b	2015-10-08 17:55:54 +0000	[diff] [blame]	3109	KMP_DEBUG_ASSERT(n_old == nPackages * nCoresPerPkg * __kmp_nThreadsPerCore);
				3110	KMP_DEBUG_ASSERT(n_new == __kmp_place_num_sockets * __kmp_place_num_cores *
				3111	__kmp_place_num_threads_per_core);
				3112
				3113	nPackages = __kmp_place_num_sockets; // correct nPackages
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3114	nCoresPerPkg = __kmp_place_num_cores; // correct nCoresPerPkg
				3115	__kmp_nThreadsPerCore = __kmp_place_num_threads_per_core; // correct __kmp_nThreadsPerCore
				3116	__kmp_avail_proc = n_new; // correct avail_proc
				3117	__kmp_ncores = nPackages * __kmp_place_num_cores; // correct ncores
				3118
				3119	__kmp_free( *pAddr );
				3120	*pAddr = newAddr; // replace old topology with new one
				3121	}
				3122
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3123
				3124	static AddrUnsPair *address2os = NULL;
				3125	static int * procarr = NULL;
				3126	static int __kmp_aff_depth = 0;
				3127
				3128	static void
				3129	__kmp_aux_affinity_initialize(void)
				3130	{
				3131	if (__kmp_affinity_masks != NULL) {
				3132	KMP_ASSERT(fullMask != NULL);
				3133	return;
				3134	}
				3135
				3136	//
				3137	// Create the "full" mask - this defines all of the processors that we
				3138	// consider to be in the machine model. If respect is set, then it is
				3139	// the initialization thread's affinity mask. Otherwise, it is all
				3140	// processors that we know about on the machine.
				3141	//
				3142	if (fullMask == NULL) {
				3143	fullMask = (kmp_affin_mask_t *)__kmp_allocate(__kmp_affin_mask_size);
				3144	}
				3145	if (KMP_AFFINITY_CAPABLE()) {
				3146	if (__kmp_affinity_respect_mask) {
				3147	__kmp_get_system_affinity(fullMask, TRUE);
				3148
				3149	//
				3150	// Count the number of available processors.
				3151	//
				3152	unsigned i;
				3153	__kmp_avail_proc = 0;
				3154	for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
				3155	if (! KMP_CPU_ISSET(i, fullMask)) {
				3156	continue;
				3157	}
				3158	__kmp_avail_proc++;
				3159	}
				3160	if (__kmp_avail_proc > __kmp_xproc) {
				3161	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3162	&& (__kmp_affinity_type != affinity_none))) {
				3163	KMP_WARNING(ErrorInitializeAffinity);
				3164	}
				3165	__kmp_affinity_type = affinity_none;
Andrey Churbanov	1f037e4	2015-03-10 09:15:26 +0000	[diff] [blame]	3166	KMP_AFFINITY_DISABLE();
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3167	return;
				3168	}
				3169	}
				3170	else {
				3171	__kmp_affinity_entire_machine_mask(fullMask);
				3172	__kmp_avail_proc = __kmp_xproc;
				3173	}
				3174	}
				3175
				3176	int depth = -1;
				3177	kmp_i18n_id_t msg_id = kmp_i18n_null;
				3178
				3179	//
Alp Toker	8f2d3f0	2014-02-24 10:40:15 +0000	[diff] [blame]	3180	// For backward compatibility, setting KMP_CPUINFO_FILE =>
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3181	// KMP_TOPOLOGY_METHOD=cpuinfo
				3182	//
				3183	if ((__kmp_cpuinfo_file != NULL) &&
				3184	(__kmp_affinity_top_method == affinity_top_method_all)) {
				3185	__kmp_affinity_top_method = affinity_top_method_cpuinfo;
				3186	}
				3187
				3188	if (__kmp_affinity_top_method == affinity_top_method_all) {
				3189	//
				3190	// In the default code path, errors are not fatal - we just try using
				3191	// another method. We only emit a warning message if affinity is on,
				3192	// or the verbose flag is set, an the nowarnings flag was not set.
				3193	//
				3194	const char *file_name = NULL;
				3195	int line = 0;
				3196
				3197	# if KMP_ARCH_X86 \|\| KMP_ARCH_X86_64
				3198
				3199	if (__kmp_affinity_verbose) {
				3200	KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
				3201	}
				3202
				3203	file_name = NULL;
				3204	depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
				3205	if (depth == 0) {
				3206	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3207	KMP_ASSERT(address2os == NULL);
				3208	return;
				3209	}
				3210
				3211	if (depth < 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3212	if (__kmp_affinity_verbose) {
				3213	if (msg_id != kmp_i18n_null) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3214	KMP_INFORM(AffInfoStrStr, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id),
				3215	KMP_I18N_STR(DecodingLegacyAPIC));
				3216	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3217	else {
				3218	KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(DecodingLegacyAPIC));
				3219	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3220	}
				3221
				3222	file_name = NULL;
				3223	depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
				3224	if (depth == 0) {
				3225	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3226	KMP_ASSERT(address2os == NULL);
				3227	return;
				3228	}
				3229	}
				3230
				3231	# endif /* KMP_ARCH_X86 \|\| KMP_ARCH_X86_64 */
				3232
				3233	# if KMP_OS_LINUX
				3234
				3235	if (depth < 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3236	if (__kmp_affinity_verbose) {
				3237	if (msg_id != kmp_i18n_null) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3238	KMP_INFORM(AffStrParseFilename, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id), "/proc/cpuinfo");
				3239	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3240	else {
				3241	KMP_INFORM(AffParseFilename, "KMP_AFFINITY", "/proc/cpuinfo");
				3242	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3243	}
				3244
				3245	FILE *f = fopen("/proc/cpuinfo", "r");
				3246	if (f == NULL) {
				3247	msg_id = kmp_i18n_str_CantOpenCpuinfo;
				3248	}
				3249	else {
				3250	file_name = "/proc/cpuinfo";
				3251	depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
				3252	fclose(f);
				3253	if (depth == 0) {
				3254	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3255	KMP_ASSERT(address2os == NULL);
				3256	return;
				3257	}
				3258	}
				3259	}
				3260
				3261	# endif /* KMP_OS_LINUX */
				3262
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	3263	# if KMP_GROUP_AFFINITY
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3264
				3265	if ((depth < 0) && (__kmp_num_proc_groups > 1)) {
				3266	if (__kmp_affinity_verbose) {
				3267	KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
				3268	}
				3269
				3270	depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
				3271	KMP_ASSERT(depth != 0);
				3272	}
				3273
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	3274	# endif /* KMP_GROUP_AFFINITY */
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3275
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3276	if (depth < 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3277	if (__kmp_affinity_verbose && (msg_id != kmp_i18n_null)) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3278	if (file_name == NULL) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3279	KMP_INFORM(UsingFlatOS, __kmp_i18n_catgets(msg_id));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3280	}
				3281	else if (line == 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3282	KMP_INFORM(UsingFlatOSFile, file_name, __kmp_i18n_catgets(msg_id));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3283	}
				3284	else {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3285	KMP_INFORM(UsingFlatOSFileLine, file_name, line, __kmp_i18n_catgets(msg_id));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3286	}
				3287	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3288	// FIXME - print msg if msg_id = kmp_i18n_null ???
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3289
				3290	file_name = "";
				3291	depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
				3292	if (depth == 0) {
				3293	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3294	KMP_ASSERT(address2os == NULL);
				3295	return;
				3296	}
				3297	KMP_ASSERT(depth > 0);
				3298	KMP_ASSERT(address2os != NULL);
				3299	}
				3300	}
				3301
				3302	//
				3303	// If the user has specified that a paricular topology discovery method
				3304	// is to be used, then we abort if that method fails. The exception is
				3305	// group affinity, which might have been implicitly set.
				3306	//
				3307
				3308	# if KMP_ARCH_X86 \|\| KMP_ARCH_X86_64
				3309
				3310	else if (__kmp_affinity_top_method == affinity_top_method_x2apicid) {
				3311	if (__kmp_affinity_verbose) {
				3312	KMP_INFORM(AffInfoStr, "KMP_AFFINITY",
				3313	KMP_I18N_STR(Decodingx2APIC));
				3314	}
				3315
				3316	depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
				3317	if (depth == 0) {
				3318	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3319	KMP_ASSERT(address2os == NULL);
				3320	return;
				3321	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3322	if (depth < 0) {
				3323	KMP_ASSERT(msg_id != kmp_i18n_null);
				3324	KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
				3325	}
				3326	}
				3327	else if (__kmp_affinity_top_method == affinity_top_method_apicid) {
				3328	if (__kmp_affinity_verbose) {
				3329	KMP_INFORM(AffInfoStr, "KMP_AFFINITY",
				3330	KMP_I18N_STR(DecodingLegacyAPIC));
				3331	}
				3332
				3333	depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
				3334	if (depth == 0) {
				3335	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3336	KMP_ASSERT(address2os == NULL);
				3337	return;
				3338	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3339	if (depth < 0) {
				3340	KMP_ASSERT(msg_id != kmp_i18n_null);
				3341	KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
				3342	}
				3343	}
				3344
				3345	# endif /* KMP_ARCH_X86 \|\| KMP_ARCH_X86_64 */
				3346
				3347	else if (__kmp_affinity_top_method == affinity_top_method_cpuinfo) {
				3348	const char *filename;
				3349	if (__kmp_cpuinfo_file != NULL) {
				3350	filename = __kmp_cpuinfo_file;
				3351	}
				3352	else {
				3353	filename = "/proc/cpuinfo";
				3354	}
				3355
				3356	if (__kmp_affinity_verbose) {
				3357	KMP_INFORM(AffParseFilename, "KMP_AFFINITY", filename);
				3358	}
				3359
				3360	FILE *f = fopen(filename, "r");
				3361	if (f == NULL) {
				3362	int code = errno;
				3363	if (__kmp_cpuinfo_file != NULL) {
				3364	__kmp_msg(
				3365	kmp_ms_fatal,
				3366	KMP_MSG(CantOpenFileForReading, filename),
				3367	KMP_ERR(code),
				3368	KMP_HNT(NameComesFrom_CPUINFO_FILE),
				3369	__kmp_msg_null
				3370	);
				3371	}
				3372	else {
				3373	__kmp_msg(
				3374	kmp_ms_fatal,
				3375	KMP_MSG(CantOpenFileForReading, filename),
				3376	KMP_ERR(code),
				3377	__kmp_msg_null
				3378	);
				3379	}
				3380	}
				3381	int line = 0;
				3382	depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
				3383	fclose(f);
				3384	if (depth < 0) {
				3385	KMP_ASSERT(msg_id != kmp_i18n_null);
				3386	if (line > 0) {
				3387	KMP_FATAL(FileLineMsgExiting, filename, line, __kmp_i18n_catgets(msg_id));
				3388	}
				3389	else {
				3390	KMP_FATAL(FileMsgExiting, filename, __kmp_i18n_catgets(msg_id));
				3391	}
				3392	}
				3393	if (__kmp_affinity_type == affinity_none) {
				3394	KMP_ASSERT(depth == 0);
				3395	KMP_ASSERT(address2os == NULL);
				3396	return;
				3397	}
				3398	}
				3399
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	3400	# if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3401
				3402	else if (__kmp_affinity_top_method == affinity_top_method_group) {
				3403	if (__kmp_affinity_verbose) {
				3404	KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
				3405	}
				3406
				3407	depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
				3408	KMP_ASSERT(depth != 0);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3409	if (depth < 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3410	KMP_ASSERT(msg_id != kmp_i18n_null);
				3411	KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3412	}
				3413	}
				3414
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	3415	# endif /* KMP_GROUP_AFFINITY */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3416
				3417	else if (__kmp_affinity_top_method == affinity_top_method_flat) {
				3418	if (__kmp_affinity_verbose) {
				3419	KMP_INFORM(AffUsingFlatOS, "KMP_AFFINITY");
				3420	}
				3421
				3422	depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
				3423	if (depth == 0) {
				3424	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3425	KMP_ASSERT(address2os == NULL);
				3426	return;
				3427	}
				3428	// should not fail
				3429	KMP_ASSERT(depth > 0);
				3430	KMP_ASSERT(address2os != NULL);
				3431	}
				3432
				3433	if (address2os == NULL) {
				3434	if (KMP_AFFINITY_CAPABLE()
				3435	&& (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3436	&& (__kmp_affinity_type != affinity_none)))) {
				3437	KMP_WARNING(ErrorInitializeAffinity);
				3438	}
				3439	__kmp_affinity_type = affinity_none;
Andrey Churbanov	1f037e4	2015-03-10 09:15:26 +0000	[diff] [blame]	3440	KMP_AFFINITY_DISABLE();
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3441	return;
				3442	}
				3443
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3444	__kmp_apply_thread_places(&address2os, depth);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3445
				3446	//
				3447	// Create the table of masks, indexed by thread Id.
				3448	//
				3449	unsigned maxIndex;
				3450	unsigned numUnique;
				3451	kmp_affin_mask_t *osId2Mask = __kmp_create_masks(&maxIndex, &numUnique,
				3452	address2os, __kmp_avail_proc);
				3453	if (__kmp_affinity_gran_levels == 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3454	KMP_DEBUG_ASSERT((int)numUnique == __kmp_avail_proc);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3455	}
				3456
				3457	//
				3458	// Set the childNums vector in all Address objects. This must be done
				3459	// before we can sort using __kmp_affinity_cmp_Address_child_num(),
				3460	// which takes into account the setting of __kmp_affinity_compact.
				3461	//
				3462	__kmp_affinity_assign_child_nums(address2os, __kmp_avail_proc);
				3463
				3464	switch (__kmp_affinity_type) {
				3465
				3466	case affinity_explicit:
				3467	KMP_DEBUG_ASSERT(__kmp_affinity_proclist != NULL);
				3468	# if OMP_40_ENABLED
				3469	if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
				3470	# endif
				3471	{
				3472	__kmp_affinity_process_proclist(&__kmp_affinity_masks,
				3473	&__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask,
				3474	maxIndex);
				3475	}
				3476	# if OMP_40_ENABLED
				3477	else {
				3478	__kmp_affinity_process_placelist(&__kmp_affinity_masks,
				3479	&__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask,
				3480	maxIndex);
				3481	}
				3482	# endif
				3483	if (__kmp_affinity_num_masks == 0) {
				3484	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3485	&& (__kmp_affinity_type != affinity_none))) {
				3486	KMP_WARNING(AffNoValidProcID);
				3487	}
				3488	__kmp_affinity_type = affinity_none;
				3489	return;
				3490	}
				3491	break;
				3492
				3493	//
				3494	// The other affinity types rely on sorting the Addresses according
				3495	// to some permutation of the machine topology tree. Set
				3496	// __kmp_affinity_compact and __kmp_affinity_offset appropriately,
				3497	// then jump to a common code fragment to do the sort and create
				3498	// the array of affinity masks.
				3499	//
				3500
				3501	case affinity_logical:
				3502	__kmp_affinity_compact = 0;
				3503	if (__kmp_affinity_offset) {
				3504	__kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset
				3505	% __kmp_avail_proc;
				3506	}
				3507	goto sortAddresses;
				3508
				3509	case affinity_physical:
				3510	if (__kmp_nThreadsPerCore > 1) {
				3511	__kmp_affinity_compact = 1;
				3512	if (__kmp_affinity_compact >= depth) {
				3513	__kmp_affinity_compact = 0;
				3514	}
				3515	} else {
				3516	__kmp_affinity_compact = 0;
				3517	}
				3518	if (__kmp_affinity_offset) {
				3519	__kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset
				3520	% __kmp_avail_proc;
				3521	}
				3522	goto sortAddresses;
				3523
				3524	case affinity_scatter:
				3525	if (__kmp_affinity_compact >= depth) {
				3526	__kmp_affinity_compact = 0;
				3527	}
				3528	else {
				3529	__kmp_affinity_compact = depth - 1 - __kmp_affinity_compact;
				3530	}
				3531	goto sortAddresses;
				3532
				3533	case affinity_compact:
				3534	if (__kmp_affinity_compact >= depth) {
				3535	__kmp_affinity_compact = depth - 1;
				3536	}
				3537	goto sortAddresses;
				3538
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3539	case affinity_balanced:
Jonathan Peyton	caf09fe	2015-05-27 23:27:33 +0000	[diff] [blame]	3540	// Balanced works only for the case of a single package
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3541	if( nPackages > 1 ) {
				3542	if( __kmp_affinity_verbose \|\| __kmp_affinity_warnings ) {
				3543	KMP_WARNING( AffBalancedNotAvail, "KMP_AFFINITY" );
				3544	}
				3545	__kmp_affinity_type = affinity_none;
				3546	return;
				3547	} else if( __kmp_affinity_uniform_topology() ) {
				3548	break;
				3549	} else { // Non-uniform topology
				3550
				3551	// Save the depth for further usage
				3552	__kmp_aff_depth = depth;
				3553
				3554	// Number of hyper threads per core in HT machine
				3555	int nth_per_core = __kmp_nThreadsPerCore;
				3556
				3557	int core_level;
				3558	if( nth_per_core > 1 ) {
				3559	core_level = depth - 2;
				3560	} else {
				3561	core_level = depth - 1;
				3562	}
				3563	int ncores = address2os[ __kmp_avail_proc - 1 ].first.labels[ core_level ] + 1;
				3564	int nproc = nth_per_core * ncores;
				3565
				3566	procarr = ( int * )__kmp_allocate( sizeof( int ) * nproc );
				3567	for( int i = 0; i < nproc; i++ ) {
				3568	procarr[ i ] = -1;
				3569	}
				3570
				3571	for( int i = 0; i < __kmp_avail_proc; i++ ) {
				3572	int proc = address2os[ i ].second;
				3573	// If depth == 3 then level=0 - package, level=1 - core, level=2 - thread.
				3574	// If there is only one thread per core then depth == 2: level 0 - package,
				3575	// level 1 - core.
				3576	int level = depth - 1;
				3577
				3578	// __kmp_nth_per_core == 1
				3579	int thread = 0;
				3580	int core = address2os[ i ].first.labels[ level ];
				3581	// If the thread level exists, that is we have more than one thread context per core
				3582	if( nth_per_core > 1 ) {
				3583	thread = address2os[ i ].first.labels[ level ] % nth_per_core;
				3584	core = address2os[ i ].first.labels[ level - 1 ];
				3585	}
				3586	procarr[ core * nth_per_core + thread ] = proc;
				3587	}
				3588
				3589	break;
				3590	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3591
				3592	sortAddresses:
				3593	//
				3594	// Allocate the gtid->affinity mask table.
				3595	//
				3596	if (__kmp_affinity_dups) {
				3597	__kmp_affinity_num_masks = __kmp_avail_proc;
				3598	}
				3599	else {
				3600	__kmp_affinity_num_masks = numUnique;
				3601	}
				3602
				3603	# if OMP_40_ENABLED
				3604	if ( ( __kmp_nested_proc_bind.bind_types[0] != proc_bind_intel )
				3605	&& ( __kmp_affinity_num_places > 0 )
				3606	&& ( (unsigned)__kmp_affinity_num_places < __kmp_affinity_num_masks ) ) {
				3607	__kmp_affinity_num_masks = __kmp_affinity_num_places;
				3608	}
				3609	# endif
				3610
				3611	__kmp_affinity_masks = (kmp_affin_mask_t*)__kmp_allocate(
				3612	__kmp_affinity_num_masks * __kmp_affin_mask_size);
				3613
				3614	//
				3615	// Sort the address2os table according to the current setting of
				3616	// __kmp_affinity_compact, then fill out __kmp_affinity_masks.
				3617	//
				3618	qsort(address2os, __kmp_avail_proc, sizeof(*address2os),
				3619	__kmp_affinity_cmp_Address_child_num);
				3620	{
				3621	int i;
				3622	unsigned j;
				3623	for (i = 0, j = 0; i < __kmp_avail_proc; i++) {
				3624	if ((! __kmp_affinity_dups) && (! address2os[i].first.leader)) {
				3625	continue;
				3626	}
				3627	unsigned osId = address2os[i].second;
				3628	kmp_affin_mask_t *src = KMP_CPU_INDEX(osId2Mask, osId);
				3629	kmp_affin_mask_t *dest
				3630	= KMP_CPU_INDEX(__kmp_affinity_masks, j);
				3631	KMP_ASSERT(KMP_CPU_ISSET(osId, src));
				3632	KMP_CPU_COPY(dest, src);
				3633	if (++j >= __kmp_affinity_num_masks) {
				3634	break;
				3635	}
				3636	}
				3637	KMP_DEBUG_ASSERT(j == __kmp_affinity_num_masks);
				3638	}
				3639	break;
				3640
				3641	default:
				3642	KMP_ASSERT2(0, "Unexpected affinity setting");
				3643	}
				3644
				3645	__kmp_free(osId2Mask);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3646	machine_hierarchy.init(address2os, __kmp_avail_proc);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3647	}
				3648
				3649
				3650	void
				3651	__kmp_affinity_initialize(void)
				3652	{
				3653	//
				3654	// Much of the code above was written assumming that if a machine was not
				3655	// affinity capable, then __kmp_affinity_type == affinity_none. We now
				3656	// explicitly represent this as __kmp_affinity_type == affinity_disabled.
				3657	//
				3658	// There are too many checks for __kmp_affinity_type == affinity_none
				3659	// in this code. Instead of trying to change them all, check if
				3660	// __kmp_affinity_type == affinity_disabled, and if so, slam it with
				3661	// affinity_none, call the real initialization routine, then restore
				3662	// __kmp_affinity_type to affinity_disabled.
				3663	//
				3664	int disabled = (__kmp_affinity_type == affinity_disabled);
				3665	if (! KMP_AFFINITY_CAPABLE()) {
				3666	KMP_ASSERT(disabled);
				3667	}
				3668	if (disabled) {
				3669	__kmp_affinity_type = affinity_none;
				3670	}
				3671	__kmp_aux_affinity_initialize();
				3672	if (disabled) {
				3673	__kmp_affinity_type = affinity_disabled;
				3674	}
				3675	}
				3676
				3677
				3678	void
				3679	__kmp_affinity_uninitialize(void)
				3680	{
				3681	if (__kmp_affinity_masks != NULL) {
				3682	__kmp_free(__kmp_affinity_masks);
				3683	__kmp_affinity_masks = NULL;
				3684	}
				3685	if (fullMask != NULL) {
				3686	KMP_CPU_FREE(fullMask);
				3687	fullMask = NULL;
				3688	}
				3689	__kmp_affinity_num_masks = 0;
				3690	# if OMP_40_ENABLED
				3691	__kmp_affinity_num_places = 0;
				3692	# endif
				3693	if (__kmp_affinity_proclist != NULL) {
				3694	__kmp_free(__kmp_affinity_proclist);
				3695	__kmp_affinity_proclist = NULL;
				3696	}
				3697	if( address2os != NULL ) {
				3698	__kmp_free( address2os );
				3699	address2os = NULL;
				3700	}
				3701	if( procarr != NULL ) {
				3702	__kmp_free( procarr );
				3703	procarr = NULL;
				3704	}
				3705	}
				3706
				3707
				3708	void
				3709	__kmp_affinity_set_init_mask(int gtid, int isa_root)
				3710	{
				3711	if (! KMP_AFFINITY_CAPABLE()) {
				3712	return;
				3713	}
				3714
				3715	kmp_info_t th = (kmp_info_t )TCR_SYNC_PTR(__kmp_threads[gtid]);
				3716	if (th->th.th_affin_mask == NULL) {
				3717	KMP_CPU_ALLOC(th->th.th_affin_mask);
				3718	}
				3719	else {
				3720	KMP_CPU_ZERO(th->th.th_affin_mask);
				3721	}
				3722
				3723	//
				3724	// Copy the thread mask to the kmp_info_t strucuture.
				3725	// If __kmp_affinity_type == affinity_none, copy the "full" mask, i.e. one
				3726	// that has all of the OS proc ids set, or if __kmp_affinity_respect_mask
				3727	// is set, then the full mask is the same as the mask of the initialization
				3728	// thread.
				3729	//
				3730	kmp_affin_mask_t *mask;
				3731	int i;
				3732
				3733	# if OMP_40_ENABLED
				3734	if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
				3735	# endif
				3736	{
Andrey Churbanov	f28f613	2015-01-13 14:54:00 +0000	[diff] [blame]	3737	if ((__kmp_affinity_type == affinity_none) \|\| (__kmp_affinity_type == affinity_balanced)
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3738	) {
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	3739	# if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3740	if (__kmp_num_proc_groups > 1) {
				3741	return;
				3742	}
				3743	# endif
				3744	KMP_ASSERT(fullMask != NULL);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3745	i = KMP_PLACE_ALL;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3746	mask = fullMask;
				3747	}
				3748	else {
				3749	KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 );
				3750	i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
				3751	mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
				3752	}
				3753	}
				3754	# if OMP_40_ENABLED
				3755	else {
				3756	if ((! isa_root)
				3757	\|\| (__kmp_nested_proc_bind.bind_types[0] == proc_bind_false)) {
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	3758	# if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3759	if (__kmp_num_proc_groups > 1) {
				3760	return;
				3761	}
				3762	# endif
				3763	KMP_ASSERT(fullMask != NULL);
				3764	i = KMP_PLACE_ALL;
				3765	mask = fullMask;
				3766	}
				3767	else {
				3768	//
				3769	// int i = some hash function or just a counter that doesn't
				3770	// always start at 0. Use gtid for now.
				3771	//
				3772	KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 );
				3773	i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
				3774	mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
				3775	}
				3776	}
				3777	# endif
				3778
				3779	# if OMP_40_ENABLED
				3780	th->th.th_current_place = i;
				3781	if (isa_root) {
				3782	th->th.th_new_place = i;
				3783	th->th.th_first_place = 0;
				3784	th->th.th_last_place = __kmp_affinity_num_masks - 1;
				3785	}
				3786
				3787	if (i == KMP_PLACE_ALL) {
				3788	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to all places\n",
				3789	gtid));
				3790	}
				3791	else {
				3792	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to place %d\n",
				3793	gtid, i));
				3794	}
				3795	# else
				3796	if (i == -1) {
				3797	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to fullMask\n",
				3798	gtid));
				3799	}
				3800	else {
				3801	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to mask %d\n",
				3802	gtid, i));
				3803	}
				3804	# endif /* OMP_40_ENABLED */
				3805
				3806	KMP_CPU_COPY(th->th.th_affin_mask, mask);
				3807
				3808	if (__kmp_affinity_verbose) {
				3809	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				3810	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				3811	th->th.th_affin_mask);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3812	KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(), gtid,
				3813	buf);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3814	}
				3815
				3816	# if KMP_OS_WINDOWS
				3817	//
				3818	// On Windows* OS, the process affinity mask might have changed.
				3819	// If the user didn't request affinity and this call fails,
				3820	// just continue silently. See CQ171393.
				3821	//
				3822	if ( __kmp_affinity_type == affinity_none ) {
				3823	__kmp_set_system_affinity(th->th.th_affin_mask, FALSE);
				3824	}
				3825	else
				3826	# endif
				3827	__kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
				3828	}
				3829
				3830
				3831	# if OMP_40_ENABLED
				3832
				3833	void
				3834	__kmp_affinity_set_place(int gtid)
				3835	{
				3836	int retval;
				3837
				3838	if (! KMP_AFFINITY_CAPABLE()) {
				3839	return;
				3840	}
				3841
				3842	kmp_info_t th = (kmp_info_t )TCR_SYNC_PTR(__kmp_threads[gtid]);
				3843
				3844	KA_TRACE(100, ("__kmp_affinity_set_place: binding T#%d to place %d (current place = %d)\n",
				3845	gtid, th->th.th_new_place, th->th.th_current_place));
				3846
				3847	//
Alp Toker	8f2d3f0	2014-02-24 10:40:15 +0000	[diff] [blame]	3848	// Check that the new place is within this thread's partition.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3849	//
				3850	KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3851	KMP_ASSERT(th->th.th_new_place >= 0);
				3852	KMP_ASSERT((unsigned)th->th.th_new_place <= __kmp_affinity_num_masks);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3853	if (th->th.th_first_place <= th->th.th_last_place) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3854	KMP_ASSERT((th->th.th_new_place >= th->th.th_first_place)
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3855	&& (th->th.th_new_place <= th->th.th_last_place));
				3856	}
				3857	else {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3858	KMP_ASSERT((th->th.th_new_place <= th->th.th_first_place)
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3859	\|\| (th->th.th_new_place >= th->th.th_last_place));
				3860	}
				3861
				3862	//
				3863	// Copy the thread mask to the kmp_info_t strucuture,
				3864	// and set this thread's affinity.
				3865	//
				3866	kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity_masks,
				3867	th->th.th_new_place);
				3868	KMP_CPU_COPY(th->th.th_affin_mask, mask);
				3869	th->th.th_current_place = th->th.th_new_place;
				3870
				3871	if (__kmp_affinity_verbose) {
				3872	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				3873	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				3874	th->th.th_affin_mask);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3875	KMP_INFORM(BoundToOSProcSet, "OMP_PROC_BIND", (kmp_int32)getpid(),
				3876	gtid, buf);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3877	}
				3878	__kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
				3879	}
				3880
				3881	# endif /* OMP_40_ENABLED */
				3882
				3883
				3884	int
				3885	__kmp_aux_set_affinity(void **mask)
				3886	{
				3887	int gtid;
				3888	kmp_info_t *th;
				3889	int retval;
				3890
				3891	if (! KMP_AFFINITY_CAPABLE()) {
				3892	return -1;
				3893	}
				3894
				3895	gtid = __kmp_entry_gtid();
				3896	KA_TRACE(1000, ;{
				3897	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				3898	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				3899	(kmp_affin_mask_t )(mask));
				3900	__kmp_debug_printf("kmp_set_affinity: setting affinity mask for thread %d = %s\n",
				3901	gtid, buf);
				3902	});
				3903
				3904	if (__kmp_env_consistency_check) {
				3905	if ((mask == NULL) \|\| (*mask == NULL)) {
				3906	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				3907	}
				3908	else {
				3909	unsigned proc;
				3910	int num_procs = 0;
				3911
				3912	for (proc = 0; proc < KMP_CPU_SETSIZE; proc++) {
				3913	if (! KMP_CPU_ISSET(proc, (kmp_affin_mask_t )(mask))) {
				3914	continue;
				3915	}
				3916	num_procs++;
				3917	if (! KMP_CPU_ISSET(proc, fullMask)) {
				3918	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				3919	break;
				3920	}
				3921	}
				3922	if (num_procs == 0) {
				3923	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				3924	}
				3925
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	3926	# if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3927	if (__kmp_get_proc_group((kmp_affin_mask_t )(mask)) < 0) {
				3928	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				3929	}
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	3930	# endif /* KMP_GROUP_AFFINITY */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3931
				3932	}
				3933	}
				3934
				3935	th = __kmp_threads[gtid];
				3936	KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
				3937	retval = __kmp_set_system_affinity((kmp_affin_mask_t )(mask), FALSE);
				3938	if (retval == 0) {
				3939	KMP_CPU_COPY(th->th.th_affin_mask, (kmp_affin_mask_t )(mask));
				3940	}
				3941
				3942	# if OMP_40_ENABLED
				3943	th->th.th_current_place = KMP_PLACE_UNDEFINED;
				3944	th->th.th_new_place = KMP_PLACE_UNDEFINED;
				3945	th->th.th_first_place = 0;
				3946	th->th.th_last_place = __kmp_affinity_num_masks - 1;
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3947
				3948	//
				3949	// Turn off 4.0 affinity for the current tread at this parallel level.
				3950	//
				3951	th->th.th_current_task->td_icvs.proc_bind = proc_bind_false;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3952	# endif
				3953
				3954	return retval;
				3955	}
				3956
				3957
				3958	int
				3959	__kmp_aux_get_affinity(void **mask)
				3960	{
				3961	int gtid;
				3962	int retval;
				3963	kmp_info_t *th;
				3964
				3965	if (! KMP_AFFINITY_CAPABLE()) {
				3966	return -1;
				3967	}
				3968
				3969	gtid = __kmp_entry_gtid();
				3970	th = __kmp_threads[gtid];
				3971	KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
				3972
				3973	KA_TRACE(1000, ;{
				3974	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				3975	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				3976	th->th.th_affin_mask);
				3977	__kmp_printf("kmp_get_affinity: stored affinity mask for thread %d = %s\n", gtid, buf);
				3978	});
				3979
				3980	if (__kmp_env_consistency_check) {
				3981	if ((mask == NULL) \|\| (*mask == NULL)) {
				3982	KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity");
				3983	}
				3984	}
				3985
				3986	# if !KMP_OS_WINDOWS
				3987
				3988	retval = __kmp_get_system_affinity((kmp_affin_mask_t )(mask), FALSE);
				3989	KA_TRACE(1000, ;{
				3990	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				3991	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				3992	(kmp_affin_mask_t )(mask));
				3993	__kmp_printf("kmp_get_affinity: system affinity mask for thread %d = %s\n", gtid, buf);
				3994	});
				3995	return retval;
				3996
				3997	# else
				3998
				3999	KMP_CPU_COPY((kmp_affin_mask_t )(mask), th->th.th_affin_mask);
				4000	return 0;
				4001
				4002	# endif /* KMP_OS_WINDOWS */
				4003
				4004	}
				4005
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4006	int
				4007	__kmp_aux_set_affinity_mask_proc(int proc, void **mask)
				4008	{
				4009	int retval;
				4010
				4011	if (! KMP_AFFINITY_CAPABLE()) {
				4012	return -1;
				4013	}
				4014
				4015	KA_TRACE(1000, ;{
				4016	int gtid = __kmp_entry_gtid();
				4017	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4018	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4019	(kmp_affin_mask_t )(mask));
				4020	__kmp_debug_printf("kmp_set_affinity_mask_proc: setting proc %d in affinity mask for thread %d = %s\n",
				4021	proc, gtid, buf);
				4022	});
				4023
				4024	if (__kmp_env_consistency_check) {
				4025	if ((mask == NULL) \|\| (*mask == NULL)) {
				4026	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity_mask_proc");
				4027	}
				4028	}
				4029
				4030	if ((proc < 0) \|\| ((unsigned)proc >= KMP_CPU_SETSIZE)) {
				4031	return -1;
				4032	}
				4033	if (! KMP_CPU_ISSET(proc, fullMask)) {
				4034	return -2;
				4035	}
				4036
				4037	KMP_CPU_SET(proc, (kmp_affin_mask_t )(mask));
				4038	return 0;
				4039	}
				4040
				4041
				4042	int
				4043	__kmp_aux_unset_affinity_mask_proc(int proc, void **mask)
				4044	{
				4045	int retval;
				4046
				4047	if (! KMP_AFFINITY_CAPABLE()) {
				4048	return -1;
				4049	}
				4050
				4051	KA_TRACE(1000, ;{
				4052	int gtid = __kmp_entry_gtid();
				4053	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4054	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4055	(kmp_affin_mask_t )(mask));
				4056	__kmp_debug_printf("kmp_unset_affinity_mask_proc: unsetting proc %d in affinity mask for thread %d = %s\n",
				4057	proc, gtid, buf);
				4058	});
				4059
				4060	if (__kmp_env_consistency_check) {
				4061	if ((mask == NULL) \|\| (*mask == NULL)) {
				4062	KMP_FATAL(AffinityInvalidMask, "kmp_unset_affinity_mask_proc");
				4063	}
				4064	}
				4065
				4066	if ((proc < 0) \|\| ((unsigned)proc >= KMP_CPU_SETSIZE)) {
				4067	return -1;
				4068	}
				4069	if (! KMP_CPU_ISSET(proc, fullMask)) {
				4070	return -2;
				4071	}
				4072
				4073	KMP_CPU_CLR(proc, (kmp_affin_mask_t )(mask));
				4074	return 0;
				4075	}
				4076
				4077
				4078	int
				4079	__kmp_aux_get_affinity_mask_proc(int proc, void **mask)
				4080	{
				4081	int retval;
				4082
				4083	if (! KMP_AFFINITY_CAPABLE()) {
				4084	return -1;
				4085	}
				4086
				4087	KA_TRACE(1000, ;{
				4088	int gtid = __kmp_entry_gtid();
				4089	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4090	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4091	(kmp_affin_mask_t )(mask));
				4092	__kmp_debug_printf("kmp_get_affinity_mask_proc: getting proc %d in affinity mask for thread %d = %s\n",
				4093	proc, gtid, buf);
				4094	});
				4095
				4096	if (__kmp_env_consistency_check) {
				4097	if ((mask == NULL) \|\| (*mask == NULL)) {
Andrey Churbanov	4b2f17a	2015-01-29 15:49:22 +0000	[diff] [blame]	4098	KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity_mask_proc");
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4099	}
				4100	}
				4101
				4102	if ((proc < 0) \|\| ((unsigned)proc >= KMP_CPU_SETSIZE)) {
				4103	return 0;
				4104	}
				4105	if (! KMP_CPU_ISSET(proc, fullMask)) {
				4106	return 0;
				4107	}
				4108
				4109	return KMP_CPU_ISSET(proc, (kmp_affin_mask_t )(mask));
				4110	}
				4111
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4112
				4113	// Dynamic affinity settings - Affinity balanced
				4114	void __kmp_balanced_affinity( int tid, int nthreads )
				4115	{
				4116	if( __kmp_affinity_uniform_topology() ) {
				4117	int coreID;
				4118	int threadID;
				4119	// Number of hyper threads per core in HT machine
				4120	int __kmp_nth_per_core = __kmp_avail_proc / __kmp_ncores;
				4121	// Number of cores
				4122	int ncores = __kmp_ncores;
				4123	// How many threads will be bound to each core
				4124	int chunk = nthreads / ncores;
				4125	// How many cores will have an additional thread bound to it - "big cores"
				4126	int big_cores = nthreads % ncores;
				4127	// Number of threads on the big cores
				4128	int big_nth = ( chunk + 1 ) * big_cores;
				4129	if( tid < big_nth ) {
				4130	coreID = tid / (chunk + 1 );
				4131	threadID = ( tid % (chunk + 1 ) ) % __kmp_nth_per_core ;
				4132	} else { //tid >= big_nth
				4133	coreID = ( tid - big_cores ) / chunk;
				4134	threadID = ( ( tid - big_cores ) % chunk ) % __kmp_nth_per_core ;
				4135	}
				4136
				4137	KMP_DEBUG_ASSERT2(KMP_AFFINITY_CAPABLE(),
				4138	"Illegal set affinity operation when not capable");
				4139
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	4140	kmp_affin_mask_t mask = (kmp_affin_mask_t )KMP_ALLOCA(__kmp_affin_mask_size);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4141	KMP_CPU_ZERO(mask);
				4142
				4143	// Granularity == thread
				4144	if( __kmp_affinity_gran == affinity_gran_fine \|\| __kmp_affinity_gran == affinity_gran_thread) {
				4145	int osID = address2os[ coreID * __kmp_nth_per_core + threadID ].second;
				4146	KMP_CPU_SET( osID, mask);
				4147	} else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
				4148	for( int i = 0; i < __kmp_nth_per_core; i++ ) {
				4149	int osID;
				4150	osID = address2os[ coreID * __kmp_nth_per_core + i ].second;
				4151	KMP_CPU_SET( osID, mask);
				4152	}
				4153	}
				4154	if (__kmp_affinity_verbose) {
				4155	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4156	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4157	KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
				4158	tid, buf);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4159	}
				4160	__kmp_set_system_affinity( mask, TRUE );
				4161	} else { // Non-uniform topology
				4162
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	4163	kmp_affin_mask_t mask = (kmp_affin_mask_t )KMP_ALLOCA(__kmp_affin_mask_size);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4164	KMP_CPU_ZERO(mask);
				4165
				4166	// Number of hyper threads per core in HT machine
				4167	int nth_per_core = __kmp_nThreadsPerCore;
				4168	int core_level;
				4169	if( nth_per_core > 1 ) {
				4170	core_level = __kmp_aff_depth - 2;
				4171	} else {
				4172	core_level = __kmp_aff_depth - 1;
				4173	}
				4174
				4175	// Number of cores - maximum value; it does not count trail cores with 0 processors
				4176	int ncores = address2os[ __kmp_avail_proc - 1 ].first.labels[ core_level ] + 1;
				4177
				4178	// For performance gain consider the special case nthreads == __kmp_avail_proc
				4179	if( nthreads == __kmp_avail_proc ) {
				4180	if( __kmp_affinity_gran == affinity_gran_fine \|\| __kmp_affinity_gran == affinity_gran_thread) {
				4181	int osID = address2os[ tid ].second;
				4182	KMP_CPU_SET( osID, mask);
				4183	} else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
				4184	int coreID = address2os[ tid ].first.labels[ core_level ];
				4185	// We'll count found osIDs for the current core; they can be not more than nth_per_core;
				4186	// since the address2os is sortied we can break when cnt==nth_per_core
				4187	int cnt = 0;
				4188	for( int i = 0; i < __kmp_avail_proc; i++ ) {
				4189	int osID = address2os[ i ].second;
				4190	int core = address2os[ i ].first.labels[ core_level ];
				4191	if( core == coreID ) {
				4192	KMP_CPU_SET( osID, mask);
				4193	cnt++;
				4194	if( cnt == nth_per_core ) {
				4195	break;
				4196	}
				4197	}
				4198	}
				4199	}
				4200	} else if( nthreads <= __kmp_ncores ) {
				4201
				4202	int core = 0;
				4203	for( int i = 0; i < ncores; i++ ) {
				4204	// Check if this core from procarr[] is in the mask
				4205	int in_mask = 0;
				4206	for( int j = 0; j < nth_per_core; j++ ) {
				4207	if( procarr[ i * nth_per_core + j ] != - 1 ) {
				4208	in_mask = 1;
				4209	break;
				4210	}
				4211	}
				4212	if( in_mask ) {
				4213	if( tid == core ) {
				4214	for( int j = 0; j < nth_per_core; j++ ) {
				4215	int osID = procarr[ i * nth_per_core + j ];
				4216	if( osID != -1 ) {
				4217	KMP_CPU_SET( osID, mask );
				4218	// For granularity=thread it is enough to set the first available osID for this core
				4219	if( __kmp_affinity_gran == affinity_gran_fine \|\| __kmp_affinity_gran == affinity_gran_thread) {
				4220	break;
				4221	}
				4222	}
				4223	}
				4224	break;
				4225	} else {
				4226	core++;
				4227	}
				4228	}
				4229	}
				4230
				4231	} else { // nthreads > __kmp_ncores
				4232
				4233	// Array to save the number of processors at each core
Jonathan Peyton	7be07533	2015-06-22 15:53:50 +0000	[diff] [blame]	4234	int* nproc_at_core = (int)KMP_ALLOCA(sizeof(int)ncores);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4235	// Array to save the number of cores with "x" available processors;
Jonathan Peyton	7be07533	2015-06-22 15:53:50 +0000	[diff] [blame]	4236	int* ncores_with_x_procs = (int)KMP_ALLOCA(sizeof(int)(nth_per_core+1));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4237	// Array to save the number of cores with # procs from x to nth_per_core
Jonathan Peyton	7be07533	2015-06-22 15:53:50 +0000	[diff] [blame]	4238	int* ncores_with_x_to_max_procs = (int)KMP_ALLOCA(sizeof(int)(nth_per_core+1));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4239
				4240	for( int i = 0; i <= nth_per_core; i++ ) {
				4241	ncores_with_x_procs[ i ] = 0;
				4242	ncores_with_x_to_max_procs[ i ] = 0;
				4243	}
				4244
				4245	for( int i = 0; i < ncores; i++ ) {
				4246	int cnt = 0;
				4247	for( int j = 0; j < nth_per_core; j++ ) {
				4248	if( procarr[ i * nth_per_core + j ] != -1 ) {
				4249	cnt++;
				4250	}
				4251	}
				4252	nproc_at_core[ i ] = cnt;
				4253	ncores_with_x_procs[ cnt ]++;
				4254	}
				4255
				4256	for( int i = 0; i <= nth_per_core; i++ ) {
				4257	for( int j = i; j <= nth_per_core; j++ ) {
				4258	ncores_with_x_to_max_procs[ i ] += ncores_with_x_procs[ j ];
				4259	}
				4260	}
				4261
				4262	// Max number of processors
				4263	int nproc = nth_per_core * ncores;
				4264	// An array to keep number of threads per each context
				4265	int * newarr = ( int * )__kmp_allocate( sizeof( int ) * nproc );
				4266	for( int i = 0; i < nproc; i++ ) {
				4267	newarr[ i ] = 0;
				4268	}
				4269
				4270	int nth = nthreads;
				4271	int flag = 0;
				4272	while( nth > 0 ) {
				4273	for( int j = 1; j <= nth_per_core; j++ ) {
				4274	int cnt = ncores_with_x_to_max_procs[ j ];
				4275	for( int i = 0; i < ncores; i++ ) {
				4276	// Skip the core with 0 processors
				4277	if( nproc_at_core[ i ] == 0 ) {
				4278	continue;
				4279	}
				4280	for( int k = 0; k < nth_per_core; k++ ) {
				4281	if( procarr[ i * nth_per_core + k ] != -1 ) {
				4282	if( newarr[ i * nth_per_core + k ] == 0 ) {
				4283	newarr[ i * nth_per_core + k ] = 1;
				4284	cnt--;
				4285	nth--;
				4286	break;
				4287	} else {
				4288	if( flag != 0 ) {
				4289	newarr[ i * nth_per_core + k ] ++;
				4290	cnt--;
				4291	nth--;
				4292	break;
				4293	}
				4294	}
				4295	}
				4296	}
				4297	if( cnt == 0 \|\| nth == 0 ) {
				4298	break;
				4299	}
				4300	}
				4301	if( nth == 0 ) {
				4302	break;
				4303	}
				4304	}
				4305	flag = 1;
				4306	}
				4307	int sum = 0;
				4308	for( int i = 0; i < nproc; i++ ) {
				4309	sum += newarr[ i ];
				4310	if( sum > tid ) {
				4311	// Granularity == thread
				4312	if( __kmp_affinity_gran == affinity_gran_fine \|\| __kmp_affinity_gran == affinity_gran_thread) {
				4313	int osID = procarr[ i ];
				4314	KMP_CPU_SET( osID, mask);
				4315	} else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
				4316	int coreID = i / nth_per_core;
				4317	for( int ii = 0; ii < nth_per_core; ii++ ) {
				4318	int osID = procarr[ coreID * nth_per_core + ii ];
				4319	if( osID != -1 ) {
				4320	KMP_CPU_SET( osID, mask);
				4321	}
				4322	}
				4323	}
				4324	break;
				4325	}
				4326	}
				4327	__kmp_free( newarr );
				4328	}
				4329
				4330	if (__kmp_affinity_verbose) {
				4331	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4332	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4333	KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
				4334	tid, buf);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4335	}
				4336	__kmp_set_system_affinity( mask, TRUE );
				4337	}
				4338	}
				4339
Alp Toker	763b939	2014-02-28 09:42:41 +0000	[diff] [blame]	4340	#endif // KMP_AFFINITY_SUPPORTED