Blame - openmp/runtime/src/kmp_affinity.cpp - toolchain/llvm-project

blob: 32a04465fe8688cc3d165cccae848d63c8348b25 [file] [log] [blame]

Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1	/*
				2	* kmp_affinity.cpp -- affinity management
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3	*/
				4
				5
				6	//===----------------------------------------------------------------------===//
				7	//
				8	// The LLVM Compiler Infrastructure
				9	//
				10	// This file is dual licensed under the MIT and the University of Illinois Open
				11	// Source Licenses. See LICENSE.txt for details.
				12	//
				13	//===----------------------------------------------------------------------===//
				14
				15
				16	#include "kmp.h"
				17	#include "kmp_i18n.h"
				18	#include "kmp_io.h"
				19	#include "kmp_str.h"
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	20	#include "kmp_wrapper_getpid.h"
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	21
Alp Toker	763b939	2014-02-28 09:42:41 +0000	[diff] [blame]	22	#if KMP_AFFINITY_SUPPORTED
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	23
				24	//
				25	// Print the affinity mask to the character array in a pretty format.
				26	//
				27	char *
				28	__kmp_affinity_print_mask(char buf, int buf_len, kmp_affin_mask_t mask)
				29	{
				30	KMP_ASSERT(buf_len >= 40);
				31	char *scan = buf;
				32	char *end = buf + buf_len - 1;
				33
				34	//
				35	// Find first element / check for empty set.
				36	//
				37	size_t i;
				38	for (i = 0; i < KMP_CPU_SETSIZE; i++) {
				39	if (KMP_CPU_ISSET(i, mask)) {
				40	break;
				41	}
				42	}
				43	if (i == KMP_CPU_SETSIZE) {
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	44	KMP_SNPRINTF(scan, buf_len, "{<empty>}");
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	45	while (*scan != '\0') scan++;
				46	KMP_ASSERT(scan <= end);
				47	return buf;
				48	}
				49
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	50	KMP_SNPRINTF(scan, buf_len, "{%ld", (long)i);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	51	while (*scan != '\0') scan++;
				52	i++;
				53	for (; i < KMP_CPU_SETSIZE; i++) {
				54	if (! KMP_CPU_ISSET(i, mask)) {
				55	continue;
				56	}
				57
				58	//
				59	// Check for buffer overflow. A string of the form ",<n>" will have
				60	// at most 10 characters, plus we want to leave room to print ",...}"
				61	// if the set is too large to print for a total of 15 characters.
				62	// We already left room for '\0' in setting end.
				63	//
				64	if (end - scan < 15) {
				65	break;
				66	}
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	67	KMP_SNPRINTF(scan, buf_len, ",%-ld", (long)i);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	68	while (*scan != '\0') scan++;
				69	}
				70	if (i < KMP_CPU_SETSIZE) {
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	71	KMP_SNPRINTF(scan, buf_len, ",...");
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	72	while (*scan != '\0') scan++;
				73	}
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	74	KMP_SNPRINTF(scan, buf_len, "}");
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	75	while (*scan != '\0') scan++;
				76	KMP_ASSERT(scan <= end);
				77	return buf;
				78	}
				79
				80
				81	void
				82	__kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask)
				83	{
				84	KMP_CPU_ZERO(mask);
				85
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	86	# if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	87
				88	if (__kmp_num_proc_groups > 1) {
				89	int group;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	90	KMP_DEBUG_ASSERT(__kmp_GetActiveProcessorCount != NULL);
				91	for (group = 0; group < __kmp_num_proc_groups; group++) {
				92	int i;
				93	int num = __kmp_GetActiveProcessorCount(group);
				94	for (i = 0; i < num; i++) {
				95	KMP_CPU_SET(i + group * (CHAR_BIT * sizeof(DWORD_PTR)), mask);
				96	}
				97	}
				98	}
				99	else
				100
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	101	# endif /* KMP_GROUP_AFFINITY */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	102
				103	{
				104	int proc;
				105	for (proc = 0; proc < __kmp_xproc; proc++) {
				106	KMP_CPU_SET(proc, mask);
				107	}
				108	}
				109	}
				110
				111
				112	//
				113	// In Linux* OS debug & cover (-O0) builds, we need to avoid inline member
				114	// functions.
				115	//
				116	// The icc codegen emits sections with extremely long names, of the form
				117	// ".gnu.linkonce.<mangled_name>". There seems to have been a linker bug
				118	// introduced between GNU ld version 2.14.90.0.4 and 2.15.92.0.2 involving
				119	// some sort of memory corruption or table overflow that is triggered by
				120	// these long strings. I checked the latest version of the linker -
				121	// GNU ld (Linux* OS/GNU Binutils) 2.18.50.0.7.20080422 - and the bug is not
				122	// fixed.
				123	//
				124	// Unfortunately, my attempts to reproduce it in a smaller example have
				125	// failed - I'm not sure what the prospects are of getting it fixed
Jonathan Peyton	6633829	2015-06-01 02:37:28 +0000	[diff] [blame]	126	// properly - but we need a reproducer smaller than all of libomp.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	127	//
				128	// Work around the problem by avoiding inline constructors in such builds.
				129	// We do this for all platforms, not just Linux* OS - non-inline functions are
				130	// more debuggable and provide better coverage into than inline functions.
				131	// Use inline functions in shipping libs, for performance.
				132	//
				133
				134	# if !defined(KMP_DEBUG) && !defined(COVER)
				135
				136	class Address {
				137	public:
				138	static const unsigned maxDepth = 32;
				139	unsigned labels[maxDepth];
				140	unsigned childNums[maxDepth];
				141	unsigned depth;
				142	unsigned leader;
				143	Address(unsigned _depth)
				144	: depth(_depth), leader(FALSE) {
				145	}
				146	Address &operator=(const Address &b) {
				147	depth = b.depth;
				148	for (unsigned i = 0; i < depth; i++) {
				149	labels[i] = b.labels[i];
				150	childNums[i] = b.childNums[i];
				151	}
				152	leader = FALSE;
				153	return *this;
				154	}
				155	bool operator==(const Address &b) const {
				156	if (depth != b.depth)
				157	return false;
				158	for (unsigned i = 0; i < depth; i++)
				159	if(labels[i] != b.labels[i])
				160	return false;
				161	return true;
				162	}
				163	bool isClose(const Address &b, int level) const {
				164	if (depth != b.depth)
				165	return false;
				166	if ((unsigned)level >= depth)
				167	return true;
				168	for (unsigned i = 0; i < (depth - level); i++)
				169	if(labels[i] != b.labels[i])
				170	return false;
				171	return true;
				172	}
				173	bool operator!=(const Address &b) const {
				174	return !operator==(b);
				175	}
				176	};
				177
				178	class AddrUnsPair {
				179	public:
				180	Address first;
				181	unsigned second;
				182	AddrUnsPair(Address _first, unsigned _second)
				183	: first(_first), second(_second) {
				184	}
				185	AddrUnsPair &operator=(const AddrUnsPair &b)
				186	{
				187	first = b.first;
				188	second = b.second;
				189	return *this;
				190	}
				191	};
				192
				193	# else
				194
				195	class Address {
				196	public:
				197	static const unsigned maxDepth = 32;
				198	unsigned labels[maxDepth];
				199	unsigned childNums[maxDepth];
				200	unsigned depth;
				201	unsigned leader;
				202	Address(unsigned _depth);
				203	Address &operator=(const Address &b);
				204	bool operator==(const Address &b) const;
				205	bool isClose(const Address &b, int level) const;
				206	bool operator!=(const Address &b) const;
				207	};
				208
				209	Address::Address(unsigned _depth)
				210	{
				211	depth = _depth;
				212	leader = FALSE;
				213	}
				214
				215	Address &Address::operator=(const Address &b) {
				216	depth = b.depth;
				217	for (unsigned i = 0; i < depth; i++) {
				218	labels[i] = b.labels[i];
				219	childNums[i] = b.childNums[i];
				220	}
				221	leader = FALSE;
				222	return *this;
				223	}
				224
				225	bool Address::operator==(const Address &b) const {
				226	if (depth != b.depth)
				227	return false;
				228	for (unsigned i = 0; i < depth; i++)
				229	if(labels[i] != b.labels[i])
				230	return false;
				231	return true;
				232	}
				233
				234	bool Address::isClose(const Address &b, int level) const {
				235	if (depth != b.depth)
				236	return false;
				237	if ((unsigned)level >= depth)
				238	return true;
				239	for (unsigned i = 0; i < (depth - level); i++)
				240	if(labels[i] != b.labels[i])
				241	return false;
				242	return true;
				243	}
				244
				245	bool Address::operator!=(const Address &b) const {
				246	return !operator==(b);
				247	}
				248
				249	class AddrUnsPair {
				250	public:
				251	Address first;
				252	unsigned second;
				253	AddrUnsPair(Address _first, unsigned _second);
				254	AddrUnsPair &operator=(const AddrUnsPair &b);
				255	};
				256
				257	AddrUnsPair::AddrUnsPair(Address _first, unsigned _second)
				258	: first(_first), second(_second)
				259	{
				260	}
				261
				262	AddrUnsPair &AddrUnsPair::operator=(const AddrUnsPair &b)
				263	{
				264	first = b.first;
				265	second = b.second;
				266	return *this;
				267	}
				268
				269	# endif /* !defined(KMP_DEBUG) && !defined(COVER) */
				270
				271
				272	static int
				273	__kmp_affinity_cmp_Address_labels(const void a, const void b)
				274	{
				275	const Address aa = (const Address )&(((AddrUnsPair *)a)
				276	->first);
				277	const Address bb = (const Address )&(((AddrUnsPair *)b)
				278	->first);
				279	unsigned depth = aa->depth;
				280	unsigned i;
				281	KMP_DEBUG_ASSERT(depth == bb->depth);
				282	for (i = 0; i < depth; i++) {
				283	if (aa->labels[i] < bb->labels[i]) return -1;
				284	if (aa->labels[i] > bb->labels[i]) return 1;
				285	}
				286	return 0;
				287	}
				288
				289
				290	static int
				291	__kmp_affinity_cmp_Address_child_num(const void a, const void b)
				292	{
				293	const Address aa = (const Address )&(((AddrUnsPair *)a)
				294	->first);
				295	const Address bb = (const Address )&(((AddrUnsPair *)b)
				296	->first);
				297	unsigned depth = aa->depth;
				298	unsigned i;
				299	KMP_DEBUG_ASSERT(depth == bb->depth);
				300	KMP_DEBUG_ASSERT((unsigned)__kmp_affinity_compact <= depth);
				301	KMP_DEBUG_ASSERT(__kmp_affinity_compact >= 0);
				302	for (i = 0; i < (unsigned)__kmp_affinity_compact; i++) {
				303	int j = depth - i - 1;
				304	if (aa->childNums[j] < bb->childNums[j]) return -1;
				305	if (aa->childNums[j] > bb->childNums[j]) return 1;
				306	}
				307	for (; i < depth; i++) {
				308	int j = i - __kmp_affinity_compact;
				309	if (aa->childNums[j] < bb->childNums[j]) return -1;
				310	if (aa->childNums[j] > bb->childNums[j]) return 1;
				311	}
				312	return 0;
				313	}
				314
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	315	/** A structure for holding machine-specific hierarchy info to be computed once at init. */
				316	class hierarchy_info {
				317	public:
				318	/** Typical levels are threads/core, cores/package or socket, packages/node, nodes/machine,
				319	etc. We don't want to get specific with nomenclature */
				320	static const kmp_uint32 maxLevels=7;
				321
				322	/** This is specifically the depth of the machine configuration hierarchy, in terms of the
				323	number of levels along the longest path from root to any leaf. It corresponds to the
				324	number of entries in numPerLevel if we exclude all but one trailing 1. */
				325	kmp_uint32 depth;
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	326	kmp_uint32 base_num_threads;
Andrey Churbanov	aa1f2b6	2015-04-13 18:51:59 +0000	[diff] [blame]	327	volatile kmp_int8 uninitialized; // 0=initialized, 1=uninitialized, 2=initialization in progress
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	328
				329	/** Level 0 corresponds to leaves. numPerLevel[i] is the number of children the parent of a
				330	node at level i has. For example, if we have a machine with 4 packages, 4 cores/package
				331	and 2 HT per core, then numPerLevel = {2, 4, 4, 1, 1}. All empty levels are set to 1. */
				332	kmp_uint32 numPerLevel[maxLevels];
				333	kmp_uint32 skipPerLevel[maxLevels];
				334
				335	void deriveLevels(AddrUnsPair *adr2os, int num_addrs) {
				336	int hier_depth = adr2os[0].first.depth;
				337	int level = 0;
				338	for (int i=hier_depth-1; i>=0; --i) {
				339	int max = -1;
				340	for (int j=0; j<num_addrs; ++j) {
				341	int next = adr2os[j].first.childNums[i];
				342	if (next > max) max = next;
				343	}
				344	numPerLevel[level] = max+1;
				345	++level;
				346	}
				347	}
				348
Andrey Churbanov	aa1f2b6	2015-04-13 18:51:59 +0000	[diff] [blame]	349	hierarchy_info() : depth(1), uninitialized(1) {}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	350	void init(AddrUnsPair *adr2os, int num_addrs)
				351	{
Andrey Churbanov	aa1f2b6	2015-04-13 18:51:59 +0000	[diff] [blame]	352	kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&uninitialized, 1, 2);
				353	if (bool_result == 0) { // Wait for initialization
				354	while (TCR_1(uninitialized) != 0) KMP_CPU_PAUSE();
				355	return;
				356	}
				357	KMP_DEBUG_ASSERT(bool_result==1);
				358
Andrey Churbanov	b41e62b	2015-02-10 20:10:21 +0000	[diff] [blame]	359	/* Added explicit initialization of the depth here to prevent usage of dirty value
				360	observed when static library is re-initialized multiple times (e.g. when
				361	non-OpenMP thread repeatedly launches/joins thread that uses OpenMP). */
				362	depth = 1;
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	363	for (kmp_uint32 i=0; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
				364	numPerLevel[i] = 1;
				365	skipPerLevel[i] = 1;
				366	}
				367
				368	// Sort table by physical ID
				369	if (adr2os) {
				370	qsort(adr2os, num_addrs, sizeof(*adr2os), __kmp_affinity_cmp_Address_labels);
				371	deriveLevels(adr2os, num_addrs);
				372	}
				373	else {
				374	numPerLevel[0] = 4;
				375	numPerLevel[1] = num_addrs/4;
				376	if (num_addrs%4) numPerLevel[1]++;
				377	}
				378
				379	base_num_threads = num_addrs;
				380	for (int i=maxLevels-1; i>=0; --i) // count non-empty levels to get depth
				381	if (numPerLevel[i] != 1 \|\| depth > 1) // only count one top-level '1'
				382	depth++;
				383
				384	kmp_uint32 branch = 4;
				385	if (numPerLevel[0] == 1) branch = num_addrs/4;
				386	if (branch<4) branch=4;
				387	for (kmp_uint32 d=0; d<depth-1; ++d) { // optimize hierarchy width
				388	while (numPerLevel[d] > branch \|\| (d==0 && numPerLevel[d]>4)) { // max 4 on level 0!
				389	if (numPerLevel[d] & 1) numPerLevel[d]++;
				390	numPerLevel[d] = numPerLevel[d] >> 1;
				391	if (numPerLevel[d+1] == 1) depth++;
				392	numPerLevel[d+1] = numPerLevel[d+1] << 1;
				393	}
				394	if(numPerLevel[0] == 1) {
				395	branch = branch >> 1;
				396	if (branch<4) branch = 4;
				397	}
				398	}
				399
				400	for (kmp_uint32 i=1; i<depth; ++i)
				401	skipPerLevel[i] = numPerLevel[i-1] * skipPerLevel[i-1];
Andrey Churbanov	aa1f2b6	2015-04-13 18:51:59 +0000	[diff] [blame]	402	// Fill in hierarchy in the case of oversubscription
				403	for (kmp_uint32 i=depth; i<maxLevels; ++i)
				404	skipPerLevel[i] = 2*skipPerLevel[i-1];
				405
				406	uninitialized = 0; // One writer
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	407
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	408	}
				409	};
				410
				411	static hierarchy_info machine_hierarchy;
				412
				413	void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
Andrey Churbanov	1362ae7	2015-04-02 13:18:50 +0000	[diff] [blame]	414	kmp_uint32 depth;
Andrey Churbanov	aa1f2b6	2015-04-13 18:51:59 +0000	[diff] [blame]	415	// The test below is true if affinity is available, but set to "none". Need to init on first use of hierarchical barrier.
				416	if (TCR_1(machine_hierarchy.uninitialized))
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	417	machine_hierarchy.init(NULL, nproc);
				418
Andrey Churbanov	1362ae7	2015-04-02 13:18:50 +0000	[diff] [blame]	419	depth = machine_hierarchy.depth;
				420	KMP_DEBUG_ASSERT(depth > 0);
Andrey Churbanov	aa1f2b6	2015-04-13 18:51:59 +0000	[diff] [blame]	421	// The loop below adjusts the depth in the case of oversubscription
				422	while (nproc > machine_hierarchy.skipPerLevel[depth-1] && depth<machine_hierarchy.maxLevels-1)
Andrey Churbanov	1362ae7	2015-04-02 13:18:50 +0000	[diff] [blame]	423	depth++;
Andrey Churbanov	aa1f2b6	2015-04-13 18:51:59 +0000	[diff] [blame]	424
Andrey Churbanov	1362ae7	2015-04-02 13:18:50 +0000	[diff] [blame]	425	thr_bar->depth = depth;
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	426	thr_bar->base_leaf_kids = (kmp_uint8)machine_hierarchy.numPerLevel[0]-1;
				427	thr_bar->skip_per_level = machine_hierarchy.skipPerLevel;
				428	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	429
				430	//
				431	// When sorting by labels, __kmp_affinity_assign_child_nums() must first be
				432	// called to renumber the labels from [0..n] and place them into the child_num
				433	// vector of the address object. This is done in case the labels used for
Alp Toker	8f2d3f0	2014-02-24 10:40:15 +0000	[diff] [blame]	434	// the children at one node of the hierarchy differ from those used for
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	435	// another node at the same level. Example: suppose the machine has 2 nodes
				436	// with 2 packages each. The first node contains packages 601 and 602, and
				437	// second node contains packages 603 and 604. If we try to sort the table
				438	// for "scatter" affinity, the table will still be sorted 601, 602, 603, 604
				439	// because we are paying attention to the labels themselves, not the ordinal
				440	// child numbers. By using the child numbers in the sort, the result is
				441	// {0,0}=601, {0,1}=603, {1,0}=602, {1,1}=604.
				442	//
				443	static void
				444	__kmp_affinity_assign_child_nums(AddrUnsPair *address2os,
				445	int numAddrs)
				446	{
				447	KMP_DEBUG_ASSERT(numAddrs > 0);
				448	int depth = address2os->first.depth;
				449	unsigned counts = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				450	unsigned lastLabel = (unsigned )__kmp_allocate(depth
				451	* sizeof(unsigned));
				452	int labCt;
				453	for (labCt = 0; labCt < depth; labCt++) {
				454	address2os[0].first.childNums[labCt] = counts[labCt] = 0;
				455	lastLabel[labCt] = address2os[0].first.labels[labCt];
				456	}
				457	int i;
				458	for (i = 1; i < numAddrs; i++) {
				459	for (labCt = 0; labCt < depth; labCt++) {
				460	if (address2os[i].first.labels[labCt] != lastLabel[labCt]) {
				461	int labCt2;
				462	for (labCt2 = labCt + 1; labCt2 < depth; labCt2++) {
				463	counts[labCt2] = 0;
				464	lastLabel[labCt2] = address2os[i].first.labels[labCt2];
				465	}
				466	counts[labCt]++;
				467	lastLabel[labCt] = address2os[i].first.labels[labCt];
				468	break;
				469	}
				470	}
				471	for (labCt = 0; labCt < depth; labCt++) {
				472	address2os[i].first.childNums[labCt] = counts[labCt];
				473	}
				474	for (; labCt < (int)Address::maxDepth; labCt++) {
				475	address2os[i].first.childNums[labCt] = 0;
				476	}
				477	}
				478	}
				479
				480
				481	//
				482	// All of the __kmp_affinity_create_*_map() routines should set
				483	// __kmp_affinity_masks to a vector of affinity mask objects of length
				484	// __kmp_affinity_num_masks, if __kmp_affinity_type != affinity_none, and
				485	// return the number of levels in the machine topology tree (zero if
				486	// __kmp_affinity_type == affinity_none).
				487	//
				488	// All of the __kmp_affinity_create__map() routines should set fullMask
				489	// to the affinity mask for the initialization thread. They need to save and
				490	// restore the mask, and it could be needed later, so saving it is just an
				491	// optimization to avoid calling kmp_get_system_affinity() again.
				492	//
				493	static kmp_affin_mask_t *fullMask = NULL;
				494
				495	kmp_affin_mask_t *
				496	__kmp_affinity_get_fullMask() { return fullMask; }
				497
				498
				499	static int nCoresPerPkg, nPackages;
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame]	500	static int __kmp_nThreadsPerCore;
				501	#ifndef KMP_DFLT_NTH_CORES
				502	static int __kmp_ncores;
				503	#endif
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	504
				505	//
				506	// __kmp_affinity_uniform_topology() doesn't work when called from
				507	// places which support arbitrarily many levels in the machine topology
				508	// map, i.e. the non-default cases in __kmp_affinity_create_cpuinfo_map()
				509	// __kmp_affinity_create_x2apicid_map().
				510	//
				511	inline static bool
				512	__kmp_affinity_uniform_topology()
				513	{
				514	return __kmp_avail_proc == (__kmp_nThreadsPerCore * nCoresPerPkg * nPackages);
				515	}
				516
				517
				518	//
				519	// Print out the detailed machine topology map, i.e. the physical locations
				520	// of each OS proc.
				521	//
				522	static void
				523	__kmp_affinity_print_topology(AddrUnsPair *address2os, int len, int depth,
				524	int pkgLevel, int coreLevel, int threadLevel)
				525	{
				526	int proc;
				527
				528	KMP_INFORM(OSProcToPhysicalThreadMap, "KMP_AFFINITY");
				529	for (proc = 0; proc < len; proc++) {
				530	int level;
				531	kmp_str_buf_t buf;
				532	__kmp_str_buf_init(&buf);
				533	for (level = 0; level < depth; level++) {
				534	if (level == threadLevel) {
				535	__kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Thread));
				536	}
				537	else if (level == coreLevel) {
				538	__kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Core));
				539	}
				540	else if (level == pkgLevel) {
				541	__kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Package));
				542	}
				543	else if (level > pkgLevel) {
				544	__kmp_str_buf_print(&buf, "%s_%d ", KMP_I18N_STR(Node),
				545	level - pkgLevel - 1);
				546	}
				547	else {
				548	__kmp_str_buf_print(&buf, "L%d ", level);
				549	}
				550	__kmp_str_buf_print(&buf, "%d ",
				551	address2os[proc].first.labels[level]);
				552	}
				553	KMP_INFORM(OSProcMapToPack, "KMP_AFFINITY", address2os[proc].second,
				554	buf.str);
				555	__kmp_str_buf_free(&buf);
				556	}
				557	}
				558
				559
				560	//
				561	// If we don't know how to retrieve the machine's processor topology, or
				562	// encounter an error in doing so, this routine is called to form a "flat"
				563	// mapping of os thread id's <-> processor id's.
				564	//
				565	static int
				566	__kmp_affinity_create_flat_map(AddrUnsPair **address2os,
				567	kmp_i18n_id_t *const msg_id)
				568	{
				569	*address2os = NULL;
				570	*msg_id = kmp_i18n_null;
				571
				572	//
				573	// Even if __kmp_affinity_type == affinity_none, this routine might still
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame]	574	// called to set __kmp_ncores, as well as
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	575	// __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
				576	//
				577	if (! KMP_AFFINITY_CAPABLE()) {
				578	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				579	__kmp_ncores = nPackages = __kmp_xproc;
				580	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	581	if (__kmp_affinity_verbose) {
				582	KMP_INFORM(AffFlatTopology, "KMP_AFFINITY");
				583	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				584	KMP_INFORM(Uniform, "KMP_AFFINITY");
				585	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				586	__kmp_nThreadsPerCore, __kmp_ncores);
				587	}
				588	return 0;
				589	}
				590
				591	//
				592	// When affinity is off, this routine will still be called to set
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame]	593	// __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	594	// nCoresPerPkg, & nPackages. Make sure all these vars are set
				595	// correctly, and return now if affinity is not enabled.
				596	//
				597	__kmp_ncores = nPackages = __kmp_avail_proc;
				598	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	599	if (__kmp_affinity_verbose) {
				600	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				601	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, fullMask);
				602
				603	KMP_INFORM(AffCapableUseFlat, "KMP_AFFINITY");
				604	if (__kmp_affinity_respect_mask) {
				605	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				606	} else {
				607	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				608	}
				609	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				610	KMP_INFORM(Uniform, "KMP_AFFINITY");
				611	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				612	__kmp_nThreadsPerCore, __kmp_ncores);
				613	}
				614	if (__kmp_affinity_type == affinity_none) {
				615	return 0;
				616	}
				617
				618	//
				619	// Contruct the data structure to be returned.
				620	//
				621	address2os = (AddrUnsPair)
				622	__kmp_allocate(sizeof(*address2os) __kmp_avail_proc);
				623	int avail_ct = 0;
				624	unsigned int i;
				625	for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
				626	//
				627	// Skip this proc if it is not included in the machine model.
				628	//
				629	if (! KMP_CPU_ISSET(i, fullMask)) {
				630	continue;
				631	}
				632
				633	Address addr(1);
				634	addr.labels[0] = i;
				635	(*address2os)[avail_ct++] = AddrUnsPair(addr,i);
				636	}
				637	if (__kmp_affinity_verbose) {
				638	KMP_INFORM(OSProcToPackage, "KMP_AFFINITY");
				639	}
				640
				641	if (__kmp_affinity_gran_levels < 0) {
				642	//
				643	// Only the package level is modeled in the machine topology map,
				644	// so the #levels of granularity is either 0 or 1.
				645	//
				646	if (__kmp_affinity_gran > affinity_gran_package) {
				647	__kmp_affinity_gran_levels = 1;
				648	}
				649	else {
				650	__kmp_affinity_gran_levels = 0;
				651	}
				652	}
				653	return 1;
				654	}
				655
				656
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	657	# if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	658
				659	//
				660	// If multiple Windows* OS processor groups exist, we can create a 2-level
				661	// topology map with the groups at level 0 and the individual procs at
				662	// level 1.
				663	//
				664	// This facilitates letting the threads float among all procs in a group,
				665	// if granularity=group (the default when there are multiple groups).
				666	//
				667	static int
				668	__kmp_affinity_create_proc_group_map(AddrUnsPair **address2os,
				669	kmp_i18n_id_t *const msg_id)
				670	{
				671	*address2os = NULL;
				672	*msg_id = kmp_i18n_null;
				673
				674	//
				675	// If we don't have multiple processor groups, return now.
				676	// The flat mapping will be used.
				677	//
				678	if ((! KMP_AFFINITY_CAPABLE()) \|\| (__kmp_get_proc_group(fullMask) >= 0)) {
				679	// FIXME set *msg_id
				680	return -1;
				681	}
				682
				683	//
				684	// Contruct the data structure to be returned.
				685	//
				686	address2os = (AddrUnsPair)
				687	__kmp_allocate(sizeof(*address2os) __kmp_avail_proc);
				688	int avail_ct = 0;
				689	int i;
				690	for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
				691	//
				692	// Skip this proc if it is not included in the machine model.
				693	//
				694	if (! KMP_CPU_ISSET(i, fullMask)) {
				695	continue;
				696	}
				697
				698	Address addr(2);
				699	addr.labels[0] = i / (CHAR_BIT * sizeof(DWORD_PTR));
				700	addr.labels[1] = i % (CHAR_BIT * sizeof(DWORD_PTR));
				701	(*address2os)[avail_ct++] = AddrUnsPair(addr,i);
				702
				703	if (__kmp_affinity_verbose) {
				704	KMP_INFORM(AffOSProcToGroup, "KMP_AFFINITY", i, addr.labels[0],
				705	addr.labels[1]);
				706	}
				707	}
				708
				709	if (__kmp_affinity_gran_levels < 0) {
				710	if (__kmp_affinity_gran == affinity_gran_group) {
				711	__kmp_affinity_gran_levels = 1;
				712	}
				713	else if ((__kmp_affinity_gran == affinity_gran_fine)
				714	\|\| (__kmp_affinity_gran == affinity_gran_thread)) {
				715	__kmp_affinity_gran_levels = 0;
				716	}
				717	else {
				718	const char *gran_str = NULL;
				719	if (__kmp_affinity_gran == affinity_gran_core) {
				720	gran_str = "core";
				721	}
				722	else if (__kmp_affinity_gran == affinity_gran_package) {
				723	gran_str = "package";
				724	}
				725	else if (__kmp_affinity_gran == affinity_gran_node) {
				726	gran_str = "node";
				727	}
				728	else {
				729	KMP_ASSERT(0);
				730	}
				731
				732	// Warning: can't use affinity granularity \"gran\" with group topology method, using "thread"
				733	__kmp_affinity_gran_levels = 0;
				734	}
				735	}
				736	return 2;
				737	}
				738
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	739	# endif /* KMP_GROUP_AFFINITY */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	740
				741
				742	# if KMP_ARCH_X86 \|\| KMP_ARCH_X86_64
				743
				744	static int
				745	__kmp_cpuid_mask_width(int count) {
				746	int r = 0;
				747
				748	while((1<<r) < count)
				749	++r;
				750	return r;
				751	}
				752
				753
				754	class apicThreadInfo {
				755	public:
				756	unsigned osId; // param to __kmp_affinity_bind_thread
				757	unsigned apicId; // from cpuid after binding
				758	unsigned maxCoresPerPkg; // ""
				759	unsigned maxThreadsPerPkg; // ""
				760	unsigned pkgId; // inferred from above values
				761	unsigned coreId; // ""
				762	unsigned threadId; // ""
				763	};
				764
				765
				766	static int
				767	__kmp_affinity_cmp_apicThreadInfo_os_id(const void a, const void b)
				768	{
				769	const apicThreadInfo aa = (const apicThreadInfo )a;
				770	const apicThreadInfo bb = (const apicThreadInfo )b;
				771	if (aa->osId < bb->osId) return -1;
				772	if (aa->osId > bb->osId) return 1;
				773	return 0;
				774	}
				775
				776
				777	static int
				778	__kmp_affinity_cmp_apicThreadInfo_phys_id(const void a, const void b)
				779	{
				780	const apicThreadInfo aa = (const apicThreadInfo )a;
				781	const apicThreadInfo bb = (const apicThreadInfo )b;
				782	if (aa->pkgId < bb->pkgId) return -1;
				783	if (aa->pkgId > bb->pkgId) return 1;
				784	if (aa->coreId < bb->coreId) return -1;
				785	if (aa->coreId > bb->coreId) return 1;
				786	if (aa->threadId < bb->threadId) return -1;
				787	if (aa->threadId > bb->threadId) return 1;
				788	return 0;
				789	}
				790
				791
				792	//
				793	// On IA-32 architecture and Intel(R) 64 architecture, we attempt to use
				794	// an algorithm which cycles through the available os threads, setting
				795	// the current thread's affinity mask to that thread, and then retrieves
				796	// the Apic Id for each thread context using the cpuid instruction.
				797	//
				798	static int
				799	__kmp_affinity_create_apicid_map(AddrUnsPair **address2os,
				800	kmp_i18n_id_t *const msg_id)
				801	{
Andrey Churbanov	1c33129	2015-01-27 17:03:42 +0000	[diff] [blame]	802	kmp_cpuid buf;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	803	int rc;
				804	*address2os = NULL;
				805	*msg_id = kmp_i18n_null;
				806
Andrey Churbanov	1c33129	2015-01-27 17:03:42 +0000	[diff] [blame]	807	//
				808	// Check if cpuid leaf 4 is supported.
				809	//
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	810	__kmp_x86_cpuid(0, 0, &buf);
				811	if (buf.eax < 4) {
				812	*msg_id = kmp_i18n_str_NoLeaf4Support;
				813	return -1;
				814	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	815
				816	//
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	817	// The algorithm used starts by setting the affinity to each available
Andrey Churbanov	1c33129	2015-01-27 17:03:42 +0000	[diff] [blame]	818	// thread and retrieving info from the cpuid instruction, so if we are
				819	// not capable of calling __kmp_get_system_affinity() and
				820	// _kmp_get_system_affinity(), then we need to do something else - use
				821	// the defaults that we calculated from issuing cpuid without binding
				822	// to each proc.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	823	//
				824	if (! KMP_AFFINITY_CAPABLE()) {
				825	//
				826	// Hack to try and infer the machine topology using only the data
				827	// available from cpuid on the current thread, and __kmp_xproc.
				828	//
				829	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				830
				831	//
				832	// Get an upper bound on the number of threads per package using
				833	// cpuid(1).
				834	//
				835	// On some OS/chps combinations where HT is supported by the chip
				836	// but is disabled, this value will be 2 on a single core chip.
				837	// Usually, it will be 2 if HT is enabled and 1 if HT is disabled.
				838	//
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	839	__kmp_x86_cpuid(1, 0, &buf);
				840	int maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
				841	if (maxThreadsPerPkg == 0) {
				842	maxThreadsPerPkg = 1;
				843	}
				844
				845	//
				846	// The num cores per pkg comes from cpuid(4).
				847	// 1 must be added to the encoded value.
				848	//
				849	// The author of cpu_count.cpp treated this only an upper bound
				850	// on the number of cores, but I haven't seen any cases where it
				851	// was greater than the actual number of cores, so we will treat
				852	// it as exact in this block of code.
				853	//
				854	// First, we need to check if cpuid(4) is supported on this chip.
				855	// To see if cpuid(n) is supported, issue cpuid(0) and check if eax
				856	// has the value n or greater.
				857	//
				858	__kmp_x86_cpuid(0, 0, &buf);
				859	if (buf.eax >= 4) {
				860	__kmp_x86_cpuid(4, 0, &buf);
				861	nCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
				862	}
				863	else {
				864	nCoresPerPkg = 1;
				865	}
				866
				867	//
				868	// There is no way to reliably tell if HT is enabled without issuing
				869	// the cpuid instruction from every thread, can correlating the cpuid
				870	// info, so if the machine is not affinity capable, we assume that HT
				871	// is off. We have seen quite a few machines where maxThreadsPerPkg
				872	// is 2, yet the machine does not support HT.
				873	//
				874	// - Older OSes are usually found on machines with older chips, which
				875	// do not support HT.
				876	//
				877	// - The performance penalty for mistakenly identifying a machine as
				878	// HT when it isn't (which results in blocktime being incorrecly set
				879	// to 0) is greater than the penalty when for mistakenly identifying
				880	// a machine as being 1 thread/core when it is really HT enabled
				881	// (which results in blocktime being incorrectly set to a positive
				882	// value).
				883	//
				884	__kmp_ncores = __kmp_xproc;
				885	nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
				886	__kmp_nThreadsPerCore = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	887	if (__kmp_affinity_verbose) {
				888	KMP_INFORM(AffNotCapableUseLocCpuid, "KMP_AFFINITY");
				889	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				890	if (__kmp_affinity_uniform_topology()) {
				891	KMP_INFORM(Uniform, "KMP_AFFINITY");
				892	} else {
				893	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				894	}
				895	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				896	__kmp_nThreadsPerCore, __kmp_ncores);
				897	}
				898	return 0;
				899	}
				900
				901	//
				902	//
				903	// From here on, we can assume that it is safe to call
				904	// __kmp_get_system_affinity() and __kmp_set_system_affinity(),
				905	// even if __kmp_affinity_type = affinity_none.
				906	//
				907
				908	//
				909	// Save the affinity mask for the current thread.
				910	//
				911	kmp_affin_mask_t *oldMask;
				912	KMP_CPU_ALLOC(oldMask);
				913	KMP_ASSERT(oldMask != NULL);
				914	__kmp_get_system_affinity(oldMask, TRUE);
				915
				916	//
				917	// Run through each of the available contexts, binding the current thread
				918	// to it, and obtaining the pertinent information using the cpuid instr.
				919	//
				920	// The relevant information is:
				921	//
				922	// Apic Id: Bits 24:31 of ebx after issuing cpuid(1) - each thread context
				923	// has a uniqie Apic Id, which is of the form pkg# : core# : thread#.
				924	//
				925	// Max Threads Per Pkg: Bits 16:23 of ebx after issuing cpuid(1). The
				926	// value of this field determines the width of the core# + thread#
				927	// fields in the Apic Id. It is also an upper bound on the number
				928	// of threads per package, but it has been verified that situations
				929	// happen were it is not exact. In particular, on certain OS/chip
				930	// combinations where Intel(R) Hyper-Threading Technology is supported
				931	// by the chip but has
				932	// been disabled, the value of this field will be 2 (for a single core
				933	// chip). On other OS/chip combinations supporting
				934	// Intel(R) Hyper-Threading Technology, the value of
				935	// this field will be 1 when Intel(R) Hyper-Threading Technology is
				936	// disabled and 2 when it is enabled.
				937	//
				938	// Max Cores Per Pkg: Bits 26:31 of eax after issuing cpuid(4). The
				939	// value of this field (+1) determines the width of the core# field in
				940	// the Apic Id. The comments in "cpucount.cpp" say that this value is
				941	// an upper bound, but the IA-32 architecture manual says that it is
				942	// exactly the number of cores per package, and I haven't seen any
				943	// case where it wasn't.
				944	//
				945	// From this information, deduce the package Id, core Id, and thread Id,
				946	// and set the corresponding fields in the apicThreadInfo struct.
				947	//
				948	unsigned i;
				949	apicThreadInfo threadInfo = (apicThreadInfo )__kmp_allocate(
				950	__kmp_avail_proc * sizeof(apicThreadInfo));
				951	unsigned nApics = 0;
				952	for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
				953	//
				954	// Skip this proc if it is not included in the machine model.
				955	//
				956	if (! KMP_CPU_ISSET(i, fullMask)) {
				957	continue;
				958	}
				959	KMP_DEBUG_ASSERT((int)nApics < __kmp_avail_proc);
				960
				961	__kmp_affinity_bind_thread(i);
				962	threadInfo[nApics].osId = i;
				963
				964	//
				965	// The apic id and max threads per pkg come from cpuid(1).
				966	//
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	967	__kmp_x86_cpuid(1, 0, &buf);
				968	if (! (buf.edx >> 9) & 1) {
				969	__kmp_set_system_affinity(oldMask, TRUE);
				970	__kmp_free(threadInfo);
				971	KMP_CPU_FREE(oldMask);
				972	*msg_id = kmp_i18n_str_ApicNotPresent;
				973	return -1;
				974	}
				975	threadInfo[nApics].apicId = (buf.ebx >> 24) & 0xff;
				976	threadInfo[nApics].maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
				977	if (threadInfo[nApics].maxThreadsPerPkg == 0) {
				978	threadInfo[nApics].maxThreadsPerPkg = 1;
				979	}
				980
				981	//
				982	// Max cores per pkg comes from cpuid(4).
				983	// 1 must be added to the encoded value.
				984	//
				985	// First, we need to check if cpuid(4) is supported on this chip.
				986	// To see if cpuid(n) is supported, issue cpuid(0) and check if eax
				987	// has the value n or greater.
				988	//
				989	__kmp_x86_cpuid(0, 0, &buf);
				990	if (buf.eax >= 4) {
				991	__kmp_x86_cpuid(4, 0, &buf);
				992	threadInfo[nApics].maxCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
				993	}
				994	else {
				995	threadInfo[nApics].maxCoresPerPkg = 1;
				996	}
				997
				998	//
				999	// Infer the pkgId / coreId / threadId using only the info
				1000	// obtained locally.
				1001	//
				1002	int widthCT = __kmp_cpuid_mask_width(
				1003	threadInfo[nApics].maxThreadsPerPkg);
				1004	threadInfo[nApics].pkgId = threadInfo[nApics].apicId >> widthCT;
				1005
				1006	int widthC = __kmp_cpuid_mask_width(
				1007	threadInfo[nApics].maxCoresPerPkg);
				1008	int widthT = widthCT - widthC;
				1009	if (widthT < 0) {
				1010	//
				1011	// I've never seen this one happen, but I suppose it could, if
				1012	// the cpuid instruction on a chip was really screwed up.
				1013	// Make sure to restore the affinity mask before the tail call.
				1014	//
				1015	__kmp_set_system_affinity(oldMask, TRUE);
				1016	__kmp_free(threadInfo);
				1017	KMP_CPU_FREE(oldMask);
				1018	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1019	return -1;
				1020	}
				1021
				1022	int maskC = (1 << widthC) - 1;
				1023	threadInfo[nApics].coreId = (threadInfo[nApics].apicId >> widthT)
				1024	&maskC;
				1025
				1026	int maskT = (1 << widthT) - 1;
				1027	threadInfo[nApics].threadId = threadInfo[nApics].apicId &maskT;
				1028
				1029	nApics++;
				1030	}
				1031
				1032	//
				1033	// We've collected all the info we need.
				1034	// Restore the old affinity mask for this thread.
				1035	//
				1036	__kmp_set_system_affinity(oldMask, TRUE);
				1037
				1038	//
				1039	// If there's only one thread context to bind to, form an Address object
				1040	// with depth 1 and return immediately (or, if affinity is off, set
				1041	// address2os to NULL and return).
				1042	//
				1043	// If it is configured to omit the package level when there is only a
				1044	// single package, the logic at the end of this routine won't work if
				1045	// there is only a single thread - it would try to form an Address
				1046	// object with depth 0.
				1047	//
				1048	KMP_ASSERT(nApics > 0);
				1049	if (nApics == 1) {
				1050	__kmp_ncores = nPackages = 1;
				1051	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1052	if (__kmp_affinity_verbose) {
				1053	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				1054	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				1055
				1056	KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
				1057	if (__kmp_affinity_respect_mask) {
				1058	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				1059	} else {
				1060	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				1061	}
				1062	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1063	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1064	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				1065	__kmp_nThreadsPerCore, __kmp_ncores);
				1066	}
				1067
				1068	if (__kmp_affinity_type == affinity_none) {
				1069	__kmp_free(threadInfo);
				1070	KMP_CPU_FREE(oldMask);
				1071	return 0;
				1072	}
				1073
				1074	address2os = (AddrUnsPair)__kmp_allocate(sizeof(AddrUnsPair));
				1075	Address addr(1);
				1076	addr.labels[0] = threadInfo[0].pkgId;
				1077	(*address2os)[0] = AddrUnsPair(addr, threadInfo[0].osId);
				1078
				1079	if (__kmp_affinity_gran_levels < 0) {
				1080	__kmp_affinity_gran_levels = 0;
				1081	}
				1082
				1083	if (__kmp_affinity_verbose) {
				1084	__kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
				1085	}
				1086
				1087	__kmp_free(threadInfo);
				1088	KMP_CPU_FREE(oldMask);
				1089	return 1;
				1090	}
				1091
				1092	//
				1093	// Sort the threadInfo table by physical Id.
				1094	//
				1095	qsort(threadInfo, nApics, sizeof(*threadInfo),
				1096	__kmp_affinity_cmp_apicThreadInfo_phys_id);
				1097
				1098	//
				1099	// The table is now sorted by pkgId / coreId / threadId, but we really
				1100	// don't know the radix of any of the fields. pkgId's may be sparsely
				1101	// assigned among the chips on a system. Although coreId's are usually
				1102	// assigned [0 .. coresPerPkg-1] and threadId's are usually assigned
				1103	// [0..threadsPerCore-1], we don't want to make any such assumptions.
				1104	//
				1105	// For that matter, we don't know what coresPerPkg and threadsPerCore
				1106	// (or the total # packages) are at this point - we want to determine
				1107	// that now. We only have an upper bound on the first two figures.
				1108	//
				1109	// We also perform a consistency check at this point: the values returned
				1110	// by the cpuid instruction for any thread bound to a given package had
				1111	// better return the same info for maxThreadsPerPkg and maxCoresPerPkg.
				1112	//
				1113	nPackages = 1;
				1114	nCoresPerPkg = 1;
				1115	__kmp_nThreadsPerCore = 1;
				1116	unsigned nCores = 1;
				1117
				1118	unsigned pkgCt = 1; // to determine radii
				1119	unsigned lastPkgId = threadInfo[0].pkgId;
				1120	unsigned coreCt = 1;
				1121	unsigned lastCoreId = threadInfo[0].coreId;
				1122	unsigned threadCt = 1;
				1123	unsigned lastThreadId = threadInfo[0].threadId;
				1124
				1125	// intra-pkg consist checks
				1126	unsigned prevMaxCoresPerPkg = threadInfo[0].maxCoresPerPkg;
				1127	unsigned prevMaxThreadsPerPkg = threadInfo[0].maxThreadsPerPkg;
				1128
				1129	for (i = 1; i < nApics; i++) {
				1130	if (threadInfo[i].pkgId != lastPkgId) {
				1131	nCores++;
				1132	pkgCt++;
				1133	lastPkgId = threadInfo[i].pkgId;
				1134	if ((int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt;
				1135	coreCt = 1;
				1136	lastCoreId = threadInfo[i].coreId;
				1137	if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
				1138	threadCt = 1;
				1139	lastThreadId = threadInfo[i].threadId;
				1140
				1141	//
				1142	// This is a different package, so go on to the next iteration
				1143	// without doing any consistency checks. Reset the consistency
				1144	// check vars, though.
				1145	//
				1146	prevMaxCoresPerPkg = threadInfo[i].maxCoresPerPkg;
				1147	prevMaxThreadsPerPkg = threadInfo[i].maxThreadsPerPkg;
				1148	continue;
				1149	}
				1150
				1151	if (threadInfo[i].coreId != lastCoreId) {
				1152	nCores++;
				1153	coreCt++;
				1154	lastCoreId = threadInfo[i].coreId;
				1155	if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
				1156	threadCt = 1;
				1157	lastThreadId = threadInfo[i].threadId;
				1158	}
				1159	else if (threadInfo[i].threadId != lastThreadId) {
				1160	threadCt++;
				1161	lastThreadId = threadInfo[i].threadId;
				1162	}
				1163	else {
				1164	__kmp_free(threadInfo);
				1165	KMP_CPU_FREE(oldMask);
				1166	*msg_id = kmp_i18n_str_LegacyApicIDsNotUnique;
				1167	return -1;
				1168	}
				1169
				1170	//
				1171	// Check to make certain that the maxCoresPerPkg and maxThreadsPerPkg
				1172	// fields agree between all the threads bounds to a given package.
				1173	//
				1174	if ((prevMaxCoresPerPkg != threadInfo[i].maxCoresPerPkg)
				1175	\|\| (prevMaxThreadsPerPkg != threadInfo[i].maxThreadsPerPkg)) {
				1176	__kmp_free(threadInfo);
				1177	KMP_CPU_FREE(oldMask);
				1178	*msg_id = kmp_i18n_str_InconsistentCpuidInfo;
				1179	return -1;
				1180	}
				1181	}
				1182	nPackages = pkgCt;
				1183	if ((int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt;
				1184	if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
				1185
				1186	//
				1187	// When affinity is off, this routine will still be called to set
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame]	1188	// __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1189	// nCoresPerPkg, & nPackages. Make sure all these vars are set
				1190	// correctly, and return now if affinity is not enabled.
				1191	//
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1192	__kmp_ncores = nCores;
				1193	if (__kmp_affinity_verbose) {
				1194	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				1195	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				1196
				1197	KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
				1198	if (__kmp_affinity_respect_mask) {
				1199	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				1200	} else {
				1201	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				1202	}
				1203	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1204	if (__kmp_affinity_uniform_topology()) {
				1205	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1206	} else {
				1207	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				1208	}
				1209	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				1210	__kmp_nThreadsPerCore, __kmp_ncores);
				1211
				1212	}
				1213
				1214	if (__kmp_affinity_type == affinity_none) {
				1215	__kmp_free(threadInfo);
				1216	KMP_CPU_FREE(oldMask);
				1217	return 0;
				1218	}
				1219
				1220	//
				1221	// Now that we've determined the number of packages, the number of cores
				1222	// per package, and the number of threads per core, we can construct the
				1223	// data structure that is to be returned.
				1224	//
				1225	int pkgLevel = 0;
				1226	int coreLevel = (nCoresPerPkg <= 1) ? -1 : 1;
				1227	int threadLevel = (__kmp_nThreadsPerCore <= 1) ? -1 : ((coreLevel >= 0) ? 2 : 1);
				1228	unsigned depth = (pkgLevel >= 0) + (coreLevel >= 0) + (threadLevel >= 0);
				1229
				1230	KMP_ASSERT(depth > 0);
				1231	address2os = (AddrUnsPair)__kmp_allocate(sizeof(AddrUnsPair) * nApics);
				1232
				1233	for (i = 0; i < nApics; ++i) {
				1234	Address addr(depth);
				1235	unsigned os = threadInfo[i].osId;
				1236	int d = 0;
				1237
				1238	if (pkgLevel >= 0) {
				1239	addr.labels[d++] = threadInfo[i].pkgId;
				1240	}
				1241	if (coreLevel >= 0) {
				1242	addr.labels[d++] = threadInfo[i].coreId;
				1243	}
				1244	if (threadLevel >= 0) {
				1245	addr.labels[d++] = threadInfo[i].threadId;
				1246	}
				1247	(*address2os)[i] = AddrUnsPair(addr, os);
				1248	}
				1249
				1250	if (__kmp_affinity_gran_levels < 0) {
				1251	//
				1252	// Set the granularity level based on what levels are modeled
				1253	// in the machine topology map.
				1254	//
				1255	__kmp_affinity_gran_levels = 0;
				1256	if ((threadLevel >= 0)
				1257	&& (__kmp_affinity_gran > affinity_gran_thread)) {
				1258	__kmp_affinity_gran_levels++;
				1259	}
				1260	if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
				1261	__kmp_affinity_gran_levels++;
				1262	}
				1263	if ((pkgLevel >= 0) && (__kmp_affinity_gran > affinity_gran_package)) {
				1264	__kmp_affinity_gran_levels++;
				1265	}
				1266	}
				1267
				1268	if (__kmp_affinity_verbose) {
				1269	__kmp_affinity_print_topology(*address2os, nApics, depth, pkgLevel,
				1270	coreLevel, threadLevel);
				1271	}
				1272
				1273	__kmp_free(threadInfo);
				1274	KMP_CPU_FREE(oldMask);
				1275	return depth;
				1276	}
				1277
				1278
				1279	//
				1280	// Intel(R) microarchitecture code name Nehalem, Dunnington and later
				1281	// architectures support a newer interface for specifying the x2APIC Ids,
				1282	// based on cpuid leaf 11.
				1283	//
				1284	static int
				1285	__kmp_affinity_create_x2apicid_map(AddrUnsPair **address2os,
				1286	kmp_i18n_id_t *const msg_id)
				1287	{
				1288	kmp_cpuid buf;
				1289
				1290	*address2os = NULL;
				1291	*msg_id = kmp_i18n_null;
				1292
				1293	//
				1294	// Check to see if cpuid leaf 11 is supported.
				1295	//
				1296	__kmp_x86_cpuid(0, 0, &buf);
				1297	if (buf.eax < 11) {
				1298	*msg_id = kmp_i18n_str_NoLeaf11Support;
				1299	return -1;
				1300	}
				1301	__kmp_x86_cpuid(11, 0, &buf);
				1302	if (buf.ebx == 0) {
				1303	*msg_id = kmp_i18n_str_NoLeaf11Support;
				1304	return -1;
				1305	}
				1306
				1307	//
				1308	// Find the number of levels in the machine topology. While we're at it,
				1309	// get the default values for __kmp_nThreadsPerCore & nCoresPerPkg. We will
				1310	// try to get more accurate values later by explicitly counting them,
				1311	// but get reasonable defaults now, in case we return early.
				1312	//
				1313	int level;
				1314	int threadLevel = -1;
				1315	int coreLevel = -1;
				1316	int pkgLevel = -1;
				1317	__kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
				1318
				1319	for (level = 0;; level++) {
				1320	if (level > 31) {
				1321	//
				1322	// FIXME: Hack for DPD200163180
				1323	//
				1324	// If level is big then something went wrong -> exiting
				1325	//
				1326	// There could actually be 32 valid levels in the machine topology,
				1327	// but so far, the only machine we have seen which does not exit
				1328	// this loop before iteration 32 has fubar x2APIC settings.
				1329	//
				1330	// For now, just reject this case based upon loop trip count.
				1331	//
				1332	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1333	return -1;
				1334	}
				1335	__kmp_x86_cpuid(11, level, &buf);
				1336	if (buf.ebx == 0) {
				1337	if (pkgLevel < 0) {
				1338	//
				1339	// Will infer nPackages from __kmp_xproc
				1340	//
				1341	pkgLevel = level;
				1342	level++;
				1343	}
				1344	break;
				1345	}
				1346	int kind = (buf.ecx >> 8) & 0xff;
				1347	if (kind == 1) {
				1348	//
				1349	// SMT level
				1350	//
				1351	threadLevel = level;
				1352	coreLevel = -1;
				1353	pkgLevel = -1;
				1354	__kmp_nThreadsPerCore = buf.ebx & 0xff;
				1355	if (__kmp_nThreadsPerCore == 0) {
				1356	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1357	return -1;
				1358	}
				1359	}
				1360	else if (kind == 2) {
				1361	//
				1362	// core level
				1363	//
				1364	coreLevel = level;
				1365	pkgLevel = -1;
				1366	nCoresPerPkg = buf.ebx & 0xff;
				1367	if (nCoresPerPkg == 0) {
				1368	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1369	return -1;
				1370	}
				1371	}
				1372	else {
				1373	if (level <= 0) {
				1374	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1375	return -1;
				1376	}
				1377	if (pkgLevel >= 0) {
				1378	continue;
				1379	}
				1380	pkgLevel = level;
				1381	nPackages = buf.ebx & 0xff;
				1382	if (nPackages == 0) {
				1383	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1384	return -1;
				1385	}
				1386	}
				1387	}
				1388	int depth = level;
				1389
				1390	//
				1391	// In the above loop, "level" was counted from the finest level (usually
				1392	// thread) to the coarsest. The caller expects that we will place the
				1393	// labels in (*address2os)[].first.labels[] in the inverse order, so
				1394	// we need to invert the vars saying which level means what.
				1395	//
				1396	if (threadLevel >= 0) {
				1397	threadLevel = depth - threadLevel - 1;
				1398	}
				1399	if (coreLevel >= 0) {
				1400	coreLevel = depth - coreLevel - 1;
				1401	}
				1402	KMP_DEBUG_ASSERT(pkgLevel >= 0);
				1403	pkgLevel = depth - pkgLevel - 1;
				1404
				1405	//
				1406	// The algorithm used starts by setting the affinity to each available
Andrey Churbanov	1c33129	2015-01-27 17:03:42 +0000	[diff] [blame]	1407	// thread and retrieving info from the cpuid instruction, so if we are
				1408	// not capable of calling __kmp_get_system_affinity() and
				1409	// _kmp_get_system_affinity(), then we need to do something else - use
				1410	// the defaults that we calculated from issuing cpuid without binding
				1411	// to each proc.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1412	//
				1413	if (! KMP_AFFINITY_CAPABLE())
				1414	{
				1415	//
				1416	// Hack to try and infer the machine topology using only the data
				1417	// available from cpuid on the current thread, and __kmp_xproc.
				1418	//
				1419	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				1420
				1421	__kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
				1422	nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1423	if (__kmp_affinity_verbose) {
				1424	KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY");
				1425	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1426	if (__kmp_affinity_uniform_topology()) {
				1427	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1428	} else {
				1429	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				1430	}
				1431	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				1432	__kmp_nThreadsPerCore, __kmp_ncores);
				1433	}
				1434	return 0;
				1435	}
				1436
				1437	//
				1438	//
				1439	// From here on, we can assume that it is safe to call
				1440	// __kmp_get_system_affinity() and __kmp_set_system_affinity(),
				1441	// even if __kmp_affinity_type = affinity_none.
				1442	//
				1443
				1444	//
				1445	// Save the affinity mask for the current thread.
				1446	//
				1447	kmp_affin_mask_t *oldMask;
				1448	KMP_CPU_ALLOC(oldMask);
				1449	__kmp_get_system_affinity(oldMask, TRUE);
				1450
				1451	//
				1452	// Allocate the data structure to be returned.
				1453	//
				1454	AddrUnsPair retval = (AddrUnsPair )
				1455	__kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc);
				1456
				1457	//
				1458	// Run through each of the available contexts, binding the current thread
				1459	// to it, and obtaining the pertinent information using the cpuid instr.
				1460	//
				1461	unsigned int proc;
				1462	int nApics = 0;
				1463	for (proc = 0; proc < KMP_CPU_SETSIZE; ++proc) {
				1464	//
				1465	// Skip this proc if it is not included in the machine model.
				1466	//
				1467	if (! KMP_CPU_ISSET(proc, fullMask)) {
				1468	continue;
				1469	}
				1470	KMP_DEBUG_ASSERT(nApics < __kmp_avail_proc);
				1471
				1472	__kmp_affinity_bind_thread(proc);
				1473
				1474	//
				1475	// Extrach the labels for each level in the machine topology map
				1476	// from the Apic ID.
				1477	//
				1478	Address addr(depth);
				1479	int prev_shift = 0;
				1480
				1481	for (level = 0; level < depth; level++) {
				1482	__kmp_x86_cpuid(11, level, &buf);
				1483	unsigned apicId = buf.edx;
				1484	if (buf.ebx == 0) {
				1485	if (level != depth - 1) {
				1486	KMP_CPU_FREE(oldMask);
				1487	*msg_id = kmp_i18n_str_InconsistentCpuidInfo;
				1488	return -1;
				1489	}
				1490	addr.labels[depth - level - 1] = apicId >> prev_shift;
				1491	level++;
				1492	break;
				1493	}
				1494	int shift = buf.eax & 0x1f;
				1495	int mask = (1 << shift) - 1;
				1496	addr.labels[depth - level - 1] = (apicId & mask) >> prev_shift;
				1497	prev_shift = shift;
				1498	}
				1499	if (level != depth) {
				1500	KMP_CPU_FREE(oldMask);
				1501	*msg_id = kmp_i18n_str_InconsistentCpuidInfo;
				1502	return -1;
				1503	}
				1504
				1505	retval[nApics] = AddrUnsPair(addr, proc);
				1506	nApics++;
				1507	}
				1508
				1509	//
				1510	// We've collected all the info we need.
				1511	// Restore the old affinity mask for this thread.
				1512	//
				1513	__kmp_set_system_affinity(oldMask, TRUE);
				1514
				1515	//
				1516	// If there's only one thread context to bind to, return now.
				1517	//
				1518	KMP_ASSERT(nApics > 0);
				1519	if (nApics == 1) {
				1520	__kmp_ncores = nPackages = 1;
				1521	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1522	if (__kmp_affinity_verbose) {
				1523	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				1524	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				1525
				1526	KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
				1527	if (__kmp_affinity_respect_mask) {
				1528	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				1529	} else {
				1530	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				1531	}
				1532	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1533	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1534	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				1535	__kmp_nThreadsPerCore, __kmp_ncores);
				1536	}
				1537
				1538	if (__kmp_affinity_type == affinity_none) {
				1539	__kmp_free(retval);
				1540	KMP_CPU_FREE(oldMask);
				1541	return 0;
				1542	}
				1543
				1544	//
				1545	// Form an Address object which only includes the package level.
				1546	//
				1547	Address addr(1);
				1548	addr.labels[0] = retval[0].first.labels[pkgLevel];
				1549	retval[0].first = addr;
				1550
				1551	if (__kmp_affinity_gran_levels < 0) {
				1552	__kmp_affinity_gran_levels = 0;
				1553	}
				1554
				1555	if (__kmp_affinity_verbose) {
				1556	__kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
				1557	}
				1558
				1559	*address2os = retval;
				1560	KMP_CPU_FREE(oldMask);
				1561	return 1;
				1562	}
				1563
				1564	//
				1565	// Sort the table by physical Id.
				1566	//
				1567	qsort(retval, nApics, sizeof(*retval), __kmp_affinity_cmp_Address_labels);
				1568
				1569	//
				1570	// Find the radix at each of the levels.
				1571	//
				1572	unsigned totals = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1573	unsigned counts = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1574	unsigned maxCt = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1575	unsigned last = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1576	for (level = 0; level < depth; level++) {
				1577	totals[level] = 1;
				1578	maxCt[level] = 1;
				1579	counts[level] = 1;
				1580	last[level] = retval[0].first.labels[level];
				1581	}
				1582
				1583	//
				1584	// From here on, the iteration variable "level" runs from the finest
				1585	// level to the coarsest, i.e. we iterate forward through
				1586	// (*address2os)[].first.labels[] - in the previous loops, we iterated
				1587	// backwards.
				1588	//
				1589	for (proc = 1; (int)proc < nApics; proc++) {
				1590	int level;
				1591	for (level = 0; level < depth; level++) {
				1592	if (retval[proc].first.labels[level] != last[level]) {
				1593	int j;
				1594	for (j = level + 1; j < depth; j++) {
				1595	totals[j]++;
				1596	counts[j] = 1;
				1597	// The line below causes printing incorrect topology information
				1598	// in case the max value for some level (maxCt[level]) is encountered earlier than
				1599	// some less value while going through the array.
				1600	// For example, let pkg0 has 4 cores and pkg1 has 2 cores. Then maxCt[1] == 2
				1601	// whereas it must be 4.
				1602	// TODO!!! Check if it can be commented safely
				1603	//maxCt[j] = 1;
				1604	last[j] = retval[proc].first.labels[j];
				1605	}
				1606	totals[level]++;
				1607	counts[level]++;
				1608	if (counts[level] > maxCt[level]) {
				1609	maxCt[level] = counts[level];
				1610	}
				1611	last[level] = retval[proc].first.labels[level];
				1612	break;
				1613	}
				1614	else if (level == depth - 1) {
				1615	__kmp_free(last);
				1616	__kmp_free(maxCt);
				1617	__kmp_free(counts);
				1618	__kmp_free(totals);
				1619	__kmp_free(retval);
				1620	KMP_CPU_FREE(oldMask);
				1621	*msg_id = kmp_i18n_str_x2ApicIDsNotUnique;
				1622	return -1;
				1623	}
				1624	}
				1625	}
				1626
				1627	//
				1628	// When affinity is off, this routine will still be called to set
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame]	1629	// __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1630	// nCoresPerPkg, & nPackages. Make sure all these vars are set
				1631	// correctly, and return if affinity is not enabled.
				1632	//
				1633	if (threadLevel >= 0) {
				1634	__kmp_nThreadsPerCore = maxCt[threadLevel];
				1635	}
				1636	else {
				1637	__kmp_nThreadsPerCore = 1;
				1638	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1639	nPackages = totals[pkgLevel];
				1640
				1641	if (coreLevel >= 0) {
				1642	__kmp_ncores = totals[coreLevel];
				1643	nCoresPerPkg = maxCt[coreLevel];
				1644	}
				1645	else {
				1646	__kmp_ncores = nPackages;
				1647	nCoresPerPkg = 1;
				1648	}
				1649
				1650	//
				1651	// Check to see if the machine topology is uniform
				1652	//
				1653	unsigned prod = maxCt[0];
				1654	for (level = 1; level < depth; level++) {
				1655	prod *= maxCt[level];
				1656	}
				1657	bool uniform = (prod == totals[level - 1]);
				1658
				1659	//
				1660	// Print the machine topology summary.
				1661	//
				1662	if (__kmp_affinity_verbose) {
				1663	char mask[KMP_AFFIN_MASK_PRINT_LEN];
				1664	__kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				1665
				1666	KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
				1667	if (__kmp_affinity_respect_mask) {
				1668	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask);
				1669	} else {
				1670	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask);
				1671	}
				1672	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1673	if (uniform) {
				1674	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1675	} else {
				1676	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				1677	}
				1678
				1679	kmp_str_buf_t buf;
				1680	__kmp_str_buf_init(&buf);
				1681
				1682	__kmp_str_buf_print(&buf, "%d", totals[0]);
				1683	for (level = 1; level <= pkgLevel; level++) {
				1684	__kmp_str_buf_print(&buf, " x %d", maxCt[level]);
				1685	}
				1686	KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg,
				1687	__kmp_nThreadsPerCore, __kmp_ncores);
				1688
				1689	__kmp_str_buf_free(&buf);
				1690	}
				1691
				1692	if (__kmp_affinity_type == affinity_none) {
				1693	__kmp_free(last);
				1694	__kmp_free(maxCt);
				1695	__kmp_free(counts);
				1696	__kmp_free(totals);
				1697	__kmp_free(retval);
				1698	KMP_CPU_FREE(oldMask);
				1699	return 0;
				1700	}
				1701
				1702	//
				1703	// Find any levels with radiix 1, and remove them from the map
				1704	// (except for the package level).
				1705	//
				1706	int new_depth = 0;
				1707	for (level = 0; level < depth; level++) {
				1708	if ((maxCt[level] == 1) && (level != pkgLevel)) {
				1709	continue;
				1710	}
				1711	new_depth++;
				1712	}
				1713
				1714	//
				1715	// If we are removing any levels, allocate a new vector to return,
				1716	// and copy the relevant information to it.
				1717	//
				1718	if (new_depth != depth) {
				1719	AddrUnsPair new_retval = (AddrUnsPair )__kmp_allocate(
				1720	sizeof(AddrUnsPair) * nApics);
				1721	for (proc = 0; (int)proc < nApics; proc++) {
				1722	Address addr(new_depth);
				1723	new_retval[proc] = AddrUnsPair(addr, retval[proc].second);
				1724	}
				1725	int new_level = 0;
				1726	for (level = 0; level < depth; level++) {
				1727	if ((maxCt[level] == 1) && (level != pkgLevel)) {
				1728	if (level == threadLevel) {
				1729	threadLevel = -1;
				1730	}
				1731	else if ((threadLevel >= 0) && (level < threadLevel)) {
				1732	threadLevel--;
				1733	}
				1734	if (level == coreLevel) {
				1735	coreLevel = -1;
				1736	}
				1737	else if ((coreLevel >= 0) && (level < coreLevel)) {
				1738	coreLevel--;
				1739	}
				1740	if (level < pkgLevel) {
				1741	pkgLevel--;
				1742	}
				1743	continue;
				1744	}
				1745	for (proc = 0; (int)proc < nApics; proc++) {
				1746	new_retval[proc].first.labels[new_level]
				1747	= retval[proc].first.labels[level];
				1748	}
				1749	new_level++;
				1750	}
				1751
				1752	__kmp_free(retval);
				1753	retval = new_retval;
				1754	depth = new_depth;
				1755	}
				1756
				1757	if (__kmp_affinity_gran_levels < 0) {
				1758	//
				1759	// Set the granularity level based on what levels are modeled
				1760	// in the machine topology map.
				1761	//
				1762	__kmp_affinity_gran_levels = 0;
				1763	if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
				1764	__kmp_affinity_gran_levels++;
				1765	}
				1766	if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
				1767	__kmp_affinity_gran_levels++;
				1768	}
				1769	if (__kmp_affinity_gran > affinity_gran_package) {
				1770	__kmp_affinity_gran_levels++;
				1771	}
				1772	}
				1773
				1774	if (__kmp_affinity_verbose) {
				1775	__kmp_affinity_print_topology(retval, nApics, depth, pkgLevel,
				1776	coreLevel, threadLevel);
				1777	}
				1778
				1779	__kmp_free(last);
				1780	__kmp_free(maxCt);
				1781	__kmp_free(counts);
				1782	__kmp_free(totals);
				1783	KMP_CPU_FREE(oldMask);
				1784	*address2os = retval;
				1785	return depth;
				1786	}
				1787
				1788
				1789	# endif /* KMP_ARCH_X86 \|\| KMP_ARCH_X86_64 */
				1790
				1791
				1792	#define osIdIndex 0
				1793	#define threadIdIndex 1
				1794	#define coreIdIndex 2
				1795	#define pkgIdIndex 3
				1796	#define nodeIdIndex 4
				1797
				1798	typedef unsigned *ProcCpuInfo;
				1799	static unsigned maxIndex = pkgIdIndex;
				1800
				1801
				1802	static int
				1803	__kmp_affinity_cmp_ProcCpuInfo_os_id(const void a, const void b)
				1804	{
				1805	const unsigned aa = (const unsigned )a;
				1806	const unsigned bb = (const unsigned )b;
				1807	if (aa[osIdIndex] < bb[osIdIndex]) return -1;
				1808	if (aa[osIdIndex] > bb[osIdIndex]) return 1;
				1809	return 0;
				1810	};
				1811
				1812
				1813	static int
				1814	__kmp_affinity_cmp_ProcCpuInfo_phys_id(const void a, const void b)
				1815	{
				1816	unsigned i;
				1817	const unsigned aa = ((const unsigned **)a);
				1818	const unsigned bb = ((const unsigned **)b);
				1819	for (i = maxIndex; ; i--) {
				1820	if (aa[i] < bb[i]) return -1;
				1821	if (aa[i] > bb[i]) return 1;
				1822	if (i == osIdIndex) break;
				1823	}
				1824	return 0;
				1825	}
				1826
				1827
				1828	//
				1829	// Parse /proc/cpuinfo (or an alternate file in the same format) to obtain the
				1830	// affinity map.
				1831	//
				1832	static int
				1833	__kmp_affinity_create_cpuinfo_map(AddrUnsPair *address2os, int line,
				1834	kmp_i18n_id_t const msg_id, FILE f)
				1835	{
				1836	*address2os = NULL;
				1837	*msg_id = kmp_i18n_null;
				1838
				1839	//
				1840	// Scan of the file, and count the number of "processor" (osId) fields,
Alp Toker	8f2d3f0	2014-02-24 10:40:15 +0000	[diff] [blame]	1841	// and find the highest value of <n> for a node_<n> field.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1842	//
				1843	char buf[256];
				1844	unsigned num_records = 0;
				1845	while (! feof(f)) {
				1846	buf[sizeof(buf) - 1] = 1;
				1847	if (! fgets(buf, sizeof(buf), f)) {
				1848	//
				1849	// Read errors presumably because of EOF
				1850	//
				1851	break;
				1852	}
				1853
				1854	char s1[] = "processor";
				1855	if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
				1856	num_records++;
				1857	continue;
				1858	}
				1859
				1860	//
				1861	// FIXME - this will match "node_<n> <garbage>"
				1862	//
				1863	unsigned level;
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	1864	if (KMP_SSCANF(buf, "node_%d id", &level) == 1) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1865	if (nodeIdIndex + level >= maxIndex) {
				1866	maxIndex = nodeIdIndex + level;
				1867	}
				1868	continue;
				1869	}
				1870	}
				1871
				1872	//
				1873	// Check for empty file / no valid processor records, or too many.
				1874	// The number of records can't exceed the number of valid bits in the
				1875	// affinity mask.
				1876	//
				1877	if (num_records == 0) {
				1878	*line = 0;
				1879	*msg_id = kmp_i18n_str_NoProcRecords;
				1880	return -1;
				1881	}
				1882	if (num_records > (unsigned)__kmp_xproc) {
				1883	*line = 0;
				1884	*msg_id = kmp_i18n_str_TooManyProcRecords;
				1885	return -1;
				1886	}
				1887
				1888	//
				1889	// Set the file pointer back to the begginning, so that we can scan the
				1890	// file again, this time performing a full parse of the data.
				1891	// Allocate a vector of ProcCpuInfo object, where we will place the data.
				1892	// Adding an extra element at the end allows us to remove a lot of extra
				1893	// checks for termination conditions.
				1894	//
				1895	if (fseek(f, 0, SEEK_SET) != 0) {
				1896	*line = 0;
				1897	*msg_id = kmp_i18n_str_CantRewindCpuinfo;
				1898	return -1;
				1899	}
				1900
				1901	//
				1902	// Allocate the array of records to store the proc info in. The dummy
				1903	// element at the end makes the logic in filling them out easier to code.
				1904	//
				1905	unsigned threadInfo = (unsigned )__kmp_allocate((num_records + 1)
				1906	* sizeof(unsigned *));
				1907	unsigned i;
				1908	for (i = 0; i <= num_records; i++) {
				1909	threadInfo[i] = (unsigned *)__kmp_allocate((maxIndex + 1)
				1910	* sizeof(unsigned));
				1911	}
				1912
				1913	#define CLEANUP_THREAD_INFO \
				1914	for (i = 0; i <= num_records; i++) { \
				1915	__kmp_free(threadInfo[i]); \
				1916	} \
				1917	__kmp_free(threadInfo);
				1918
				1919	//
				1920	// A value of UINT_MAX means that we didn't find the field
				1921	//
				1922	unsigned __index;
				1923
				1924	#define INIT_PROC_INFO(p) \
				1925	for (__index = 0; __index <= maxIndex; __index++) { \
				1926	(p)[__index] = UINT_MAX; \
				1927	}
				1928
				1929	for (i = 0; i <= num_records; i++) {
				1930	INIT_PROC_INFO(threadInfo[i]);
				1931	}
				1932
				1933	unsigned num_avail = 0;
				1934	*line = 0;
				1935	while (! feof(f)) {
				1936	//
				1937	// Create an inner scoping level, so that all the goto targets at the
				1938	// end of the loop appear in an outer scoping level. This avoids
				1939	// warnings about jumping past an initialization to a target in the
				1940	// same block.
				1941	//
				1942	{
				1943	buf[sizeof(buf) - 1] = 1;
				1944	bool long_line = false;
				1945	if (! fgets(buf, sizeof(buf), f)) {
				1946	//
				1947	// Read errors presumably because of EOF
				1948	//
				1949	// If there is valid data in threadInfo[num_avail], then fake
				1950	// a blank line in ensure that the last address gets parsed.
				1951	//
				1952	bool valid = false;
				1953	for (i = 0; i <= maxIndex; i++) {
				1954	if (threadInfo[num_avail][i] != UINT_MAX) {
				1955	valid = true;
				1956	}
				1957	}
				1958	if (! valid) {
				1959	break;
				1960	}
				1961	buf[0] = 0;
				1962	} else if (!buf[sizeof(buf) - 1]) {
				1963	//
				1964	// The line is longer than the buffer. Set a flag and don't
				1965	// emit an error if we were going to ignore the line, anyway.
				1966	//
				1967	long_line = true;
				1968
				1969	#define CHECK_LINE \
				1970	if (long_line) { \
				1971	CLEANUP_THREAD_INFO; \
				1972	*msg_id = kmp_i18n_str_LongLineCpuinfo; \
				1973	return -1; \
				1974	}
				1975	}
				1976	(*line)++;
				1977
				1978	char s1[] = "processor";
				1979	if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
				1980	CHECK_LINE;
				1981	char *p = strchr(buf + sizeof(s1) - 1, ':');
				1982	unsigned val;
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	1983	if ((p == NULL) \|\| (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1984	if (threadInfo[num_avail][osIdIndex] != UINT_MAX) goto dup_field;
				1985	threadInfo[num_avail][osIdIndex] = val;
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	1986	#if KMP_OS_LINUX && USE_SYSFS_INFO
				1987	char path[256];
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	1988	KMP_SNPRINTF(path, sizeof(path),
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	1989	"/sys/devices/system/cpu/cpu%u/topology/physical_package_id",
				1990	threadInfo[num_avail][osIdIndex]);
				1991	__kmp_read_from_file(path, "%u", &threadInfo[num_avail][pkgIdIndex]);
				1992
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	1993	KMP_SNPRINTF(path, sizeof(path),
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	1994	"/sys/devices/system/cpu/cpu%u/topology/core_id",
				1995	threadInfo[num_avail][osIdIndex]);
				1996	__kmp_read_from_file(path, "%u", &threadInfo[num_avail][coreIdIndex]);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1997	continue;
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	1998	#else
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1999	}
				2000	char s2[] = "physical id";
				2001	if (strncmp(buf, s2, sizeof(s2) - 1) == 0) {
				2002	CHECK_LINE;
				2003	char *p = strchr(buf + sizeof(s2) - 1, ':');
				2004	unsigned val;
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	2005	if ((p == NULL) \|\| (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2006	if (threadInfo[num_avail][pkgIdIndex] != UINT_MAX) goto dup_field;
				2007	threadInfo[num_avail][pkgIdIndex] = val;
				2008	continue;
				2009	}
				2010	char s3[] = "core id";
				2011	if (strncmp(buf, s3, sizeof(s3) - 1) == 0) {
				2012	CHECK_LINE;
				2013	char *p = strchr(buf + sizeof(s3) - 1, ':');
				2014	unsigned val;
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	2015	if ((p == NULL) \|\| (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2016	if (threadInfo[num_avail][coreIdIndex] != UINT_MAX) goto dup_field;
				2017	threadInfo[num_avail][coreIdIndex] = val;
				2018	continue;
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	2019	#endif // KMP_OS_LINUX && USE_SYSFS_INFO
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2020	}
				2021	char s4[] = "thread id";
				2022	if (strncmp(buf, s4, sizeof(s4) - 1) == 0) {
				2023	CHECK_LINE;
				2024	char *p = strchr(buf + sizeof(s4) - 1, ':');
				2025	unsigned val;
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	2026	if ((p == NULL) \|\| (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2027	if (threadInfo[num_avail][threadIdIndex] != UINT_MAX) goto dup_field;
				2028	threadInfo[num_avail][threadIdIndex] = val;
				2029	continue;
				2030	}
				2031	unsigned level;
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	2032	if (KMP_SSCANF(buf, "node_%d id", &level) == 1) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2033	CHECK_LINE;
				2034	char *p = strchr(buf + sizeof(s4) - 1, ':');
				2035	unsigned val;
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	2036	if ((p == NULL) \|\| (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2037	KMP_ASSERT(nodeIdIndex + level <= maxIndex);
				2038	if (threadInfo[num_avail][nodeIdIndex + level] != UINT_MAX) goto dup_field;
				2039	threadInfo[num_avail][nodeIdIndex + level] = val;
				2040	continue;
				2041	}
				2042
				2043	//
				2044	// We didn't recognize the leading token on the line.
				2045	// There are lots of leading tokens that we don't recognize -
				2046	// if the line isn't empty, go on to the next line.
				2047	//
				2048	if ((buf != 0) && (buf != '\n')) {
				2049	//
				2050	// If the line is longer than the buffer, read characters
				2051	// until we find a newline.
				2052	//
				2053	if (long_line) {
				2054	int ch;
				2055	while (((ch = fgetc(f)) != EOF) && (ch != '\n'));
				2056	}
				2057	continue;
				2058	}
				2059
				2060	//
				2061	// A newline has signalled the end of the processor record.
				2062	// Check that there aren't too many procs specified.
				2063	//
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	2064	if ((int)num_avail == __kmp_xproc) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2065	CLEANUP_THREAD_INFO;
				2066	*msg_id = kmp_i18n_str_TooManyEntries;
				2067	return -1;
				2068	}
				2069
				2070	//
				2071	// Check for missing fields. The osId field must be there, and we
				2072	// currently require that the physical id field is specified, also.
				2073	//
				2074	if (threadInfo[num_avail][osIdIndex] == UINT_MAX) {
				2075	CLEANUP_THREAD_INFO;
				2076	*msg_id = kmp_i18n_str_MissingProcField;
				2077	return -1;
				2078	}
				2079	if (threadInfo[0][pkgIdIndex] == UINT_MAX) {
				2080	CLEANUP_THREAD_INFO;
				2081	*msg_id = kmp_i18n_str_MissingPhysicalIDField;
				2082	return -1;
				2083	}
				2084
				2085	//
				2086	// Skip this proc if it is not included in the machine model.
				2087	//
				2088	if (! KMP_CPU_ISSET(threadInfo[num_avail][osIdIndex], fullMask)) {
				2089	INIT_PROC_INFO(threadInfo[num_avail]);
				2090	continue;
				2091	}
				2092
				2093	//
				2094	// We have a successful parse of this proc's info.
				2095	// Increment the counter, and prepare for the next proc.
				2096	//
				2097	num_avail++;
				2098	KMP_ASSERT(num_avail <= num_records);
				2099	INIT_PROC_INFO(threadInfo[num_avail]);
				2100	}
				2101	continue;
				2102
				2103	no_val:
				2104	CLEANUP_THREAD_INFO;
				2105	*msg_id = kmp_i18n_str_MissingValCpuinfo;
				2106	return -1;
				2107
				2108	dup_field:
				2109	CLEANUP_THREAD_INFO;
				2110	*msg_id = kmp_i18n_str_DuplicateFieldCpuinfo;
				2111	return -1;
				2112	}
				2113	*line = 0;
				2114
				2115	# if KMP_MIC && REDUCE_TEAM_SIZE
				2116	unsigned teamSize = 0;
				2117	# endif // KMP_MIC && REDUCE_TEAM_SIZE
				2118
				2119	// check for num_records == __kmp_xproc ???
				2120
				2121	//
				2122	// If there's only one thread context to bind to, form an Address object
				2123	// with depth 1 and return immediately (or, if affinity is off, set
				2124	// address2os to NULL and return).
				2125	//
				2126	// If it is configured to omit the package level when there is only a
				2127	// single package, the logic at the end of this routine won't work if
				2128	// there is only a single thread - it would try to form an Address
				2129	// object with depth 0.
				2130	//
				2131	KMP_ASSERT(num_avail > 0);
				2132	KMP_ASSERT(num_avail <= num_records);
				2133	if (num_avail == 1) {
				2134	__kmp_ncores = 1;
				2135	__kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2136	if (__kmp_affinity_verbose) {
				2137	if (! KMP_AFFINITY_CAPABLE()) {
				2138	KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
				2139	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				2140	KMP_INFORM(Uniform, "KMP_AFFINITY");
				2141	}
				2142	else {
				2143	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				2144	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				2145	fullMask);
				2146	KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
				2147	if (__kmp_affinity_respect_mask) {
				2148	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				2149	} else {
				2150	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				2151	}
				2152	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				2153	KMP_INFORM(Uniform, "KMP_AFFINITY");
				2154	}
				2155	int index;
				2156	kmp_str_buf_t buf;
				2157	__kmp_str_buf_init(&buf);
				2158	__kmp_str_buf_print(&buf, "1");
				2159	for (index = maxIndex - 1; index > pkgIdIndex; index--) {
				2160	__kmp_str_buf_print(&buf, " x 1");
				2161	}
				2162	KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, 1, 1, 1);
				2163	__kmp_str_buf_free(&buf);
				2164	}
				2165
				2166	if (__kmp_affinity_type == affinity_none) {
				2167	CLEANUP_THREAD_INFO;
				2168	return 0;
				2169	}
				2170
				2171	address2os = (AddrUnsPair)__kmp_allocate(sizeof(AddrUnsPair));
				2172	Address addr(1);
				2173	addr.labels[0] = threadInfo[0][pkgIdIndex];
				2174	(*address2os)[0] = AddrUnsPair(addr, threadInfo[0][osIdIndex]);
				2175
				2176	if (__kmp_affinity_gran_levels < 0) {
				2177	__kmp_affinity_gran_levels = 0;
				2178	}
				2179
				2180	if (__kmp_affinity_verbose) {
				2181	__kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
				2182	}
				2183
				2184	CLEANUP_THREAD_INFO;
				2185	return 1;
				2186	}
				2187
				2188	//
				2189	// Sort the threadInfo table by physical Id.
				2190	//
				2191	qsort(threadInfo, num_avail, sizeof(*threadInfo),
				2192	__kmp_affinity_cmp_ProcCpuInfo_phys_id);
				2193
				2194	//
				2195	// The table is now sorted by pkgId / coreId / threadId, but we really
				2196	// don't know the radix of any of the fields. pkgId's may be sparsely
				2197	// assigned among the chips on a system. Although coreId's are usually
				2198	// assigned [0 .. coresPerPkg-1] and threadId's are usually assigned
				2199	// [0..threadsPerCore-1], we don't want to make any such assumptions.
				2200	//
				2201	// For that matter, we don't know what coresPerPkg and threadsPerCore
				2202	// (or the total # packages) are at this point - we want to determine
				2203	// that now. We only have an upper bound on the first two figures.
				2204	//
				2205	unsigned counts = (unsigned )__kmp_allocate((maxIndex + 1)
				2206	* sizeof(unsigned));
				2207	unsigned maxCt = (unsigned )__kmp_allocate((maxIndex + 1)
				2208	* sizeof(unsigned));
				2209	unsigned totals = (unsigned )__kmp_allocate((maxIndex + 1)
				2210	* sizeof(unsigned));
				2211	unsigned lastId = (unsigned )__kmp_allocate((maxIndex + 1)
				2212	* sizeof(unsigned));
				2213
				2214	bool assign_thread_ids = false;
				2215	unsigned threadIdCt;
				2216	unsigned index;
				2217
				2218	restart_radix_check:
				2219	threadIdCt = 0;
				2220
				2221	//
				2222	// Initialize the counter arrays with data from threadInfo[0].
				2223	//
				2224	if (assign_thread_ids) {
				2225	if (threadInfo[0][threadIdIndex] == UINT_MAX) {
				2226	threadInfo[0][threadIdIndex] = threadIdCt++;
				2227	}
				2228	else if (threadIdCt <= threadInfo[0][threadIdIndex]) {
				2229	threadIdCt = threadInfo[0][threadIdIndex] + 1;
				2230	}
				2231	}
				2232	for (index = 0; index <= maxIndex; index++) {
				2233	counts[index] = 1;
				2234	maxCt[index] = 1;
				2235	totals[index] = 1;
				2236	lastId[index] = threadInfo[0][index];;
				2237	}
				2238
				2239	//
				2240	// Run through the rest of the OS procs.
				2241	//
				2242	for (i = 1; i < num_avail; i++) {
				2243	//
				2244	// Find the most significant index whose id differs
				2245	// from the id for the previous OS proc.
				2246	//
				2247	for (index = maxIndex; index >= threadIdIndex; index--) {
				2248	if (assign_thread_ids && (index == threadIdIndex)) {
				2249	//
				2250	// Auto-assign the thread id field if it wasn't specified.
				2251	//
				2252	if (threadInfo[i][threadIdIndex] == UINT_MAX) {
				2253	threadInfo[i][threadIdIndex] = threadIdCt++;
				2254	}
				2255
				2256	//
				2257	// Aparrently the thread id field was specified for some
				2258	// entries and not others. Start the thread id counter
				2259	// off at the next higher thread id.
				2260	//
				2261	else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
				2262	threadIdCt = threadInfo[i][threadIdIndex] + 1;
				2263	}
				2264	}
				2265	if (threadInfo[i][index] != lastId[index]) {
				2266	//
				2267	// Run through all indices which are less significant,
				2268	// and reset the counts to 1.
				2269	//
				2270	// At all levels up to and including index, we need to
				2271	// increment the totals and record the last id.
				2272	//
				2273	unsigned index2;
				2274	for (index2 = threadIdIndex; index2 < index; index2++) {
				2275	totals[index2]++;
				2276	if (counts[index2] > maxCt[index2]) {
				2277	maxCt[index2] = counts[index2];
				2278	}
				2279	counts[index2] = 1;
				2280	lastId[index2] = threadInfo[i][index2];
				2281	}
				2282	counts[index]++;
				2283	totals[index]++;
				2284	lastId[index] = threadInfo[i][index];
				2285
				2286	if (assign_thread_ids && (index > threadIdIndex)) {
				2287
				2288	# if KMP_MIC && REDUCE_TEAM_SIZE
				2289	//
				2290	// The default team size is the total #threads in the machine
				2291	// minus 1 thread for every core that has 3 or more threads.
				2292	//
				2293	teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 );
				2294	# endif // KMP_MIC && REDUCE_TEAM_SIZE
				2295
				2296	//
				2297	// Restart the thread counter, as we are on a new core.
				2298	//
				2299	threadIdCt = 0;
				2300
				2301	//
				2302	// Auto-assign the thread id field if it wasn't specified.
				2303	//
				2304	if (threadInfo[i][threadIdIndex] == UINT_MAX) {
				2305	threadInfo[i][threadIdIndex] = threadIdCt++;
				2306	}
				2307
				2308	//
				2309	// Aparrently the thread id field was specified for some
				2310	// entries and not others. Start the thread id counter
				2311	// off at the next higher thread id.
				2312	//
				2313	else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
				2314	threadIdCt = threadInfo[i][threadIdIndex] + 1;
				2315	}
				2316	}
				2317	break;
				2318	}
				2319	}
				2320	if (index < threadIdIndex) {
				2321	//
				2322	// If thread ids were specified, it is an error if they are not
				2323	// unique. Also, check that we waven't already restarted the
				2324	// loop (to be safe - shouldn't need to).
				2325	//
				2326	if ((threadInfo[i][threadIdIndex] != UINT_MAX)
				2327	\|\| assign_thread_ids) {
				2328	__kmp_free(lastId);
				2329	__kmp_free(totals);
				2330	__kmp_free(maxCt);
				2331	__kmp_free(counts);
				2332	CLEANUP_THREAD_INFO;
				2333	*msg_id = kmp_i18n_str_PhysicalIDsNotUnique;
				2334	return -1;
				2335	}
				2336
				2337	//
				2338	// If the thread ids were not specified and we see entries
				2339	// entries that are duplicates, start the loop over and
				2340	// assign the thread ids manually.
				2341	//
				2342	assign_thread_ids = true;
				2343	goto restart_radix_check;
				2344	}
				2345	}
				2346
				2347	# if KMP_MIC && REDUCE_TEAM_SIZE
				2348	//
				2349	// The default team size is the total #threads in the machine
				2350	// minus 1 thread for every core that has 3 or more threads.
				2351	//
				2352	teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 );
				2353	# endif // KMP_MIC && REDUCE_TEAM_SIZE
				2354
				2355	for (index = threadIdIndex; index <= maxIndex; index++) {
				2356	if (counts[index] > maxCt[index]) {
				2357	maxCt[index] = counts[index];
				2358	}
				2359	}
				2360
				2361	__kmp_nThreadsPerCore = maxCt[threadIdIndex];
				2362	nCoresPerPkg = maxCt[coreIdIndex];
				2363	nPackages = totals[pkgIdIndex];
				2364
				2365	//
				2366	// Check to see if the machine topology is uniform
				2367	//
				2368	unsigned prod = totals[maxIndex];
				2369	for (index = threadIdIndex; index < maxIndex; index++) {
				2370	prod *= maxCt[index];
				2371	}
				2372	bool uniform = (prod == totals[threadIdIndex]);
				2373
				2374	//
				2375	// When affinity is off, this routine will still be called to set
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame]	2376	// __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2377	// nCoresPerPkg, & nPackages. Make sure all these vars are set
				2378	// correctly, and return now if affinity is not enabled.
				2379	//
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2380	__kmp_ncores = totals[coreIdIndex];
				2381
				2382	if (__kmp_affinity_verbose) {
				2383	if (! KMP_AFFINITY_CAPABLE()) {
				2384	KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
				2385	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				2386	if (uniform) {
				2387	KMP_INFORM(Uniform, "KMP_AFFINITY");
				2388	} else {
				2389	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				2390	}
				2391	}
				2392	else {
				2393	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				2394	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, fullMask);
				2395	KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
				2396	if (__kmp_affinity_respect_mask) {
				2397	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				2398	} else {
				2399	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				2400	}
				2401	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				2402	if (uniform) {
				2403	KMP_INFORM(Uniform, "KMP_AFFINITY");
				2404	} else {
				2405	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				2406	}
				2407	}
				2408	kmp_str_buf_t buf;
				2409	__kmp_str_buf_init(&buf);
				2410
				2411	__kmp_str_buf_print(&buf, "%d", totals[maxIndex]);
				2412	for (index = maxIndex - 1; index >= pkgIdIndex; index--) {
				2413	__kmp_str_buf_print(&buf, " x %d", maxCt[index]);
				2414	}
				2415	KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, maxCt[coreIdIndex],
				2416	maxCt[threadIdIndex], __kmp_ncores);
				2417
				2418	__kmp_str_buf_free(&buf);
				2419	}
				2420
				2421	# if KMP_MIC && REDUCE_TEAM_SIZE
				2422	//
				2423	// Set the default team size.
				2424	//
				2425	if ((__kmp_dflt_team_nth == 0) && (teamSize > 0)) {
				2426	__kmp_dflt_team_nth = teamSize;
				2427	KA_TRACE(20, ("__kmp_affinity_create_cpuinfo_map: setting __kmp_dflt_team_nth = %d\n",
				2428	__kmp_dflt_team_nth));
				2429	}
				2430	# endif // KMP_MIC && REDUCE_TEAM_SIZE
				2431
				2432	if (__kmp_affinity_type == affinity_none) {
				2433	__kmp_free(lastId);
				2434	__kmp_free(totals);
				2435	__kmp_free(maxCt);
				2436	__kmp_free(counts);
				2437	CLEANUP_THREAD_INFO;
				2438	return 0;
				2439	}
				2440
				2441	//
				2442	// Count the number of levels which have more nodes at that level than
				2443	// at the parent's level (with there being an implicit root node of
				2444	// the top level). This is equivalent to saying that there is at least
				2445	// one node at this level which has a sibling. These levels are in the
				2446	// map, and the package level is always in the map.
				2447	//
				2448	bool inMap = (bool )__kmp_allocate((maxIndex + 1) * sizeof(bool));
				2449	int level = 0;
				2450	for (index = threadIdIndex; index < maxIndex; index++) {
				2451	KMP_ASSERT(totals[index] >= totals[index + 1]);
				2452	inMap[index] = (totals[index] > totals[index + 1]);
				2453	}
				2454	inMap[maxIndex] = (totals[maxIndex] > 1);
				2455	inMap[pkgIdIndex] = true;
				2456
				2457	int depth = 0;
				2458	for (index = threadIdIndex; index <= maxIndex; index++) {
				2459	if (inMap[index]) {
				2460	depth++;
				2461	}
				2462	}
				2463	KMP_ASSERT(depth > 0);
				2464
				2465	//
				2466	// Construct the data structure that is to be returned.
				2467	//
				2468	address2os = (AddrUnsPair)
				2469	__kmp_allocate(sizeof(AddrUnsPair) * num_avail);
				2470	int pkgLevel = -1;
				2471	int coreLevel = -1;
				2472	int threadLevel = -1;
				2473
				2474	for (i = 0; i < num_avail; ++i) {
				2475	Address addr(depth);
				2476	unsigned os = threadInfo[i][osIdIndex];
				2477	int src_index;
				2478	int dst_index = 0;
				2479
				2480	for (src_index = maxIndex; src_index >= threadIdIndex; src_index--) {
				2481	if (! inMap[src_index]) {
				2482	continue;
				2483	}
				2484	addr.labels[dst_index] = threadInfo[i][src_index];
				2485	if (src_index == pkgIdIndex) {
				2486	pkgLevel = dst_index;
				2487	}
				2488	else if (src_index == coreIdIndex) {
				2489	coreLevel = dst_index;
				2490	}
				2491	else if (src_index == threadIdIndex) {
				2492	threadLevel = dst_index;
				2493	}
				2494	dst_index++;
				2495	}
				2496	(*address2os)[i] = AddrUnsPair(addr, os);
				2497	}
				2498
				2499	if (__kmp_affinity_gran_levels < 0) {
				2500	//
				2501	// Set the granularity level based on what levels are modeled
				2502	// in the machine topology map.
				2503	//
				2504	unsigned src_index;
				2505	__kmp_affinity_gran_levels = 0;
				2506	for (src_index = threadIdIndex; src_index <= maxIndex; src_index++) {
				2507	if (! inMap[src_index]) {
				2508	continue;
				2509	}
				2510	switch (src_index) {
				2511	case threadIdIndex:
				2512	if (__kmp_affinity_gran > affinity_gran_thread) {
				2513	__kmp_affinity_gran_levels++;
				2514	}
				2515
				2516	break;
				2517	case coreIdIndex:
				2518	if (__kmp_affinity_gran > affinity_gran_core) {
				2519	__kmp_affinity_gran_levels++;
				2520	}
				2521	break;
				2522
				2523	case pkgIdIndex:
				2524	if (__kmp_affinity_gran > affinity_gran_package) {
				2525	__kmp_affinity_gran_levels++;
				2526	}
				2527	break;
				2528	}
				2529	}
				2530	}
				2531
				2532	if (__kmp_affinity_verbose) {
				2533	__kmp_affinity_print_topology(*address2os, num_avail, depth, pkgLevel,
				2534	coreLevel, threadLevel);
				2535	}
				2536
				2537	__kmp_free(inMap);
				2538	__kmp_free(lastId);
				2539	__kmp_free(totals);
				2540	__kmp_free(maxCt);
				2541	__kmp_free(counts);
				2542	CLEANUP_THREAD_INFO;
				2543	return depth;
				2544	}
				2545
				2546
				2547	//
				2548	// Create and return a table of affinity masks, indexed by OS thread ID.
				2549	// This routine handles OR'ing together all the affinity masks of threads
				2550	// that are sufficiently close, if granularity > fine.
				2551	//
				2552	static kmp_affin_mask_t *
				2553	__kmp_create_masks(unsigned maxIndex, unsigned numUnique,
				2554	AddrUnsPair *address2os, unsigned numAddrs)
				2555	{
				2556	//
				2557	// First form a table of affinity masks in order of OS thread id.
				2558	//
				2559	unsigned depth;
				2560	unsigned maxOsId;
				2561	unsigned i;
				2562
				2563	KMP_ASSERT(numAddrs > 0);
				2564	depth = address2os[0].first.depth;
				2565
				2566	maxOsId = 0;
				2567	for (i = 0; i < numAddrs; i++) {
				2568	unsigned osId = address2os[i].second;
				2569	if (osId > maxOsId) {
				2570	maxOsId = osId;
				2571	}
				2572	}
				2573	kmp_affin_mask_t osId2Mask = (kmp_affin_mask_t )__kmp_allocate(
				2574	(maxOsId + 1) * __kmp_affin_mask_size);
				2575
				2576	//
				2577	// Sort the address2os table according to physical order. Doing so
				2578	// will put all threads on the same core/package/node in consecutive
				2579	// locations.
				2580	//
				2581	qsort(address2os, numAddrs, sizeof(*address2os),
				2582	__kmp_affinity_cmp_Address_labels);
				2583
				2584	KMP_ASSERT(__kmp_affinity_gran_levels >= 0);
				2585	if (__kmp_affinity_verbose && (__kmp_affinity_gran_levels > 0)) {
				2586	KMP_INFORM(ThreadsMigrate, "KMP_AFFINITY", __kmp_affinity_gran_levels);
				2587	}
				2588	if (__kmp_affinity_gran_levels >= (int)depth) {
				2589	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				2590	&& (__kmp_affinity_type != affinity_none))) {
				2591	KMP_WARNING(AffThreadsMayMigrate);
				2592	}
				2593	}
				2594
				2595	//
				2596	// Run through the table, forming the masks for all threads on each
				2597	// core. Threads on the same core will have identical "Address"
				2598	// objects, not considering the last level, which must be the thread
				2599	// id. All threads on a core will appear consecutively.
				2600	//
				2601	unsigned unique = 0;
				2602	unsigned j = 0; // index of 1st thread on core
				2603	unsigned leader = 0;
				2604	Address *leaderAddr = &(address2os[0].first);
				2605	kmp_affin_mask_t *sum
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	2606	= (kmp_affin_mask_t *)KMP_ALLOCA(__kmp_affin_mask_size);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2607	KMP_CPU_ZERO(sum);
				2608	KMP_CPU_SET(address2os[0].second, sum);
				2609	for (i = 1; i < numAddrs; i++) {
				2610	//
Alp Toker	8f2d3f0	2014-02-24 10:40:15 +0000	[diff] [blame]	2611	// If this thread is sufficiently close to the leader (within the
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2612	// granularity setting), then set the bit for this os thread in the
				2613	// affinity mask for this group, and go on to the next thread.
				2614	//
				2615	if (leaderAddr->isClose(address2os[i].first,
				2616	__kmp_affinity_gran_levels)) {
				2617	KMP_CPU_SET(address2os[i].second, sum);
				2618	continue;
				2619	}
				2620
				2621	//
				2622	// For every thread in this group, copy the mask to the thread's
				2623	// entry in the osId2Mask table. Mark the first address as a
				2624	// leader.
				2625	//
				2626	for (; j < i; j++) {
				2627	unsigned osId = address2os[j].second;
				2628	KMP_DEBUG_ASSERT(osId <= maxOsId);
				2629	kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
				2630	KMP_CPU_COPY(mask, sum);
				2631	address2os[j].first.leader = (j == leader);
				2632	}
				2633	unique++;
				2634
				2635	//
				2636	// Start a new mask.
				2637	//
				2638	leader = i;
				2639	leaderAddr = &(address2os[i].first);
				2640	KMP_CPU_ZERO(sum);
				2641	KMP_CPU_SET(address2os[i].second, sum);
				2642	}
				2643
				2644	//
				2645	// For every thread in last group, copy the mask to the thread's
				2646	// entry in the osId2Mask table.
				2647	//
				2648	for (; j < i; j++) {
				2649	unsigned osId = address2os[j].second;
				2650	KMP_DEBUG_ASSERT(osId <= maxOsId);
				2651	kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
				2652	KMP_CPU_COPY(mask, sum);
				2653	address2os[j].first.leader = (j == leader);
				2654	}
				2655	unique++;
				2656
				2657	*maxIndex = maxOsId;
				2658	*numUnique = unique;
				2659	return osId2Mask;
				2660	}
				2661
				2662
				2663	//
				2664	// Stuff for the affinity proclist parsers. It's easier to declare these vars
				2665	// as file-static than to try and pass them through the calling sequence of
				2666	// the recursive-descent OMP_PLACES parser.
				2667	//
				2668	static kmp_affin_mask_t *newMasks;
				2669	static int numNewMasks;
				2670	static int nextNewMask;
				2671
				2672	#define ADD_MASK(_mask) \
				2673	{ \
				2674	if (nextNewMask >= numNewMasks) { \
				2675	numNewMasks *= 2; \
				2676	newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_REALLOC(newMasks, \
				2677	numNewMasks * __kmp_affin_mask_size); \
				2678	} \
				2679	KMP_CPU_COPY(KMP_CPU_INDEX(newMasks, nextNewMask), (_mask)); \
				2680	nextNewMask++; \
				2681	}
				2682
				2683	#define ADD_MASK_OSID(_osId,_osId2Mask,_maxOsId) \
				2684	{ \
				2685	if (((_osId) > _maxOsId) \|\| \
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	2686	(! KMP_CPU_ISSET((_osId), KMP_CPU_INDEX((_osId2Mask), (_osId))))) { \
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2687	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings \
				2688	&& (__kmp_affinity_type != affinity_none))) { \
				2689	KMP_WARNING(AffIgnoreInvalidProcID, _osId); \
				2690	} \
				2691	} \
				2692	else { \
				2693	ADD_MASK(KMP_CPU_INDEX(_osId2Mask, (_osId))); \
				2694	} \
				2695	}
				2696
				2697
				2698	//
				2699	// Re-parse the proclist (for the explicit affinity type), and form the list
				2700	// of affinity newMasks indexed by gtid.
				2701	//
				2702	static void
				2703	__kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks,
				2704	unsigned int out_numMasks, const char proclist,
				2705	kmp_affin_mask_t *osId2Mask, int maxOsId)
				2706	{
				2707	const char *scan = proclist;
				2708	const char *next = proclist;
				2709
				2710	//
				2711	// We use malloc() for the temporary mask vector,
				2712	// so that we can use realloc() to extend it.
				2713	//
				2714	numNewMasks = 2;
				2715	newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(numNewMasks
				2716	* __kmp_affin_mask_size);
				2717	nextNewMask = 0;
				2718	kmp_affin_mask_t sumMask = (kmp_affin_mask_t )__kmp_allocate(
				2719	__kmp_affin_mask_size);
				2720	int setSize = 0;
				2721
				2722	for (;;) {
				2723	int start, end, stride;
				2724
				2725	SKIP_WS(scan);
				2726	next = scan;
				2727	if (*next == '\0') {
				2728	break;
				2729	}
				2730
				2731	if (*next == '{') {
				2732	int num;
				2733	setSize = 0;
				2734	next++; // skip '{'
				2735	SKIP_WS(next);
				2736	scan = next;
				2737
				2738	//
				2739	// Read the first integer in the set.
				2740	//
				2741	KMP_ASSERT2((next >= '0') && (next <= '9'),
				2742	"bad proclist");
				2743	SKIP_DIGITS(next);
				2744	num = __kmp_str_to_int(scan, *next);
				2745	KMP_ASSERT2(num >= 0, "bad explicit proc list");
				2746
				2747	//
				2748	// Copy the mask for that osId to the sum (union) mask.
				2749	//
				2750	if ((num > maxOsId) \|\|
				2751	(! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
				2752	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				2753	&& (__kmp_affinity_type != affinity_none))) {
				2754	KMP_WARNING(AffIgnoreInvalidProcID, num);
				2755	}
				2756	KMP_CPU_ZERO(sumMask);
				2757	}
				2758	else {
				2759	KMP_CPU_COPY(sumMask, KMP_CPU_INDEX(osId2Mask, num));
				2760	setSize = 1;
				2761	}
				2762
				2763	for (;;) {
				2764	//
				2765	// Check for end of set.
				2766	//
				2767	SKIP_WS(next);
				2768	if (*next == '}') {
				2769	next++; // skip '}'
				2770	break;
				2771	}
				2772
				2773	//
				2774	// Skip optional comma.
				2775	//
				2776	if (*next == ',') {
				2777	next++;
				2778	}
				2779	SKIP_WS(next);
				2780
				2781	//
				2782	// Read the next integer in the set.
				2783	//
				2784	scan = next;
				2785	KMP_ASSERT2((next >= '0') && (next <= '9'),
				2786	"bad explicit proc list");
				2787
				2788	SKIP_DIGITS(next);
				2789	num = __kmp_str_to_int(scan, *next);
				2790	KMP_ASSERT2(num >= 0, "bad explicit proc list");
				2791
				2792	//
				2793	// Add the mask for that osId to the sum mask.
				2794	//
				2795	if ((num > maxOsId) \|\|
				2796	(! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
				2797	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				2798	&& (__kmp_affinity_type != affinity_none))) {
				2799	KMP_WARNING(AffIgnoreInvalidProcID, num);
				2800	}
				2801	}
				2802	else {
				2803	KMP_CPU_UNION(sumMask, KMP_CPU_INDEX(osId2Mask, num));
				2804	setSize++;
				2805	}
				2806	}
				2807	if (setSize > 0) {
				2808	ADD_MASK(sumMask);
				2809	}
				2810
				2811	SKIP_WS(next);
				2812	if (*next == ',') {
				2813	next++;
				2814	}
				2815	scan = next;
				2816	continue;
				2817	}
				2818
				2819	//
				2820	// Read the first integer.
				2821	//
				2822	KMP_ASSERT2((next >= '0') && (next <= '9'), "bad explicit proc list");
				2823	SKIP_DIGITS(next);
				2824	start = __kmp_str_to_int(scan, *next);
				2825	KMP_ASSERT2(start >= 0, "bad explicit proc list");
				2826	SKIP_WS(next);
				2827
				2828	//
				2829	// If this isn't a range, then add a mask to the list and go on.
				2830	//
				2831	if (*next != '-') {
				2832	ADD_MASK_OSID(start, osId2Mask, maxOsId);
				2833
				2834	//
				2835	// Skip optional comma.
				2836	//
				2837	if (*next == ',') {
				2838	next++;
				2839	}
				2840	scan = next;
				2841	continue;
				2842	}
				2843
				2844	//
				2845	// This is a range. Skip over the '-' and read in the 2nd int.
				2846	//
				2847	next++; // skip '-'
				2848	SKIP_WS(next);
				2849	scan = next;
				2850	KMP_ASSERT2((next >= '0') && (next <= '9'), "bad explicit proc list");
				2851	SKIP_DIGITS(next);
				2852	end = __kmp_str_to_int(scan, *next);
				2853	KMP_ASSERT2(end >= 0, "bad explicit proc list");
				2854
				2855	//
				2856	// Check for a stride parameter
				2857	//
				2858	stride = 1;
				2859	SKIP_WS(next);
				2860	if (*next == ':') {
				2861	//
				2862	// A stride is specified. Skip over the ':" and read the 3rd int.
				2863	//
				2864	int sign = +1;
				2865	next++; // skip ':'
				2866	SKIP_WS(next);
				2867	scan = next;
				2868	if (*next == '-') {
				2869	sign = -1;
				2870	next++;
				2871	SKIP_WS(next);
				2872	scan = next;
				2873	}
				2874	KMP_ASSERT2((next >= '0') && (next <= '9'),
				2875	"bad explicit proc list");
				2876	SKIP_DIGITS(next);
				2877	stride = __kmp_str_to_int(scan, *next);
				2878	KMP_ASSERT2(stride >= 0, "bad explicit proc list");
				2879	stride *= sign;
				2880	}
				2881
				2882	//
				2883	// Do some range checks.
				2884	//
				2885	KMP_ASSERT2(stride != 0, "bad explicit proc list");
				2886	if (stride > 0) {
				2887	KMP_ASSERT2(start <= end, "bad explicit proc list");
				2888	}
				2889	else {
				2890	KMP_ASSERT2(start >= end, "bad explicit proc list");
				2891	}
				2892	KMP_ASSERT2((end - start) / stride <= 65536, "bad explicit proc list");
				2893
				2894	//
				2895	// Add the mask for each OS proc # to the list.
				2896	//
				2897	if (stride > 0) {
				2898	do {
				2899	ADD_MASK_OSID(start, osId2Mask, maxOsId);
				2900	start += stride;
				2901	} while (start <= end);
				2902	}
				2903	else {
				2904	do {
				2905	ADD_MASK_OSID(start, osId2Mask, maxOsId);
				2906	start += stride;
				2907	} while (start >= end);
				2908	}
				2909
				2910	//
				2911	// Skip optional comma.
				2912	//
				2913	SKIP_WS(next);
				2914	if (*next == ',') {
				2915	next++;
				2916	}
				2917	scan = next;
				2918	}
				2919
				2920	*out_numMasks = nextNewMask;
				2921	if (nextNewMask == 0) {
				2922	*out_masks = NULL;
				2923	KMP_INTERNAL_FREE(newMasks);
				2924	return;
				2925	}
				2926	*out_masks
				2927	= (kmp_affin_mask_t )__kmp_allocate(nextNewMask __kmp_affin_mask_size);
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	2928	KMP_MEMCPY(out_masks, newMasks, nextNewMask __kmp_affin_mask_size);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2929	__kmp_free(sumMask);
				2930	KMP_INTERNAL_FREE(newMasks);
				2931	}
				2932
				2933
				2934	# if OMP_40_ENABLED
				2935
				2936	/*-----------------------------------------------------------------------------
				2937
				2938	Re-parse the OMP_PLACES proc id list, forming the newMasks for the different
				2939	places. Again, Here is the grammar:
				2940
				2941	place_list := place
				2942	place_list := place , place_list
				2943	place := num
				2944	place := place : num
				2945	place := place : num : signed
				2946	place := { subplacelist }
				2947	place := ! place // (lowest priority)
				2948	subplace_list := subplace
				2949	subplace_list := subplace , subplace_list
				2950	subplace := num
				2951	subplace := num : num
				2952	subplace := num : num : signed
				2953	signed := num
				2954	signed := + signed
				2955	signed := - signed
				2956
				2957	-----------------------------------------------------------------------------*/
				2958
				2959	static void
				2960	__kmp_process_subplace_list(const char *scan, kmp_affin_mask_t osId2Mask,
				2961	int maxOsId, kmp_affin_mask_t tempMask, int setSize)
				2962	{
				2963	const char *next;
				2964
				2965	for (;;) {
				2966	int start, count, stride, i;
				2967
				2968	//
				2969	// Read in the starting proc id
				2970	//
				2971	SKIP_WS(*scan);
				2972	KMP_ASSERT2((scan >= '0') && (scan <= '9'),
				2973	"bad explicit places list");
				2974	next = *scan;
				2975	SKIP_DIGITS(next);
				2976	start = __kmp_str_to_int(scan, next);
				2977	KMP_ASSERT(start >= 0);
				2978	*scan = next;
				2979
				2980	//
				2981	// valid follow sets are ',' ':' and '}'
				2982	//
				2983	SKIP_WS(*scan);
				2984	if (scan == '}' \|\| scan == ',') {
				2985	if ((start > maxOsId) \|\|
				2986	(! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
				2987	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				2988	&& (__kmp_affinity_type != affinity_none))) {
				2989	KMP_WARNING(AffIgnoreInvalidProcID, start);
				2990	}
				2991	}
				2992	else {
				2993	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
				2994	(*setSize)++;
				2995	}
				2996	if (**scan == '}') {
				2997	break;
				2998	}
				2999	(*scan)++; // skip ','
				3000	continue;
				3001	}
				3002	KMP_ASSERT2(**scan == ':', "bad explicit places list");
				3003	(*scan)++; // skip ':'
				3004
				3005	//
				3006	// Read count parameter
				3007	//
				3008	SKIP_WS(*scan);
				3009	KMP_ASSERT2((scan >= '0') && (scan <= '9'),
				3010	"bad explicit places list");
				3011	next = *scan;
				3012	SKIP_DIGITS(next);
				3013	count = __kmp_str_to_int(scan, next);
				3014	KMP_ASSERT(count >= 0);
				3015	*scan = next;
				3016
				3017	//
				3018	// valid follow sets are ',' ':' and '}'
				3019	//
				3020	SKIP_WS(*scan);
				3021	if (scan == '}' \|\| scan == ',') {
				3022	for (i = 0; i < count; i++) {
				3023	if ((start > maxOsId) \|\|
				3024	(! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
				3025	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3026	&& (__kmp_affinity_type != affinity_none))) {
				3027	KMP_WARNING(AffIgnoreInvalidProcID, start);
				3028	}
				3029	break; // don't proliferate warnings for large count
				3030	}
				3031	else {
				3032	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
				3033	start++;
				3034	(*setSize)++;
				3035	}
				3036	}
				3037	if (**scan == '}') {
				3038	break;
				3039	}
				3040	(*scan)++; // skip ','
				3041	continue;
				3042	}
				3043	KMP_ASSERT2(**scan == ':', "bad explicit places list");
				3044	(*scan)++; // skip ':'
				3045
				3046	//
				3047	// Read stride parameter
				3048	//
				3049	int sign = +1;
				3050	for (;;) {
				3051	SKIP_WS(*scan);
				3052	if (**scan == '+') {
				3053	(*scan)++; // skip '+'
				3054	continue;
				3055	}
				3056	if (**scan == '-') {
				3057	sign *= -1;
				3058	(*scan)++; // skip '-'
				3059	continue;
				3060	}
				3061	break;
				3062	}
				3063	SKIP_WS(*scan);
				3064	KMP_ASSERT2((scan >= '0') && (scan <= '9'),
				3065	"bad explicit places list");
				3066	next = *scan;
				3067	SKIP_DIGITS(next);
				3068	stride = __kmp_str_to_int(scan, next);
				3069	KMP_ASSERT(stride >= 0);
				3070	*scan = next;
				3071	stride *= sign;
				3072
				3073	//
				3074	// valid follow sets are ',' and '}'
				3075	//
				3076	SKIP_WS(*scan);
				3077	if (scan == '}' \|\| scan == ',') {
				3078	for (i = 0; i < count; i++) {
				3079	if ((start > maxOsId) \|\|
				3080	(! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
				3081	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3082	&& (__kmp_affinity_type != affinity_none))) {
				3083	KMP_WARNING(AffIgnoreInvalidProcID, start);
				3084	}
				3085	break; // don't proliferate warnings for large count
				3086	}
				3087	else {
				3088	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
				3089	start += stride;
				3090	(*setSize)++;
				3091	}
				3092	}
				3093	if (**scan == '}') {
				3094	break;
				3095	}
				3096	(*scan)++; // skip ','
				3097	continue;
				3098	}
				3099
				3100	KMP_ASSERT2(0, "bad explicit places list");
				3101	}
				3102	}
				3103
				3104
				3105	static void
				3106	__kmp_process_place(const char *scan, kmp_affin_mask_t osId2Mask,
				3107	int maxOsId, kmp_affin_mask_t tempMask, int setSize)
				3108	{
				3109	const char *next;
				3110
				3111	//
				3112	// valid follow sets are '{' '!' and num
				3113	//
				3114	SKIP_WS(*scan);
				3115	if (**scan == '{') {
				3116	(*scan)++; // skip '{'
				3117	__kmp_process_subplace_list(scan, osId2Mask, maxOsId , tempMask,
				3118	setSize);
				3119	KMP_ASSERT2(**scan == '}', "bad explicit places list");
				3120	(*scan)++; // skip '}'
				3121	}
				3122	else if (**scan == '!') {
				3123	__kmp_process_place(scan, osId2Mask, maxOsId, tempMask, setSize);
				3124	KMP_CPU_COMPLEMENT(tempMask);
				3125	(*scan)++; // skip '!'
				3126	}
				3127	else if ((scan >= '0') && (scan <= '9')) {
				3128	next = *scan;
				3129	SKIP_DIGITS(next);
				3130	int num = __kmp_str_to_int(scan, next);
				3131	KMP_ASSERT(num >= 0);
				3132	if ((num > maxOsId) \|\|
				3133	(! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
				3134	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3135	&& (__kmp_affinity_type != affinity_none))) {
				3136	KMP_WARNING(AffIgnoreInvalidProcID, num);
				3137	}
				3138	}
				3139	else {
				3140	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, num));
				3141	(*setSize)++;
				3142	}
				3143	*scan = next; // skip num
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3144	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3145	else {
				3146	KMP_ASSERT2(0, "bad explicit places list");
				3147	}
				3148	}
				3149
				3150
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3151	//static void
				3152	void
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3153	__kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
				3154	unsigned int out_numMasks, const char placelist,
				3155	kmp_affin_mask_t *osId2Mask, int maxOsId)
				3156	{
				3157	const char *scan = placelist;
				3158	const char *next = placelist;
				3159
				3160	numNewMasks = 2;
				3161	newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(numNewMasks
				3162	* __kmp_affin_mask_size);
				3163	nextNewMask = 0;
				3164
				3165	kmp_affin_mask_t tempMask = (kmp_affin_mask_t )__kmp_allocate(
				3166	__kmp_affin_mask_size);
				3167	KMP_CPU_ZERO(tempMask);
				3168	int setSize = 0;
				3169
				3170	for (;;) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3171	__kmp_process_place(&scan, osId2Mask, maxOsId, tempMask, &setSize);
				3172
				3173	//
				3174	// valid follow sets are ',' ':' and EOL
				3175	//
				3176	SKIP_WS(scan);
				3177	if (scan == '\0' \|\| scan == ',') {
				3178	if (setSize > 0) {
				3179	ADD_MASK(tempMask);
				3180	}
				3181	KMP_CPU_ZERO(tempMask);
				3182	setSize = 0;
				3183	if (*scan == '\0') {
				3184	break;
				3185	}
				3186	scan++; // skip ','
				3187	continue;
				3188	}
				3189
				3190	KMP_ASSERT2(*scan == ':', "bad explicit places list");
				3191	scan++; // skip ':'
				3192
				3193	//
				3194	// Read count parameter
				3195	//
				3196	SKIP_WS(scan);
				3197	KMP_ASSERT2((scan >= '0') && (scan <= '9'),
				3198	"bad explicit places list");
				3199	next = scan;
				3200	SKIP_DIGITS(next);
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	3201	int count = __kmp_str_to_int(scan, *next);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3202	KMP_ASSERT(count >= 0);
				3203	scan = next;
				3204
				3205	//
				3206	// valid follow sets are ',' ':' and EOL
				3207	//
				3208	SKIP_WS(scan);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3209	int stride;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3210	if (scan == '\0' \|\| scan == ',') {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3211	stride = +1;
				3212	}
				3213	else {
				3214	KMP_ASSERT2(*scan == ':', "bad explicit places list");
				3215	scan++; // skip ':'
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3216
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3217	//
				3218	// Read stride parameter
				3219	//
				3220	int sign = +1;
				3221	for (;;) {
				3222	SKIP_WS(scan);
				3223	if (*scan == '+') {
				3224	scan++; // skip '+'
				3225	continue;
				3226	}
				3227	if (*scan == '-') {
				3228	sign *= -1;
				3229	scan++; // skip '-'
				3230	continue;
				3231	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3232	break;
				3233	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3234	SKIP_WS(scan);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3235	KMP_ASSERT2((scan >= '0') && (scan <= '9'),
				3236	"bad explicit places list");
				3237	next = scan;
				3238	SKIP_DIGITS(next);
				3239	stride = __kmp_str_to_int(scan, *next);
				3240	KMP_DEBUG_ASSERT(stride >= 0);
				3241	scan = next;
				3242	stride *= sign;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3243	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3244
				3245	if (stride > 0) {
				3246	int i;
				3247	for (i = 0; i < count; i++) {
				3248	int j;
				3249	if (setSize == 0) {
				3250	break;
				3251	}
				3252	ADD_MASK(tempMask);
				3253	setSize = 0;
				3254	for (j = __kmp_affin_mask_size * CHAR_BIT - 1; j >= stride; j--) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3255	if (! KMP_CPU_ISSET(j - stride, tempMask)) {
				3256	KMP_CPU_CLR(j, tempMask);
				3257	}
				3258	else if ((j > maxOsId) \|\|
				3259	(! KMP_CPU_ISSET(j, KMP_CPU_INDEX(osId2Mask, j)))) {
Andrey Churbanov	16a1432	2015-03-10 09:34:38 +0000	[diff] [blame]	3260	if ((__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3261	&& (__kmp_affinity_type != affinity_none))) && i < count - 1) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3262	KMP_WARNING(AffIgnoreInvalidProcID, j);
				3263	}
				3264	KMP_CPU_CLR(j, tempMask);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3265	}
				3266	else {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3267	KMP_CPU_SET(j, tempMask);
				3268	setSize++;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3269	}
				3270	}
				3271	for (; j >= 0; j--) {
				3272	KMP_CPU_CLR(j, tempMask);
				3273	}
				3274	}
				3275	}
				3276	else {
				3277	int i;
				3278	for (i = 0; i < count; i++) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3279	int j;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3280	if (setSize == 0) {
				3281	break;
				3282	}
				3283	ADD_MASK(tempMask);
				3284	setSize = 0;
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3285	for (j = 0; j < ((int)__kmp_affin_mask_size * CHAR_BIT) + stride;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3286	j++) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3287	if (! KMP_CPU_ISSET(j - stride, tempMask)) {
				3288	KMP_CPU_CLR(j, tempMask);
				3289	}
				3290	else if ((j > maxOsId) \|\|
				3291	(! KMP_CPU_ISSET(j, KMP_CPU_INDEX(osId2Mask, j)))) {
Andrey Churbanov	16a1432	2015-03-10 09:34:38 +0000	[diff] [blame]	3292	if ((__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3293	&& (__kmp_affinity_type != affinity_none))) && i < count - 1) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3294	KMP_WARNING(AffIgnoreInvalidProcID, j);
				3295	}
				3296	KMP_CPU_CLR(j, tempMask);
				3297	}
				3298	else {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3299	KMP_CPU_SET(j, tempMask);
				3300	setSize++;
				3301	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3302	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3303	for (; j < (int)__kmp_affin_mask_size * CHAR_BIT; j++) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3304	KMP_CPU_CLR(j, tempMask);
				3305	}
				3306	}
				3307	}
				3308	KMP_CPU_ZERO(tempMask);
				3309	setSize = 0;
				3310
				3311	//
				3312	// valid follow sets are ',' and EOL
				3313	//
				3314	SKIP_WS(scan);
				3315	if (*scan == '\0') {
				3316	break;
				3317	}
				3318	if (*scan == ',') {
				3319	scan++; // skip ','
				3320	continue;
				3321	}
				3322
				3323	KMP_ASSERT2(0, "bad explicit places list");
				3324	}
				3325
				3326	*out_numMasks = nextNewMask;
				3327	if (nextNewMask == 0) {
				3328	*out_masks = NULL;
				3329	KMP_INTERNAL_FREE(newMasks);
				3330	return;
				3331	}
				3332	*out_masks
				3333	= (kmp_affin_mask_t )__kmp_allocate(nextNewMask __kmp_affin_mask_size);
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	3334	KMP_MEMCPY(out_masks, newMasks, nextNewMask __kmp_affin_mask_size);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3335	__kmp_free(tempMask);
				3336	KMP_INTERNAL_FREE(newMasks);
				3337	}
				3338
				3339	# endif /* OMP_40_ENABLED */
				3340
				3341	#undef ADD_MASK
				3342	#undef ADD_MASK_OSID
				3343
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3344	static void
				3345	__kmp_apply_thread_places(AddrUnsPair **pAddr, int depth)
				3346	{
				3347	if ( __kmp_place_num_cores == 0 ) {
				3348	if ( __kmp_place_num_threads_per_core == 0 ) {
				3349	return; // no cores limiting actions requested, exit
				3350	}
				3351	__kmp_place_num_cores = nCoresPerPkg; // use all available cores
				3352	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3353	if ( !__kmp_affinity_uniform_topology() ) {
				3354	KMP_WARNING( AffThrPlaceNonUniform );
				3355	return; // don't support non-uniform topology
				3356	}
				3357	if ( depth != 3 ) {
				3358	KMP_WARNING( AffThrPlaceNonThreeLevel );
				3359	return; // don't support not-3-level topology
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3360	}
				3361	if ( __kmp_place_num_threads_per_core == 0 ) {
				3362	__kmp_place_num_threads_per_core = __kmp_nThreadsPerCore; // use all HW contexts
				3363	}
Andrey Churbanov	1287557	2015-03-10 09:00:36 +0000	[diff] [blame]	3364	if ( __kmp_place_core_offset + __kmp_place_num_cores > nCoresPerPkg ) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3365	KMP_WARNING( AffThrPlaceManyCores );
				3366	return;
				3367	}
				3368
				3369	AddrUnsPair newAddr = (AddrUnsPair )__kmp_allocate( sizeof(AddrUnsPair) *
				3370	nPackages * __kmp_place_num_cores * __kmp_place_num_threads_per_core);
				3371	int i, j, k, n_old = 0, n_new = 0;
				3372	for ( i = 0; i < nPackages; ++i ) {
				3373	for ( j = 0; j < nCoresPerPkg; ++j ) {
Andrey Churbanov	1287557	2015-03-10 09:00:36 +0000	[diff] [blame]	3374	if ( j < __kmp_place_core_offset \|\| j >= __kmp_place_core_offset + __kmp_place_num_cores ) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3375	n_old += __kmp_nThreadsPerCore; // skip not-requested core
				3376	} else {
				3377	for ( k = 0; k < __kmp_nThreadsPerCore; ++k ) {
Andrey Churbanov	1287557	2015-03-10 09:00:36 +0000	[diff] [blame]	3378	if ( k < __kmp_place_num_threads_per_core ) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3379	newAddr[n_new] = (*pAddr)[n_old]; // copy requested core' data to new location
				3380	n_new++;
				3381	}
				3382	n_old++;
				3383	}
				3384	}
				3385	}
				3386	}
				3387	nCoresPerPkg = __kmp_place_num_cores; // correct nCoresPerPkg
				3388	__kmp_nThreadsPerCore = __kmp_place_num_threads_per_core; // correct __kmp_nThreadsPerCore
				3389	__kmp_avail_proc = n_new; // correct avail_proc
				3390	__kmp_ncores = nPackages * __kmp_place_num_cores; // correct ncores
				3391
				3392	__kmp_free( *pAddr );
				3393	*pAddr = newAddr; // replace old topology with new one
				3394	}
				3395
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3396
				3397	static AddrUnsPair *address2os = NULL;
				3398	static int * procarr = NULL;
				3399	static int __kmp_aff_depth = 0;
				3400
				3401	static void
				3402	__kmp_aux_affinity_initialize(void)
				3403	{
				3404	if (__kmp_affinity_masks != NULL) {
				3405	KMP_ASSERT(fullMask != NULL);
				3406	return;
				3407	}
				3408
				3409	//
				3410	// Create the "full" mask - this defines all of the processors that we
				3411	// consider to be in the machine model. If respect is set, then it is
				3412	// the initialization thread's affinity mask. Otherwise, it is all
				3413	// processors that we know about on the machine.
				3414	//
				3415	if (fullMask == NULL) {
				3416	fullMask = (kmp_affin_mask_t *)__kmp_allocate(__kmp_affin_mask_size);
				3417	}
				3418	if (KMP_AFFINITY_CAPABLE()) {
				3419	if (__kmp_affinity_respect_mask) {
				3420	__kmp_get_system_affinity(fullMask, TRUE);
				3421
				3422	//
				3423	// Count the number of available processors.
				3424	//
				3425	unsigned i;
				3426	__kmp_avail_proc = 0;
				3427	for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
				3428	if (! KMP_CPU_ISSET(i, fullMask)) {
				3429	continue;
				3430	}
				3431	__kmp_avail_proc++;
				3432	}
				3433	if (__kmp_avail_proc > __kmp_xproc) {
				3434	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3435	&& (__kmp_affinity_type != affinity_none))) {
				3436	KMP_WARNING(ErrorInitializeAffinity);
				3437	}
				3438	__kmp_affinity_type = affinity_none;
Andrey Churbanov	1f037e4	2015-03-10 09:15:26 +0000	[diff] [blame]	3439	KMP_AFFINITY_DISABLE();
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3440	return;
				3441	}
				3442	}
				3443	else {
				3444	__kmp_affinity_entire_machine_mask(fullMask);
				3445	__kmp_avail_proc = __kmp_xproc;
				3446	}
				3447	}
				3448
				3449	int depth = -1;
				3450	kmp_i18n_id_t msg_id = kmp_i18n_null;
				3451
				3452	//
Alp Toker	8f2d3f0	2014-02-24 10:40:15 +0000	[diff] [blame]	3453	// For backward compatibility, setting KMP_CPUINFO_FILE =>
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3454	// KMP_TOPOLOGY_METHOD=cpuinfo
				3455	//
				3456	if ((__kmp_cpuinfo_file != NULL) &&
				3457	(__kmp_affinity_top_method == affinity_top_method_all)) {
				3458	__kmp_affinity_top_method = affinity_top_method_cpuinfo;
				3459	}
				3460
				3461	if (__kmp_affinity_top_method == affinity_top_method_all) {
				3462	//
				3463	// In the default code path, errors are not fatal - we just try using
				3464	// another method. We only emit a warning message if affinity is on,
				3465	// or the verbose flag is set, an the nowarnings flag was not set.
				3466	//
				3467	const char *file_name = NULL;
				3468	int line = 0;
				3469
				3470	# if KMP_ARCH_X86 \|\| KMP_ARCH_X86_64
				3471
				3472	if (__kmp_affinity_verbose) {
				3473	KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
				3474	}
				3475
				3476	file_name = NULL;
				3477	depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
				3478	if (depth == 0) {
				3479	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3480	KMP_ASSERT(address2os == NULL);
				3481	return;
				3482	}
				3483
				3484	if (depth < 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3485	if (__kmp_affinity_verbose) {
				3486	if (msg_id != kmp_i18n_null) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3487	KMP_INFORM(AffInfoStrStr, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id),
				3488	KMP_I18N_STR(DecodingLegacyAPIC));
				3489	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3490	else {
				3491	KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(DecodingLegacyAPIC));
				3492	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3493	}
				3494
				3495	file_name = NULL;
				3496	depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
				3497	if (depth == 0) {
				3498	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3499	KMP_ASSERT(address2os == NULL);
				3500	return;
				3501	}
				3502	}
				3503
				3504	# endif /* KMP_ARCH_X86 \|\| KMP_ARCH_X86_64 */
				3505
				3506	# if KMP_OS_LINUX
				3507
				3508	if (depth < 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3509	if (__kmp_affinity_verbose) {
				3510	if (msg_id != kmp_i18n_null) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3511	KMP_INFORM(AffStrParseFilename, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id), "/proc/cpuinfo");
				3512	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3513	else {
				3514	KMP_INFORM(AffParseFilename, "KMP_AFFINITY", "/proc/cpuinfo");
				3515	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3516	}
				3517
				3518	FILE *f = fopen("/proc/cpuinfo", "r");
				3519	if (f == NULL) {
				3520	msg_id = kmp_i18n_str_CantOpenCpuinfo;
				3521	}
				3522	else {
				3523	file_name = "/proc/cpuinfo";
				3524	depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
				3525	fclose(f);
				3526	if (depth == 0) {
				3527	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3528	KMP_ASSERT(address2os == NULL);
				3529	return;
				3530	}
				3531	}
				3532	}
				3533
				3534	# endif /* KMP_OS_LINUX */
				3535
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	3536	# if KMP_GROUP_AFFINITY
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3537
				3538	if ((depth < 0) && (__kmp_num_proc_groups > 1)) {
				3539	if (__kmp_affinity_verbose) {
				3540	KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
				3541	}
				3542
				3543	depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
				3544	KMP_ASSERT(depth != 0);
				3545	}
				3546
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	3547	# endif /* KMP_GROUP_AFFINITY */
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3548
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3549	if (depth < 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3550	if (__kmp_affinity_verbose && (msg_id != kmp_i18n_null)) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3551	if (file_name == NULL) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3552	KMP_INFORM(UsingFlatOS, __kmp_i18n_catgets(msg_id));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3553	}
				3554	else if (line == 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3555	KMP_INFORM(UsingFlatOSFile, file_name, __kmp_i18n_catgets(msg_id));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3556	}
				3557	else {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3558	KMP_INFORM(UsingFlatOSFileLine, file_name, line, __kmp_i18n_catgets(msg_id));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3559	}
				3560	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3561	// FIXME - print msg if msg_id = kmp_i18n_null ???
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3562
				3563	file_name = "";
				3564	depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
				3565	if (depth == 0) {
				3566	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3567	KMP_ASSERT(address2os == NULL);
				3568	return;
				3569	}
				3570	KMP_ASSERT(depth > 0);
				3571	KMP_ASSERT(address2os != NULL);
				3572	}
				3573	}
				3574
				3575	//
				3576	// If the user has specified that a paricular topology discovery method
				3577	// is to be used, then we abort if that method fails. The exception is
				3578	// group affinity, which might have been implicitly set.
				3579	//
				3580
				3581	# if KMP_ARCH_X86 \|\| KMP_ARCH_X86_64
				3582
				3583	else if (__kmp_affinity_top_method == affinity_top_method_x2apicid) {
				3584	if (__kmp_affinity_verbose) {
				3585	KMP_INFORM(AffInfoStr, "KMP_AFFINITY",
				3586	KMP_I18N_STR(Decodingx2APIC));
				3587	}
				3588
				3589	depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
				3590	if (depth == 0) {
				3591	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3592	KMP_ASSERT(address2os == NULL);
				3593	return;
				3594	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3595	if (depth < 0) {
				3596	KMP_ASSERT(msg_id != kmp_i18n_null);
				3597	KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
				3598	}
				3599	}
				3600	else if (__kmp_affinity_top_method == affinity_top_method_apicid) {
				3601	if (__kmp_affinity_verbose) {
				3602	KMP_INFORM(AffInfoStr, "KMP_AFFINITY",
				3603	KMP_I18N_STR(DecodingLegacyAPIC));
				3604	}
				3605
				3606	depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
				3607	if (depth == 0) {
				3608	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3609	KMP_ASSERT(address2os == NULL);
				3610	return;
				3611	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3612	if (depth < 0) {
				3613	KMP_ASSERT(msg_id != kmp_i18n_null);
				3614	KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
				3615	}
				3616	}
				3617
				3618	# endif /* KMP_ARCH_X86 \|\| KMP_ARCH_X86_64 */
				3619
				3620	else if (__kmp_affinity_top_method == affinity_top_method_cpuinfo) {
				3621	const char *filename;
				3622	if (__kmp_cpuinfo_file != NULL) {
				3623	filename = __kmp_cpuinfo_file;
				3624	}
				3625	else {
				3626	filename = "/proc/cpuinfo";
				3627	}
				3628
				3629	if (__kmp_affinity_verbose) {
				3630	KMP_INFORM(AffParseFilename, "KMP_AFFINITY", filename);
				3631	}
				3632
				3633	FILE *f = fopen(filename, "r");
				3634	if (f == NULL) {
				3635	int code = errno;
				3636	if (__kmp_cpuinfo_file != NULL) {
				3637	__kmp_msg(
				3638	kmp_ms_fatal,
				3639	KMP_MSG(CantOpenFileForReading, filename),
				3640	KMP_ERR(code),
				3641	KMP_HNT(NameComesFrom_CPUINFO_FILE),
				3642	__kmp_msg_null
				3643	);
				3644	}
				3645	else {
				3646	__kmp_msg(
				3647	kmp_ms_fatal,
				3648	KMP_MSG(CantOpenFileForReading, filename),
				3649	KMP_ERR(code),
				3650	__kmp_msg_null
				3651	);
				3652	}
				3653	}
				3654	int line = 0;
				3655	depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
				3656	fclose(f);
				3657	if (depth < 0) {
				3658	KMP_ASSERT(msg_id != kmp_i18n_null);
				3659	if (line > 0) {
				3660	KMP_FATAL(FileLineMsgExiting, filename, line, __kmp_i18n_catgets(msg_id));
				3661	}
				3662	else {
				3663	KMP_FATAL(FileMsgExiting, filename, __kmp_i18n_catgets(msg_id));
				3664	}
				3665	}
				3666	if (__kmp_affinity_type == affinity_none) {
				3667	KMP_ASSERT(depth == 0);
				3668	KMP_ASSERT(address2os == NULL);
				3669	return;
				3670	}
				3671	}
				3672
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	3673	# if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3674
				3675	else if (__kmp_affinity_top_method == affinity_top_method_group) {
				3676	if (__kmp_affinity_verbose) {
				3677	KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
				3678	}
				3679
				3680	depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
				3681	KMP_ASSERT(depth != 0);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3682	if (depth < 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3683	KMP_ASSERT(msg_id != kmp_i18n_null);
				3684	KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3685	}
				3686	}
				3687
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	3688	# endif /* KMP_GROUP_AFFINITY */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3689
				3690	else if (__kmp_affinity_top_method == affinity_top_method_flat) {
				3691	if (__kmp_affinity_verbose) {
				3692	KMP_INFORM(AffUsingFlatOS, "KMP_AFFINITY");
				3693	}
				3694
				3695	depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
				3696	if (depth == 0) {
				3697	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3698	KMP_ASSERT(address2os == NULL);
				3699	return;
				3700	}
				3701	// should not fail
				3702	KMP_ASSERT(depth > 0);
				3703	KMP_ASSERT(address2os != NULL);
				3704	}
				3705
				3706	if (address2os == NULL) {
				3707	if (KMP_AFFINITY_CAPABLE()
				3708	&& (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3709	&& (__kmp_affinity_type != affinity_none)))) {
				3710	KMP_WARNING(ErrorInitializeAffinity);
				3711	}
				3712	__kmp_affinity_type = affinity_none;
Andrey Churbanov	1f037e4	2015-03-10 09:15:26 +0000	[diff] [blame]	3713	KMP_AFFINITY_DISABLE();
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3714	return;
				3715	}
				3716
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3717	__kmp_apply_thread_places(&address2os, depth);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3718
				3719	//
				3720	// Create the table of masks, indexed by thread Id.
				3721	//
				3722	unsigned maxIndex;
				3723	unsigned numUnique;
				3724	kmp_affin_mask_t *osId2Mask = __kmp_create_masks(&maxIndex, &numUnique,
				3725	address2os, __kmp_avail_proc);
				3726	if (__kmp_affinity_gran_levels == 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3727	KMP_DEBUG_ASSERT((int)numUnique == __kmp_avail_proc);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3728	}
				3729
				3730	//
				3731	// Set the childNums vector in all Address objects. This must be done
				3732	// before we can sort using __kmp_affinity_cmp_Address_child_num(),
				3733	// which takes into account the setting of __kmp_affinity_compact.
				3734	//
				3735	__kmp_affinity_assign_child_nums(address2os, __kmp_avail_proc);
				3736
				3737	switch (__kmp_affinity_type) {
				3738
				3739	case affinity_explicit:
				3740	KMP_DEBUG_ASSERT(__kmp_affinity_proclist != NULL);
				3741	# if OMP_40_ENABLED
				3742	if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
				3743	# endif
				3744	{
				3745	__kmp_affinity_process_proclist(&__kmp_affinity_masks,
				3746	&__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask,
				3747	maxIndex);
				3748	}
				3749	# if OMP_40_ENABLED
				3750	else {
				3751	__kmp_affinity_process_placelist(&__kmp_affinity_masks,
				3752	&__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask,
				3753	maxIndex);
				3754	}
				3755	# endif
				3756	if (__kmp_affinity_num_masks == 0) {
				3757	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3758	&& (__kmp_affinity_type != affinity_none))) {
				3759	KMP_WARNING(AffNoValidProcID);
				3760	}
				3761	__kmp_affinity_type = affinity_none;
				3762	return;
				3763	}
				3764	break;
				3765
				3766	//
				3767	// The other affinity types rely on sorting the Addresses according
				3768	// to some permutation of the machine topology tree. Set
				3769	// __kmp_affinity_compact and __kmp_affinity_offset appropriately,
				3770	// then jump to a common code fragment to do the sort and create
				3771	// the array of affinity masks.
				3772	//
				3773
				3774	case affinity_logical:
				3775	__kmp_affinity_compact = 0;
				3776	if (__kmp_affinity_offset) {
				3777	__kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset
				3778	% __kmp_avail_proc;
				3779	}
				3780	goto sortAddresses;
				3781
				3782	case affinity_physical:
				3783	if (__kmp_nThreadsPerCore > 1) {
				3784	__kmp_affinity_compact = 1;
				3785	if (__kmp_affinity_compact >= depth) {
				3786	__kmp_affinity_compact = 0;
				3787	}
				3788	} else {
				3789	__kmp_affinity_compact = 0;
				3790	}
				3791	if (__kmp_affinity_offset) {
				3792	__kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset
				3793	% __kmp_avail_proc;
				3794	}
				3795	goto sortAddresses;
				3796
				3797	case affinity_scatter:
				3798	if (__kmp_affinity_compact >= depth) {
				3799	__kmp_affinity_compact = 0;
				3800	}
				3801	else {
				3802	__kmp_affinity_compact = depth - 1 - __kmp_affinity_compact;
				3803	}
				3804	goto sortAddresses;
				3805
				3806	case affinity_compact:
				3807	if (__kmp_affinity_compact >= depth) {
				3808	__kmp_affinity_compact = depth - 1;
				3809	}
				3810	goto sortAddresses;
				3811
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3812	case affinity_balanced:
Jonathan Peyton	caf09fe	2015-05-27 23:27:33 +0000	[diff] [blame]	3813	// Balanced works only for the case of a single package
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3814	if( nPackages > 1 ) {
				3815	if( __kmp_affinity_verbose \|\| __kmp_affinity_warnings ) {
				3816	KMP_WARNING( AffBalancedNotAvail, "KMP_AFFINITY" );
				3817	}
				3818	__kmp_affinity_type = affinity_none;
				3819	return;
				3820	} else if( __kmp_affinity_uniform_topology() ) {
				3821	break;
				3822	} else { // Non-uniform topology
				3823
				3824	// Save the depth for further usage
				3825	__kmp_aff_depth = depth;
				3826
				3827	// Number of hyper threads per core in HT machine
				3828	int nth_per_core = __kmp_nThreadsPerCore;
				3829
				3830	int core_level;
				3831	if( nth_per_core > 1 ) {
				3832	core_level = depth - 2;
				3833	} else {
				3834	core_level = depth - 1;
				3835	}
				3836	int ncores = address2os[ __kmp_avail_proc - 1 ].first.labels[ core_level ] + 1;
				3837	int nproc = nth_per_core * ncores;
				3838
				3839	procarr = ( int * )__kmp_allocate( sizeof( int ) * nproc );
				3840	for( int i = 0; i < nproc; i++ ) {
				3841	procarr[ i ] = -1;
				3842	}
				3843
				3844	for( int i = 0; i < __kmp_avail_proc; i++ ) {
				3845	int proc = address2os[ i ].second;
				3846	// If depth == 3 then level=0 - package, level=1 - core, level=2 - thread.
				3847	// If there is only one thread per core then depth == 2: level 0 - package,
				3848	// level 1 - core.
				3849	int level = depth - 1;
				3850
				3851	// __kmp_nth_per_core == 1
				3852	int thread = 0;
				3853	int core = address2os[ i ].first.labels[ level ];
				3854	// If the thread level exists, that is we have more than one thread context per core
				3855	if( nth_per_core > 1 ) {
				3856	thread = address2os[ i ].first.labels[ level ] % nth_per_core;
				3857	core = address2os[ i ].first.labels[ level - 1 ];
				3858	}
				3859	procarr[ core * nth_per_core + thread ] = proc;
				3860	}
				3861
				3862	break;
				3863	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3864
				3865	sortAddresses:
				3866	//
				3867	// Allocate the gtid->affinity mask table.
				3868	//
				3869	if (__kmp_affinity_dups) {
				3870	__kmp_affinity_num_masks = __kmp_avail_proc;
				3871	}
				3872	else {
				3873	__kmp_affinity_num_masks = numUnique;
				3874	}
				3875
				3876	# if OMP_40_ENABLED
				3877	if ( ( __kmp_nested_proc_bind.bind_types[0] != proc_bind_intel )
				3878	&& ( __kmp_affinity_num_places > 0 )
				3879	&& ( (unsigned)__kmp_affinity_num_places < __kmp_affinity_num_masks ) ) {
				3880	__kmp_affinity_num_masks = __kmp_affinity_num_places;
				3881	}
				3882	# endif
				3883
				3884	__kmp_affinity_masks = (kmp_affin_mask_t*)__kmp_allocate(
				3885	__kmp_affinity_num_masks * __kmp_affin_mask_size);
				3886
				3887	//
				3888	// Sort the address2os table according to the current setting of
				3889	// __kmp_affinity_compact, then fill out __kmp_affinity_masks.
				3890	//
				3891	qsort(address2os, __kmp_avail_proc, sizeof(*address2os),
				3892	__kmp_affinity_cmp_Address_child_num);
				3893	{
				3894	int i;
				3895	unsigned j;
				3896	for (i = 0, j = 0; i < __kmp_avail_proc; i++) {
				3897	if ((! __kmp_affinity_dups) && (! address2os[i].first.leader)) {
				3898	continue;
				3899	}
				3900	unsigned osId = address2os[i].second;
				3901	kmp_affin_mask_t *src = KMP_CPU_INDEX(osId2Mask, osId);
				3902	kmp_affin_mask_t *dest
				3903	= KMP_CPU_INDEX(__kmp_affinity_masks, j);
				3904	KMP_ASSERT(KMP_CPU_ISSET(osId, src));
				3905	KMP_CPU_COPY(dest, src);
				3906	if (++j >= __kmp_affinity_num_masks) {
				3907	break;
				3908	}
				3909	}
				3910	KMP_DEBUG_ASSERT(j == __kmp_affinity_num_masks);
				3911	}
				3912	break;
				3913
				3914	default:
				3915	KMP_ASSERT2(0, "Unexpected affinity setting");
				3916	}
				3917
				3918	__kmp_free(osId2Mask);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3919	machine_hierarchy.init(address2os, __kmp_avail_proc);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3920	}
				3921
				3922
				3923	void
				3924	__kmp_affinity_initialize(void)
				3925	{
				3926	//
				3927	// Much of the code above was written assumming that if a machine was not
				3928	// affinity capable, then __kmp_affinity_type == affinity_none. We now
				3929	// explicitly represent this as __kmp_affinity_type == affinity_disabled.
				3930	//
				3931	// There are too many checks for __kmp_affinity_type == affinity_none
				3932	// in this code. Instead of trying to change them all, check if
				3933	// __kmp_affinity_type == affinity_disabled, and if so, slam it with
				3934	// affinity_none, call the real initialization routine, then restore
				3935	// __kmp_affinity_type to affinity_disabled.
				3936	//
				3937	int disabled = (__kmp_affinity_type == affinity_disabled);
				3938	if (! KMP_AFFINITY_CAPABLE()) {
				3939	KMP_ASSERT(disabled);
				3940	}
				3941	if (disabled) {
				3942	__kmp_affinity_type = affinity_none;
				3943	}
				3944	__kmp_aux_affinity_initialize();
				3945	if (disabled) {
				3946	__kmp_affinity_type = affinity_disabled;
				3947	}
				3948	}
				3949
				3950
				3951	void
				3952	__kmp_affinity_uninitialize(void)
				3953	{
				3954	if (__kmp_affinity_masks != NULL) {
				3955	__kmp_free(__kmp_affinity_masks);
				3956	__kmp_affinity_masks = NULL;
				3957	}
				3958	if (fullMask != NULL) {
				3959	KMP_CPU_FREE(fullMask);
				3960	fullMask = NULL;
				3961	}
				3962	__kmp_affinity_num_masks = 0;
				3963	# if OMP_40_ENABLED
				3964	__kmp_affinity_num_places = 0;
				3965	# endif
				3966	if (__kmp_affinity_proclist != NULL) {
				3967	__kmp_free(__kmp_affinity_proclist);
				3968	__kmp_affinity_proclist = NULL;
				3969	}
				3970	if( address2os != NULL ) {
				3971	__kmp_free( address2os );
				3972	address2os = NULL;
				3973	}
				3974	if( procarr != NULL ) {
				3975	__kmp_free( procarr );
				3976	procarr = NULL;
				3977	}
				3978	}
				3979
				3980
				3981	void
				3982	__kmp_affinity_set_init_mask(int gtid, int isa_root)
				3983	{
				3984	if (! KMP_AFFINITY_CAPABLE()) {
				3985	return;
				3986	}
				3987
				3988	kmp_info_t th = (kmp_info_t )TCR_SYNC_PTR(__kmp_threads[gtid]);
				3989	if (th->th.th_affin_mask == NULL) {
				3990	KMP_CPU_ALLOC(th->th.th_affin_mask);
				3991	}
				3992	else {
				3993	KMP_CPU_ZERO(th->th.th_affin_mask);
				3994	}
				3995
				3996	//
				3997	// Copy the thread mask to the kmp_info_t strucuture.
				3998	// If __kmp_affinity_type == affinity_none, copy the "full" mask, i.e. one
				3999	// that has all of the OS proc ids set, or if __kmp_affinity_respect_mask
				4000	// is set, then the full mask is the same as the mask of the initialization
				4001	// thread.
				4002	//
				4003	kmp_affin_mask_t *mask;
				4004	int i;
				4005
				4006	# if OMP_40_ENABLED
				4007	if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
				4008	# endif
				4009	{
Andrey Churbanov	f28f613	2015-01-13 14:54:00 +0000	[diff] [blame]	4010	if ((__kmp_affinity_type == affinity_none) \|\| (__kmp_affinity_type == affinity_balanced)
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4011	) {
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	4012	# if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4013	if (__kmp_num_proc_groups > 1) {
				4014	return;
				4015	}
				4016	# endif
				4017	KMP_ASSERT(fullMask != NULL);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4018	i = KMP_PLACE_ALL;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4019	mask = fullMask;
				4020	}
				4021	else {
				4022	KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 );
				4023	i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
				4024	mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
				4025	}
				4026	}
				4027	# if OMP_40_ENABLED
				4028	else {
				4029	if ((! isa_root)
				4030	\|\| (__kmp_nested_proc_bind.bind_types[0] == proc_bind_false)) {
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	4031	# if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4032	if (__kmp_num_proc_groups > 1) {
				4033	return;
				4034	}
				4035	# endif
				4036	KMP_ASSERT(fullMask != NULL);
				4037	i = KMP_PLACE_ALL;
				4038	mask = fullMask;
				4039	}
				4040	else {
				4041	//
				4042	// int i = some hash function or just a counter that doesn't
				4043	// always start at 0. Use gtid for now.
				4044	//
				4045	KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 );
				4046	i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
				4047	mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
				4048	}
				4049	}
				4050	# endif
				4051
				4052	# if OMP_40_ENABLED
				4053	th->th.th_current_place = i;
				4054	if (isa_root) {
				4055	th->th.th_new_place = i;
				4056	th->th.th_first_place = 0;
				4057	th->th.th_last_place = __kmp_affinity_num_masks - 1;
				4058	}
				4059
				4060	if (i == KMP_PLACE_ALL) {
				4061	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to all places\n",
				4062	gtid));
				4063	}
				4064	else {
				4065	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to place %d\n",
				4066	gtid, i));
				4067	}
				4068	# else
				4069	if (i == -1) {
				4070	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to fullMask\n",
				4071	gtid));
				4072	}
				4073	else {
				4074	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to mask %d\n",
				4075	gtid, i));
				4076	}
				4077	# endif /* OMP_40_ENABLED */
				4078
				4079	KMP_CPU_COPY(th->th.th_affin_mask, mask);
				4080
				4081	if (__kmp_affinity_verbose) {
				4082	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4083	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4084	th->th.th_affin_mask);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4085	KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(), gtid,
				4086	buf);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4087	}
				4088
				4089	# if KMP_OS_WINDOWS
				4090	//
				4091	// On Windows* OS, the process affinity mask might have changed.
				4092	// If the user didn't request affinity and this call fails,
				4093	// just continue silently. See CQ171393.
				4094	//
				4095	if ( __kmp_affinity_type == affinity_none ) {
				4096	__kmp_set_system_affinity(th->th.th_affin_mask, FALSE);
				4097	}
				4098	else
				4099	# endif
				4100	__kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
				4101	}
				4102
				4103
				4104	# if OMP_40_ENABLED
				4105
				4106	void
				4107	__kmp_affinity_set_place(int gtid)
				4108	{
				4109	int retval;
				4110
				4111	if (! KMP_AFFINITY_CAPABLE()) {
				4112	return;
				4113	}
				4114
				4115	kmp_info_t th = (kmp_info_t )TCR_SYNC_PTR(__kmp_threads[gtid]);
				4116
				4117	KA_TRACE(100, ("__kmp_affinity_set_place: binding T#%d to place %d (current place = %d)\n",
				4118	gtid, th->th.th_new_place, th->th.th_current_place));
				4119
				4120	//
Alp Toker	8f2d3f0	2014-02-24 10:40:15 +0000	[diff] [blame]	4121	// Check that the new place is within this thread's partition.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4122	//
				4123	KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4124	KMP_ASSERT(th->th.th_new_place >= 0);
				4125	KMP_ASSERT((unsigned)th->th.th_new_place <= __kmp_affinity_num_masks);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4126	if (th->th.th_first_place <= th->th.th_last_place) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4127	KMP_ASSERT((th->th.th_new_place >= th->th.th_first_place)
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4128	&& (th->th.th_new_place <= th->th.th_last_place));
				4129	}
				4130	else {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4131	KMP_ASSERT((th->th.th_new_place <= th->th.th_first_place)
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4132	\|\| (th->th.th_new_place >= th->th.th_last_place));
				4133	}
				4134
				4135	//
				4136	// Copy the thread mask to the kmp_info_t strucuture,
				4137	// and set this thread's affinity.
				4138	//
				4139	kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity_masks,
				4140	th->th.th_new_place);
				4141	KMP_CPU_COPY(th->th.th_affin_mask, mask);
				4142	th->th.th_current_place = th->th.th_new_place;
				4143
				4144	if (__kmp_affinity_verbose) {
				4145	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4146	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4147	th->th.th_affin_mask);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4148	KMP_INFORM(BoundToOSProcSet, "OMP_PROC_BIND", (kmp_int32)getpid(),
				4149	gtid, buf);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4150	}
				4151	__kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
				4152	}
				4153
				4154	# endif /* OMP_40_ENABLED */
				4155
				4156
				4157	int
				4158	__kmp_aux_set_affinity(void **mask)
				4159	{
				4160	int gtid;
				4161	kmp_info_t *th;
				4162	int retval;
				4163
				4164	if (! KMP_AFFINITY_CAPABLE()) {
				4165	return -1;
				4166	}
				4167
				4168	gtid = __kmp_entry_gtid();
				4169	KA_TRACE(1000, ;{
				4170	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4171	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4172	(kmp_affin_mask_t )(mask));
				4173	__kmp_debug_printf("kmp_set_affinity: setting affinity mask for thread %d = %s\n",
				4174	gtid, buf);
				4175	});
				4176
				4177	if (__kmp_env_consistency_check) {
				4178	if ((mask == NULL) \|\| (*mask == NULL)) {
				4179	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				4180	}
				4181	else {
				4182	unsigned proc;
				4183	int num_procs = 0;
				4184
				4185	for (proc = 0; proc < KMP_CPU_SETSIZE; proc++) {
				4186	if (! KMP_CPU_ISSET(proc, (kmp_affin_mask_t )(mask))) {
				4187	continue;
				4188	}
				4189	num_procs++;
				4190	if (! KMP_CPU_ISSET(proc, fullMask)) {
				4191	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				4192	break;
				4193	}
				4194	}
				4195	if (num_procs == 0) {
				4196	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				4197	}
				4198
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	4199	# if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4200	if (__kmp_get_proc_group((kmp_affin_mask_t )(mask)) < 0) {
				4201	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				4202	}
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	4203	# endif /* KMP_GROUP_AFFINITY */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4204
				4205	}
				4206	}
				4207
				4208	th = __kmp_threads[gtid];
				4209	KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
				4210	retval = __kmp_set_system_affinity((kmp_affin_mask_t )(mask), FALSE);
				4211	if (retval == 0) {
				4212	KMP_CPU_COPY(th->th.th_affin_mask, (kmp_affin_mask_t )(mask));
				4213	}
				4214
				4215	# if OMP_40_ENABLED
				4216	th->th.th_current_place = KMP_PLACE_UNDEFINED;
				4217	th->th.th_new_place = KMP_PLACE_UNDEFINED;
				4218	th->th.th_first_place = 0;
				4219	th->th.th_last_place = __kmp_affinity_num_masks - 1;
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4220
				4221	//
				4222	// Turn off 4.0 affinity for the current tread at this parallel level.
				4223	//
				4224	th->th.th_current_task->td_icvs.proc_bind = proc_bind_false;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4225	# endif
				4226
				4227	return retval;
				4228	}
				4229
				4230
				4231	int
				4232	__kmp_aux_get_affinity(void **mask)
				4233	{
				4234	int gtid;
				4235	int retval;
				4236	kmp_info_t *th;
				4237
				4238	if (! KMP_AFFINITY_CAPABLE()) {
				4239	return -1;
				4240	}
				4241
				4242	gtid = __kmp_entry_gtid();
				4243	th = __kmp_threads[gtid];
				4244	KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
				4245
				4246	KA_TRACE(1000, ;{
				4247	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4248	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4249	th->th.th_affin_mask);
				4250	__kmp_printf("kmp_get_affinity: stored affinity mask for thread %d = %s\n", gtid, buf);
				4251	});
				4252
				4253	if (__kmp_env_consistency_check) {
				4254	if ((mask == NULL) \|\| (*mask == NULL)) {
				4255	KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity");
				4256	}
				4257	}
				4258
				4259	# if !KMP_OS_WINDOWS
				4260
				4261	retval = __kmp_get_system_affinity((kmp_affin_mask_t )(mask), FALSE);
				4262	KA_TRACE(1000, ;{
				4263	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4264	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4265	(kmp_affin_mask_t )(mask));
				4266	__kmp_printf("kmp_get_affinity: system affinity mask for thread %d = %s\n", gtid, buf);
				4267	});
				4268	return retval;
				4269
				4270	# else
				4271
				4272	KMP_CPU_COPY((kmp_affin_mask_t )(mask), th->th.th_affin_mask);
				4273	return 0;
				4274
				4275	# endif /* KMP_OS_WINDOWS */
				4276
				4277	}
				4278
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4279	int
				4280	__kmp_aux_set_affinity_mask_proc(int proc, void **mask)
				4281	{
				4282	int retval;
				4283
				4284	if (! KMP_AFFINITY_CAPABLE()) {
				4285	return -1;
				4286	}
				4287
				4288	KA_TRACE(1000, ;{
				4289	int gtid = __kmp_entry_gtid();
				4290	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4291	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4292	(kmp_affin_mask_t )(mask));
				4293	__kmp_debug_printf("kmp_set_affinity_mask_proc: setting proc %d in affinity mask for thread %d = %s\n",
				4294	proc, gtid, buf);
				4295	});
				4296
				4297	if (__kmp_env_consistency_check) {
				4298	if ((mask == NULL) \|\| (*mask == NULL)) {
				4299	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity_mask_proc");
				4300	}
				4301	}
				4302
				4303	if ((proc < 0) \|\| ((unsigned)proc >= KMP_CPU_SETSIZE)) {
				4304	return -1;
				4305	}
				4306	if (! KMP_CPU_ISSET(proc, fullMask)) {
				4307	return -2;
				4308	}
				4309
				4310	KMP_CPU_SET(proc, (kmp_affin_mask_t )(mask));
				4311	return 0;
				4312	}
				4313
				4314
				4315	int
				4316	__kmp_aux_unset_affinity_mask_proc(int proc, void **mask)
				4317	{
				4318	int retval;
				4319
				4320	if (! KMP_AFFINITY_CAPABLE()) {
				4321	return -1;
				4322	}
				4323
				4324	KA_TRACE(1000, ;{
				4325	int gtid = __kmp_entry_gtid();
				4326	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4327	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4328	(kmp_affin_mask_t )(mask));
				4329	__kmp_debug_printf("kmp_unset_affinity_mask_proc: unsetting proc %d in affinity mask for thread %d = %s\n",
				4330	proc, gtid, buf);
				4331	});
				4332
				4333	if (__kmp_env_consistency_check) {
				4334	if ((mask == NULL) \|\| (*mask == NULL)) {
				4335	KMP_FATAL(AffinityInvalidMask, "kmp_unset_affinity_mask_proc");
				4336	}
				4337	}
				4338
				4339	if ((proc < 0) \|\| ((unsigned)proc >= KMP_CPU_SETSIZE)) {
				4340	return -1;
				4341	}
				4342	if (! KMP_CPU_ISSET(proc, fullMask)) {
				4343	return -2;
				4344	}
				4345
				4346	KMP_CPU_CLR(proc, (kmp_affin_mask_t )(mask));
				4347	return 0;
				4348	}
				4349
				4350
				4351	int
				4352	__kmp_aux_get_affinity_mask_proc(int proc, void **mask)
				4353	{
				4354	int retval;
				4355
				4356	if (! KMP_AFFINITY_CAPABLE()) {
				4357	return -1;
				4358	}
				4359
				4360	KA_TRACE(1000, ;{
				4361	int gtid = __kmp_entry_gtid();
				4362	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4363	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4364	(kmp_affin_mask_t )(mask));
				4365	__kmp_debug_printf("kmp_get_affinity_mask_proc: getting proc %d in affinity mask for thread %d = %s\n",
				4366	proc, gtid, buf);
				4367	});
				4368
				4369	if (__kmp_env_consistency_check) {
				4370	if ((mask == NULL) \|\| (*mask == NULL)) {
Andrey Churbanov	4b2f17a	2015-01-29 15:49:22 +0000	[diff] [blame]	4371	KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity_mask_proc");
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4372	}
				4373	}
				4374
				4375	if ((proc < 0) \|\| ((unsigned)proc >= KMP_CPU_SETSIZE)) {
				4376	return 0;
				4377	}
				4378	if (! KMP_CPU_ISSET(proc, fullMask)) {
				4379	return 0;
				4380	}
				4381
				4382	return KMP_CPU_ISSET(proc, (kmp_affin_mask_t )(mask));
				4383	}
				4384
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4385
				4386	// Dynamic affinity settings - Affinity balanced
				4387	void __kmp_balanced_affinity( int tid, int nthreads )
				4388	{
				4389	if( __kmp_affinity_uniform_topology() ) {
				4390	int coreID;
				4391	int threadID;
				4392	// Number of hyper threads per core in HT machine
				4393	int __kmp_nth_per_core = __kmp_avail_proc / __kmp_ncores;
				4394	// Number of cores
				4395	int ncores = __kmp_ncores;
				4396	// How many threads will be bound to each core
				4397	int chunk = nthreads / ncores;
				4398	// How many cores will have an additional thread bound to it - "big cores"
				4399	int big_cores = nthreads % ncores;
				4400	// Number of threads on the big cores
				4401	int big_nth = ( chunk + 1 ) * big_cores;
				4402	if( tid < big_nth ) {
				4403	coreID = tid / (chunk + 1 );
				4404	threadID = ( tid % (chunk + 1 ) ) % __kmp_nth_per_core ;
				4405	} else { //tid >= big_nth
				4406	coreID = ( tid - big_cores ) / chunk;
				4407	threadID = ( ( tid - big_cores ) % chunk ) % __kmp_nth_per_core ;
				4408	}
				4409
				4410	KMP_DEBUG_ASSERT2(KMP_AFFINITY_CAPABLE(),
				4411	"Illegal set affinity operation when not capable");
				4412
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	4413	kmp_affin_mask_t mask = (kmp_affin_mask_t )KMP_ALLOCA(__kmp_affin_mask_size);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4414	KMP_CPU_ZERO(mask);
				4415
				4416	// Granularity == thread
				4417	if( __kmp_affinity_gran == affinity_gran_fine \|\| __kmp_affinity_gran == affinity_gran_thread) {
				4418	int osID = address2os[ coreID * __kmp_nth_per_core + threadID ].second;
				4419	KMP_CPU_SET( osID, mask);
				4420	} else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
				4421	for( int i = 0; i < __kmp_nth_per_core; i++ ) {
				4422	int osID;
				4423	osID = address2os[ coreID * __kmp_nth_per_core + i ].second;
				4424	KMP_CPU_SET( osID, mask);
				4425	}
				4426	}
				4427	if (__kmp_affinity_verbose) {
				4428	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4429	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4430	KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
				4431	tid, buf);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4432	}
				4433	__kmp_set_system_affinity( mask, TRUE );
				4434	} else { // Non-uniform topology
				4435
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	4436	kmp_affin_mask_t mask = (kmp_affin_mask_t )KMP_ALLOCA(__kmp_affin_mask_size);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4437	KMP_CPU_ZERO(mask);
				4438
				4439	// Number of hyper threads per core in HT machine
				4440	int nth_per_core = __kmp_nThreadsPerCore;
				4441	int core_level;
				4442	if( nth_per_core > 1 ) {
				4443	core_level = __kmp_aff_depth - 2;
				4444	} else {
				4445	core_level = __kmp_aff_depth - 1;
				4446	}
				4447
				4448	// Number of cores - maximum value; it does not count trail cores with 0 processors
				4449	int ncores = address2os[ __kmp_avail_proc - 1 ].first.labels[ core_level ] + 1;
				4450
				4451	// For performance gain consider the special case nthreads == __kmp_avail_proc
				4452	if( nthreads == __kmp_avail_proc ) {
				4453	if( __kmp_affinity_gran == affinity_gran_fine \|\| __kmp_affinity_gran == affinity_gran_thread) {
				4454	int osID = address2os[ tid ].second;
				4455	KMP_CPU_SET( osID, mask);
				4456	} else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
				4457	int coreID = address2os[ tid ].first.labels[ core_level ];
				4458	// We'll count found osIDs for the current core; they can be not more than nth_per_core;
				4459	// since the address2os is sortied we can break when cnt==nth_per_core
				4460	int cnt = 0;
				4461	for( int i = 0; i < __kmp_avail_proc; i++ ) {
				4462	int osID = address2os[ i ].second;
				4463	int core = address2os[ i ].first.labels[ core_level ];
				4464	if( core == coreID ) {
				4465	KMP_CPU_SET( osID, mask);
				4466	cnt++;
				4467	if( cnt == nth_per_core ) {
				4468	break;
				4469	}
				4470	}
				4471	}
				4472	}
				4473	} else if( nthreads <= __kmp_ncores ) {
				4474
				4475	int core = 0;
				4476	for( int i = 0; i < ncores; i++ ) {
				4477	// Check if this core from procarr[] is in the mask
				4478	int in_mask = 0;
				4479	for( int j = 0; j < nth_per_core; j++ ) {
				4480	if( procarr[ i * nth_per_core + j ] != - 1 ) {
				4481	in_mask = 1;
				4482	break;
				4483	}
				4484	}
				4485	if( in_mask ) {
				4486	if( tid == core ) {
				4487	for( int j = 0; j < nth_per_core; j++ ) {
				4488	int osID = procarr[ i * nth_per_core + j ];
				4489	if( osID != -1 ) {
				4490	KMP_CPU_SET( osID, mask );
				4491	// For granularity=thread it is enough to set the first available osID for this core
				4492	if( __kmp_affinity_gran == affinity_gran_fine \|\| __kmp_affinity_gran == affinity_gran_thread) {
				4493	break;
				4494	}
				4495	}
				4496	}
				4497	break;
				4498	} else {
				4499	core++;
				4500	}
				4501	}
				4502	}
				4503
				4504	} else { // nthreads > __kmp_ncores
				4505
				4506	// Array to save the number of processors at each core
Jonathan Peyton	7be07533	2015-06-22 15:53:50 +0000	[diff] [blame]	4507	int* nproc_at_core = (int)KMP_ALLOCA(sizeof(int)ncores);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4508	// Array to save the number of cores with "x" available processors;
Jonathan Peyton	7be07533	2015-06-22 15:53:50 +0000	[diff] [blame]	4509	int* ncores_with_x_procs = (int)KMP_ALLOCA(sizeof(int)(nth_per_core+1));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4510	// Array to save the number of cores with # procs from x to nth_per_core
Jonathan Peyton	7be07533	2015-06-22 15:53:50 +0000	[diff] [blame]	4511	int* ncores_with_x_to_max_procs = (int)KMP_ALLOCA(sizeof(int)(nth_per_core+1));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4512
				4513	for( int i = 0; i <= nth_per_core; i++ ) {
				4514	ncores_with_x_procs[ i ] = 0;
				4515	ncores_with_x_to_max_procs[ i ] = 0;
				4516	}
				4517
				4518	for( int i = 0; i < ncores; i++ ) {
				4519	int cnt = 0;
				4520	for( int j = 0; j < nth_per_core; j++ ) {
				4521	if( procarr[ i * nth_per_core + j ] != -1 ) {
				4522	cnt++;
				4523	}
				4524	}
				4525	nproc_at_core[ i ] = cnt;
				4526	ncores_with_x_procs[ cnt ]++;
				4527	}
				4528
				4529	for( int i = 0; i <= nth_per_core; i++ ) {
				4530	for( int j = i; j <= nth_per_core; j++ ) {
				4531	ncores_with_x_to_max_procs[ i ] += ncores_with_x_procs[ j ];
				4532	}
				4533	}
				4534
				4535	// Max number of processors
				4536	int nproc = nth_per_core * ncores;
				4537	// An array to keep number of threads per each context
				4538	int * newarr = ( int * )__kmp_allocate( sizeof( int ) * nproc );
				4539	for( int i = 0; i < nproc; i++ ) {
				4540	newarr[ i ] = 0;
				4541	}
				4542
				4543	int nth = nthreads;
				4544	int flag = 0;
				4545	while( nth > 0 ) {
				4546	for( int j = 1; j <= nth_per_core; j++ ) {
				4547	int cnt = ncores_with_x_to_max_procs[ j ];
				4548	for( int i = 0; i < ncores; i++ ) {
				4549	// Skip the core with 0 processors
				4550	if( nproc_at_core[ i ] == 0 ) {
				4551	continue;
				4552	}
				4553	for( int k = 0; k < nth_per_core; k++ ) {
				4554	if( procarr[ i * nth_per_core + k ] != -1 ) {
				4555	if( newarr[ i * nth_per_core + k ] == 0 ) {
				4556	newarr[ i * nth_per_core + k ] = 1;
				4557	cnt--;
				4558	nth--;
				4559	break;
				4560	} else {
				4561	if( flag != 0 ) {
				4562	newarr[ i * nth_per_core + k ] ++;
				4563	cnt--;
				4564	nth--;
				4565	break;
				4566	}
				4567	}
				4568	}
				4569	}
				4570	if( cnt == 0 \|\| nth == 0 ) {
				4571	break;
				4572	}
				4573	}
				4574	if( nth == 0 ) {
				4575	break;
				4576	}
				4577	}
				4578	flag = 1;
				4579	}
				4580	int sum = 0;
				4581	for( int i = 0; i < nproc; i++ ) {
				4582	sum += newarr[ i ];
				4583	if( sum > tid ) {
				4584	// Granularity == thread
				4585	if( __kmp_affinity_gran == affinity_gran_fine \|\| __kmp_affinity_gran == affinity_gran_thread) {
				4586	int osID = procarr[ i ];
				4587	KMP_CPU_SET( osID, mask);
				4588	} else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
				4589	int coreID = i / nth_per_core;
				4590	for( int ii = 0; ii < nth_per_core; ii++ ) {
				4591	int osID = procarr[ coreID * nth_per_core + ii ];
				4592	if( osID != -1 ) {
				4593	KMP_CPU_SET( osID, mask);
				4594	}
				4595	}
				4596	}
				4597	break;
				4598	}
				4599	}
				4600	__kmp_free( newarr );
				4601	}
				4602
				4603	if (__kmp_affinity_verbose) {
				4604	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4605	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4606	KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
				4607	tid, buf);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4608	}
				4609	__kmp_set_system_affinity( mask, TRUE );
				4610	}
				4611	}
				4612
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4613	#else
				4614	// affinity not supported
				4615
Andrey Churbanov	aa1f2b6	2015-04-13 18:51:59 +0000	[diff] [blame]	4616	static const kmp_uint32 noaff_maxLevels=7;
				4617	kmp_uint32 noaff_skipPerLevel[noaff_maxLevels];
				4618	kmp_uint32 noaff_depth;
				4619	kmp_uint8 noaff_leaf_kids;
				4620	kmp_int8 noaff_uninitialized=1;
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4621
Andrey Churbanov	aa1f2b6	2015-04-13 18:51:59 +0000	[diff] [blame]	4622	void noaff_init(int nprocs)
				4623	{
				4624	kmp_int8 result = KMP_COMPARE_AND_STORE_ACQ8(&noaff_uninitialized, 1, 2);
				4625	if (result == 0) return; // Already initialized
				4626	else if (result == 2) { // Someone else is initializing
				4627	while (TCR_1(noaff_uninitialized) != 0) KMP_CPU_PAUSE();
				4628	return;
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4629	}
Andrey Churbanov	aa1f2b6	2015-04-13 18:51:59 +0000	[diff] [blame]	4630	KMP_DEBUG_ASSERT(result==1);
				4631
				4632	kmp_uint32 numPerLevel[noaff_maxLevels];
				4633	noaff_depth = 1;
				4634	for (kmp_uint32 i=0; i<noaff_maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
				4635	numPerLevel[i] = 1;
				4636	noaff_skipPerLevel[i] = 1;
				4637	}
				4638
				4639	numPerLevel[0] = 4;
				4640	numPerLevel[1] = nprocs/4;
				4641	if (nprocs%4) numPerLevel[1]++;
				4642
				4643	for (int i=noaff_maxLevels-1; i>=0; --i) // count non-empty levels to get depth
				4644	if (numPerLevel[i] != 1 \|\| noaff_depth > 1) // only count one top-level '1'
				4645	noaff_depth++;
				4646
				4647	kmp_uint32 branch = 4;
				4648	if (numPerLevel[0] == 1) branch = nprocs/4;
				4649	if (branch<4) branch=4;
				4650	for (kmp_uint32 d=0; d<noaff_depth-1; ++d) { // optimize hierarchy width
				4651	while (numPerLevel[d] > branch \|\| (d==0 && numPerLevel[d]>4)) { // max 4 on level 0!
				4652	if (numPerLevel[d] & 1) numPerLevel[d]++;
				4653	numPerLevel[d] = numPerLevel[d] >> 1;
				4654	if (numPerLevel[d+1] == 1) noaff_depth++;
				4655	numPerLevel[d+1] = numPerLevel[d+1] << 1;
				4656	}
				4657	if(numPerLevel[0] == 1) {
				4658	branch = branch >> 1;
				4659	if (branch<4) branch = 4;
				4660	}
				4661	}
				4662
				4663	for (kmp_uint32 i=1; i<noaff_depth; ++i)
				4664	noaff_skipPerLevel[i] = numPerLevel[i-1] * noaff_skipPerLevel[i-1];
				4665	// Fill in hierarchy in the case of oversubscription
				4666	for (kmp_uint32 i=noaff_depth; i<noaff_maxLevels; ++i)
				4667	noaff_skipPerLevel[i] = 2*noaff_skipPerLevel[i-1];
				4668	noaff_leaf_kids = (kmp_uint8)numPerLevel[0]-1;
				4669	noaff_uninitialized = 0; // One writer
				4670
				4671	}
				4672
				4673	void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
				4674	if (noaff_uninitialized)
				4675	noaff_init(nproc);
				4676
				4677	thr_bar->depth = noaff_depth;
				4678	thr_bar->base_leaf_kids = noaff_leaf_kids;
				4679	thr_bar->skip_per_level = noaff_skipPerLevel;
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4680	}
				4681
Alp Toker	763b939	2014-02-28 09:42:41 +0000	[diff] [blame]	4682	#endif // KMP_AFFINITY_SUPPORTED