Blame - openmp/runtime/src/kmp_affinity.cpp - toolchain/llvm-project

blob: 2e91c14e8d8971e47934a4b66969813bde6cc310 [file] [log] [blame]

Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1	/*
				2	* kmp_affinity.cpp -- affinity management
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3	*/
				4
				5
				6	//===----------------------------------------------------------------------===//
				7	//
				8	// The LLVM Compiler Infrastructure
				9	//
				10	// This file is dual licensed under the MIT and the University of Illinois Open
				11	// Source Licenses. See LICENSE.txt for details.
				12	//
				13	//===----------------------------------------------------------------------===//
				14
				15
				16	#include "kmp.h"
				17	#include "kmp_i18n.h"
				18	#include "kmp_io.h"
				19	#include "kmp_str.h"
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	20	#include "kmp_wrapper_getpid.h"
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	21
Alp Toker	763b939	2014-02-28 09:42:41 +0000	[diff] [blame]	22	#if KMP_AFFINITY_SUPPORTED
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	23
				24	//
				25	// Print the affinity mask to the character array in a pretty format.
				26	//
				27	char *
				28	__kmp_affinity_print_mask(char buf, int buf_len, kmp_affin_mask_t mask)
				29	{
				30	KMP_ASSERT(buf_len >= 40);
				31	char *scan = buf;
				32	char *end = buf + buf_len - 1;
				33
				34	//
				35	// Find first element / check for empty set.
				36	//
				37	size_t i;
				38	for (i = 0; i < KMP_CPU_SETSIZE; i++) {
				39	if (KMP_CPU_ISSET(i, mask)) {
				40	break;
				41	}
				42	}
				43	if (i == KMP_CPU_SETSIZE) {
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	44	KMP_SNPRINTF(scan, buf_len, "{<empty>}");
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	45	while (*scan != '\0') scan++;
				46	KMP_ASSERT(scan <= end);
				47	return buf;
				48	}
				49
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	50	KMP_SNPRINTF(scan, buf_len, "{%ld", (long)i);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	51	while (*scan != '\0') scan++;
				52	i++;
				53	for (; i < KMP_CPU_SETSIZE; i++) {
				54	if (! KMP_CPU_ISSET(i, mask)) {
				55	continue;
				56	}
				57
				58	//
				59	// Check for buffer overflow. A string of the form ",<n>" will have
				60	// at most 10 characters, plus we want to leave room to print ",...}"
				61	// if the set is too large to print for a total of 15 characters.
				62	// We already left room for '\0' in setting end.
				63	//
				64	if (end - scan < 15) {
				65	break;
				66	}
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	67	KMP_SNPRINTF(scan, buf_len, ",%-ld", (long)i);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	68	while (*scan != '\0') scan++;
				69	}
				70	if (i < KMP_CPU_SETSIZE) {
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	71	KMP_SNPRINTF(scan, buf_len, ",...");
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	72	while (*scan != '\0') scan++;
				73	}
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	74	KMP_SNPRINTF(scan, buf_len, "}");
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	75	while (*scan != '\0') scan++;
				76	KMP_ASSERT(scan <= end);
				77	return buf;
				78	}
				79
				80
				81	void
				82	__kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask)
				83	{
				84	KMP_CPU_ZERO(mask);
				85
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	86	# if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	87
				88	if (__kmp_num_proc_groups > 1) {
				89	int group;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	90	KMP_DEBUG_ASSERT(__kmp_GetActiveProcessorCount != NULL);
				91	for (group = 0; group < __kmp_num_proc_groups; group++) {
				92	int i;
				93	int num = __kmp_GetActiveProcessorCount(group);
				94	for (i = 0; i < num; i++) {
				95	KMP_CPU_SET(i + group * (CHAR_BIT * sizeof(DWORD_PTR)), mask);
				96	}
				97	}
				98	}
				99	else
				100
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	101	# endif /* KMP_GROUP_AFFINITY */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	102
				103	{
				104	int proc;
				105	for (proc = 0; proc < __kmp_xproc; proc++) {
				106	KMP_CPU_SET(proc, mask);
				107	}
				108	}
				109	}
				110
				111
				112	//
				113	// In Linux* OS debug & cover (-O0) builds, we need to avoid inline member
				114	// functions.
				115	//
				116	// The icc codegen emits sections with extremely long names, of the form
				117	// ".gnu.linkonce.<mangled_name>". There seems to have been a linker bug
				118	// introduced between GNU ld version 2.14.90.0.4 and 2.15.92.0.2 involving
				119	// some sort of memory corruption or table overflow that is triggered by
				120	// these long strings. I checked the latest version of the linker -
				121	// GNU ld (Linux* OS/GNU Binutils) 2.18.50.0.7.20080422 - and the bug is not
				122	// fixed.
				123	//
				124	// Unfortunately, my attempts to reproduce it in a smaller example have
				125	// failed - I'm not sure what the prospects are of getting it fixed
Jonathan Peyton	6633829	2015-06-01 02:37:28 +0000	[diff] [blame]	126	// properly - but we need a reproducer smaller than all of libomp.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	127	//
				128	// Work around the problem by avoiding inline constructors in such builds.
				129	// We do this for all platforms, not just Linux* OS - non-inline functions are
				130	// more debuggable and provide better coverage into than inline functions.
				131	// Use inline functions in shipping libs, for performance.
				132	//
				133
				134	# if !defined(KMP_DEBUG) && !defined(COVER)
				135
				136	class Address {
				137	public:
				138	static const unsigned maxDepth = 32;
				139	unsigned labels[maxDepth];
				140	unsigned childNums[maxDepth];
				141	unsigned depth;
				142	unsigned leader;
				143	Address(unsigned _depth)
				144	: depth(_depth), leader(FALSE) {
				145	}
				146	Address &operator=(const Address &b) {
				147	depth = b.depth;
				148	for (unsigned i = 0; i < depth; i++) {
				149	labels[i] = b.labels[i];
				150	childNums[i] = b.childNums[i];
				151	}
				152	leader = FALSE;
				153	return *this;
				154	}
				155	bool operator==(const Address &b) const {
				156	if (depth != b.depth)
				157	return false;
				158	for (unsigned i = 0; i < depth; i++)
				159	if(labels[i] != b.labels[i])
				160	return false;
				161	return true;
				162	}
				163	bool isClose(const Address &b, int level) const {
				164	if (depth != b.depth)
				165	return false;
				166	if ((unsigned)level >= depth)
				167	return true;
				168	for (unsigned i = 0; i < (depth - level); i++)
				169	if(labels[i] != b.labels[i])
				170	return false;
				171	return true;
				172	}
				173	bool operator!=(const Address &b) const {
				174	return !operator==(b);
				175	}
				176	};
				177
				178	class AddrUnsPair {
				179	public:
				180	Address first;
				181	unsigned second;
				182	AddrUnsPair(Address _first, unsigned _second)
				183	: first(_first), second(_second) {
				184	}
				185	AddrUnsPair &operator=(const AddrUnsPair &b)
				186	{
				187	first = b.first;
				188	second = b.second;
				189	return *this;
				190	}
				191	};
				192
				193	# else
				194
				195	class Address {
				196	public:
				197	static const unsigned maxDepth = 32;
				198	unsigned labels[maxDepth];
				199	unsigned childNums[maxDepth];
				200	unsigned depth;
				201	unsigned leader;
				202	Address(unsigned _depth);
				203	Address &operator=(const Address &b);
				204	bool operator==(const Address &b) const;
				205	bool isClose(const Address &b, int level) const;
				206	bool operator!=(const Address &b) const;
				207	};
				208
				209	Address::Address(unsigned _depth)
				210	{
				211	depth = _depth;
				212	leader = FALSE;
				213	}
				214
				215	Address &Address::operator=(const Address &b) {
				216	depth = b.depth;
				217	for (unsigned i = 0; i < depth; i++) {
				218	labels[i] = b.labels[i];
				219	childNums[i] = b.childNums[i];
				220	}
				221	leader = FALSE;
				222	return *this;
				223	}
				224
				225	bool Address::operator==(const Address &b) const {
				226	if (depth != b.depth)
				227	return false;
				228	for (unsigned i = 0; i < depth; i++)
				229	if(labels[i] != b.labels[i])
				230	return false;
				231	return true;
				232	}
				233
				234	bool Address::isClose(const Address &b, int level) const {
				235	if (depth != b.depth)
				236	return false;
				237	if ((unsigned)level >= depth)
				238	return true;
				239	for (unsigned i = 0; i < (depth - level); i++)
				240	if(labels[i] != b.labels[i])
				241	return false;
				242	return true;
				243	}
				244
				245	bool Address::operator!=(const Address &b) const {
				246	return !operator==(b);
				247	}
				248
				249	class AddrUnsPair {
				250	public:
				251	Address first;
				252	unsigned second;
				253	AddrUnsPair(Address _first, unsigned _second);
				254	AddrUnsPair &operator=(const AddrUnsPair &b);
				255	};
				256
				257	AddrUnsPair::AddrUnsPair(Address _first, unsigned _second)
				258	: first(_first), second(_second)
				259	{
				260	}
				261
				262	AddrUnsPair &AddrUnsPair::operator=(const AddrUnsPair &b)
				263	{
				264	first = b.first;
				265	second = b.second;
				266	return *this;
				267	}
				268
				269	# endif /* !defined(KMP_DEBUG) && !defined(COVER) */
				270
				271
				272	static int
				273	__kmp_affinity_cmp_Address_labels(const void a, const void b)
				274	{
				275	const Address aa = (const Address )&(((AddrUnsPair *)a)
				276	->first);
				277	const Address bb = (const Address )&(((AddrUnsPair *)b)
				278	->first);
				279	unsigned depth = aa->depth;
				280	unsigned i;
				281	KMP_DEBUG_ASSERT(depth == bb->depth);
				282	for (i = 0; i < depth; i++) {
				283	if (aa->labels[i] < bb->labels[i]) return -1;
				284	if (aa->labels[i] > bb->labels[i]) return 1;
				285	}
				286	return 0;
				287	}
				288
				289
				290	static int
				291	__kmp_affinity_cmp_Address_child_num(const void a, const void b)
				292	{
				293	const Address aa = (const Address )&(((AddrUnsPair *)a)
				294	->first);
				295	const Address bb = (const Address )&(((AddrUnsPair *)b)
				296	->first);
				297	unsigned depth = aa->depth;
				298	unsigned i;
				299	KMP_DEBUG_ASSERT(depth == bb->depth);
				300	KMP_DEBUG_ASSERT((unsigned)__kmp_affinity_compact <= depth);
				301	KMP_DEBUG_ASSERT(__kmp_affinity_compact >= 0);
				302	for (i = 0; i < (unsigned)__kmp_affinity_compact; i++) {
				303	int j = depth - i - 1;
				304	if (aa->childNums[j] < bb->childNums[j]) return -1;
				305	if (aa->childNums[j] > bb->childNums[j]) return 1;
				306	}
				307	for (; i < depth; i++) {
				308	int j = i - __kmp_affinity_compact;
				309	if (aa->childNums[j] < bb->childNums[j]) return -1;
				310	if (aa->childNums[j] > bb->childNums[j]) return 1;
				311	}
				312	return 0;
				313	}
				314
Jonathan Peyton	7f09a98	2015-06-22 15:59:18 +0000	[diff] [blame]	315	/** A structure for holding machine-specific hierarchy info to be computed once at init.
				316	This structure represents a mapping of threads to the actual machine hierarchy, or to
				317	our best guess at what the hierarchy might be, for the purpose of performing an
				318	efficient barrier. In the worst case, when there is no machine hierarchy information,
				319	it produces a tree suitable for a barrier, similar to the tree used in the hyper barrier. */
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	320	class hierarchy_info {
				321	public:
Jonathan Peyton	7f09a98	2015-06-22 15:59:18 +0000	[diff] [blame]	322	/** Number of levels in the hierarchy. Typical levels are threads/core, cores/package
				323	or socket, packages/node, nodes/machine, etc. We don't want to get specific with
				324	nomenclature. When the machine is oversubscribed we add levels to duplicate the
				325	hierarchy, doubling the thread capacity of the hierarchy each time we add a level. */
				326	kmp_uint32 maxLevels;
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	327
				328	/** This is specifically the depth of the machine configuration hierarchy, in terms of the
				329	number of levels along the longest path from root to any leaf. It corresponds to the
				330	number of entries in numPerLevel if we exclude all but one trailing 1. */
				331	kmp_uint32 depth;
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	332	kmp_uint32 base_num_threads;
Andrey Churbanov	aa1f2b6	2015-04-13 18:51:59 +0000	[diff] [blame]	333	volatile kmp_int8 uninitialized; // 0=initialized, 1=uninitialized, 2=initialization in progress
Jonathan Peyton	7f09a98	2015-06-22 15:59:18 +0000	[diff] [blame]	334	volatile kmp_int8 resizing; // 0=not resizing, 1=resizing
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	335
				336	/** Level 0 corresponds to leaves. numPerLevel[i] is the number of children the parent of a
				337	node at level i has. For example, if we have a machine with 4 packages, 4 cores/package
				338	and 2 HT per core, then numPerLevel = {2, 4, 4, 1, 1}. All empty levels are set to 1. */
Jonathan Peyton	7f09a98	2015-06-22 15:59:18 +0000	[diff] [blame]	339	kmp_uint32 *numPerLevel;
				340	kmp_uint32 *skipPerLevel;
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	341
				342	void deriveLevels(AddrUnsPair *adr2os, int num_addrs) {
				343	int hier_depth = adr2os[0].first.depth;
				344	int level = 0;
				345	for (int i=hier_depth-1; i>=0; --i) {
				346	int max = -1;
				347	for (int j=0; j<num_addrs; ++j) {
				348	int next = adr2os[j].first.childNums[i];
				349	if (next > max) max = next;
				350	}
				351	numPerLevel[level] = max+1;
				352	++level;
				353	}
				354	}
				355
Jonathan Peyton	7f09a98	2015-06-22 15:59:18 +0000	[diff] [blame]	356	hierarchy_info() : maxLevels(7), depth(1), uninitialized(1), resizing(0) {}
				357
				358	// TO FIX: This destructor causes a segfault in the library at shutdown.
				359	//~hierarchy_info() { if (!uninitialized && numPerLevel) __kmp_free(numPerLevel); }
				360
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	361	void init(AddrUnsPair *adr2os, int num_addrs)
				362	{
Andrey Churbanov	aa1f2b6	2015-04-13 18:51:59 +0000	[diff] [blame]	363	kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&uninitialized, 1, 2);
				364	if (bool_result == 0) { // Wait for initialization
				365	while (TCR_1(uninitialized) != 0) KMP_CPU_PAUSE();
				366	return;
				367	}
				368	KMP_DEBUG_ASSERT(bool_result==1);
				369
Jonathan Peyton	7f09a98	2015-06-22 15:59:18 +0000	[diff] [blame]	370	/* Added explicit initialization of the data fields here to prevent usage of dirty value
Andrey Churbanov	b41e62b	2015-02-10 20:10:21 +0000	[diff] [blame]	371	observed when static library is re-initialized multiple times (e.g. when
				372	non-OpenMP thread repeatedly launches/joins thread that uses OpenMP). */
				373	depth = 1;
Jonathan Peyton	7f09a98	2015-06-22 15:59:18 +0000	[diff] [blame]	374	resizing = 0;
				375	maxLevels = 7;
				376	numPerLevel = (kmp_uint32 )__kmp_allocate(maxLevels2*sizeof(kmp_uint32));
				377	skipPerLevel = &(numPerLevel[maxLevels]);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	378	for (kmp_uint32 i=0; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
				379	numPerLevel[i] = 1;
				380	skipPerLevel[i] = 1;
				381	}
				382
				383	// Sort table by physical ID
				384	if (adr2os) {
				385	qsort(adr2os, num_addrs, sizeof(*adr2os), __kmp_affinity_cmp_Address_labels);
				386	deriveLevels(adr2os, num_addrs);
				387	}
				388	else {
				389	numPerLevel[0] = 4;
				390	numPerLevel[1] = num_addrs/4;
				391	if (num_addrs%4) numPerLevel[1]++;
				392	}
				393
				394	base_num_threads = num_addrs;
				395	for (int i=maxLevels-1; i>=0; --i) // count non-empty levels to get depth
				396	if (numPerLevel[i] != 1 \|\| depth > 1) // only count one top-level '1'
				397	depth++;
				398
				399	kmp_uint32 branch = 4;
				400	if (numPerLevel[0] == 1) branch = num_addrs/4;
				401	if (branch<4) branch=4;
				402	for (kmp_uint32 d=0; d<depth-1; ++d) { // optimize hierarchy width
				403	while (numPerLevel[d] > branch \|\| (d==0 && numPerLevel[d]>4)) { // max 4 on level 0!
				404	if (numPerLevel[d] & 1) numPerLevel[d]++;
				405	numPerLevel[d] = numPerLevel[d] >> 1;
				406	if (numPerLevel[d+1] == 1) depth++;
				407	numPerLevel[d+1] = numPerLevel[d+1] << 1;
				408	}
				409	if(numPerLevel[0] == 1) {
				410	branch = branch >> 1;
				411	if (branch<4) branch = 4;
				412	}
				413	}
				414
				415	for (kmp_uint32 i=1; i<depth; ++i)
				416	skipPerLevel[i] = numPerLevel[i-1] * skipPerLevel[i-1];
Andrey Churbanov	aa1f2b6	2015-04-13 18:51:59 +0000	[diff] [blame]	417	// Fill in hierarchy in the case of oversubscription
				418	for (kmp_uint32 i=depth; i<maxLevels; ++i)
				419	skipPerLevel[i] = 2*skipPerLevel[i-1];
				420
				421	uninitialized = 0; // One writer
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	422
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	423	}
Jonathan Peyton	7f09a98	2015-06-22 15:59:18 +0000	[diff] [blame]	424
				425	void resize(kmp_uint32 nproc)
				426	{
				427	kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
				428	if (bool_result == 0) { // Someone else is resizing
				429	while (TCR_1(resizing) != 0) KMP_CPU_PAUSE();
				430	return;
				431	}
				432	KMP_DEBUG_ASSERT(bool_result!=0);
				433	KMP_DEBUG_ASSERT(nproc > base_num_threads);
				434
				435	// Calculate new max_levels
				436	kmp_uint32 old_sz = skipPerLevel[depth-1];
				437	kmp_uint32 incs = 0, old_maxLevels= maxLevels;
				438	while (nproc > old_sz) {
				439	old_sz *=2;
				440	incs++;
				441	}
				442	maxLevels += incs;
				443
				444	// Resize arrays
				445	kmp_uint32 *old_numPerLevel = numPerLevel;
				446	kmp_uint32 *old_skipPerLevel = skipPerLevel;
				447	numPerLevel = skipPerLevel = NULL;
				448	numPerLevel = (kmp_uint32 )__kmp_allocate(maxLevels2*sizeof(kmp_uint32));
				449	skipPerLevel = &(numPerLevel[maxLevels]);
				450
				451	// Copy old elements from old arrays
				452	for (kmp_uint32 i=0; i<old_maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
				453	numPerLevel[i] = old_numPerLevel[i];
				454	skipPerLevel[i] = old_skipPerLevel[i];
				455	}
				456
				457	// Init new elements in arrays to 1
				458	for (kmp_uint32 i=old_maxLevels; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
				459	numPerLevel[i] = 1;
				460	skipPerLevel[i] = 1;
				461	}
				462
				463	// Free old arrays
				464	__kmp_free(old_numPerLevel);
				465
				466	// Fill in oversubscription levels of hierarchy
				467	for (kmp_uint32 i=old_maxLevels; i<maxLevels; ++i)
				468	skipPerLevel[i] = 2*skipPerLevel[i-1];
				469
				470	base_num_threads = nproc;
				471	resizing = 0; // One writer
				472
				473	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	474	};
				475
				476	static hierarchy_info machine_hierarchy;
				477
				478	void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
Andrey Churbanov	1362ae7	2015-04-02 13:18:50 +0000	[diff] [blame]	479	kmp_uint32 depth;
Andrey Churbanov	aa1f2b6	2015-04-13 18:51:59 +0000	[diff] [blame]	480	// The test below is true if affinity is available, but set to "none". Need to init on first use of hierarchical barrier.
				481	if (TCR_1(machine_hierarchy.uninitialized))
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	482	machine_hierarchy.init(NULL, nproc);
Jonathan Peyton	7f09a98	2015-06-22 15:59:18 +0000	[diff] [blame]	483	// Adjust the hierarchy in case num threads exceeds original
				484	if (nproc > machine_hierarchy.base_num_threads)
				485	machine_hierarchy.resize(nproc);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	486
Andrey Churbanov	1362ae7	2015-04-02 13:18:50 +0000	[diff] [blame]	487	depth = machine_hierarchy.depth;
				488	KMP_DEBUG_ASSERT(depth > 0);
Jonathan Peyton	7f09a98	2015-06-22 15:59:18 +0000	[diff] [blame]	489	// The loop below adjusts the depth in the case of a resize
				490	while (nproc > machine_hierarchy.skipPerLevel[depth-1])
Andrey Churbanov	1362ae7	2015-04-02 13:18:50 +0000	[diff] [blame]	491	depth++;
Andrey Churbanov	aa1f2b6	2015-04-13 18:51:59 +0000	[diff] [blame]	492
Andrey Churbanov	1362ae7	2015-04-02 13:18:50 +0000	[diff] [blame]	493	thr_bar->depth = depth;
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	494	thr_bar->base_leaf_kids = (kmp_uint8)machine_hierarchy.numPerLevel[0]-1;
				495	thr_bar->skip_per_level = machine_hierarchy.skipPerLevel;
				496	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	497
				498	//
				499	// When sorting by labels, __kmp_affinity_assign_child_nums() must first be
				500	// called to renumber the labels from [0..n] and place them into the child_num
				501	// vector of the address object. This is done in case the labels used for
Alp Toker	8f2d3f0	2014-02-24 10:40:15 +0000	[diff] [blame]	502	// the children at one node of the hierarchy differ from those used for
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	503	// another node at the same level. Example: suppose the machine has 2 nodes
				504	// with 2 packages each. The first node contains packages 601 and 602, and
				505	// second node contains packages 603 and 604. If we try to sort the table
				506	// for "scatter" affinity, the table will still be sorted 601, 602, 603, 604
				507	// because we are paying attention to the labels themselves, not the ordinal
				508	// child numbers. By using the child numbers in the sort, the result is
				509	// {0,0}=601, {0,1}=603, {1,0}=602, {1,1}=604.
				510	//
				511	static void
				512	__kmp_affinity_assign_child_nums(AddrUnsPair *address2os,
				513	int numAddrs)
				514	{
				515	KMP_DEBUG_ASSERT(numAddrs > 0);
				516	int depth = address2os->first.depth;
				517	unsigned counts = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				518	unsigned lastLabel = (unsigned )__kmp_allocate(depth
				519	* sizeof(unsigned));
				520	int labCt;
				521	for (labCt = 0; labCt < depth; labCt++) {
				522	address2os[0].first.childNums[labCt] = counts[labCt] = 0;
				523	lastLabel[labCt] = address2os[0].first.labels[labCt];
				524	}
				525	int i;
				526	for (i = 1; i < numAddrs; i++) {
				527	for (labCt = 0; labCt < depth; labCt++) {
				528	if (address2os[i].first.labels[labCt] != lastLabel[labCt]) {
				529	int labCt2;
				530	for (labCt2 = labCt + 1; labCt2 < depth; labCt2++) {
				531	counts[labCt2] = 0;
				532	lastLabel[labCt2] = address2os[i].first.labels[labCt2];
				533	}
				534	counts[labCt]++;
				535	lastLabel[labCt] = address2os[i].first.labels[labCt];
				536	break;
				537	}
				538	}
				539	for (labCt = 0; labCt < depth; labCt++) {
				540	address2os[i].first.childNums[labCt] = counts[labCt];
				541	}
				542	for (; labCt < (int)Address::maxDepth; labCt++) {
				543	address2os[i].first.childNums[labCt] = 0;
				544	}
				545	}
				546	}
				547
				548
				549	//
				550	// All of the __kmp_affinity_create_*_map() routines should set
				551	// __kmp_affinity_masks to a vector of affinity mask objects of length
				552	// __kmp_affinity_num_masks, if __kmp_affinity_type != affinity_none, and
				553	// return the number of levels in the machine topology tree (zero if
				554	// __kmp_affinity_type == affinity_none).
				555	//
				556	// All of the __kmp_affinity_create__map() routines should set fullMask
				557	// to the affinity mask for the initialization thread. They need to save and
				558	// restore the mask, and it could be needed later, so saving it is just an
				559	// optimization to avoid calling kmp_get_system_affinity() again.
				560	//
				561	static kmp_affin_mask_t *fullMask = NULL;
				562
				563	kmp_affin_mask_t *
				564	__kmp_affinity_get_fullMask() { return fullMask; }
				565
				566
				567	static int nCoresPerPkg, nPackages;
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame]	568	static int __kmp_nThreadsPerCore;
				569	#ifndef KMP_DFLT_NTH_CORES
				570	static int __kmp_ncores;
				571	#endif
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	572
				573	//
				574	// __kmp_affinity_uniform_topology() doesn't work when called from
				575	// places which support arbitrarily many levels in the machine topology
				576	// map, i.e. the non-default cases in __kmp_affinity_create_cpuinfo_map()
				577	// __kmp_affinity_create_x2apicid_map().
				578	//
				579	inline static bool
				580	__kmp_affinity_uniform_topology()
				581	{
				582	return __kmp_avail_proc == (__kmp_nThreadsPerCore * nCoresPerPkg * nPackages);
				583	}
				584
				585
				586	//
				587	// Print out the detailed machine topology map, i.e. the physical locations
				588	// of each OS proc.
				589	//
				590	static void
				591	__kmp_affinity_print_topology(AddrUnsPair *address2os, int len, int depth,
				592	int pkgLevel, int coreLevel, int threadLevel)
				593	{
				594	int proc;
				595
				596	KMP_INFORM(OSProcToPhysicalThreadMap, "KMP_AFFINITY");
				597	for (proc = 0; proc < len; proc++) {
				598	int level;
				599	kmp_str_buf_t buf;
				600	__kmp_str_buf_init(&buf);
				601	for (level = 0; level < depth; level++) {
				602	if (level == threadLevel) {
				603	__kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Thread));
				604	}
				605	else if (level == coreLevel) {
				606	__kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Core));
				607	}
				608	else if (level == pkgLevel) {
				609	__kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Package));
				610	}
				611	else if (level > pkgLevel) {
				612	__kmp_str_buf_print(&buf, "%s_%d ", KMP_I18N_STR(Node),
				613	level - pkgLevel - 1);
				614	}
				615	else {
				616	__kmp_str_buf_print(&buf, "L%d ", level);
				617	}
				618	__kmp_str_buf_print(&buf, "%d ",
				619	address2os[proc].first.labels[level]);
				620	}
				621	KMP_INFORM(OSProcMapToPack, "KMP_AFFINITY", address2os[proc].second,
				622	buf.str);
				623	__kmp_str_buf_free(&buf);
				624	}
				625	}
				626
				627
				628	//
				629	// If we don't know how to retrieve the machine's processor topology, or
				630	// encounter an error in doing so, this routine is called to form a "flat"
				631	// mapping of os thread id's <-> processor id's.
				632	//
				633	static int
				634	__kmp_affinity_create_flat_map(AddrUnsPair **address2os,
				635	kmp_i18n_id_t *const msg_id)
				636	{
				637	*address2os = NULL;
				638	*msg_id = kmp_i18n_null;
				639
				640	//
				641	// Even if __kmp_affinity_type == affinity_none, this routine might still
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame]	642	// called to set __kmp_ncores, as well as
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	643	// __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
				644	//
				645	if (! KMP_AFFINITY_CAPABLE()) {
				646	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				647	__kmp_ncores = nPackages = __kmp_xproc;
				648	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	649	if (__kmp_affinity_verbose) {
				650	KMP_INFORM(AffFlatTopology, "KMP_AFFINITY");
				651	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				652	KMP_INFORM(Uniform, "KMP_AFFINITY");
				653	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				654	__kmp_nThreadsPerCore, __kmp_ncores);
				655	}
				656	return 0;
				657	}
				658
				659	//
				660	// When affinity is off, this routine will still be called to set
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame]	661	// __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	662	// nCoresPerPkg, & nPackages. Make sure all these vars are set
				663	// correctly, and return now if affinity is not enabled.
				664	//
				665	__kmp_ncores = nPackages = __kmp_avail_proc;
				666	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	667	if (__kmp_affinity_verbose) {
				668	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				669	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, fullMask);
				670
				671	KMP_INFORM(AffCapableUseFlat, "KMP_AFFINITY");
				672	if (__kmp_affinity_respect_mask) {
				673	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				674	} else {
				675	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				676	}
				677	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				678	KMP_INFORM(Uniform, "KMP_AFFINITY");
				679	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				680	__kmp_nThreadsPerCore, __kmp_ncores);
				681	}
				682	if (__kmp_affinity_type == affinity_none) {
				683	return 0;
				684	}
				685
				686	//
				687	// Contruct the data structure to be returned.
				688	//
				689	address2os = (AddrUnsPair)
				690	__kmp_allocate(sizeof(*address2os) __kmp_avail_proc);
				691	int avail_ct = 0;
				692	unsigned int i;
				693	for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
				694	//
				695	// Skip this proc if it is not included in the machine model.
				696	//
				697	if (! KMP_CPU_ISSET(i, fullMask)) {
				698	continue;
				699	}
				700
				701	Address addr(1);
				702	addr.labels[0] = i;
				703	(*address2os)[avail_ct++] = AddrUnsPair(addr,i);
				704	}
				705	if (__kmp_affinity_verbose) {
				706	KMP_INFORM(OSProcToPackage, "KMP_AFFINITY");
				707	}
				708
				709	if (__kmp_affinity_gran_levels < 0) {
				710	//
				711	// Only the package level is modeled in the machine topology map,
				712	// so the #levels of granularity is either 0 or 1.
				713	//
				714	if (__kmp_affinity_gran > affinity_gran_package) {
				715	__kmp_affinity_gran_levels = 1;
				716	}
				717	else {
				718	__kmp_affinity_gran_levels = 0;
				719	}
				720	}
				721	return 1;
				722	}
				723
				724
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	725	# if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	726
				727	//
				728	// If multiple Windows* OS processor groups exist, we can create a 2-level
				729	// topology map with the groups at level 0 and the individual procs at
				730	// level 1.
				731	//
				732	// This facilitates letting the threads float among all procs in a group,
				733	// if granularity=group (the default when there are multiple groups).
				734	//
				735	static int
				736	__kmp_affinity_create_proc_group_map(AddrUnsPair **address2os,
				737	kmp_i18n_id_t *const msg_id)
				738	{
				739	*address2os = NULL;
				740	*msg_id = kmp_i18n_null;
				741
				742	//
				743	// If we don't have multiple processor groups, return now.
				744	// The flat mapping will be used.
				745	//
				746	if ((! KMP_AFFINITY_CAPABLE()) \|\| (__kmp_get_proc_group(fullMask) >= 0)) {
				747	// FIXME set *msg_id
				748	return -1;
				749	}
				750
				751	//
				752	// Contruct the data structure to be returned.
				753	//
				754	address2os = (AddrUnsPair)
				755	__kmp_allocate(sizeof(*address2os) __kmp_avail_proc);
				756	int avail_ct = 0;
				757	int i;
				758	for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
				759	//
				760	// Skip this proc if it is not included in the machine model.
				761	//
				762	if (! KMP_CPU_ISSET(i, fullMask)) {
				763	continue;
				764	}
				765
				766	Address addr(2);
				767	addr.labels[0] = i / (CHAR_BIT * sizeof(DWORD_PTR));
				768	addr.labels[1] = i % (CHAR_BIT * sizeof(DWORD_PTR));
				769	(*address2os)[avail_ct++] = AddrUnsPair(addr,i);
				770
				771	if (__kmp_affinity_verbose) {
				772	KMP_INFORM(AffOSProcToGroup, "KMP_AFFINITY", i, addr.labels[0],
				773	addr.labels[1]);
				774	}
				775	}
				776
				777	if (__kmp_affinity_gran_levels < 0) {
				778	if (__kmp_affinity_gran == affinity_gran_group) {
				779	__kmp_affinity_gran_levels = 1;
				780	}
				781	else if ((__kmp_affinity_gran == affinity_gran_fine)
				782	\|\| (__kmp_affinity_gran == affinity_gran_thread)) {
				783	__kmp_affinity_gran_levels = 0;
				784	}
				785	else {
				786	const char *gran_str = NULL;
				787	if (__kmp_affinity_gran == affinity_gran_core) {
				788	gran_str = "core";
				789	}
				790	else if (__kmp_affinity_gran == affinity_gran_package) {
				791	gran_str = "package";
				792	}
				793	else if (__kmp_affinity_gran == affinity_gran_node) {
				794	gran_str = "node";
				795	}
				796	else {
				797	KMP_ASSERT(0);
				798	}
				799
				800	// Warning: can't use affinity granularity \"gran\" with group topology method, using "thread"
				801	__kmp_affinity_gran_levels = 0;
				802	}
				803	}
				804	return 2;
				805	}
				806
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	807	# endif /* KMP_GROUP_AFFINITY */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	808
				809
				810	# if KMP_ARCH_X86 \|\| KMP_ARCH_X86_64
				811
				812	static int
				813	__kmp_cpuid_mask_width(int count) {
				814	int r = 0;
				815
				816	while((1<<r) < count)
				817	++r;
				818	return r;
				819	}
				820
				821
				822	class apicThreadInfo {
				823	public:
				824	unsigned osId; // param to __kmp_affinity_bind_thread
				825	unsigned apicId; // from cpuid after binding
				826	unsigned maxCoresPerPkg; // ""
				827	unsigned maxThreadsPerPkg; // ""
				828	unsigned pkgId; // inferred from above values
				829	unsigned coreId; // ""
				830	unsigned threadId; // ""
				831	};
				832
				833
				834	static int
				835	__kmp_affinity_cmp_apicThreadInfo_os_id(const void a, const void b)
				836	{
				837	const apicThreadInfo aa = (const apicThreadInfo )a;
				838	const apicThreadInfo bb = (const apicThreadInfo )b;
				839	if (aa->osId < bb->osId) return -1;
				840	if (aa->osId > bb->osId) return 1;
				841	return 0;
				842	}
				843
				844
				845	static int
				846	__kmp_affinity_cmp_apicThreadInfo_phys_id(const void a, const void b)
				847	{
				848	const apicThreadInfo aa = (const apicThreadInfo )a;
				849	const apicThreadInfo bb = (const apicThreadInfo )b;
				850	if (aa->pkgId < bb->pkgId) return -1;
				851	if (aa->pkgId > bb->pkgId) return 1;
				852	if (aa->coreId < bb->coreId) return -1;
				853	if (aa->coreId > bb->coreId) return 1;
				854	if (aa->threadId < bb->threadId) return -1;
				855	if (aa->threadId > bb->threadId) return 1;
				856	return 0;
				857	}
				858
				859
				860	//
				861	// On IA-32 architecture and Intel(R) 64 architecture, we attempt to use
				862	// an algorithm which cycles through the available os threads, setting
				863	// the current thread's affinity mask to that thread, and then retrieves
				864	// the Apic Id for each thread context using the cpuid instruction.
				865	//
				866	static int
				867	__kmp_affinity_create_apicid_map(AddrUnsPair **address2os,
				868	kmp_i18n_id_t *const msg_id)
				869	{
Andrey Churbanov	1c33129	2015-01-27 17:03:42 +0000	[diff] [blame]	870	kmp_cpuid buf;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	871	int rc;
				872	*address2os = NULL;
				873	*msg_id = kmp_i18n_null;
				874
Andrey Churbanov	1c33129	2015-01-27 17:03:42 +0000	[diff] [blame]	875	//
				876	// Check if cpuid leaf 4 is supported.
				877	//
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	878	__kmp_x86_cpuid(0, 0, &buf);
				879	if (buf.eax < 4) {
				880	*msg_id = kmp_i18n_str_NoLeaf4Support;
				881	return -1;
				882	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	883
				884	//
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	885	// The algorithm used starts by setting the affinity to each available
Andrey Churbanov	1c33129	2015-01-27 17:03:42 +0000	[diff] [blame]	886	// thread and retrieving info from the cpuid instruction, so if we are
				887	// not capable of calling __kmp_get_system_affinity() and
				888	// _kmp_get_system_affinity(), then we need to do something else - use
				889	// the defaults that we calculated from issuing cpuid without binding
				890	// to each proc.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	891	//
				892	if (! KMP_AFFINITY_CAPABLE()) {
				893	//
				894	// Hack to try and infer the machine topology using only the data
				895	// available from cpuid on the current thread, and __kmp_xproc.
				896	//
				897	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				898
				899	//
				900	// Get an upper bound on the number of threads per package using
				901	// cpuid(1).
				902	//
				903	// On some OS/chps combinations where HT is supported by the chip
				904	// but is disabled, this value will be 2 on a single core chip.
				905	// Usually, it will be 2 if HT is enabled and 1 if HT is disabled.
				906	//
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	907	__kmp_x86_cpuid(1, 0, &buf);
				908	int maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
				909	if (maxThreadsPerPkg == 0) {
				910	maxThreadsPerPkg = 1;
				911	}
				912
				913	//
				914	// The num cores per pkg comes from cpuid(4).
				915	// 1 must be added to the encoded value.
				916	//
				917	// The author of cpu_count.cpp treated this only an upper bound
				918	// on the number of cores, but I haven't seen any cases where it
				919	// was greater than the actual number of cores, so we will treat
				920	// it as exact in this block of code.
				921	//
				922	// First, we need to check if cpuid(4) is supported on this chip.
				923	// To see if cpuid(n) is supported, issue cpuid(0) and check if eax
				924	// has the value n or greater.
				925	//
				926	__kmp_x86_cpuid(0, 0, &buf);
				927	if (buf.eax >= 4) {
				928	__kmp_x86_cpuid(4, 0, &buf);
				929	nCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
				930	}
				931	else {
				932	nCoresPerPkg = 1;
				933	}
				934
				935	//
				936	// There is no way to reliably tell if HT is enabled without issuing
				937	// the cpuid instruction from every thread, can correlating the cpuid
				938	// info, so if the machine is not affinity capable, we assume that HT
				939	// is off. We have seen quite a few machines where maxThreadsPerPkg
				940	// is 2, yet the machine does not support HT.
				941	//
				942	// - Older OSes are usually found on machines with older chips, which
				943	// do not support HT.
				944	//
				945	// - The performance penalty for mistakenly identifying a machine as
				946	// HT when it isn't (which results in blocktime being incorrecly set
				947	// to 0) is greater than the penalty when for mistakenly identifying
				948	// a machine as being 1 thread/core when it is really HT enabled
				949	// (which results in blocktime being incorrectly set to a positive
				950	// value).
				951	//
				952	__kmp_ncores = __kmp_xproc;
				953	nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
				954	__kmp_nThreadsPerCore = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	955	if (__kmp_affinity_verbose) {
				956	KMP_INFORM(AffNotCapableUseLocCpuid, "KMP_AFFINITY");
				957	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				958	if (__kmp_affinity_uniform_topology()) {
				959	KMP_INFORM(Uniform, "KMP_AFFINITY");
				960	} else {
				961	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				962	}
				963	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				964	__kmp_nThreadsPerCore, __kmp_ncores);
				965	}
				966	return 0;
				967	}
				968
				969	//
				970	//
				971	// From here on, we can assume that it is safe to call
				972	// __kmp_get_system_affinity() and __kmp_set_system_affinity(),
				973	// even if __kmp_affinity_type = affinity_none.
				974	//
				975
				976	//
				977	// Save the affinity mask for the current thread.
				978	//
				979	kmp_affin_mask_t *oldMask;
				980	KMP_CPU_ALLOC(oldMask);
				981	KMP_ASSERT(oldMask != NULL);
				982	__kmp_get_system_affinity(oldMask, TRUE);
				983
				984	//
				985	// Run through each of the available contexts, binding the current thread
				986	// to it, and obtaining the pertinent information using the cpuid instr.
				987	//
				988	// The relevant information is:
				989	//
				990	// Apic Id: Bits 24:31 of ebx after issuing cpuid(1) - each thread context
				991	// has a uniqie Apic Id, which is of the form pkg# : core# : thread#.
				992	//
				993	// Max Threads Per Pkg: Bits 16:23 of ebx after issuing cpuid(1). The
				994	// value of this field determines the width of the core# + thread#
				995	// fields in the Apic Id. It is also an upper bound on the number
				996	// of threads per package, but it has been verified that situations
				997	// happen were it is not exact. In particular, on certain OS/chip
				998	// combinations where Intel(R) Hyper-Threading Technology is supported
				999	// by the chip but has
				1000	// been disabled, the value of this field will be 2 (for a single core
				1001	// chip). On other OS/chip combinations supporting
				1002	// Intel(R) Hyper-Threading Technology, the value of
				1003	// this field will be 1 when Intel(R) Hyper-Threading Technology is
				1004	// disabled and 2 when it is enabled.
				1005	//
				1006	// Max Cores Per Pkg: Bits 26:31 of eax after issuing cpuid(4). The
				1007	// value of this field (+1) determines the width of the core# field in
				1008	// the Apic Id. The comments in "cpucount.cpp" say that this value is
				1009	// an upper bound, but the IA-32 architecture manual says that it is
				1010	// exactly the number of cores per package, and I haven't seen any
				1011	// case where it wasn't.
				1012	//
				1013	// From this information, deduce the package Id, core Id, and thread Id,
				1014	// and set the corresponding fields in the apicThreadInfo struct.
				1015	//
				1016	unsigned i;
				1017	apicThreadInfo threadInfo = (apicThreadInfo )__kmp_allocate(
				1018	__kmp_avail_proc * sizeof(apicThreadInfo));
				1019	unsigned nApics = 0;
				1020	for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
				1021	//
				1022	// Skip this proc if it is not included in the machine model.
				1023	//
				1024	if (! KMP_CPU_ISSET(i, fullMask)) {
				1025	continue;
				1026	}
				1027	KMP_DEBUG_ASSERT((int)nApics < __kmp_avail_proc);
				1028
				1029	__kmp_affinity_bind_thread(i);
				1030	threadInfo[nApics].osId = i;
				1031
				1032	//
				1033	// The apic id and max threads per pkg come from cpuid(1).
				1034	//
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1035	__kmp_x86_cpuid(1, 0, &buf);
				1036	if (! (buf.edx >> 9) & 1) {
				1037	__kmp_set_system_affinity(oldMask, TRUE);
				1038	__kmp_free(threadInfo);
				1039	KMP_CPU_FREE(oldMask);
				1040	*msg_id = kmp_i18n_str_ApicNotPresent;
				1041	return -1;
				1042	}
				1043	threadInfo[nApics].apicId = (buf.ebx >> 24) & 0xff;
				1044	threadInfo[nApics].maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
				1045	if (threadInfo[nApics].maxThreadsPerPkg == 0) {
				1046	threadInfo[nApics].maxThreadsPerPkg = 1;
				1047	}
				1048
				1049	//
				1050	// Max cores per pkg comes from cpuid(4).
				1051	// 1 must be added to the encoded value.
				1052	//
				1053	// First, we need to check if cpuid(4) is supported on this chip.
				1054	// To see if cpuid(n) is supported, issue cpuid(0) and check if eax
				1055	// has the value n or greater.
				1056	//
				1057	__kmp_x86_cpuid(0, 0, &buf);
				1058	if (buf.eax >= 4) {
				1059	__kmp_x86_cpuid(4, 0, &buf);
				1060	threadInfo[nApics].maxCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
				1061	}
				1062	else {
				1063	threadInfo[nApics].maxCoresPerPkg = 1;
				1064	}
				1065
				1066	//
				1067	// Infer the pkgId / coreId / threadId using only the info
				1068	// obtained locally.
				1069	//
				1070	int widthCT = __kmp_cpuid_mask_width(
				1071	threadInfo[nApics].maxThreadsPerPkg);
				1072	threadInfo[nApics].pkgId = threadInfo[nApics].apicId >> widthCT;
				1073
				1074	int widthC = __kmp_cpuid_mask_width(
				1075	threadInfo[nApics].maxCoresPerPkg);
				1076	int widthT = widthCT - widthC;
				1077	if (widthT < 0) {
				1078	//
				1079	// I've never seen this one happen, but I suppose it could, if
				1080	// the cpuid instruction on a chip was really screwed up.
				1081	// Make sure to restore the affinity mask before the tail call.
				1082	//
				1083	__kmp_set_system_affinity(oldMask, TRUE);
				1084	__kmp_free(threadInfo);
				1085	KMP_CPU_FREE(oldMask);
				1086	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1087	return -1;
				1088	}
				1089
				1090	int maskC = (1 << widthC) - 1;
				1091	threadInfo[nApics].coreId = (threadInfo[nApics].apicId >> widthT)
				1092	&maskC;
				1093
				1094	int maskT = (1 << widthT) - 1;
				1095	threadInfo[nApics].threadId = threadInfo[nApics].apicId &maskT;
				1096
				1097	nApics++;
				1098	}
				1099
				1100	//
				1101	// We've collected all the info we need.
				1102	// Restore the old affinity mask for this thread.
				1103	//
				1104	__kmp_set_system_affinity(oldMask, TRUE);
				1105
				1106	//
				1107	// If there's only one thread context to bind to, form an Address object
				1108	// with depth 1 and return immediately (or, if affinity is off, set
				1109	// address2os to NULL and return).
				1110	//
				1111	// If it is configured to omit the package level when there is only a
				1112	// single package, the logic at the end of this routine won't work if
				1113	// there is only a single thread - it would try to form an Address
				1114	// object with depth 0.
				1115	//
				1116	KMP_ASSERT(nApics > 0);
				1117	if (nApics == 1) {
				1118	__kmp_ncores = nPackages = 1;
				1119	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1120	if (__kmp_affinity_verbose) {
				1121	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				1122	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				1123
				1124	KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
				1125	if (__kmp_affinity_respect_mask) {
				1126	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				1127	} else {
				1128	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				1129	}
				1130	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1131	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1132	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				1133	__kmp_nThreadsPerCore, __kmp_ncores);
				1134	}
				1135
				1136	if (__kmp_affinity_type == affinity_none) {
				1137	__kmp_free(threadInfo);
				1138	KMP_CPU_FREE(oldMask);
				1139	return 0;
				1140	}
				1141
				1142	address2os = (AddrUnsPair)__kmp_allocate(sizeof(AddrUnsPair));
				1143	Address addr(1);
				1144	addr.labels[0] = threadInfo[0].pkgId;
				1145	(*address2os)[0] = AddrUnsPair(addr, threadInfo[0].osId);
				1146
				1147	if (__kmp_affinity_gran_levels < 0) {
				1148	__kmp_affinity_gran_levels = 0;
				1149	}
				1150
				1151	if (__kmp_affinity_verbose) {
				1152	__kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
				1153	}
				1154
				1155	__kmp_free(threadInfo);
				1156	KMP_CPU_FREE(oldMask);
				1157	return 1;
				1158	}
				1159
				1160	//
				1161	// Sort the threadInfo table by physical Id.
				1162	//
				1163	qsort(threadInfo, nApics, sizeof(*threadInfo),
				1164	__kmp_affinity_cmp_apicThreadInfo_phys_id);
				1165
				1166	//
				1167	// The table is now sorted by pkgId / coreId / threadId, but we really
				1168	// don't know the radix of any of the fields. pkgId's may be sparsely
				1169	// assigned among the chips on a system. Although coreId's are usually
				1170	// assigned [0 .. coresPerPkg-1] and threadId's are usually assigned
				1171	// [0..threadsPerCore-1], we don't want to make any such assumptions.
				1172	//
				1173	// For that matter, we don't know what coresPerPkg and threadsPerCore
				1174	// (or the total # packages) are at this point - we want to determine
				1175	// that now. We only have an upper bound on the first two figures.
				1176	//
				1177	// We also perform a consistency check at this point: the values returned
				1178	// by the cpuid instruction for any thread bound to a given package had
				1179	// better return the same info for maxThreadsPerPkg and maxCoresPerPkg.
				1180	//
				1181	nPackages = 1;
				1182	nCoresPerPkg = 1;
				1183	__kmp_nThreadsPerCore = 1;
				1184	unsigned nCores = 1;
				1185
				1186	unsigned pkgCt = 1; // to determine radii
				1187	unsigned lastPkgId = threadInfo[0].pkgId;
				1188	unsigned coreCt = 1;
				1189	unsigned lastCoreId = threadInfo[0].coreId;
				1190	unsigned threadCt = 1;
				1191	unsigned lastThreadId = threadInfo[0].threadId;
				1192
				1193	// intra-pkg consist checks
				1194	unsigned prevMaxCoresPerPkg = threadInfo[0].maxCoresPerPkg;
				1195	unsigned prevMaxThreadsPerPkg = threadInfo[0].maxThreadsPerPkg;
				1196
				1197	for (i = 1; i < nApics; i++) {
				1198	if (threadInfo[i].pkgId != lastPkgId) {
				1199	nCores++;
				1200	pkgCt++;
				1201	lastPkgId = threadInfo[i].pkgId;
				1202	if ((int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt;
				1203	coreCt = 1;
				1204	lastCoreId = threadInfo[i].coreId;
				1205	if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
				1206	threadCt = 1;
				1207	lastThreadId = threadInfo[i].threadId;
				1208
				1209	//
				1210	// This is a different package, so go on to the next iteration
				1211	// without doing any consistency checks. Reset the consistency
				1212	// check vars, though.
				1213	//
				1214	prevMaxCoresPerPkg = threadInfo[i].maxCoresPerPkg;
				1215	prevMaxThreadsPerPkg = threadInfo[i].maxThreadsPerPkg;
				1216	continue;
				1217	}
				1218
				1219	if (threadInfo[i].coreId != lastCoreId) {
				1220	nCores++;
				1221	coreCt++;
				1222	lastCoreId = threadInfo[i].coreId;
				1223	if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
				1224	threadCt = 1;
				1225	lastThreadId = threadInfo[i].threadId;
				1226	}
				1227	else if (threadInfo[i].threadId != lastThreadId) {
				1228	threadCt++;
				1229	lastThreadId = threadInfo[i].threadId;
				1230	}
				1231	else {
				1232	__kmp_free(threadInfo);
				1233	KMP_CPU_FREE(oldMask);
				1234	*msg_id = kmp_i18n_str_LegacyApicIDsNotUnique;
				1235	return -1;
				1236	}
				1237
				1238	//
				1239	// Check to make certain that the maxCoresPerPkg and maxThreadsPerPkg
				1240	// fields agree between all the threads bounds to a given package.
				1241	//
				1242	if ((prevMaxCoresPerPkg != threadInfo[i].maxCoresPerPkg)
				1243	\|\| (prevMaxThreadsPerPkg != threadInfo[i].maxThreadsPerPkg)) {
				1244	__kmp_free(threadInfo);
				1245	KMP_CPU_FREE(oldMask);
				1246	*msg_id = kmp_i18n_str_InconsistentCpuidInfo;
				1247	return -1;
				1248	}
				1249	}
				1250	nPackages = pkgCt;
				1251	if ((int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt;
				1252	if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
				1253
				1254	//
				1255	// When affinity is off, this routine will still be called to set
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame]	1256	// __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1257	// nCoresPerPkg, & nPackages. Make sure all these vars are set
				1258	// correctly, and return now if affinity is not enabled.
				1259	//
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1260	__kmp_ncores = nCores;
				1261	if (__kmp_affinity_verbose) {
				1262	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				1263	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				1264
				1265	KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
				1266	if (__kmp_affinity_respect_mask) {
				1267	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				1268	} else {
				1269	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				1270	}
				1271	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1272	if (__kmp_affinity_uniform_topology()) {
				1273	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1274	} else {
				1275	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				1276	}
				1277	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				1278	__kmp_nThreadsPerCore, __kmp_ncores);
				1279
				1280	}
				1281
				1282	if (__kmp_affinity_type == affinity_none) {
				1283	__kmp_free(threadInfo);
				1284	KMP_CPU_FREE(oldMask);
				1285	return 0;
				1286	}
				1287
				1288	//
				1289	// Now that we've determined the number of packages, the number of cores
				1290	// per package, and the number of threads per core, we can construct the
				1291	// data structure that is to be returned.
				1292	//
				1293	int pkgLevel = 0;
				1294	int coreLevel = (nCoresPerPkg <= 1) ? -1 : 1;
				1295	int threadLevel = (__kmp_nThreadsPerCore <= 1) ? -1 : ((coreLevel >= 0) ? 2 : 1);
				1296	unsigned depth = (pkgLevel >= 0) + (coreLevel >= 0) + (threadLevel >= 0);
				1297
				1298	KMP_ASSERT(depth > 0);
				1299	address2os = (AddrUnsPair)__kmp_allocate(sizeof(AddrUnsPair) * nApics);
				1300
				1301	for (i = 0; i < nApics; ++i) {
				1302	Address addr(depth);
				1303	unsigned os = threadInfo[i].osId;
				1304	int d = 0;
				1305
				1306	if (pkgLevel >= 0) {
				1307	addr.labels[d++] = threadInfo[i].pkgId;
				1308	}
				1309	if (coreLevel >= 0) {
				1310	addr.labels[d++] = threadInfo[i].coreId;
				1311	}
				1312	if (threadLevel >= 0) {
				1313	addr.labels[d++] = threadInfo[i].threadId;
				1314	}
				1315	(*address2os)[i] = AddrUnsPair(addr, os);
				1316	}
				1317
				1318	if (__kmp_affinity_gran_levels < 0) {
				1319	//
				1320	// Set the granularity level based on what levels are modeled
				1321	// in the machine topology map.
				1322	//
				1323	__kmp_affinity_gran_levels = 0;
				1324	if ((threadLevel >= 0)
				1325	&& (__kmp_affinity_gran > affinity_gran_thread)) {
				1326	__kmp_affinity_gran_levels++;
				1327	}
				1328	if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
				1329	__kmp_affinity_gran_levels++;
				1330	}
				1331	if ((pkgLevel >= 0) && (__kmp_affinity_gran > affinity_gran_package)) {
				1332	__kmp_affinity_gran_levels++;
				1333	}
				1334	}
				1335
				1336	if (__kmp_affinity_verbose) {
				1337	__kmp_affinity_print_topology(*address2os, nApics, depth, pkgLevel,
				1338	coreLevel, threadLevel);
				1339	}
				1340
				1341	__kmp_free(threadInfo);
				1342	KMP_CPU_FREE(oldMask);
				1343	return depth;
				1344	}
				1345
				1346
				1347	//
				1348	// Intel(R) microarchitecture code name Nehalem, Dunnington and later
				1349	// architectures support a newer interface for specifying the x2APIC Ids,
				1350	// based on cpuid leaf 11.
				1351	//
				1352	static int
				1353	__kmp_affinity_create_x2apicid_map(AddrUnsPair **address2os,
				1354	kmp_i18n_id_t *const msg_id)
				1355	{
				1356	kmp_cpuid buf;
				1357
				1358	*address2os = NULL;
				1359	*msg_id = kmp_i18n_null;
				1360
				1361	//
				1362	// Check to see if cpuid leaf 11 is supported.
				1363	//
				1364	__kmp_x86_cpuid(0, 0, &buf);
				1365	if (buf.eax < 11) {
				1366	*msg_id = kmp_i18n_str_NoLeaf11Support;
				1367	return -1;
				1368	}
				1369	__kmp_x86_cpuid(11, 0, &buf);
				1370	if (buf.ebx == 0) {
				1371	*msg_id = kmp_i18n_str_NoLeaf11Support;
				1372	return -1;
				1373	}
				1374
				1375	//
				1376	// Find the number of levels in the machine topology. While we're at it,
				1377	// get the default values for __kmp_nThreadsPerCore & nCoresPerPkg. We will
				1378	// try to get more accurate values later by explicitly counting them,
				1379	// but get reasonable defaults now, in case we return early.
				1380	//
				1381	int level;
				1382	int threadLevel = -1;
				1383	int coreLevel = -1;
				1384	int pkgLevel = -1;
				1385	__kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
				1386
				1387	for (level = 0;; level++) {
				1388	if (level > 31) {
				1389	//
				1390	// FIXME: Hack for DPD200163180
				1391	//
				1392	// If level is big then something went wrong -> exiting
				1393	//
				1394	// There could actually be 32 valid levels in the machine topology,
				1395	// but so far, the only machine we have seen which does not exit
				1396	// this loop before iteration 32 has fubar x2APIC settings.
				1397	//
				1398	// For now, just reject this case based upon loop trip count.
				1399	//
				1400	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1401	return -1;
				1402	}
				1403	__kmp_x86_cpuid(11, level, &buf);
				1404	if (buf.ebx == 0) {
				1405	if (pkgLevel < 0) {
				1406	//
				1407	// Will infer nPackages from __kmp_xproc
				1408	//
				1409	pkgLevel = level;
				1410	level++;
				1411	}
				1412	break;
				1413	}
				1414	int kind = (buf.ecx >> 8) & 0xff;
				1415	if (kind == 1) {
				1416	//
				1417	// SMT level
				1418	//
				1419	threadLevel = level;
				1420	coreLevel = -1;
				1421	pkgLevel = -1;
				1422	__kmp_nThreadsPerCore = buf.ebx & 0xff;
				1423	if (__kmp_nThreadsPerCore == 0) {
				1424	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1425	return -1;
				1426	}
				1427	}
				1428	else if (kind == 2) {
				1429	//
				1430	// core level
				1431	//
				1432	coreLevel = level;
				1433	pkgLevel = -1;
				1434	nCoresPerPkg = buf.ebx & 0xff;
				1435	if (nCoresPerPkg == 0) {
				1436	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1437	return -1;
				1438	}
				1439	}
				1440	else {
				1441	if (level <= 0) {
				1442	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1443	return -1;
				1444	}
				1445	if (pkgLevel >= 0) {
				1446	continue;
				1447	}
				1448	pkgLevel = level;
				1449	nPackages = buf.ebx & 0xff;
				1450	if (nPackages == 0) {
				1451	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1452	return -1;
				1453	}
				1454	}
				1455	}
				1456	int depth = level;
				1457
				1458	//
				1459	// In the above loop, "level" was counted from the finest level (usually
				1460	// thread) to the coarsest. The caller expects that we will place the
				1461	// labels in (*address2os)[].first.labels[] in the inverse order, so
				1462	// we need to invert the vars saying which level means what.
				1463	//
				1464	if (threadLevel >= 0) {
				1465	threadLevel = depth - threadLevel - 1;
				1466	}
				1467	if (coreLevel >= 0) {
				1468	coreLevel = depth - coreLevel - 1;
				1469	}
				1470	KMP_DEBUG_ASSERT(pkgLevel >= 0);
				1471	pkgLevel = depth - pkgLevel - 1;
				1472
				1473	//
				1474	// The algorithm used starts by setting the affinity to each available
Andrey Churbanov	1c33129	2015-01-27 17:03:42 +0000	[diff] [blame]	1475	// thread and retrieving info from the cpuid instruction, so if we are
				1476	// not capable of calling __kmp_get_system_affinity() and
				1477	// _kmp_get_system_affinity(), then we need to do something else - use
				1478	// the defaults that we calculated from issuing cpuid without binding
				1479	// to each proc.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1480	//
				1481	if (! KMP_AFFINITY_CAPABLE())
				1482	{
				1483	//
				1484	// Hack to try and infer the machine topology using only the data
				1485	// available from cpuid on the current thread, and __kmp_xproc.
				1486	//
				1487	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				1488
				1489	__kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
				1490	nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1491	if (__kmp_affinity_verbose) {
				1492	KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY");
				1493	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1494	if (__kmp_affinity_uniform_topology()) {
				1495	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1496	} else {
				1497	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				1498	}
				1499	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				1500	__kmp_nThreadsPerCore, __kmp_ncores);
				1501	}
				1502	return 0;
				1503	}
				1504
				1505	//
				1506	//
				1507	// From here on, we can assume that it is safe to call
				1508	// __kmp_get_system_affinity() and __kmp_set_system_affinity(),
				1509	// even if __kmp_affinity_type = affinity_none.
				1510	//
				1511
				1512	//
				1513	// Save the affinity mask for the current thread.
				1514	//
				1515	kmp_affin_mask_t *oldMask;
				1516	KMP_CPU_ALLOC(oldMask);
				1517	__kmp_get_system_affinity(oldMask, TRUE);
				1518
				1519	//
				1520	// Allocate the data structure to be returned.
				1521	//
				1522	AddrUnsPair retval = (AddrUnsPair )
				1523	__kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc);
				1524
				1525	//
				1526	// Run through each of the available contexts, binding the current thread
				1527	// to it, and obtaining the pertinent information using the cpuid instr.
				1528	//
				1529	unsigned int proc;
				1530	int nApics = 0;
				1531	for (proc = 0; proc < KMP_CPU_SETSIZE; ++proc) {
				1532	//
				1533	// Skip this proc if it is not included in the machine model.
				1534	//
				1535	if (! KMP_CPU_ISSET(proc, fullMask)) {
				1536	continue;
				1537	}
				1538	KMP_DEBUG_ASSERT(nApics < __kmp_avail_proc);
				1539
				1540	__kmp_affinity_bind_thread(proc);
				1541
				1542	//
				1543	// Extrach the labels for each level in the machine topology map
				1544	// from the Apic ID.
				1545	//
				1546	Address addr(depth);
				1547	int prev_shift = 0;
				1548
				1549	for (level = 0; level < depth; level++) {
				1550	__kmp_x86_cpuid(11, level, &buf);
				1551	unsigned apicId = buf.edx;
				1552	if (buf.ebx == 0) {
				1553	if (level != depth - 1) {
				1554	KMP_CPU_FREE(oldMask);
				1555	*msg_id = kmp_i18n_str_InconsistentCpuidInfo;
				1556	return -1;
				1557	}
				1558	addr.labels[depth - level - 1] = apicId >> prev_shift;
				1559	level++;
				1560	break;
				1561	}
				1562	int shift = buf.eax & 0x1f;
				1563	int mask = (1 << shift) - 1;
				1564	addr.labels[depth - level - 1] = (apicId & mask) >> prev_shift;
				1565	prev_shift = shift;
				1566	}
				1567	if (level != depth) {
				1568	KMP_CPU_FREE(oldMask);
				1569	*msg_id = kmp_i18n_str_InconsistentCpuidInfo;
				1570	return -1;
				1571	}
				1572
				1573	retval[nApics] = AddrUnsPair(addr, proc);
				1574	nApics++;
				1575	}
				1576
				1577	//
				1578	// We've collected all the info we need.
				1579	// Restore the old affinity mask for this thread.
				1580	//
				1581	__kmp_set_system_affinity(oldMask, TRUE);
				1582
				1583	//
				1584	// If there's only one thread context to bind to, return now.
				1585	//
				1586	KMP_ASSERT(nApics > 0);
				1587	if (nApics == 1) {
				1588	__kmp_ncores = nPackages = 1;
				1589	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1590	if (__kmp_affinity_verbose) {
				1591	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				1592	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				1593
				1594	KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
				1595	if (__kmp_affinity_respect_mask) {
				1596	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				1597	} else {
				1598	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				1599	}
				1600	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1601	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1602	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				1603	__kmp_nThreadsPerCore, __kmp_ncores);
				1604	}
				1605
				1606	if (__kmp_affinity_type == affinity_none) {
				1607	__kmp_free(retval);
				1608	KMP_CPU_FREE(oldMask);
				1609	return 0;
				1610	}
				1611
				1612	//
				1613	// Form an Address object which only includes the package level.
				1614	//
				1615	Address addr(1);
				1616	addr.labels[0] = retval[0].first.labels[pkgLevel];
				1617	retval[0].first = addr;
				1618
				1619	if (__kmp_affinity_gran_levels < 0) {
				1620	__kmp_affinity_gran_levels = 0;
				1621	}
				1622
				1623	if (__kmp_affinity_verbose) {
				1624	__kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
				1625	}
				1626
				1627	*address2os = retval;
				1628	KMP_CPU_FREE(oldMask);
				1629	return 1;
				1630	}
				1631
				1632	//
				1633	// Sort the table by physical Id.
				1634	//
				1635	qsort(retval, nApics, sizeof(*retval), __kmp_affinity_cmp_Address_labels);
				1636
				1637	//
				1638	// Find the radix at each of the levels.
				1639	//
				1640	unsigned totals = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1641	unsigned counts = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1642	unsigned maxCt = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1643	unsigned last = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1644	for (level = 0; level < depth; level++) {
				1645	totals[level] = 1;
				1646	maxCt[level] = 1;
				1647	counts[level] = 1;
				1648	last[level] = retval[0].first.labels[level];
				1649	}
				1650
				1651	//
				1652	// From here on, the iteration variable "level" runs from the finest
				1653	// level to the coarsest, i.e. we iterate forward through
				1654	// (*address2os)[].first.labels[] - in the previous loops, we iterated
				1655	// backwards.
				1656	//
				1657	for (proc = 1; (int)proc < nApics; proc++) {
				1658	int level;
				1659	for (level = 0; level < depth; level++) {
				1660	if (retval[proc].first.labels[level] != last[level]) {
				1661	int j;
				1662	for (j = level + 1; j < depth; j++) {
				1663	totals[j]++;
				1664	counts[j] = 1;
				1665	// The line below causes printing incorrect topology information
				1666	// in case the max value for some level (maxCt[level]) is encountered earlier than
				1667	// some less value while going through the array.
				1668	// For example, let pkg0 has 4 cores and pkg1 has 2 cores. Then maxCt[1] == 2
				1669	// whereas it must be 4.
				1670	// TODO!!! Check if it can be commented safely
				1671	//maxCt[j] = 1;
				1672	last[j] = retval[proc].first.labels[j];
				1673	}
				1674	totals[level]++;
				1675	counts[level]++;
				1676	if (counts[level] > maxCt[level]) {
				1677	maxCt[level] = counts[level];
				1678	}
				1679	last[level] = retval[proc].first.labels[level];
				1680	break;
				1681	}
				1682	else if (level == depth - 1) {
				1683	__kmp_free(last);
				1684	__kmp_free(maxCt);
				1685	__kmp_free(counts);
				1686	__kmp_free(totals);
				1687	__kmp_free(retval);
				1688	KMP_CPU_FREE(oldMask);
				1689	*msg_id = kmp_i18n_str_x2ApicIDsNotUnique;
				1690	return -1;
				1691	}
				1692	}
				1693	}
				1694
				1695	//
				1696	// When affinity is off, this routine will still be called to set
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame]	1697	// __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1698	// nCoresPerPkg, & nPackages. Make sure all these vars are set
				1699	// correctly, and return if affinity is not enabled.
				1700	//
				1701	if (threadLevel >= 0) {
				1702	__kmp_nThreadsPerCore = maxCt[threadLevel];
				1703	}
				1704	else {
				1705	__kmp_nThreadsPerCore = 1;
				1706	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1707	nPackages = totals[pkgLevel];
				1708
				1709	if (coreLevel >= 0) {
				1710	__kmp_ncores = totals[coreLevel];
				1711	nCoresPerPkg = maxCt[coreLevel];
				1712	}
				1713	else {
				1714	__kmp_ncores = nPackages;
				1715	nCoresPerPkg = 1;
				1716	}
				1717
				1718	//
				1719	// Check to see if the machine topology is uniform
				1720	//
				1721	unsigned prod = maxCt[0];
				1722	for (level = 1; level < depth; level++) {
				1723	prod *= maxCt[level];
				1724	}
				1725	bool uniform = (prod == totals[level - 1]);
				1726
				1727	//
				1728	// Print the machine topology summary.
				1729	//
				1730	if (__kmp_affinity_verbose) {
				1731	char mask[KMP_AFFIN_MASK_PRINT_LEN];
				1732	__kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				1733
				1734	KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
				1735	if (__kmp_affinity_respect_mask) {
				1736	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask);
				1737	} else {
				1738	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask);
				1739	}
				1740	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1741	if (uniform) {
				1742	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1743	} else {
				1744	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				1745	}
				1746
				1747	kmp_str_buf_t buf;
				1748	__kmp_str_buf_init(&buf);
				1749
				1750	__kmp_str_buf_print(&buf, "%d", totals[0]);
				1751	for (level = 1; level <= pkgLevel; level++) {
				1752	__kmp_str_buf_print(&buf, " x %d", maxCt[level]);
				1753	}
				1754	KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg,
				1755	__kmp_nThreadsPerCore, __kmp_ncores);
				1756
				1757	__kmp_str_buf_free(&buf);
				1758	}
				1759
				1760	if (__kmp_affinity_type == affinity_none) {
				1761	__kmp_free(last);
				1762	__kmp_free(maxCt);
				1763	__kmp_free(counts);
				1764	__kmp_free(totals);
				1765	__kmp_free(retval);
				1766	KMP_CPU_FREE(oldMask);
				1767	return 0;
				1768	}
				1769
				1770	//
				1771	// Find any levels with radiix 1, and remove them from the map
				1772	// (except for the package level).
				1773	//
				1774	int new_depth = 0;
				1775	for (level = 0; level < depth; level++) {
				1776	if ((maxCt[level] == 1) && (level != pkgLevel)) {
				1777	continue;
				1778	}
				1779	new_depth++;
				1780	}
				1781
				1782	//
				1783	// If we are removing any levels, allocate a new vector to return,
				1784	// and copy the relevant information to it.
				1785	//
				1786	if (new_depth != depth) {
				1787	AddrUnsPair new_retval = (AddrUnsPair )__kmp_allocate(
				1788	sizeof(AddrUnsPair) * nApics);
				1789	for (proc = 0; (int)proc < nApics; proc++) {
				1790	Address addr(new_depth);
				1791	new_retval[proc] = AddrUnsPair(addr, retval[proc].second);
				1792	}
				1793	int new_level = 0;
Jonathan Peyton	62f3840	2015-08-25 18:44:41 +0000	[diff] [blame]	1794	int newPkgLevel = -1;
				1795	int newCoreLevel = -1;
				1796	int newThreadLevel = -1;
				1797	int i;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1798	for (level = 0; level < depth; level++) {
Jonathan Peyton	62f3840	2015-08-25 18:44:41 +0000	[diff] [blame]	1799	if ((maxCt[level] == 1)
				1800	&& (level != pkgLevel)) {
				1801	//
				1802	// Remove this level. Never remove the package level
				1803	//
				1804	continue;
				1805	}
				1806	if (level == pkgLevel) {
				1807	newPkgLevel = level;
				1808	}
				1809	if (level == coreLevel) {
				1810	newCoreLevel = level;
				1811	}
				1812	if (level == threadLevel) {
				1813	newThreadLevel = level;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1814	}
				1815	for (proc = 0; (int)proc < nApics; proc++) {
				1816	new_retval[proc].first.labels[new_level]
				1817	= retval[proc].first.labels[level];
				1818	}
				1819	new_level++;
				1820	}
				1821
				1822	__kmp_free(retval);
				1823	retval = new_retval;
				1824	depth = new_depth;
Jonathan Peyton	62f3840	2015-08-25 18:44:41 +0000	[diff] [blame]	1825	pkgLevel = newPkgLevel;
				1826	coreLevel = newCoreLevel;
				1827	threadLevel = newThreadLevel;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1828	}
				1829
				1830	if (__kmp_affinity_gran_levels < 0) {
				1831	//
				1832	// Set the granularity level based on what levels are modeled
				1833	// in the machine topology map.
				1834	//
				1835	__kmp_affinity_gran_levels = 0;
				1836	if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
				1837	__kmp_affinity_gran_levels++;
				1838	}
				1839	if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
				1840	__kmp_affinity_gran_levels++;
				1841	}
				1842	if (__kmp_affinity_gran > affinity_gran_package) {
				1843	__kmp_affinity_gran_levels++;
				1844	}
				1845	}
				1846
				1847	if (__kmp_affinity_verbose) {
				1848	__kmp_affinity_print_topology(retval, nApics, depth, pkgLevel,
				1849	coreLevel, threadLevel);
				1850	}
				1851
				1852	__kmp_free(last);
				1853	__kmp_free(maxCt);
				1854	__kmp_free(counts);
				1855	__kmp_free(totals);
				1856	KMP_CPU_FREE(oldMask);
				1857	*address2os = retval;
				1858	return depth;
				1859	}
				1860
				1861
				1862	# endif /* KMP_ARCH_X86 \|\| KMP_ARCH_X86_64 */
				1863
				1864
				1865	#define osIdIndex 0
				1866	#define threadIdIndex 1
				1867	#define coreIdIndex 2
				1868	#define pkgIdIndex 3
				1869	#define nodeIdIndex 4
				1870
				1871	typedef unsigned *ProcCpuInfo;
				1872	static unsigned maxIndex = pkgIdIndex;
				1873
				1874
				1875	static int
				1876	__kmp_affinity_cmp_ProcCpuInfo_os_id(const void a, const void b)
				1877	{
				1878	const unsigned aa = (const unsigned )a;
				1879	const unsigned bb = (const unsigned )b;
				1880	if (aa[osIdIndex] < bb[osIdIndex]) return -1;
				1881	if (aa[osIdIndex] > bb[osIdIndex]) return 1;
				1882	return 0;
				1883	};
				1884
				1885
				1886	static int
				1887	__kmp_affinity_cmp_ProcCpuInfo_phys_id(const void a, const void b)
				1888	{
				1889	unsigned i;
				1890	const unsigned aa = ((const unsigned **)a);
				1891	const unsigned bb = ((const unsigned **)b);
				1892	for (i = maxIndex; ; i--) {
				1893	if (aa[i] < bb[i]) return -1;
				1894	if (aa[i] > bb[i]) return 1;
				1895	if (i == osIdIndex) break;
				1896	}
				1897	return 0;
				1898	}
				1899
				1900
				1901	//
				1902	// Parse /proc/cpuinfo (or an alternate file in the same format) to obtain the
				1903	// affinity map.
				1904	//
				1905	static int
				1906	__kmp_affinity_create_cpuinfo_map(AddrUnsPair *address2os, int line,
				1907	kmp_i18n_id_t const msg_id, FILE f)
				1908	{
				1909	*address2os = NULL;
				1910	*msg_id = kmp_i18n_null;
				1911
				1912	//
				1913	// Scan of the file, and count the number of "processor" (osId) fields,
Alp Toker	8f2d3f0	2014-02-24 10:40:15 +0000	[diff] [blame]	1914	// and find the highest value of <n> for a node_<n> field.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1915	//
				1916	char buf[256];
				1917	unsigned num_records = 0;
				1918	while (! feof(f)) {
				1919	buf[sizeof(buf) - 1] = 1;
				1920	if (! fgets(buf, sizeof(buf), f)) {
				1921	//
				1922	// Read errors presumably because of EOF
				1923	//
				1924	break;
				1925	}
				1926
				1927	char s1[] = "processor";
				1928	if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
				1929	num_records++;
				1930	continue;
				1931	}
				1932
				1933	//
				1934	// FIXME - this will match "node_<n> <garbage>"
				1935	//
				1936	unsigned level;
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	1937	if (KMP_SSCANF(buf, "node_%d id", &level) == 1) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1938	if (nodeIdIndex + level >= maxIndex) {
				1939	maxIndex = nodeIdIndex + level;
				1940	}
				1941	continue;
				1942	}
				1943	}
				1944
				1945	//
				1946	// Check for empty file / no valid processor records, or too many.
				1947	// The number of records can't exceed the number of valid bits in the
				1948	// affinity mask.
				1949	//
				1950	if (num_records == 0) {
				1951	*line = 0;
				1952	*msg_id = kmp_i18n_str_NoProcRecords;
				1953	return -1;
				1954	}
				1955	if (num_records > (unsigned)__kmp_xproc) {
				1956	*line = 0;
				1957	*msg_id = kmp_i18n_str_TooManyProcRecords;
				1958	return -1;
				1959	}
				1960
				1961	//
				1962	// Set the file pointer back to the begginning, so that we can scan the
				1963	// file again, this time performing a full parse of the data.
				1964	// Allocate a vector of ProcCpuInfo object, where we will place the data.
				1965	// Adding an extra element at the end allows us to remove a lot of extra
				1966	// checks for termination conditions.
				1967	//
				1968	if (fseek(f, 0, SEEK_SET) != 0) {
				1969	*line = 0;
				1970	*msg_id = kmp_i18n_str_CantRewindCpuinfo;
				1971	return -1;
				1972	}
				1973
				1974	//
				1975	// Allocate the array of records to store the proc info in. The dummy
				1976	// element at the end makes the logic in filling them out easier to code.
				1977	//
				1978	unsigned threadInfo = (unsigned )__kmp_allocate((num_records + 1)
				1979	* sizeof(unsigned *));
				1980	unsigned i;
				1981	for (i = 0; i <= num_records; i++) {
				1982	threadInfo[i] = (unsigned *)__kmp_allocate((maxIndex + 1)
				1983	* sizeof(unsigned));
				1984	}
				1985
				1986	#define CLEANUP_THREAD_INFO \
				1987	for (i = 0; i <= num_records; i++) { \
				1988	__kmp_free(threadInfo[i]); \
				1989	} \
				1990	__kmp_free(threadInfo);
				1991
				1992	//
				1993	// A value of UINT_MAX means that we didn't find the field
				1994	//
				1995	unsigned __index;
				1996
				1997	#define INIT_PROC_INFO(p) \
				1998	for (__index = 0; __index <= maxIndex; __index++) { \
				1999	(p)[__index] = UINT_MAX; \
				2000	}
				2001
				2002	for (i = 0; i <= num_records; i++) {
				2003	INIT_PROC_INFO(threadInfo[i]);
				2004	}
				2005
				2006	unsigned num_avail = 0;
				2007	*line = 0;
				2008	while (! feof(f)) {
				2009	//
				2010	// Create an inner scoping level, so that all the goto targets at the
				2011	// end of the loop appear in an outer scoping level. This avoids
				2012	// warnings about jumping past an initialization to a target in the
				2013	// same block.
				2014	//
				2015	{
				2016	buf[sizeof(buf) - 1] = 1;
				2017	bool long_line = false;
				2018	if (! fgets(buf, sizeof(buf), f)) {
				2019	//
				2020	// Read errors presumably because of EOF
				2021	//
				2022	// If there is valid data in threadInfo[num_avail], then fake
				2023	// a blank line in ensure that the last address gets parsed.
				2024	//
				2025	bool valid = false;
				2026	for (i = 0; i <= maxIndex; i++) {
				2027	if (threadInfo[num_avail][i] != UINT_MAX) {
				2028	valid = true;
				2029	}
				2030	}
				2031	if (! valid) {
				2032	break;
				2033	}
				2034	buf[0] = 0;
				2035	} else if (!buf[sizeof(buf) - 1]) {
				2036	//
				2037	// The line is longer than the buffer. Set a flag and don't
				2038	// emit an error if we were going to ignore the line, anyway.
				2039	//
				2040	long_line = true;
				2041
				2042	#define CHECK_LINE \
				2043	if (long_line) { \
				2044	CLEANUP_THREAD_INFO; \
				2045	*msg_id = kmp_i18n_str_LongLineCpuinfo; \
				2046	return -1; \
				2047	}
				2048	}
				2049	(*line)++;
				2050
				2051	char s1[] = "processor";
				2052	if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
				2053	CHECK_LINE;
				2054	char *p = strchr(buf + sizeof(s1) - 1, ':');
				2055	unsigned val;
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	2056	if ((p == NULL) \|\| (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2057	if (threadInfo[num_avail][osIdIndex] != UINT_MAX) goto dup_field;
				2058	threadInfo[num_avail][osIdIndex] = val;
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	2059	#if KMP_OS_LINUX && USE_SYSFS_INFO
				2060	char path[256];
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	2061	KMP_SNPRINTF(path, sizeof(path),
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	2062	"/sys/devices/system/cpu/cpu%u/topology/physical_package_id",
				2063	threadInfo[num_avail][osIdIndex]);
				2064	__kmp_read_from_file(path, "%u", &threadInfo[num_avail][pkgIdIndex]);
				2065
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	2066	KMP_SNPRINTF(path, sizeof(path),
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	2067	"/sys/devices/system/cpu/cpu%u/topology/core_id",
				2068	threadInfo[num_avail][osIdIndex]);
				2069	__kmp_read_from_file(path, "%u", &threadInfo[num_avail][coreIdIndex]);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2070	continue;
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	2071	#else
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2072	}
				2073	char s2[] = "physical id";
				2074	if (strncmp(buf, s2, sizeof(s2) - 1) == 0) {
				2075	CHECK_LINE;
				2076	char *p = strchr(buf + sizeof(s2) - 1, ':');
				2077	unsigned val;
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	2078	if ((p == NULL) \|\| (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2079	if (threadInfo[num_avail][pkgIdIndex] != UINT_MAX) goto dup_field;
				2080	threadInfo[num_avail][pkgIdIndex] = val;
				2081	continue;
				2082	}
				2083	char s3[] = "core id";
				2084	if (strncmp(buf, s3, sizeof(s3) - 1) == 0) {
				2085	CHECK_LINE;
				2086	char *p = strchr(buf + sizeof(s3) - 1, ':');
				2087	unsigned val;
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	2088	if ((p == NULL) \|\| (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2089	if (threadInfo[num_avail][coreIdIndex] != UINT_MAX) goto dup_field;
				2090	threadInfo[num_avail][coreIdIndex] = val;
				2091	continue;
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	2092	#endif // KMP_OS_LINUX && USE_SYSFS_INFO
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2093	}
				2094	char s4[] = "thread id";
				2095	if (strncmp(buf, s4, sizeof(s4) - 1) == 0) {
				2096	CHECK_LINE;
				2097	char *p = strchr(buf + sizeof(s4) - 1, ':');
				2098	unsigned val;
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	2099	if ((p == NULL) \|\| (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2100	if (threadInfo[num_avail][threadIdIndex] != UINT_MAX) goto dup_field;
				2101	threadInfo[num_avail][threadIdIndex] = val;
				2102	continue;
				2103	}
				2104	unsigned level;
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	2105	if (KMP_SSCANF(buf, "node_%d id", &level) == 1) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2106	CHECK_LINE;
				2107	char *p = strchr(buf + sizeof(s4) - 1, ':');
				2108	unsigned val;
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	2109	if ((p == NULL) \|\| (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2110	KMP_ASSERT(nodeIdIndex + level <= maxIndex);
				2111	if (threadInfo[num_avail][nodeIdIndex + level] != UINT_MAX) goto dup_field;
				2112	threadInfo[num_avail][nodeIdIndex + level] = val;
				2113	continue;
				2114	}
				2115
				2116	//
				2117	// We didn't recognize the leading token on the line.
				2118	// There are lots of leading tokens that we don't recognize -
				2119	// if the line isn't empty, go on to the next line.
				2120	//
				2121	if ((buf != 0) && (buf != '\n')) {
				2122	//
				2123	// If the line is longer than the buffer, read characters
				2124	// until we find a newline.
				2125	//
				2126	if (long_line) {
				2127	int ch;
				2128	while (((ch = fgetc(f)) != EOF) && (ch != '\n'));
				2129	}
				2130	continue;
				2131	}
				2132
				2133	//
				2134	// A newline has signalled the end of the processor record.
				2135	// Check that there aren't too many procs specified.
				2136	//
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	2137	if ((int)num_avail == __kmp_xproc) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2138	CLEANUP_THREAD_INFO;
				2139	*msg_id = kmp_i18n_str_TooManyEntries;
				2140	return -1;
				2141	}
				2142
				2143	//
				2144	// Check for missing fields. The osId field must be there, and we
				2145	// currently require that the physical id field is specified, also.
				2146	//
				2147	if (threadInfo[num_avail][osIdIndex] == UINT_MAX) {
				2148	CLEANUP_THREAD_INFO;
				2149	*msg_id = kmp_i18n_str_MissingProcField;
				2150	return -1;
				2151	}
				2152	if (threadInfo[0][pkgIdIndex] == UINT_MAX) {
				2153	CLEANUP_THREAD_INFO;
				2154	*msg_id = kmp_i18n_str_MissingPhysicalIDField;
				2155	return -1;
				2156	}
				2157
				2158	//
				2159	// Skip this proc if it is not included in the machine model.
				2160	//
				2161	if (! KMP_CPU_ISSET(threadInfo[num_avail][osIdIndex], fullMask)) {
				2162	INIT_PROC_INFO(threadInfo[num_avail]);
				2163	continue;
				2164	}
				2165
				2166	//
				2167	// We have a successful parse of this proc's info.
				2168	// Increment the counter, and prepare for the next proc.
				2169	//
				2170	num_avail++;
				2171	KMP_ASSERT(num_avail <= num_records);
				2172	INIT_PROC_INFO(threadInfo[num_avail]);
				2173	}
				2174	continue;
				2175
				2176	no_val:
				2177	CLEANUP_THREAD_INFO;
				2178	*msg_id = kmp_i18n_str_MissingValCpuinfo;
				2179	return -1;
				2180
				2181	dup_field:
				2182	CLEANUP_THREAD_INFO;
				2183	*msg_id = kmp_i18n_str_DuplicateFieldCpuinfo;
				2184	return -1;
				2185	}
				2186	*line = 0;
				2187
				2188	# if KMP_MIC && REDUCE_TEAM_SIZE
				2189	unsigned teamSize = 0;
				2190	# endif // KMP_MIC && REDUCE_TEAM_SIZE
				2191
				2192	// check for num_records == __kmp_xproc ???
				2193
				2194	//
				2195	// If there's only one thread context to bind to, form an Address object
				2196	// with depth 1 and return immediately (or, if affinity is off, set
				2197	// address2os to NULL and return).
				2198	//
				2199	// If it is configured to omit the package level when there is only a
				2200	// single package, the logic at the end of this routine won't work if
				2201	// there is only a single thread - it would try to form an Address
				2202	// object with depth 0.
				2203	//
				2204	KMP_ASSERT(num_avail > 0);
				2205	KMP_ASSERT(num_avail <= num_records);
				2206	if (num_avail == 1) {
				2207	__kmp_ncores = 1;
				2208	__kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2209	if (__kmp_affinity_verbose) {
				2210	if (! KMP_AFFINITY_CAPABLE()) {
				2211	KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
				2212	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				2213	KMP_INFORM(Uniform, "KMP_AFFINITY");
				2214	}
				2215	else {
				2216	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				2217	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				2218	fullMask);
				2219	KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
				2220	if (__kmp_affinity_respect_mask) {
				2221	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				2222	} else {
				2223	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				2224	}
				2225	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				2226	KMP_INFORM(Uniform, "KMP_AFFINITY");
				2227	}
				2228	int index;
				2229	kmp_str_buf_t buf;
				2230	__kmp_str_buf_init(&buf);
				2231	__kmp_str_buf_print(&buf, "1");
				2232	for (index = maxIndex - 1; index > pkgIdIndex; index--) {
				2233	__kmp_str_buf_print(&buf, " x 1");
				2234	}
				2235	KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, 1, 1, 1);
				2236	__kmp_str_buf_free(&buf);
				2237	}
				2238
				2239	if (__kmp_affinity_type == affinity_none) {
				2240	CLEANUP_THREAD_INFO;
				2241	return 0;
				2242	}
				2243
				2244	address2os = (AddrUnsPair)__kmp_allocate(sizeof(AddrUnsPair));
				2245	Address addr(1);
				2246	addr.labels[0] = threadInfo[0][pkgIdIndex];
				2247	(*address2os)[0] = AddrUnsPair(addr, threadInfo[0][osIdIndex]);
				2248
				2249	if (__kmp_affinity_gran_levels < 0) {
				2250	__kmp_affinity_gran_levels = 0;
				2251	}
				2252
				2253	if (__kmp_affinity_verbose) {
				2254	__kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
				2255	}
				2256
				2257	CLEANUP_THREAD_INFO;
				2258	return 1;
				2259	}
				2260
				2261	//
				2262	// Sort the threadInfo table by physical Id.
				2263	//
				2264	qsort(threadInfo, num_avail, sizeof(*threadInfo),
				2265	__kmp_affinity_cmp_ProcCpuInfo_phys_id);
				2266
				2267	//
				2268	// The table is now sorted by pkgId / coreId / threadId, but we really
				2269	// don't know the radix of any of the fields. pkgId's may be sparsely
				2270	// assigned among the chips on a system. Although coreId's are usually
				2271	// assigned [0 .. coresPerPkg-1] and threadId's are usually assigned
				2272	// [0..threadsPerCore-1], we don't want to make any such assumptions.
				2273	//
				2274	// For that matter, we don't know what coresPerPkg and threadsPerCore
				2275	// (or the total # packages) are at this point - we want to determine
				2276	// that now. We only have an upper bound on the first two figures.
				2277	//
				2278	unsigned counts = (unsigned )__kmp_allocate((maxIndex + 1)
				2279	* sizeof(unsigned));
				2280	unsigned maxCt = (unsigned )__kmp_allocate((maxIndex + 1)
				2281	* sizeof(unsigned));
				2282	unsigned totals = (unsigned )__kmp_allocate((maxIndex + 1)
				2283	* sizeof(unsigned));
				2284	unsigned lastId = (unsigned )__kmp_allocate((maxIndex + 1)
				2285	* sizeof(unsigned));
				2286
				2287	bool assign_thread_ids = false;
				2288	unsigned threadIdCt;
				2289	unsigned index;
				2290
				2291	restart_radix_check:
				2292	threadIdCt = 0;
				2293
				2294	//
				2295	// Initialize the counter arrays with data from threadInfo[0].
				2296	//
				2297	if (assign_thread_ids) {
				2298	if (threadInfo[0][threadIdIndex] == UINT_MAX) {
				2299	threadInfo[0][threadIdIndex] = threadIdCt++;
				2300	}
				2301	else if (threadIdCt <= threadInfo[0][threadIdIndex]) {
				2302	threadIdCt = threadInfo[0][threadIdIndex] + 1;
				2303	}
				2304	}
				2305	for (index = 0; index <= maxIndex; index++) {
				2306	counts[index] = 1;
				2307	maxCt[index] = 1;
				2308	totals[index] = 1;
				2309	lastId[index] = threadInfo[0][index];;
				2310	}
				2311
				2312	//
				2313	// Run through the rest of the OS procs.
				2314	//
				2315	for (i = 1; i < num_avail; i++) {
				2316	//
				2317	// Find the most significant index whose id differs
				2318	// from the id for the previous OS proc.
				2319	//
				2320	for (index = maxIndex; index >= threadIdIndex; index--) {
				2321	if (assign_thread_ids && (index == threadIdIndex)) {
				2322	//
				2323	// Auto-assign the thread id field if it wasn't specified.
				2324	//
				2325	if (threadInfo[i][threadIdIndex] == UINT_MAX) {
				2326	threadInfo[i][threadIdIndex] = threadIdCt++;
				2327	}
				2328
				2329	//
				2330	// Aparrently the thread id field was specified for some
				2331	// entries and not others. Start the thread id counter
				2332	// off at the next higher thread id.
				2333	//
				2334	else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
				2335	threadIdCt = threadInfo[i][threadIdIndex] + 1;
				2336	}
				2337	}
				2338	if (threadInfo[i][index] != lastId[index]) {
				2339	//
				2340	// Run through all indices which are less significant,
				2341	// and reset the counts to 1.
				2342	//
				2343	// At all levels up to and including index, we need to
				2344	// increment the totals and record the last id.
				2345	//
				2346	unsigned index2;
				2347	for (index2 = threadIdIndex; index2 < index; index2++) {
				2348	totals[index2]++;
				2349	if (counts[index2] > maxCt[index2]) {
				2350	maxCt[index2] = counts[index2];
				2351	}
				2352	counts[index2] = 1;
				2353	lastId[index2] = threadInfo[i][index2];
				2354	}
				2355	counts[index]++;
				2356	totals[index]++;
				2357	lastId[index] = threadInfo[i][index];
				2358
				2359	if (assign_thread_ids && (index > threadIdIndex)) {
				2360
				2361	# if KMP_MIC && REDUCE_TEAM_SIZE
				2362	//
				2363	// The default team size is the total #threads in the machine
				2364	// minus 1 thread for every core that has 3 or more threads.
				2365	//
				2366	teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 );
				2367	# endif // KMP_MIC && REDUCE_TEAM_SIZE
				2368
				2369	//
				2370	// Restart the thread counter, as we are on a new core.
				2371	//
				2372	threadIdCt = 0;
				2373
				2374	//
				2375	// Auto-assign the thread id field if it wasn't specified.
				2376	//
				2377	if (threadInfo[i][threadIdIndex] == UINT_MAX) {
				2378	threadInfo[i][threadIdIndex] = threadIdCt++;
				2379	}
				2380
				2381	//
				2382	// Aparrently the thread id field was specified for some
				2383	// entries and not others. Start the thread id counter
				2384	// off at the next higher thread id.
				2385	//
				2386	else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
				2387	threadIdCt = threadInfo[i][threadIdIndex] + 1;
				2388	}
				2389	}
				2390	break;
				2391	}
				2392	}
				2393	if (index < threadIdIndex) {
				2394	//
				2395	// If thread ids were specified, it is an error if they are not
				2396	// unique. Also, check that we waven't already restarted the
				2397	// loop (to be safe - shouldn't need to).
				2398	//
				2399	if ((threadInfo[i][threadIdIndex] != UINT_MAX)
				2400	\|\| assign_thread_ids) {
				2401	__kmp_free(lastId);
				2402	__kmp_free(totals);
				2403	__kmp_free(maxCt);
				2404	__kmp_free(counts);
				2405	CLEANUP_THREAD_INFO;
				2406	*msg_id = kmp_i18n_str_PhysicalIDsNotUnique;
				2407	return -1;
				2408	}
				2409
				2410	//
				2411	// If the thread ids were not specified and we see entries
				2412	// entries that are duplicates, start the loop over and
				2413	// assign the thread ids manually.
				2414	//
				2415	assign_thread_ids = true;
				2416	goto restart_radix_check;
				2417	}
				2418	}
				2419
				2420	# if KMP_MIC && REDUCE_TEAM_SIZE
				2421	//
				2422	// The default team size is the total #threads in the machine
				2423	// minus 1 thread for every core that has 3 or more threads.
				2424	//
				2425	teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 );
				2426	# endif // KMP_MIC && REDUCE_TEAM_SIZE
				2427
				2428	for (index = threadIdIndex; index <= maxIndex; index++) {
				2429	if (counts[index] > maxCt[index]) {
				2430	maxCt[index] = counts[index];
				2431	}
				2432	}
				2433
				2434	__kmp_nThreadsPerCore = maxCt[threadIdIndex];
				2435	nCoresPerPkg = maxCt[coreIdIndex];
				2436	nPackages = totals[pkgIdIndex];
				2437
				2438	//
				2439	// Check to see if the machine topology is uniform
				2440	//
				2441	unsigned prod = totals[maxIndex];
				2442	for (index = threadIdIndex; index < maxIndex; index++) {
				2443	prod *= maxCt[index];
				2444	}
				2445	bool uniform = (prod == totals[threadIdIndex]);
				2446
				2447	//
				2448	// When affinity is off, this routine will still be called to set
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame]	2449	// __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2450	// nCoresPerPkg, & nPackages. Make sure all these vars are set
				2451	// correctly, and return now if affinity is not enabled.
				2452	//
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2453	__kmp_ncores = totals[coreIdIndex];
				2454
				2455	if (__kmp_affinity_verbose) {
				2456	if (! KMP_AFFINITY_CAPABLE()) {
				2457	KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
				2458	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				2459	if (uniform) {
				2460	KMP_INFORM(Uniform, "KMP_AFFINITY");
				2461	} else {
				2462	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				2463	}
				2464	}
				2465	else {
				2466	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				2467	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, fullMask);
				2468	KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
				2469	if (__kmp_affinity_respect_mask) {
				2470	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				2471	} else {
				2472	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				2473	}
				2474	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				2475	if (uniform) {
				2476	KMP_INFORM(Uniform, "KMP_AFFINITY");
				2477	} else {
				2478	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				2479	}
				2480	}
				2481	kmp_str_buf_t buf;
				2482	__kmp_str_buf_init(&buf);
				2483
				2484	__kmp_str_buf_print(&buf, "%d", totals[maxIndex]);
				2485	for (index = maxIndex - 1; index >= pkgIdIndex; index--) {
				2486	__kmp_str_buf_print(&buf, " x %d", maxCt[index]);
				2487	}
				2488	KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, maxCt[coreIdIndex],
				2489	maxCt[threadIdIndex], __kmp_ncores);
				2490
				2491	__kmp_str_buf_free(&buf);
				2492	}
				2493
				2494	# if KMP_MIC && REDUCE_TEAM_SIZE
				2495	//
				2496	// Set the default team size.
				2497	//
				2498	if ((__kmp_dflt_team_nth == 0) && (teamSize > 0)) {
				2499	__kmp_dflt_team_nth = teamSize;
				2500	KA_TRACE(20, ("__kmp_affinity_create_cpuinfo_map: setting __kmp_dflt_team_nth = %d\n",
				2501	__kmp_dflt_team_nth));
				2502	}
				2503	# endif // KMP_MIC && REDUCE_TEAM_SIZE
				2504
				2505	if (__kmp_affinity_type == affinity_none) {
				2506	__kmp_free(lastId);
				2507	__kmp_free(totals);
				2508	__kmp_free(maxCt);
				2509	__kmp_free(counts);
				2510	CLEANUP_THREAD_INFO;
				2511	return 0;
				2512	}
				2513
				2514	//
				2515	// Count the number of levels which have more nodes at that level than
				2516	// at the parent's level (with there being an implicit root node of
				2517	// the top level). This is equivalent to saying that there is at least
				2518	// one node at this level which has a sibling. These levels are in the
				2519	// map, and the package level is always in the map.
				2520	//
				2521	bool inMap = (bool )__kmp_allocate((maxIndex + 1) * sizeof(bool));
				2522	int level = 0;
				2523	for (index = threadIdIndex; index < maxIndex; index++) {
				2524	KMP_ASSERT(totals[index] >= totals[index + 1]);
				2525	inMap[index] = (totals[index] > totals[index + 1]);
				2526	}
				2527	inMap[maxIndex] = (totals[maxIndex] > 1);
				2528	inMap[pkgIdIndex] = true;
				2529
				2530	int depth = 0;
				2531	for (index = threadIdIndex; index <= maxIndex; index++) {
				2532	if (inMap[index]) {
				2533	depth++;
				2534	}
				2535	}
				2536	KMP_ASSERT(depth > 0);
				2537
				2538	//
				2539	// Construct the data structure that is to be returned.
				2540	//
				2541	address2os = (AddrUnsPair)
				2542	__kmp_allocate(sizeof(AddrUnsPair) * num_avail);
				2543	int pkgLevel = -1;
				2544	int coreLevel = -1;
				2545	int threadLevel = -1;
				2546
				2547	for (i = 0; i < num_avail; ++i) {
				2548	Address addr(depth);
				2549	unsigned os = threadInfo[i][osIdIndex];
				2550	int src_index;
				2551	int dst_index = 0;
				2552
				2553	for (src_index = maxIndex; src_index >= threadIdIndex; src_index--) {
				2554	if (! inMap[src_index]) {
				2555	continue;
				2556	}
				2557	addr.labels[dst_index] = threadInfo[i][src_index];
				2558	if (src_index == pkgIdIndex) {
				2559	pkgLevel = dst_index;
				2560	}
				2561	else if (src_index == coreIdIndex) {
				2562	coreLevel = dst_index;
				2563	}
				2564	else if (src_index == threadIdIndex) {
				2565	threadLevel = dst_index;
				2566	}
				2567	dst_index++;
				2568	}
				2569	(*address2os)[i] = AddrUnsPair(addr, os);
				2570	}
				2571
				2572	if (__kmp_affinity_gran_levels < 0) {
				2573	//
				2574	// Set the granularity level based on what levels are modeled
				2575	// in the machine topology map.
				2576	//
				2577	unsigned src_index;
				2578	__kmp_affinity_gran_levels = 0;
				2579	for (src_index = threadIdIndex; src_index <= maxIndex; src_index++) {
				2580	if (! inMap[src_index]) {
				2581	continue;
				2582	}
				2583	switch (src_index) {
				2584	case threadIdIndex:
				2585	if (__kmp_affinity_gran > affinity_gran_thread) {
				2586	__kmp_affinity_gran_levels++;
				2587	}
				2588
				2589	break;
				2590	case coreIdIndex:
				2591	if (__kmp_affinity_gran > affinity_gran_core) {
				2592	__kmp_affinity_gran_levels++;
				2593	}
				2594	break;
				2595
				2596	case pkgIdIndex:
				2597	if (__kmp_affinity_gran > affinity_gran_package) {
				2598	__kmp_affinity_gran_levels++;
				2599	}
				2600	break;
				2601	}
				2602	}
				2603	}
				2604
				2605	if (__kmp_affinity_verbose) {
				2606	__kmp_affinity_print_topology(*address2os, num_avail, depth, pkgLevel,
				2607	coreLevel, threadLevel);
				2608	}
				2609
				2610	__kmp_free(inMap);
				2611	__kmp_free(lastId);
				2612	__kmp_free(totals);
				2613	__kmp_free(maxCt);
				2614	__kmp_free(counts);
				2615	CLEANUP_THREAD_INFO;
				2616	return depth;
				2617	}
				2618
				2619
				2620	//
				2621	// Create and return a table of affinity masks, indexed by OS thread ID.
				2622	// This routine handles OR'ing together all the affinity masks of threads
				2623	// that are sufficiently close, if granularity > fine.
				2624	//
				2625	static kmp_affin_mask_t *
				2626	__kmp_create_masks(unsigned maxIndex, unsigned numUnique,
				2627	AddrUnsPair *address2os, unsigned numAddrs)
				2628	{
				2629	//
				2630	// First form a table of affinity masks in order of OS thread id.
				2631	//
				2632	unsigned depth;
				2633	unsigned maxOsId;
				2634	unsigned i;
				2635
				2636	KMP_ASSERT(numAddrs > 0);
				2637	depth = address2os[0].first.depth;
				2638
				2639	maxOsId = 0;
				2640	for (i = 0; i < numAddrs; i++) {
				2641	unsigned osId = address2os[i].second;
				2642	if (osId > maxOsId) {
				2643	maxOsId = osId;
				2644	}
				2645	}
				2646	kmp_affin_mask_t osId2Mask = (kmp_affin_mask_t )__kmp_allocate(
				2647	(maxOsId + 1) * __kmp_affin_mask_size);
				2648
				2649	//
				2650	// Sort the address2os table according to physical order. Doing so
				2651	// will put all threads on the same core/package/node in consecutive
				2652	// locations.
				2653	//
				2654	qsort(address2os, numAddrs, sizeof(*address2os),
				2655	__kmp_affinity_cmp_Address_labels);
				2656
				2657	KMP_ASSERT(__kmp_affinity_gran_levels >= 0);
				2658	if (__kmp_affinity_verbose && (__kmp_affinity_gran_levels > 0)) {
				2659	KMP_INFORM(ThreadsMigrate, "KMP_AFFINITY", __kmp_affinity_gran_levels);
				2660	}
				2661	if (__kmp_affinity_gran_levels >= (int)depth) {
				2662	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				2663	&& (__kmp_affinity_type != affinity_none))) {
				2664	KMP_WARNING(AffThreadsMayMigrate);
				2665	}
				2666	}
				2667
				2668	//
				2669	// Run through the table, forming the masks for all threads on each
				2670	// core. Threads on the same core will have identical "Address"
				2671	// objects, not considering the last level, which must be the thread
				2672	// id. All threads on a core will appear consecutively.
				2673	//
				2674	unsigned unique = 0;
				2675	unsigned j = 0; // index of 1st thread on core
				2676	unsigned leader = 0;
				2677	Address *leaderAddr = &(address2os[0].first);
				2678	kmp_affin_mask_t *sum
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	2679	= (kmp_affin_mask_t *)KMP_ALLOCA(__kmp_affin_mask_size);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2680	KMP_CPU_ZERO(sum);
				2681	KMP_CPU_SET(address2os[0].second, sum);
				2682	for (i = 1; i < numAddrs; i++) {
				2683	//
Alp Toker	8f2d3f0	2014-02-24 10:40:15 +0000	[diff] [blame]	2684	// If this thread is sufficiently close to the leader (within the
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2685	// granularity setting), then set the bit for this os thread in the
				2686	// affinity mask for this group, and go on to the next thread.
				2687	//
				2688	if (leaderAddr->isClose(address2os[i].first,
				2689	__kmp_affinity_gran_levels)) {
				2690	KMP_CPU_SET(address2os[i].second, sum);
				2691	continue;
				2692	}
				2693
				2694	//
				2695	// For every thread in this group, copy the mask to the thread's
				2696	// entry in the osId2Mask table. Mark the first address as a
				2697	// leader.
				2698	//
				2699	for (; j < i; j++) {
				2700	unsigned osId = address2os[j].second;
				2701	KMP_DEBUG_ASSERT(osId <= maxOsId);
				2702	kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
				2703	KMP_CPU_COPY(mask, sum);
				2704	address2os[j].first.leader = (j == leader);
				2705	}
				2706	unique++;
				2707
				2708	//
				2709	// Start a new mask.
				2710	//
				2711	leader = i;
				2712	leaderAddr = &(address2os[i].first);
				2713	KMP_CPU_ZERO(sum);
				2714	KMP_CPU_SET(address2os[i].second, sum);
				2715	}
				2716
				2717	//
				2718	// For every thread in last group, copy the mask to the thread's
				2719	// entry in the osId2Mask table.
				2720	//
				2721	for (; j < i; j++) {
				2722	unsigned osId = address2os[j].second;
				2723	KMP_DEBUG_ASSERT(osId <= maxOsId);
				2724	kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
				2725	KMP_CPU_COPY(mask, sum);
				2726	address2os[j].first.leader = (j == leader);
				2727	}
				2728	unique++;
				2729
				2730	*maxIndex = maxOsId;
				2731	*numUnique = unique;
				2732	return osId2Mask;
				2733	}
				2734
				2735
				2736	//
				2737	// Stuff for the affinity proclist parsers. It's easier to declare these vars
				2738	// as file-static than to try and pass them through the calling sequence of
				2739	// the recursive-descent OMP_PLACES parser.
				2740	//
				2741	static kmp_affin_mask_t *newMasks;
				2742	static int numNewMasks;
				2743	static int nextNewMask;
				2744
				2745	#define ADD_MASK(_mask) \
				2746	{ \
				2747	if (nextNewMask >= numNewMasks) { \
				2748	numNewMasks *= 2; \
				2749	newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_REALLOC(newMasks, \
				2750	numNewMasks * __kmp_affin_mask_size); \
				2751	} \
				2752	KMP_CPU_COPY(KMP_CPU_INDEX(newMasks, nextNewMask), (_mask)); \
				2753	nextNewMask++; \
				2754	}
				2755
				2756	#define ADD_MASK_OSID(_osId,_osId2Mask,_maxOsId) \
				2757	{ \
				2758	if (((_osId) > _maxOsId) \|\| \
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	2759	(! KMP_CPU_ISSET((_osId), KMP_CPU_INDEX((_osId2Mask), (_osId))))) { \
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2760	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings \
				2761	&& (__kmp_affinity_type != affinity_none))) { \
				2762	KMP_WARNING(AffIgnoreInvalidProcID, _osId); \
				2763	} \
				2764	} \
				2765	else { \
				2766	ADD_MASK(KMP_CPU_INDEX(_osId2Mask, (_osId))); \
				2767	} \
				2768	}
				2769
				2770
				2771	//
				2772	// Re-parse the proclist (for the explicit affinity type), and form the list
				2773	// of affinity newMasks indexed by gtid.
				2774	//
				2775	static void
				2776	__kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks,
				2777	unsigned int out_numMasks, const char proclist,
				2778	kmp_affin_mask_t *osId2Mask, int maxOsId)
				2779	{
				2780	const char *scan = proclist;
				2781	const char *next = proclist;
				2782
				2783	//
				2784	// We use malloc() for the temporary mask vector,
				2785	// so that we can use realloc() to extend it.
				2786	//
				2787	numNewMasks = 2;
				2788	newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(numNewMasks
				2789	* __kmp_affin_mask_size);
				2790	nextNewMask = 0;
				2791	kmp_affin_mask_t sumMask = (kmp_affin_mask_t )__kmp_allocate(
				2792	__kmp_affin_mask_size);
				2793	int setSize = 0;
				2794
				2795	for (;;) {
				2796	int start, end, stride;
				2797
				2798	SKIP_WS(scan);
				2799	next = scan;
				2800	if (*next == '\0') {
				2801	break;
				2802	}
				2803
				2804	if (*next == '{') {
				2805	int num;
				2806	setSize = 0;
				2807	next++; // skip '{'
				2808	SKIP_WS(next);
				2809	scan = next;
				2810
				2811	//
				2812	// Read the first integer in the set.
				2813	//
				2814	KMP_ASSERT2((next >= '0') && (next <= '9'),
				2815	"bad proclist");
				2816	SKIP_DIGITS(next);
				2817	num = __kmp_str_to_int(scan, *next);
				2818	KMP_ASSERT2(num >= 0, "bad explicit proc list");
				2819
				2820	//
				2821	// Copy the mask for that osId to the sum (union) mask.
				2822	//
				2823	if ((num > maxOsId) \|\|
				2824	(! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
				2825	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				2826	&& (__kmp_affinity_type != affinity_none))) {
				2827	KMP_WARNING(AffIgnoreInvalidProcID, num);
				2828	}
				2829	KMP_CPU_ZERO(sumMask);
				2830	}
				2831	else {
				2832	KMP_CPU_COPY(sumMask, KMP_CPU_INDEX(osId2Mask, num));
				2833	setSize = 1;
				2834	}
				2835
				2836	for (;;) {
				2837	//
				2838	// Check for end of set.
				2839	//
				2840	SKIP_WS(next);
				2841	if (*next == '}') {
				2842	next++; // skip '}'
				2843	break;
				2844	}
				2845
				2846	//
				2847	// Skip optional comma.
				2848	//
				2849	if (*next == ',') {
				2850	next++;
				2851	}
				2852	SKIP_WS(next);
				2853
				2854	//
				2855	// Read the next integer in the set.
				2856	//
				2857	scan = next;
				2858	KMP_ASSERT2((next >= '0') && (next <= '9'),
				2859	"bad explicit proc list");
				2860
				2861	SKIP_DIGITS(next);
				2862	num = __kmp_str_to_int(scan, *next);
				2863	KMP_ASSERT2(num >= 0, "bad explicit proc list");
				2864
				2865	//
				2866	// Add the mask for that osId to the sum mask.
				2867	//
				2868	if ((num > maxOsId) \|\|
				2869	(! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
				2870	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				2871	&& (__kmp_affinity_type != affinity_none))) {
				2872	KMP_WARNING(AffIgnoreInvalidProcID, num);
				2873	}
				2874	}
				2875	else {
				2876	KMP_CPU_UNION(sumMask, KMP_CPU_INDEX(osId2Mask, num));
				2877	setSize++;
				2878	}
				2879	}
				2880	if (setSize > 0) {
				2881	ADD_MASK(sumMask);
				2882	}
				2883
				2884	SKIP_WS(next);
				2885	if (*next == ',') {
				2886	next++;
				2887	}
				2888	scan = next;
				2889	continue;
				2890	}
				2891
				2892	//
				2893	// Read the first integer.
				2894	//
				2895	KMP_ASSERT2((next >= '0') && (next <= '9'), "bad explicit proc list");
				2896	SKIP_DIGITS(next);
				2897	start = __kmp_str_to_int(scan, *next);
				2898	KMP_ASSERT2(start >= 0, "bad explicit proc list");
				2899	SKIP_WS(next);
				2900
				2901	//
				2902	// If this isn't a range, then add a mask to the list and go on.
				2903	//
				2904	if (*next != '-') {
				2905	ADD_MASK_OSID(start, osId2Mask, maxOsId);
				2906
				2907	//
				2908	// Skip optional comma.
				2909	//
				2910	if (*next == ',') {
				2911	next++;
				2912	}
				2913	scan = next;
				2914	continue;
				2915	}
				2916
				2917	//
				2918	// This is a range. Skip over the '-' and read in the 2nd int.
				2919	//
				2920	next++; // skip '-'
				2921	SKIP_WS(next);
				2922	scan = next;
				2923	KMP_ASSERT2((next >= '0') && (next <= '9'), "bad explicit proc list");
				2924	SKIP_DIGITS(next);
				2925	end = __kmp_str_to_int(scan, *next);
				2926	KMP_ASSERT2(end >= 0, "bad explicit proc list");
				2927
				2928	//
				2929	// Check for a stride parameter
				2930	//
				2931	stride = 1;
				2932	SKIP_WS(next);
				2933	if (*next == ':') {
				2934	//
				2935	// A stride is specified. Skip over the ':" and read the 3rd int.
				2936	//
				2937	int sign = +1;
				2938	next++; // skip ':'
				2939	SKIP_WS(next);
				2940	scan = next;
				2941	if (*next == '-') {
				2942	sign = -1;
				2943	next++;
				2944	SKIP_WS(next);
				2945	scan = next;
				2946	}
				2947	KMP_ASSERT2((next >= '0') && (next <= '9'),
				2948	"bad explicit proc list");
				2949	SKIP_DIGITS(next);
				2950	stride = __kmp_str_to_int(scan, *next);
				2951	KMP_ASSERT2(stride >= 0, "bad explicit proc list");
				2952	stride *= sign;
				2953	}
				2954
				2955	//
				2956	// Do some range checks.
				2957	//
				2958	KMP_ASSERT2(stride != 0, "bad explicit proc list");
				2959	if (stride > 0) {
				2960	KMP_ASSERT2(start <= end, "bad explicit proc list");
				2961	}
				2962	else {
				2963	KMP_ASSERT2(start >= end, "bad explicit proc list");
				2964	}
				2965	KMP_ASSERT2((end - start) / stride <= 65536, "bad explicit proc list");
				2966
				2967	//
				2968	// Add the mask for each OS proc # to the list.
				2969	//
				2970	if (stride > 0) {
				2971	do {
				2972	ADD_MASK_OSID(start, osId2Mask, maxOsId);
				2973	start += stride;
				2974	} while (start <= end);
				2975	}
				2976	else {
				2977	do {
				2978	ADD_MASK_OSID(start, osId2Mask, maxOsId);
				2979	start += stride;
				2980	} while (start >= end);
				2981	}
				2982
				2983	//
				2984	// Skip optional comma.
				2985	//
				2986	SKIP_WS(next);
				2987	if (*next == ',') {
				2988	next++;
				2989	}
				2990	scan = next;
				2991	}
				2992
				2993	*out_numMasks = nextNewMask;
				2994	if (nextNewMask == 0) {
				2995	*out_masks = NULL;
				2996	KMP_INTERNAL_FREE(newMasks);
				2997	return;
				2998	}
				2999	*out_masks
				3000	= (kmp_affin_mask_t )__kmp_allocate(nextNewMask __kmp_affin_mask_size);
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	3001	KMP_MEMCPY(out_masks, newMasks, nextNewMask __kmp_affin_mask_size);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3002	__kmp_free(sumMask);
				3003	KMP_INTERNAL_FREE(newMasks);
				3004	}
				3005
				3006
				3007	# if OMP_40_ENABLED
				3008
				3009	/*-----------------------------------------------------------------------------
				3010
				3011	Re-parse the OMP_PLACES proc id list, forming the newMasks for the different
				3012	places. Again, Here is the grammar:
				3013
				3014	place_list := place
				3015	place_list := place , place_list
				3016	place := num
				3017	place := place : num
				3018	place := place : num : signed
				3019	place := { subplacelist }
				3020	place := ! place // (lowest priority)
				3021	subplace_list := subplace
				3022	subplace_list := subplace , subplace_list
				3023	subplace := num
				3024	subplace := num : num
				3025	subplace := num : num : signed
				3026	signed := num
				3027	signed := + signed
				3028	signed := - signed
				3029
				3030	-----------------------------------------------------------------------------*/
				3031
				3032	static void
				3033	__kmp_process_subplace_list(const char *scan, kmp_affin_mask_t osId2Mask,
				3034	int maxOsId, kmp_affin_mask_t tempMask, int setSize)
				3035	{
				3036	const char *next;
				3037
				3038	for (;;) {
				3039	int start, count, stride, i;
				3040
				3041	//
				3042	// Read in the starting proc id
				3043	//
				3044	SKIP_WS(*scan);
				3045	KMP_ASSERT2((scan >= '0') && (scan <= '9'),
				3046	"bad explicit places list");
				3047	next = *scan;
				3048	SKIP_DIGITS(next);
				3049	start = __kmp_str_to_int(scan, next);
				3050	KMP_ASSERT(start >= 0);
				3051	*scan = next;
				3052
				3053	//
				3054	// valid follow sets are ',' ':' and '}'
				3055	//
				3056	SKIP_WS(*scan);
				3057	if (scan == '}' \|\| scan == ',') {
				3058	if ((start > maxOsId) \|\|
				3059	(! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
				3060	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3061	&& (__kmp_affinity_type != affinity_none))) {
				3062	KMP_WARNING(AffIgnoreInvalidProcID, start);
				3063	}
				3064	}
				3065	else {
				3066	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
				3067	(*setSize)++;
				3068	}
				3069	if (**scan == '}') {
				3070	break;
				3071	}
				3072	(*scan)++; // skip ','
				3073	continue;
				3074	}
				3075	KMP_ASSERT2(**scan == ':', "bad explicit places list");
				3076	(*scan)++; // skip ':'
				3077
				3078	//
				3079	// Read count parameter
				3080	//
				3081	SKIP_WS(*scan);
				3082	KMP_ASSERT2((scan >= '0') && (scan <= '9'),
				3083	"bad explicit places list");
				3084	next = *scan;
				3085	SKIP_DIGITS(next);
				3086	count = __kmp_str_to_int(scan, next);
				3087	KMP_ASSERT(count >= 0);
				3088	*scan = next;
				3089
				3090	//
				3091	// valid follow sets are ',' ':' and '}'
				3092	//
				3093	SKIP_WS(*scan);
				3094	if (scan == '}' \|\| scan == ',') {
				3095	for (i = 0; i < count; i++) {
				3096	if ((start > maxOsId) \|\|
				3097	(! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
				3098	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3099	&& (__kmp_affinity_type != affinity_none))) {
				3100	KMP_WARNING(AffIgnoreInvalidProcID, start);
				3101	}
				3102	break; // don't proliferate warnings for large count
				3103	}
				3104	else {
				3105	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
				3106	start++;
				3107	(*setSize)++;
				3108	}
				3109	}
				3110	if (**scan == '}') {
				3111	break;
				3112	}
				3113	(*scan)++; // skip ','
				3114	continue;
				3115	}
				3116	KMP_ASSERT2(**scan == ':', "bad explicit places list");
				3117	(*scan)++; // skip ':'
				3118
				3119	//
				3120	// Read stride parameter
				3121	//
				3122	int sign = +1;
				3123	for (;;) {
				3124	SKIP_WS(*scan);
				3125	if (**scan == '+') {
				3126	(*scan)++; // skip '+'
				3127	continue;
				3128	}
				3129	if (**scan == '-') {
				3130	sign *= -1;
				3131	(*scan)++; // skip '-'
				3132	continue;
				3133	}
				3134	break;
				3135	}
				3136	SKIP_WS(*scan);
				3137	KMP_ASSERT2((scan >= '0') && (scan <= '9'),
				3138	"bad explicit places list");
				3139	next = *scan;
				3140	SKIP_DIGITS(next);
				3141	stride = __kmp_str_to_int(scan, next);
				3142	KMP_ASSERT(stride >= 0);
				3143	*scan = next;
				3144	stride *= sign;
				3145
				3146	//
				3147	// valid follow sets are ',' and '}'
				3148	//
				3149	SKIP_WS(*scan);
				3150	if (scan == '}' \|\| scan == ',') {
				3151	for (i = 0; i < count; i++) {
				3152	if ((start > maxOsId) \|\|
				3153	(! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
				3154	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3155	&& (__kmp_affinity_type != affinity_none))) {
				3156	KMP_WARNING(AffIgnoreInvalidProcID, start);
				3157	}
				3158	break; // don't proliferate warnings for large count
				3159	}
				3160	else {
				3161	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
				3162	start += stride;
				3163	(*setSize)++;
				3164	}
				3165	}
				3166	if (**scan == '}') {
				3167	break;
				3168	}
				3169	(*scan)++; // skip ','
				3170	continue;
				3171	}
				3172
				3173	KMP_ASSERT2(0, "bad explicit places list");
				3174	}
				3175	}
				3176
				3177
				3178	static void
				3179	__kmp_process_place(const char *scan, kmp_affin_mask_t osId2Mask,
				3180	int maxOsId, kmp_affin_mask_t tempMask, int setSize)
				3181	{
				3182	const char *next;
				3183
				3184	//
				3185	// valid follow sets are '{' '!' and num
				3186	//
				3187	SKIP_WS(*scan);
				3188	if (**scan == '{') {
				3189	(*scan)++; // skip '{'
				3190	__kmp_process_subplace_list(scan, osId2Mask, maxOsId , tempMask,
				3191	setSize);
				3192	KMP_ASSERT2(**scan == '}', "bad explicit places list");
				3193	(*scan)++; // skip '}'
				3194	}
				3195	else if (**scan == '!') {
				3196	__kmp_process_place(scan, osId2Mask, maxOsId, tempMask, setSize);
				3197	KMP_CPU_COMPLEMENT(tempMask);
				3198	(*scan)++; // skip '!'
				3199	}
				3200	else if ((scan >= '0') && (scan <= '9')) {
				3201	next = *scan;
				3202	SKIP_DIGITS(next);
				3203	int num = __kmp_str_to_int(scan, next);
				3204	KMP_ASSERT(num >= 0);
				3205	if ((num > maxOsId) \|\|
				3206	(! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
				3207	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3208	&& (__kmp_affinity_type != affinity_none))) {
				3209	KMP_WARNING(AffIgnoreInvalidProcID, num);
				3210	}
				3211	}
				3212	else {
				3213	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, num));
				3214	(*setSize)++;
				3215	}
				3216	*scan = next; // skip num
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3217	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3218	else {
				3219	KMP_ASSERT2(0, "bad explicit places list");
				3220	}
				3221	}
				3222
				3223
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3224	//static void
				3225	void
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3226	__kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
				3227	unsigned int out_numMasks, const char placelist,
				3228	kmp_affin_mask_t *osId2Mask, int maxOsId)
				3229	{
				3230	const char *scan = placelist;
				3231	const char *next = placelist;
				3232
				3233	numNewMasks = 2;
				3234	newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(numNewMasks
				3235	* __kmp_affin_mask_size);
				3236	nextNewMask = 0;
				3237
				3238	kmp_affin_mask_t tempMask = (kmp_affin_mask_t )__kmp_allocate(
				3239	__kmp_affin_mask_size);
				3240	KMP_CPU_ZERO(tempMask);
				3241	int setSize = 0;
				3242
				3243	for (;;) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3244	__kmp_process_place(&scan, osId2Mask, maxOsId, tempMask, &setSize);
				3245
				3246	//
				3247	// valid follow sets are ',' ':' and EOL
				3248	//
				3249	SKIP_WS(scan);
				3250	if (scan == '\0' \|\| scan == ',') {
				3251	if (setSize > 0) {
				3252	ADD_MASK(tempMask);
				3253	}
				3254	KMP_CPU_ZERO(tempMask);
				3255	setSize = 0;
				3256	if (*scan == '\0') {
				3257	break;
				3258	}
				3259	scan++; // skip ','
				3260	continue;
				3261	}
				3262
				3263	KMP_ASSERT2(*scan == ':', "bad explicit places list");
				3264	scan++; // skip ':'
				3265
				3266	//
				3267	// Read count parameter
				3268	//
				3269	SKIP_WS(scan);
				3270	KMP_ASSERT2((scan >= '0') && (scan <= '9'),
				3271	"bad explicit places list");
				3272	next = scan;
				3273	SKIP_DIGITS(next);
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	3274	int count = __kmp_str_to_int(scan, *next);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3275	KMP_ASSERT(count >= 0);
				3276	scan = next;
				3277
				3278	//
				3279	// valid follow sets are ',' ':' and EOL
				3280	//
				3281	SKIP_WS(scan);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3282	int stride;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3283	if (scan == '\0' \|\| scan == ',') {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3284	stride = +1;
				3285	}
				3286	else {
				3287	KMP_ASSERT2(*scan == ':', "bad explicit places list");
				3288	scan++; // skip ':'
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3289
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3290	//
				3291	// Read stride parameter
				3292	//
				3293	int sign = +1;
				3294	for (;;) {
				3295	SKIP_WS(scan);
				3296	if (*scan == '+') {
				3297	scan++; // skip '+'
				3298	continue;
				3299	}
				3300	if (*scan == '-') {
				3301	sign *= -1;
				3302	scan++; // skip '-'
				3303	continue;
				3304	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3305	break;
				3306	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3307	SKIP_WS(scan);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3308	KMP_ASSERT2((scan >= '0') && (scan <= '9'),
				3309	"bad explicit places list");
				3310	next = scan;
				3311	SKIP_DIGITS(next);
				3312	stride = __kmp_str_to_int(scan, *next);
				3313	KMP_DEBUG_ASSERT(stride >= 0);
				3314	scan = next;
				3315	stride *= sign;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3316	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3317
				3318	if (stride > 0) {
				3319	int i;
				3320	for (i = 0; i < count; i++) {
				3321	int j;
				3322	if (setSize == 0) {
				3323	break;
				3324	}
				3325	ADD_MASK(tempMask);
				3326	setSize = 0;
				3327	for (j = __kmp_affin_mask_size * CHAR_BIT - 1; j >= stride; j--) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3328	if (! KMP_CPU_ISSET(j - stride, tempMask)) {
				3329	KMP_CPU_CLR(j, tempMask);
				3330	}
				3331	else if ((j > maxOsId) \|\|
				3332	(! KMP_CPU_ISSET(j, KMP_CPU_INDEX(osId2Mask, j)))) {
Andrey Churbanov	16a1432	2015-03-10 09:34:38 +0000	[diff] [blame]	3333	if ((__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3334	&& (__kmp_affinity_type != affinity_none))) && i < count - 1) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3335	KMP_WARNING(AffIgnoreInvalidProcID, j);
				3336	}
				3337	KMP_CPU_CLR(j, tempMask);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3338	}
				3339	else {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3340	KMP_CPU_SET(j, tempMask);
				3341	setSize++;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3342	}
				3343	}
				3344	for (; j >= 0; j--) {
				3345	KMP_CPU_CLR(j, tempMask);
				3346	}
				3347	}
				3348	}
				3349	else {
				3350	int i;
				3351	for (i = 0; i < count; i++) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3352	int j;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3353	if (setSize == 0) {
				3354	break;
				3355	}
				3356	ADD_MASK(tempMask);
				3357	setSize = 0;
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3358	for (j = 0; j < ((int)__kmp_affin_mask_size * CHAR_BIT) + stride;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3359	j++) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3360	if (! KMP_CPU_ISSET(j - stride, tempMask)) {
				3361	KMP_CPU_CLR(j, tempMask);
				3362	}
				3363	else if ((j > maxOsId) \|\|
				3364	(! KMP_CPU_ISSET(j, KMP_CPU_INDEX(osId2Mask, j)))) {
Andrey Churbanov	16a1432	2015-03-10 09:34:38 +0000	[diff] [blame]	3365	if ((__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3366	&& (__kmp_affinity_type != affinity_none))) && i < count - 1) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3367	KMP_WARNING(AffIgnoreInvalidProcID, j);
				3368	}
				3369	KMP_CPU_CLR(j, tempMask);
				3370	}
				3371	else {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3372	KMP_CPU_SET(j, tempMask);
				3373	setSize++;
				3374	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3375	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3376	for (; j < (int)__kmp_affin_mask_size * CHAR_BIT; j++) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3377	KMP_CPU_CLR(j, tempMask);
				3378	}
				3379	}
				3380	}
				3381	KMP_CPU_ZERO(tempMask);
				3382	setSize = 0;
				3383
				3384	//
				3385	// valid follow sets are ',' and EOL
				3386	//
				3387	SKIP_WS(scan);
				3388	if (*scan == '\0') {
				3389	break;
				3390	}
				3391	if (*scan == ',') {
				3392	scan++; // skip ','
				3393	continue;
				3394	}
				3395
				3396	KMP_ASSERT2(0, "bad explicit places list");
				3397	}
				3398
				3399	*out_numMasks = nextNewMask;
				3400	if (nextNewMask == 0) {
				3401	*out_masks = NULL;
				3402	KMP_INTERNAL_FREE(newMasks);
				3403	return;
				3404	}
				3405	*out_masks
				3406	= (kmp_affin_mask_t )__kmp_allocate(nextNewMask __kmp_affin_mask_size);
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	3407	KMP_MEMCPY(out_masks, newMasks, nextNewMask __kmp_affin_mask_size);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3408	__kmp_free(tempMask);
				3409	KMP_INTERNAL_FREE(newMasks);
				3410	}
				3411
				3412	# endif /* OMP_40_ENABLED */
				3413
				3414	#undef ADD_MASK
				3415	#undef ADD_MASK_OSID
				3416
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3417	static void
				3418	__kmp_apply_thread_places(AddrUnsPair **pAddr, int depth)
				3419	{
				3420	if ( __kmp_place_num_cores == 0 ) {
				3421	if ( __kmp_place_num_threads_per_core == 0 ) {
				3422	return; // no cores limiting actions requested, exit
				3423	}
				3424	__kmp_place_num_cores = nCoresPerPkg; // use all available cores
				3425	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3426	if ( !__kmp_affinity_uniform_topology() ) {
				3427	KMP_WARNING( AffThrPlaceNonUniform );
				3428	return; // don't support non-uniform topology
				3429	}
				3430	if ( depth != 3 ) {
				3431	KMP_WARNING( AffThrPlaceNonThreeLevel );
				3432	return; // don't support not-3-level topology
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3433	}
				3434	if ( __kmp_place_num_threads_per_core == 0 ) {
				3435	__kmp_place_num_threads_per_core = __kmp_nThreadsPerCore; // use all HW contexts
				3436	}
Andrey Churbanov	1287557	2015-03-10 09:00:36 +0000	[diff] [blame]	3437	if ( __kmp_place_core_offset + __kmp_place_num_cores > nCoresPerPkg ) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3438	KMP_WARNING( AffThrPlaceManyCores );
				3439	return;
				3440	}
				3441
				3442	AddrUnsPair newAddr = (AddrUnsPair )__kmp_allocate( sizeof(AddrUnsPair) *
				3443	nPackages * __kmp_place_num_cores * __kmp_place_num_threads_per_core);
				3444	int i, j, k, n_old = 0, n_new = 0;
				3445	for ( i = 0; i < nPackages; ++i ) {
				3446	for ( j = 0; j < nCoresPerPkg; ++j ) {
Andrey Churbanov	1287557	2015-03-10 09:00:36 +0000	[diff] [blame]	3447	if ( j < __kmp_place_core_offset \|\| j >= __kmp_place_core_offset + __kmp_place_num_cores ) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3448	n_old += __kmp_nThreadsPerCore; // skip not-requested core
				3449	} else {
				3450	for ( k = 0; k < __kmp_nThreadsPerCore; ++k ) {
Andrey Churbanov	1287557	2015-03-10 09:00:36 +0000	[diff] [blame]	3451	if ( k < __kmp_place_num_threads_per_core ) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3452	newAddr[n_new] = (*pAddr)[n_old]; // copy requested core' data to new location
				3453	n_new++;
				3454	}
				3455	n_old++;
				3456	}
				3457	}
				3458	}
				3459	}
				3460	nCoresPerPkg = __kmp_place_num_cores; // correct nCoresPerPkg
				3461	__kmp_nThreadsPerCore = __kmp_place_num_threads_per_core; // correct __kmp_nThreadsPerCore
				3462	__kmp_avail_proc = n_new; // correct avail_proc
				3463	__kmp_ncores = nPackages * __kmp_place_num_cores; // correct ncores
				3464
				3465	__kmp_free( *pAddr );
				3466	*pAddr = newAddr; // replace old topology with new one
				3467	}
				3468
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3469
				3470	static AddrUnsPair *address2os = NULL;
				3471	static int * procarr = NULL;
				3472	static int __kmp_aff_depth = 0;
				3473
				3474	static void
				3475	__kmp_aux_affinity_initialize(void)
				3476	{
				3477	if (__kmp_affinity_masks != NULL) {
				3478	KMP_ASSERT(fullMask != NULL);
				3479	return;
				3480	}
				3481
				3482	//
				3483	// Create the "full" mask - this defines all of the processors that we
				3484	// consider to be in the machine model. If respect is set, then it is
				3485	// the initialization thread's affinity mask. Otherwise, it is all
				3486	// processors that we know about on the machine.
				3487	//
				3488	if (fullMask == NULL) {
				3489	fullMask = (kmp_affin_mask_t *)__kmp_allocate(__kmp_affin_mask_size);
				3490	}
				3491	if (KMP_AFFINITY_CAPABLE()) {
				3492	if (__kmp_affinity_respect_mask) {
				3493	__kmp_get_system_affinity(fullMask, TRUE);
				3494
				3495	//
				3496	// Count the number of available processors.
				3497	//
				3498	unsigned i;
				3499	__kmp_avail_proc = 0;
				3500	for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
				3501	if (! KMP_CPU_ISSET(i, fullMask)) {
				3502	continue;
				3503	}
				3504	__kmp_avail_proc++;
				3505	}
				3506	if (__kmp_avail_proc > __kmp_xproc) {
				3507	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3508	&& (__kmp_affinity_type != affinity_none))) {
				3509	KMP_WARNING(ErrorInitializeAffinity);
				3510	}
				3511	__kmp_affinity_type = affinity_none;
Andrey Churbanov	1f037e4	2015-03-10 09:15:26 +0000	[diff] [blame]	3512	KMP_AFFINITY_DISABLE();
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3513	return;
				3514	}
				3515	}
				3516	else {
				3517	__kmp_affinity_entire_machine_mask(fullMask);
				3518	__kmp_avail_proc = __kmp_xproc;
				3519	}
				3520	}
				3521
				3522	int depth = -1;
				3523	kmp_i18n_id_t msg_id = kmp_i18n_null;
				3524
				3525	//
Alp Toker	8f2d3f0	2014-02-24 10:40:15 +0000	[diff] [blame]	3526	// For backward compatibility, setting KMP_CPUINFO_FILE =>
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3527	// KMP_TOPOLOGY_METHOD=cpuinfo
				3528	//
				3529	if ((__kmp_cpuinfo_file != NULL) &&
				3530	(__kmp_affinity_top_method == affinity_top_method_all)) {
				3531	__kmp_affinity_top_method = affinity_top_method_cpuinfo;
				3532	}
				3533
				3534	if (__kmp_affinity_top_method == affinity_top_method_all) {
				3535	//
				3536	// In the default code path, errors are not fatal - we just try using
				3537	// another method. We only emit a warning message if affinity is on,
				3538	// or the verbose flag is set, an the nowarnings flag was not set.
				3539	//
				3540	const char *file_name = NULL;
				3541	int line = 0;
				3542
				3543	# if KMP_ARCH_X86 \|\| KMP_ARCH_X86_64
				3544
				3545	if (__kmp_affinity_verbose) {
				3546	KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
				3547	}
				3548
				3549	file_name = NULL;
				3550	depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
				3551	if (depth == 0) {
				3552	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3553	KMP_ASSERT(address2os == NULL);
				3554	return;
				3555	}
				3556
				3557	if (depth < 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3558	if (__kmp_affinity_verbose) {
				3559	if (msg_id != kmp_i18n_null) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3560	KMP_INFORM(AffInfoStrStr, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id),
				3561	KMP_I18N_STR(DecodingLegacyAPIC));
				3562	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3563	else {
				3564	KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(DecodingLegacyAPIC));
				3565	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3566	}
				3567
				3568	file_name = NULL;
				3569	depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
				3570	if (depth == 0) {
				3571	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3572	KMP_ASSERT(address2os == NULL);
				3573	return;
				3574	}
				3575	}
				3576
				3577	# endif /* KMP_ARCH_X86 \|\| KMP_ARCH_X86_64 */
				3578
				3579	# if KMP_OS_LINUX
				3580
				3581	if (depth < 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3582	if (__kmp_affinity_verbose) {
				3583	if (msg_id != kmp_i18n_null) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3584	KMP_INFORM(AffStrParseFilename, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id), "/proc/cpuinfo");
				3585	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3586	else {
				3587	KMP_INFORM(AffParseFilename, "KMP_AFFINITY", "/proc/cpuinfo");
				3588	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3589	}
				3590
				3591	FILE *f = fopen("/proc/cpuinfo", "r");
				3592	if (f == NULL) {
				3593	msg_id = kmp_i18n_str_CantOpenCpuinfo;
				3594	}
				3595	else {
				3596	file_name = "/proc/cpuinfo";
				3597	depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
				3598	fclose(f);
				3599	if (depth == 0) {
				3600	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3601	KMP_ASSERT(address2os == NULL);
				3602	return;
				3603	}
				3604	}
				3605	}
				3606
				3607	# endif /* KMP_OS_LINUX */
				3608
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	3609	# if KMP_GROUP_AFFINITY
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3610
				3611	if ((depth < 0) && (__kmp_num_proc_groups > 1)) {
				3612	if (__kmp_affinity_verbose) {
				3613	KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
				3614	}
				3615
				3616	depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
				3617	KMP_ASSERT(depth != 0);
				3618	}
				3619
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	3620	# endif /* KMP_GROUP_AFFINITY */
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3621
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3622	if (depth < 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3623	if (__kmp_affinity_verbose && (msg_id != kmp_i18n_null)) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3624	if (file_name == NULL) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3625	KMP_INFORM(UsingFlatOS, __kmp_i18n_catgets(msg_id));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3626	}
				3627	else if (line == 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3628	KMP_INFORM(UsingFlatOSFile, file_name, __kmp_i18n_catgets(msg_id));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3629	}
				3630	else {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3631	KMP_INFORM(UsingFlatOSFileLine, file_name, line, __kmp_i18n_catgets(msg_id));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3632	}
				3633	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3634	// FIXME - print msg if msg_id = kmp_i18n_null ???
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3635
				3636	file_name = "";
				3637	depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
				3638	if (depth == 0) {
				3639	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3640	KMP_ASSERT(address2os == NULL);
				3641	return;
				3642	}
				3643	KMP_ASSERT(depth > 0);
				3644	KMP_ASSERT(address2os != NULL);
				3645	}
				3646	}
				3647
				3648	//
				3649	// If the user has specified that a paricular topology discovery method
				3650	// is to be used, then we abort if that method fails. The exception is
				3651	// group affinity, which might have been implicitly set.
				3652	//
				3653
				3654	# if KMP_ARCH_X86 \|\| KMP_ARCH_X86_64
				3655
				3656	else if (__kmp_affinity_top_method == affinity_top_method_x2apicid) {
				3657	if (__kmp_affinity_verbose) {
				3658	KMP_INFORM(AffInfoStr, "KMP_AFFINITY",
				3659	KMP_I18N_STR(Decodingx2APIC));
				3660	}
				3661
				3662	depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
				3663	if (depth == 0) {
				3664	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3665	KMP_ASSERT(address2os == NULL);
				3666	return;
				3667	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3668	if (depth < 0) {
				3669	KMP_ASSERT(msg_id != kmp_i18n_null);
				3670	KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
				3671	}
				3672	}
				3673	else if (__kmp_affinity_top_method == affinity_top_method_apicid) {
				3674	if (__kmp_affinity_verbose) {
				3675	KMP_INFORM(AffInfoStr, "KMP_AFFINITY",
				3676	KMP_I18N_STR(DecodingLegacyAPIC));
				3677	}
				3678
				3679	depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
				3680	if (depth == 0) {
				3681	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3682	KMP_ASSERT(address2os == NULL);
				3683	return;
				3684	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3685	if (depth < 0) {
				3686	KMP_ASSERT(msg_id != kmp_i18n_null);
				3687	KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
				3688	}
				3689	}
				3690
				3691	# endif /* KMP_ARCH_X86 \|\| KMP_ARCH_X86_64 */
				3692
				3693	else if (__kmp_affinity_top_method == affinity_top_method_cpuinfo) {
				3694	const char *filename;
				3695	if (__kmp_cpuinfo_file != NULL) {
				3696	filename = __kmp_cpuinfo_file;
				3697	}
				3698	else {
				3699	filename = "/proc/cpuinfo";
				3700	}
				3701
				3702	if (__kmp_affinity_verbose) {
				3703	KMP_INFORM(AffParseFilename, "KMP_AFFINITY", filename);
				3704	}
				3705
				3706	FILE *f = fopen(filename, "r");
				3707	if (f == NULL) {
				3708	int code = errno;
				3709	if (__kmp_cpuinfo_file != NULL) {
				3710	__kmp_msg(
				3711	kmp_ms_fatal,
				3712	KMP_MSG(CantOpenFileForReading, filename),
				3713	KMP_ERR(code),
				3714	KMP_HNT(NameComesFrom_CPUINFO_FILE),
				3715	__kmp_msg_null
				3716	);
				3717	}
				3718	else {
				3719	__kmp_msg(
				3720	kmp_ms_fatal,
				3721	KMP_MSG(CantOpenFileForReading, filename),
				3722	KMP_ERR(code),
				3723	__kmp_msg_null
				3724	);
				3725	}
				3726	}
				3727	int line = 0;
				3728	depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
				3729	fclose(f);
				3730	if (depth < 0) {
				3731	KMP_ASSERT(msg_id != kmp_i18n_null);
				3732	if (line > 0) {
				3733	KMP_FATAL(FileLineMsgExiting, filename, line, __kmp_i18n_catgets(msg_id));
				3734	}
				3735	else {
				3736	KMP_FATAL(FileMsgExiting, filename, __kmp_i18n_catgets(msg_id));
				3737	}
				3738	}
				3739	if (__kmp_affinity_type == affinity_none) {
				3740	KMP_ASSERT(depth == 0);
				3741	KMP_ASSERT(address2os == NULL);
				3742	return;
				3743	}
				3744	}
				3745
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	3746	# if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3747
				3748	else if (__kmp_affinity_top_method == affinity_top_method_group) {
				3749	if (__kmp_affinity_verbose) {
				3750	KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
				3751	}
				3752
				3753	depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
				3754	KMP_ASSERT(depth != 0);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3755	if (depth < 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3756	KMP_ASSERT(msg_id != kmp_i18n_null);
				3757	KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3758	}
				3759	}
				3760
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	3761	# endif /* KMP_GROUP_AFFINITY */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3762
				3763	else if (__kmp_affinity_top_method == affinity_top_method_flat) {
				3764	if (__kmp_affinity_verbose) {
				3765	KMP_INFORM(AffUsingFlatOS, "KMP_AFFINITY");
				3766	}
				3767
				3768	depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
				3769	if (depth == 0) {
				3770	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3771	KMP_ASSERT(address2os == NULL);
				3772	return;
				3773	}
				3774	// should not fail
				3775	KMP_ASSERT(depth > 0);
				3776	KMP_ASSERT(address2os != NULL);
				3777	}
				3778
				3779	if (address2os == NULL) {
				3780	if (KMP_AFFINITY_CAPABLE()
				3781	&& (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3782	&& (__kmp_affinity_type != affinity_none)))) {
				3783	KMP_WARNING(ErrorInitializeAffinity);
				3784	}
				3785	__kmp_affinity_type = affinity_none;
Andrey Churbanov	1f037e4	2015-03-10 09:15:26 +0000	[diff] [blame]	3786	KMP_AFFINITY_DISABLE();
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3787	return;
				3788	}
				3789
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3790	__kmp_apply_thread_places(&address2os, depth);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3791
				3792	//
				3793	// Create the table of masks, indexed by thread Id.
				3794	//
				3795	unsigned maxIndex;
				3796	unsigned numUnique;
				3797	kmp_affin_mask_t *osId2Mask = __kmp_create_masks(&maxIndex, &numUnique,
				3798	address2os, __kmp_avail_proc);
				3799	if (__kmp_affinity_gran_levels == 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3800	KMP_DEBUG_ASSERT((int)numUnique == __kmp_avail_proc);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3801	}
				3802
				3803	//
				3804	// Set the childNums vector in all Address objects. This must be done
				3805	// before we can sort using __kmp_affinity_cmp_Address_child_num(),
				3806	// which takes into account the setting of __kmp_affinity_compact.
				3807	//
				3808	__kmp_affinity_assign_child_nums(address2os, __kmp_avail_proc);
				3809
				3810	switch (__kmp_affinity_type) {
				3811
				3812	case affinity_explicit:
				3813	KMP_DEBUG_ASSERT(__kmp_affinity_proclist != NULL);
				3814	# if OMP_40_ENABLED
				3815	if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
				3816	# endif
				3817	{
				3818	__kmp_affinity_process_proclist(&__kmp_affinity_masks,
				3819	&__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask,
				3820	maxIndex);
				3821	}
				3822	# if OMP_40_ENABLED
				3823	else {
				3824	__kmp_affinity_process_placelist(&__kmp_affinity_masks,
				3825	&__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask,
				3826	maxIndex);
				3827	}
				3828	# endif
				3829	if (__kmp_affinity_num_masks == 0) {
				3830	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3831	&& (__kmp_affinity_type != affinity_none))) {
				3832	KMP_WARNING(AffNoValidProcID);
				3833	}
				3834	__kmp_affinity_type = affinity_none;
				3835	return;
				3836	}
				3837	break;
				3838
				3839	//
				3840	// The other affinity types rely on sorting the Addresses according
				3841	// to some permutation of the machine topology tree. Set
				3842	// __kmp_affinity_compact and __kmp_affinity_offset appropriately,
				3843	// then jump to a common code fragment to do the sort and create
				3844	// the array of affinity masks.
				3845	//
				3846
				3847	case affinity_logical:
				3848	__kmp_affinity_compact = 0;
				3849	if (__kmp_affinity_offset) {
				3850	__kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset
				3851	% __kmp_avail_proc;
				3852	}
				3853	goto sortAddresses;
				3854
				3855	case affinity_physical:
				3856	if (__kmp_nThreadsPerCore > 1) {
				3857	__kmp_affinity_compact = 1;
				3858	if (__kmp_affinity_compact >= depth) {
				3859	__kmp_affinity_compact = 0;
				3860	}
				3861	} else {
				3862	__kmp_affinity_compact = 0;
				3863	}
				3864	if (__kmp_affinity_offset) {
				3865	__kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset
				3866	% __kmp_avail_proc;
				3867	}
				3868	goto sortAddresses;
				3869
				3870	case affinity_scatter:
				3871	if (__kmp_affinity_compact >= depth) {
				3872	__kmp_affinity_compact = 0;
				3873	}
				3874	else {
				3875	__kmp_affinity_compact = depth - 1 - __kmp_affinity_compact;
				3876	}
				3877	goto sortAddresses;
				3878
				3879	case affinity_compact:
				3880	if (__kmp_affinity_compact >= depth) {
				3881	__kmp_affinity_compact = depth - 1;
				3882	}
				3883	goto sortAddresses;
				3884
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3885	case affinity_balanced:
Jonathan Peyton	caf09fe	2015-05-27 23:27:33 +0000	[diff] [blame]	3886	// Balanced works only for the case of a single package
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3887	if( nPackages > 1 ) {
				3888	if( __kmp_affinity_verbose \|\| __kmp_affinity_warnings ) {
				3889	KMP_WARNING( AffBalancedNotAvail, "KMP_AFFINITY" );
				3890	}
				3891	__kmp_affinity_type = affinity_none;
				3892	return;
				3893	} else if( __kmp_affinity_uniform_topology() ) {
				3894	break;
				3895	} else { // Non-uniform topology
				3896
				3897	// Save the depth for further usage
				3898	__kmp_aff_depth = depth;
				3899
				3900	// Number of hyper threads per core in HT machine
				3901	int nth_per_core = __kmp_nThreadsPerCore;
				3902
				3903	int core_level;
				3904	if( nth_per_core > 1 ) {
				3905	core_level = depth - 2;
				3906	} else {
				3907	core_level = depth - 1;
				3908	}
				3909	int ncores = address2os[ __kmp_avail_proc - 1 ].first.labels[ core_level ] + 1;
				3910	int nproc = nth_per_core * ncores;
				3911
				3912	procarr = ( int * )__kmp_allocate( sizeof( int ) * nproc );
				3913	for( int i = 0; i < nproc; i++ ) {
				3914	procarr[ i ] = -1;
				3915	}
				3916
				3917	for( int i = 0; i < __kmp_avail_proc; i++ ) {
				3918	int proc = address2os[ i ].second;
				3919	// If depth == 3 then level=0 - package, level=1 - core, level=2 - thread.
				3920	// If there is only one thread per core then depth == 2: level 0 - package,
				3921	// level 1 - core.
				3922	int level = depth - 1;
				3923
				3924	// __kmp_nth_per_core == 1
				3925	int thread = 0;
				3926	int core = address2os[ i ].first.labels[ level ];
				3927	// If the thread level exists, that is we have more than one thread context per core
				3928	if( nth_per_core > 1 ) {
				3929	thread = address2os[ i ].first.labels[ level ] % nth_per_core;
				3930	core = address2os[ i ].first.labels[ level - 1 ];
				3931	}
				3932	procarr[ core * nth_per_core + thread ] = proc;
				3933	}
				3934
				3935	break;
				3936	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3937
				3938	sortAddresses:
				3939	//
				3940	// Allocate the gtid->affinity mask table.
				3941	//
				3942	if (__kmp_affinity_dups) {
				3943	__kmp_affinity_num_masks = __kmp_avail_proc;
				3944	}
				3945	else {
				3946	__kmp_affinity_num_masks = numUnique;
				3947	}
				3948
				3949	# if OMP_40_ENABLED
				3950	if ( ( __kmp_nested_proc_bind.bind_types[0] != proc_bind_intel )
				3951	&& ( __kmp_affinity_num_places > 0 )
				3952	&& ( (unsigned)__kmp_affinity_num_places < __kmp_affinity_num_masks ) ) {
				3953	__kmp_affinity_num_masks = __kmp_affinity_num_places;
				3954	}
				3955	# endif
				3956
				3957	__kmp_affinity_masks = (kmp_affin_mask_t*)__kmp_allocate(
				3958	__kmp_affinity_num_masks * __kmp_affin_mask_size);
				3959
				3960	//
				3961	// Sort the address2os table according to the current setting of
				3962	// __kmp_affinity_compact, then fill out __kmp_affinity_masks.
				3963	//
				3964	qsort(address2os, __kmp_avail_proc, sizeof(*address2os),
				3965	__kmp_affinity_cmp_Address_child_num);
				3966	{
				3967	int i;
				3968	unsigned j;
				3969	for (i = 0, j = 0; i < __kmp_avail_proc; i++) {
				3970	if ((! __kmp_affinity_dups) && (! address2os[i].first.leader)) {
				3971	continue;
				3972	}
				3973	unsigned osId = address2os[i].second;
				3974	kmp_affin_mask_t *src = KMP_CPU_INDEX(osId2Mask, osId);
				3975	kmp_affin_mask_t *dest
				3976	= KMP_CPU_INDEX(__kmp_affinity_masks, j);
				3977	KMP_ASSERT(KMP_CPU_ISSET(osId, src));
				3978	KMP_CPU_COPY(dest, src);
				3979	if (++j >= __kmp_affinity_num_masks) {
				3980	break;
				3981	}
				3982	}
				3983	KMP_DEBUG_ASSERT(j == __kmp_affinity_num_masks);
				3984	}
				3985	break;
				3986
				3987	default:
				3988	KMP_ASSERT2(0, "Unexpected affinity setting");
				3989	}
				3990
				3991	__kmp_free(osId2Mask);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3992	machine_hierarchy.init(address2os, __kmp_avail_proc);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3993	}
				3994
				3995
				3996	void
				3997	__kmp_affinity_initialize(void)
				3998	{
				3999	//
				4000	// Much of the code above was written assumming that if a machine was not
				4001	// affinity capable, then __kmp_affinity_type == affinity_none. We now
				4002	// explicitly represent this as __kmp_affinity_type == affinity_disabled.
				4003	//
				4004	// There are too many checks for __kmp_affinity_type == affinity_none
				4005	// in this code. Instead of trying to change them all, check if
				4006	// __kmp_affinity_type == affinity_disabled, and if so, slam it with
				4007	// affinity_none, call the real initialization routine, then restore
				4008	// __kmp_affinity_type to affinity_disabled.
				4009	//
				4010	int disabled = (__kmp_affinity_type == affinity_disabled);
				4011	if (! KMP_AFFINITY_CAPABLE()) {
				4012	KMP_ASSERT(disabled);
				4013	}
				4014	if (disabled) {
				4015	__kmp_affinity_type = affinity_none;
				4016	}
				4017	__kmp_aux_affinity_initialize();
				4018	if (disabled) {
				4019	__kmp_affinity_type = affinity_disabled;
				4020	}
				4021	}
				4022
				4023
				4024	void
				4025	__kmp_affinity_uninitialize(void)
				4026	{
				4027	if (__kmp_affinity_masks != NULL) {
				4028	__kmp_free(__kmp_affinity_masks);
				4029	__kmp_affinity_masks = NULL;
				4030	}
				4031	if (fullMask != NULL) {
				4032	KMP_CPU_FREE(fullMask);
				4033	fullMask = NULL;
				4034	}
				4035	__kmp_affinity_num_masks = 0;
				4036	# if OMP_40_ENABLED
				4037	__kmp_affinity_num_places = 0;
				4038	# endif
				4039	if (__kmp_affinity_proclist != NULL) {
				4040	__kmp_free(__kmp_affinity_proclist);
				4041	__kmp_affinity_proclist = NULL;
				4042	}
				4043	if( address2os != NULL ) {
				4044	__kmp_free( address2os );
				4045	address2os = NULL;
				4046	}
				4047	if( procarr != NULL ) {
				4048	__kmp_free( procarr );
				4049	procarr = NULL;
				4050	}
				4051	}
				4052
				4053
				4054	void
				4055	__kmp_affinity_set_init_mask(int gtid, int isa_root)
				4056	{
				4057	if (! KMP_AFFINITY_CAPABLE()) {
				4058	return;
				4059	}
				4060
				4061	kmp_info_t th = (kmp_info_t )TCR_SYNC_PTR(__kmp_threads[gtid]);
				4062	if (th->th.th_affin_mask == NULL) {
				4063	KMP_CPU_ALLOC(th->th.th_affin_mask);
				4064	}
				4065	else {
				4066	KMP_CPU_ZERO(th->th.th_affin_mask);
				4067	}
				4068
				4069	//
				4070	// Copy the thread mask to the kmp_info_t strucuture.
				4071	// If __kmp_affinity_type == affinity_none, copy the "full" mask, i.e. one
				4072	// that has all of the OS proc ids set, or if __kmp_affinity_respect_mask
				4073	// is set, then the full mask is the same as the mask of the initialization
				4074	// thread.
				4075	//
				4076	kmp_affin_mask_t *mask;
				4077	int i;
				4078
				4079	# if OMP_40_ENABLED
				4080	if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
				4081	# endif
				4082	{
Andrey Churbanov	f28f613	2015-01-13 14:54:00 +0000	[diff] [blame]	4083	if ((__kmp_affinity_type == affinity_none) \|\| (__kmp_affinity_type == affinity_balanced)
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4084	) {
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	4085	# if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4086	if (__kmp_num_proc_groups > 1) {
				4087	return;
				4088	}
				4089	# endif
				4090	KMP_ASSERT(fullMask != NULL);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4091	i = KMP_PLACE_ALL;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4092	mask = fullMask;
				4093	}
				4094	else {
				4095	KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 );
				4096	i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
				4097	mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
				4098	}
				4099	}
				4100	# if OMP_40_ENABLED
				4101	else {
				4102	if ((! isa_root)
				4103	\|\| (__kmp_nested_proc_bind.bind_types[0] == proc_bind_false)) {
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	4104	# if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4105	if (__kmp_num_proc_groups > 1) {
				4106	return;
				4107	}
				4108	# endif
				4109	KMP_ASSERT(fullMask != NULL);
				4110	i = KMP_PLACE_ALL;
				4111	mask = fullMask;
				4112	}
				4113	else {
				4114	//
				4115	// int i = some hash function or just a counter that doesn't
				4116	// always start at 0. Use gtid for now.
				4117	//
				4118	KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 );
				4119	i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
				4120	mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
				4121	}
				4122	}
				4123	# endif
				4124
				4125	# if OMP_40_ENABLED
				4126	th->th.th_current_place = i;
				4127	if (isa_root) {
				4128	th->th.th_new_place = i;
				4129	th->th.th_first_place = 0;
				4130	th->th.th_last_place = __kmp_affinity_num_masks - 1;
				4131	}
				4132
				4133	if (i == KMP_PLACE_ALL) {
				4134	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to all places\n",
				4135	gtid));
				4136	}
				4137	else {
				4138	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to place %d\n",
				4139	gtid, i));
				4140	}
				4141	# else
				4142	if (i == -1) {
				4143	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to fullMask\n",
				4144	gtid));
				4145	}
				4146	else {
				4147	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to mask %d\n",
				4148	gtid, i));
				4149	}
				4150	# endif /* OMP_40_ENABLED */
				4151
				4152	KMP_CPU_COPY(th->th.th_affin_mask, mask);
				4153
				4154	if (__kmp_affinity_verbose) {
				4155	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4156	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4157	th->th.th_affin_mask);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4158	KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(), gtid,
				4159	buf);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4160	}
				4161
				4162	# if KMP_OS_WINDOWS
				4163	//
				4164	// On Windows* OS, the process affinity mask might have changed.
				4165	// If the user didn't request affinity and this call fails,
				4166	// just continue silently. See CQ171393.
				4167	//
				4168	if ( __kmp_affinity_type == affinity_none ) {
				4169	__kmp_set_system_affinity(th->th.th_affin_mask, FALSE);
				4170	}
				4171	else
				4172	# endif
				4173	__kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
				4174	}
				4175
				4176
				4177	# if OMP_40_ENABLED
				4178
				4179	void
				4180	__kmp_affinity_set_place(int gtid)
				4181	{
				4182	int retval;
				4183
				4184	if (! KMP_AFFINITY_CAPABLE()) {
				4185	return;
				4186	}
				4187
				4188	kmp_info_t th = (kmp_info_t )TCR_SYNC_PTR(__kmp_threads[gtid]);
				4189
				4190	KA_TRACE(100, ("__kmp_affinity_set_place: binding T#%d to place %d (current place = %d)\n",
				4191	gtid, th->th.th_new_place, th->th.th_current_place));
				4192
				4193	//
Alp Toker	8f2d3f0	2014-02-24 10:40:15 +0000	[diff] [blame]	4194	// Check that the new place is within this thread's partition.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4195	//
				4196	KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4197	KMP_ASSERT(th->th.th_new_place >= 0);
				4198	KMP_ASSERT((unsigned)th->th.th_new_place <= __kmp_affinity_num_masks);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4199	if (th->th.th_first_place <= th->th.th_last_place) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4200	KMP_ASSERT((th->th.th_new_place >= th->th.th_first_place)
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4201	&& (th->th.th_new_place <= th->th.th_last_place));
				4202	}
				4203	else {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4204	KMP_ASSERT((th->th.th_new_place <= th->th.th_first_place)
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4205	\|\| (th->th.th_new_place >= th->th.th_last_place));
				4206	}
				4207
				4208	//
				4209	// Copy the thread mask to the kmp_info_t strucuture,
				4210	// and set this thread's affinity.
				4211	//
				4212	kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity_masks,
				4213	th->th.th_new_place);
				4214	KMP_CPU_COPY(th->th.th_affin_mask, mask);
				4215	th->th.th_current_place = th->th.th_new_place;
				4216
				4217	if (__kmp_affinity_verbose) {
				4218	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4219	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4220	th->th.th_affin_mask);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4221	KMP_INFORM(BoundToOSProcSet, "OMP_PROC_BIND", (kmp_int32)getpid(),
				4222	gtid, buf);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4223	}
				4224	__kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
				4225	}
				4226
				4227	# endif /* OMP_40_ENABLED */
				4228
				4229
				4230	int
				4231	__kmp_aux_set_affinity(void **mask)
				4232	{
				4233	int gtid;
				4234	kmp_info_t *th;
				4235	int retval;
				4236
				4237	if (! KMP_AFFINITY_CAPABLE()) {
				4238	return -1;
				4239	}
				4240
				4241	gtid = __kmp_entry_gtid();
				4242	KA_TRACE(1000, ;{
				4243	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4244	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4245	(kmp_affin_mask_t )(mask));
				4246	__kmp_debug_printf("kmp_set_affinity: setting affinity mask for thread %d = %s\n",
				4247	gtid, buf);
				4248	});
				4249
				4250	if (__kmp_env_consistency_check) {
				4251	if ((mask == NULL) \|\| (*mask == NULL)) {
				4252	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				4253	}
				4254	else {
				4255	unsigned proc;
				4256	int num_procs = 0;
				4257
				4258	for (proc = 0; proc < KMP_CPU_SETSIZE; proc++) {
				4259	if (! KMP_CPU_ISSET(proc, (kmp_affin_mask_t )(mask))) {
				4260	continue;
				4261	}
				4262	num_procs++;
				4263	if (! KMP_CPU_ISSET(proc, fullMask)) {
				4264	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				4265	break;
				4266	}
				4267	}
				4268	if (num_procs == 0) {
				4269	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				4270	}
				4271
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	4272	# if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4273	if (__kmp_get_proc_group((kmp_affin_mask_t )(mask)) < 0) {
				4274	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				4275	}
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	4276	# endif /* KMP_GROUP_AFFINITY */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4277
				4278	}
				4279	}
				4280
				4281	th = __kmp_threads[gtid];
				4282	KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
				4283	retval = __kmp_set_system_affinity((kmp_affin_mask_t )(mask), FALSE);
				4284	if (retval == 0) {
				4285	KMP_CPU_COPY(th->th.th_affin_mask, (kmp_affin_mask_t )(mask));
				4286	}
				4287
				4288	# if OMP_40_ENABLED
				4289	th->th.th_current_place = KMP_PLACE_UNDEFINED;
				4290	th->th.th_new_place = KMP_PLACE_UNDEFINED;
				4291	th->th.th_first_place = 0;
				4292	th->th.th_last_place = __kmp_affinity_num_masks - 1;
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4293
				4294	//
				4295	// Turn off 4.0 affinity for the current tread at this parallel level.
				4296	//
				4297	th->th.th_current_task->td_icvs.proc_bind = proc_bind_false;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4298	# endif
				4299
				4300	return retval;
				4301	}
				4302
				4303
				4304	int
				4305	__kmp_aux_get_affinity(void **mask)
				4306	{
				4307	int gtid;
				4308	int retval;
				4309	kmp_info_t *th;
				4310
				4311	if (! KMP_AFFINITY_CAPABLE()) {
				4312	return -1;
				4313	}
				4314
				4315	gtid = __kmp_entry_gtid();
				4316	th = __kmp_threads[gtid];
				4317	KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
				4318
				4319	KA_TRACE(1000, ;{
				4320	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4321	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4322	th->th.th_affin_mask);
				4323	__kmp_printf("kmp_get_affinity: stored affinity mask for thread %d = %s\n", gtid, buf);
				4324	});
				4325
				4326	if (__kmp_env_consistency_check) {
				4327	if ((mask == NULL) \|\| (*mask == NULL)) {
				4328	KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity");
				4329	}
				4330	}
				4331
				4332	# if !KMP_OS_WINDOWS
				4333
				4334	retval = __kmp_get_system_affinity((kmp_affin_mask_t )(mask), FALSE);
				4335	KA_TRACE(1000, ;{
				4336	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4337	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4338	(kmp_affin_mask_t )(mask));
				4339	__kmp_printf("kmp_get_affinity: system affinity mask for thread %d = %s\n", gtid, buf);
				4340	});
				4341	return retval;
				4342
				4343	# else
				4344
				4345	KMP_CPU_COPY((kmp_affin_mask_t )(mask), th->th.th_affin_mask);
				4346	return 0;
				4347
				4348	# endif /* KMP_OS_WINDOWS */
				4349
				4350	}
				4351
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4352	int
				4353	__kmp_aux_set_affinity_mask_proc(int proc, void **mask)
				4354	{
				4355	int retval;
				4356
				4357	if (! KMP_AFFINITY_CAPABLE()) {
				4358	return -1;
				4359	}
				4360
				4361	KA_TRACE(1000, ;{
				4362	int gtid = __kmp_entry_gtid();
				4363	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4364	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4365	(kmp_affin_mask_t )(mask));
				4366	__kmp_debug_printf("kmp_set_affinity_mask_proc: setting proc %d in affinity mask for thread %d = %s\n",
				4367	proc, gtid, buf);
				4368	});
				4369
				4370	if (__kmp_env_consistency_check) {
				4371	if ((mask == NULL) \|\| (*mask == NULL)) {
				4372	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity_mask_proc");
				4373	}
				4374	}
				4375
				4376	if ((proc < 0) \|\| ((unsigned)proc >= KMP_CPU_SETSIZE)) {
				4377	return -1;
				4378	}
				4379	if (! KMP_CPU_ISSET(proc, fullMask)) {
				4380	return -2;
				4381	}
				4382
				4383	KMP_CPU_SET(proc, (kmp_affin_mask_t )(mask));
				4384	return 0;
				4385	}
				4386
				4387
				4388	int
				4389	__kmp_aux_unset_affinity_mask_proc(int proc, void **mask)
				4390	{
				4391	int retval;
				4392
				4393	if (! KMP_AFFINITY_CAPABLE()) {
				4394	return -1;
				4395	}
				4396
				4397	KA_TRACE(1000, ;{
				4398	int gtid = __kmp_entry_gtid();
				4399	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4400	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4401	(kmp_affin_mask_t )(mask));
				4402	__kmp_debug_printf("kmp_unset_affinity_mask_proc: unsetting proc %d in affinity mask for thread %d = %s\n",
				4403	proc, gtid, buf);
				4404	});
				4405
				4406	if (__kmp_env_consistency_check) {
				4407	if ((mask == NULL) \|\| (*mask == NULL)) {
				4408	KMP_FATAL(AffinityInvalidMask, "kmp_unset_affinity_mask_proc");
				4409	}
				4410	}
				4411
				4412	if ((proc < 0) \|\| ((unsigned)proc >= KMP_CPU_SETSIZE)) {
				4413	return -1;
				4414	}
				4415	if (! KMP_CPU_ISSET(proc, fullMask)) {
				4416	return -2;
				4417	}
				4418
				4419	KMP_CPU_CLR(proc, (kmp_affin_mask_t )(mask));
				4420	return 0;
				4421	}
				4422
				4423
				4424	int
				4425	__kmp_aux_get_affinity_mask_proc(int proc, void **mask)
				4426	{
				4427	int retval;
				4428
				4429	if (! KMP_AFFINITY_CAPABLE()) {
				4430	return -1;
				4431	}
				4432
				4433	KA_TRACE(1000, ;{
				4434	int gtid = __kmp_entry_gtid();
				4435	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4436	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4437	(kmp_affin_mask_t )(mask));
				4438	__kmp_debug_printf("kmp_get_affinity_mask_proc: getting proc %d in affinity mask for thread %d = %s\n",
				4439	proc, gtid, buf);
				4440	});
				4441
				4442	if (__kmp_env_consistency_check) {
				4443	if ((mask == NULL) \|\| (*mask == NULL)) {
Andrey Churbanov	4b2f17a	2015-01-29 15:49:22 +0000	[diff] [blame]	4444	KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity_mask_proc");
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4445	}
				4446	}
				4447
				4448	if ((proc < 0) \|\| ((unsigned)proc >= KMP_CPU_SETSIZE)) {
				4449	return 0;
				4450	}
				4451	if (! KMP_CPU_ISSET(proc, fullMask)) {
				4452	return 0;
				4453	}
				4454
				4455	return KMP_CPU_ISSET(proc, (kmp_affin_mask_t )(mask));
				4456	}
				4457
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4458
				4459	// Dynamic affinity settings - Affinity balanced
				4460	void __kmp_balanced_affinity( int tid, int nthreads )
				4461	{
				4462	if( __kmp_affinity_uniform_topology() ) {
				4463	int coreID;
				4464	int threadID;
				4465	// Number of hyper threads per core in HT machine
				4466	int __kmp_nth_per_core = __kmp_avail_proc / __kmp_ncores;
				4467	// Number of cores
				4468	int ncores = __kmp_ncores;
				4469	// How many threads will be bound to each core
				4470	int chunk = nthreads / ncores;
				4471	// How many cores will have an additional thread bound to it - "big cores"
				4472	int big_cores = nthreads % ncores;
				4473	// Number of threads on the big cores
				4474	int big_nth = ( chunk + 1 ) * big_cores;
				4475	if( tid < big_nth ) {
				4476	coreID = tid / (chunk + 1 );
				4477	threadID = ( tid % (chunk + 1 ) ) % __kmp_nth_per_core ;
				4478	} else { //tid >= big_nth
				4479	coreID = ( tid - big_cores ) / chunk;
				4480	threadID = ( ( tid - big_cores ) % chunk ) % __kmp_nth_per_core ;
				4481	}
				4482
				4483	KMP_DEBUG_ASSERT2(KMP_AFFINITY_CAPABLE(),
				4484	"Illegal set affinity operation when not capable");
				4485
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	4486	kmp_affin_mask_t mask = (kmp_affin_mask_t )KMP_ALLOCA(__kmp_affin_mask_size);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4487	KMP_CPU_ZERO(mask);
				4488
				4489	// Granularity == thread
				4490	if( __kmp_affinity_gran == affinity_gran_fine \|\| __kmp_affinity_gran == affinity_gran_thread) {
				4491	int osID = address2os[ coreID * __kmp_nth_per_core + threadID ].second;
				4492	KMP_CPU_SET( osID, mask);
				4493	} else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
				4494	for( int i = 0; i < __kmp_nth_per_core; i++ ) {
				4495	int osID;
				4496	osID = address2os[ coreID * __kmp_nth_per_core + i ].second;
				4497	KMP_CPU_SET( osID, mask);
				4498	}
				4499	}
				4500	if (__kmp_affinity_verbose) {
				4501	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4502	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4503	KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
				4504	tid, buf);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4505	}
				4506	__kmp_set_system_affinity( mask, TRUE );
				4507	} else { // Non-uniform topology
				4508
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	4509	kmp_affin_mask_t mask = (kmp_affin_mask_t )KMP_ALLOCA(__kmp_affin_mask_size);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4510	KMP_CPU_ZERO(mask);
				4511
				4512	// Number of hyper threads per core in HT machine
				4513	int nth_per_core = __kmp_nThreadsPerCore;
				4514	int core_level;
				4515	if( nth_per_core > 1 ) {
				4516	core_level = __kmp_aff_depth - 2;
				4517	} else {
				4518	core_level = __kmp_aff_depth - 1;
				4519	}
				4520
				4521	// Number of cores - maximum value; it does not count trail cores with 0 processors
				4522	int ncores = address2os[ __kmp_avail_proc - 1 ].first.labels[ core_level ] + 1;
				4523
				4524	// For performance gain consider the special case nthreads == __kmp_avail_proc
				4525	if( nthreads == __kmp_avail_proc ) {
				4526	if( __kmp_affinity_gran == affinity_gran_fine \|\| __kmp_affinity_gran == affinity_gran_thread) {
				4527	int osID = address2os[ tid ].second;
				4528	KMP_CPU_SET( osID, mask);
				4529	} else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
				4530	int coreID = address2os[ tid ].first.labels[ core_level ];
				4531	// We'll count found osIDs for the current core; they can be not more than nth_per_core;
				4532	// since the address2os is sortied we can break when cnt==nth_per_core
				4533	int cnt = 0;
				4534	for( int i = 0; i < __kmp_avail_proc; i++ ) {
				4535	int osID = address2os[ i ].second;
				4536	int core = address2os[ i ].first.labels[ core_level ];
				4537	if( core == coreID ) {
				4538	KMP_CPU_SET( osID, mask);
				4539	cnt++;
				4540	if( cnt == nth_per_core ) {
				4541	break;
				4542	}
				4543	}
				4544	}
				4545	}
				4546	} else if( nthreads <= __kmp_ncores ) {
				4547
				4548	int core = 0;
				4549	for( int i = 0; i < ncores; i++ ) {
				4550	// Check if this core from procarr[] is in the mask
				4551	int in_mask = 0;
				4552	for( int j = 0; j < nth_per_core; j++ ) {
				4553	if( procarr[ i * nth_per_core + j ] != - 1 ) {
				4554	in_mask = 1;
				4555	break;
				4556	}
				4557	}
				4558	if( in_mask ) {
				4559	if( tid == core ) {
				4560	for( int j = 0; j < nth_per_core; j++ ) {
				4561	int osID = procarr[ i * nth_per_core + j ];
				4562	if( osID != -1 ) {
				4563	KMP_CPU_SET( osID, mask );
				4564	// For granularity=thread it is enough to set the first available osID for this core
				4565	if( __kmp_affinity_gran == affinity_gran_fine \|\| __kmp_affinity_gran == affinity_gran_thread) {
				4566	break;
				4567	}
				4568	}
				4569	}
				4570	break;
				4571	} else {
				4572	core++;
				4573	}
				4574	}
				4575	}
				4576
				4577	} else { // nthreads > __kmp_ncores
				4578
				4579	// Array to save the number of processors at each core
Jonathan Peyton	7be07533	2015-06-22 15:53:50 +0000	[diff] [blame]	4580	int* nproc_at_core = (int)KMP_ALLOCA(sizeof(int)ncores);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4581	// Array to save the number of cores with "x" available processors;
Jonathan Peyton	7be07533	2015-06-22 15:53:50 +0000	[diff] [blame]	4582	int* ncores_with_x_procs = (int)KMP_ALLOCA(sizeof(int)(nth_per_core+1));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4583	// Array to save the number of cores with # procs from x to nth_per_core
Jonathan Peyton	7be07533	2015-06-22 15:53:50 +0000	[diff] [blame]	4584	int* ncores_with_x_to_max_procs = (int)KMP_ALLOCA(sizeof(int)(nth_per_core+1));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4585
				4586	for( int i = 0; i <= nth_per_core; i++ ) {
				4587	ncores_with_x_procs[ i ] = 0;
				4588	ncores_with_x_to_max_procs[ i ] = 0;
				4589	}
				4590
				4591	for( int i = 0; i < ncores; i++ ) {
				4592	int cnt = 0;
				4593	for( int j = 0; j < nth_per_core; j++ ) {
				4594	if( procarr[ i * nth_per_core + j ] != -1 ) {
				4595	cnt++;
				4596	}
				4597	}
				4598	nproc_at_core[ i ] = cnt;
				4599	ncores_with_x_procs[ cnt ]++;
				4600	}
				4601
				4602	for( int i = 0; i <= nth_per_core; i++ ) {
				4603	for( int j = i; j <= nth_per_core; j++ ) {
				4604	ncores_with_x_to_max_procs[ i ] += ncores_with_x_procs[ j ];
				4605	}
				4606	}
				4607
				4608	// Max number of processors
				4609	int nproc = nth_per_core * ncores;
				4610	// An array to keep number of threads per each context
				4611	int * newarr = ( int * )__kmp_allocate( sizeof( int ) * nproc );
				4612	for( int i = 0; i < nproc; i++ ) {
				4613	newarr[ i ] = 0;
				4614	}
				4615
				4616	int nth = nthreads;
				4617	int flag = 0;
				4618	while( nth > 0 ) {
				4619	for( int j = 1; j <= nth_per_core; j++ ) {
				4620	int cnt = ncores_with_x_to_max_procs[ j ];
				4621	for( int i = 0; i < ncores; i++ ) {
				4622	// Skip the core with 0 processors
				4623	if( nproc_at_core[ i ] == 0 ) {
				4624	continue;
				4625	}
				4626	for( int k = 0; k < nth_per_core; k++ ) {
				4627	if( procarr[ i * nth_per_core + k ] != -1 ) {
				4628	if( newarr[ i * nth_per_core + k ] == 0 ) {
				4629	newarr[ i * nth_per_core + k ] = 1;
				4630	cnt--;
				4631	nth--;
				4632	break;
				4633	} else {
				4634	if( flag != 0 ) {
				4635	newarr[ i * nth_per_core + k ] ++;
				4636	cnt--;
				4637	nth--;
				4638	break;
				4639	}
				4640	}
				4641	}
				4642	}
				4643	if( cnt == 0 \|\| nth == 0 ) {
				4644	break;
				4645	}
				4646	}
				4647	if( nth == 0 ) {
				4648	break;
				4649	}
				4650	}
				4651	flag = 1;
				4652	}
				4653	int sum = 0;
				4654	for( int i = 0; i < nproc; i++ ) {
				4655	sum += newarr[ i ];
				4656	if( sum > tid ) {
				4657	// Granularity == thread
				4658	if( __kmp_affinity_gran == affinity_gran_fine \|\| __kmp_affinity_gran == affinity_gran_thread) {
				4659	int osID = procarr[ i ];
				4660	KMP_CPU_SET( osID, mask);
				4661	} else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
				4662	int coreID = i / nth_per_core;
				4663	for( int ii = 0; ii < nth_per_core; ii++ ) {
				4664	int osID = procarr[ coreID * nth_per_core + ii ];
				4665	if( osID != -1 ) {
				4666	KMP_CPU_SET( osID, mask);
				4667	}
				4668	}
				4669	}
				4670	break;
				4671	}
				4672	}
				4673	__kmp_free( newarr );
				4674	}
				4675
				4676	if (__kmp_affinity_verbose) {
				4677	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4678	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4679	KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
				4680	tid, buf);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4681	}
				4682	__kmp_set_system_affinity( mask, TRUE );
				4683	}
				4684	}
				4685
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4686	#else
				4687	// affinity not supported
				4688
Andrey Churbanov	aa1f2b6	2015-04-13 18:51:59 +0000	[diff] [blame]	4689	static const kmp_uint32 noaff_maxLevels=7;
				4690	kmp_uint32 noaff_skipPerLevel[noaff_maxLevels];
				4691	kmp_uint32 noaff_depth;
				4692	kmp_uint8 noaff_leaf_kids;
				4693	kmp_int8 noaff_uninitialized=1;
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4694
Andrey Churbanov	aa1f2b6	2015-04-13 18:51:59 +0000	[diff] [blame]	4695	void noaff_init(int nprocs)
				4696	{
				4697	kmp_int8 result = KMP_COMPARE_AND_STORE_ACQ8(&noaff_uninitialized, 1, 2);
				4698	if (result == 0) return; // Already initialized
				4699	else if (result == 2) { // Someone else is initializing
				4700	while (TCR_1(noaff_uninitialized) != 0) KMP_CPU_PAUSE();
				4701	return;
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4702	}
Andrey Churbanov	aa1f2b6	2015-04-13 18:51:59 +0000	[diff] [blame]	4703	KMP_DEBUG_ASSERT(result==1);
				4704
				4705	kmp_uint32 numPerLevel[noaff_maxLevels];
				4706	noaff_depth = 1;
				4707	for (kmp_uint32 i=0; i<noaff_maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
				4708	numPerLevel[i] = 1;
				4709	noaff_skipPerLevel[i] = 1;
				4710	}
				4711
				4712	numPerLevel[0] = 4;
				4713	numPerLevel[1] = nprocs/4;
				4714	if (nprocs%4) numPerLevel[1]++;
				4715
				4716	for (int i=noaff_maxLevels-1; i>=0; --i) // count non-empty levels to get depth
				4717	if (numPerLevel[i] != 1 \|\| noaff_depth > 1) // only count one top-level '1'
				4718	noaff_depth++;
				4719
				4720	kmp_uint32 branch = 4;
				4721	if (numPerLevel[0] == 1) branch = nprocs/4;
				4722	if (branch<4) branch=4;
				4723	for (kmp_uint32 d=0; d<noaff_depth-1; ++d) { // optimize hierarchy width
				4724	while (numPerLevel[d] > branch \|\| (d==0 && numPerLevel[d]>4)) { // max 4 on level 0!
				4725	if (numPerLevel[d] & 1) numPerLevel[d]++;
				4726	numPerLevel[d] = numPerLevel[d] >> 1;
				4727	if (numPerLevel[d+1] == 1) noaff_depth++;
				4728	numPerLevel[d+1] = numPerLevel[d+1] << 1;
				4729	}
				4730	if(numPerLevel[0] == 1) {
				4731	branch = branch >> 1;
				4732	if (branch<4) branch = 4;
				4733	}
				4734	}
				4735
				4736	for (kmp_uint32 i=1; i<noaff_depth; ++i)
				4737	noaff_skipPerLevel[i] = numPerLevel[i-1] * noaff_skipPerLevel[i-1];
				4738	// Fill in hierarchy in the case of oversubscription
				4739	for (kmp_uint32 i=noaff_depth; i<noaff_maxLevels; ++i)
				4740	noaff_skipPerLevel[i] = 2*noaff_skipPerLevel[i-1];
				4741	noaff_leaf_kids = (kmp_uint8)numPerLevel[0]-1;
				4742	noaff_uninitialized = 0; // One writer
				4743
				4744	}
				4745
				4746	void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
				4747	if (noaff_uninitialized)
				4748	noaff_init(nproc);
				4749
				4750	thr_bar->depth = noaff_depth;
				4751	thr_bar->base_leaf_kids = noaff_leaf_kids;
				4752	thr_bar->skip_per_level = noaff_skipPerLevel;
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4753	}
				4754
Alp Toker	763b939	2014-02-28 09:42:41 +0000	[diff] [blame]	4755	#endif // KMP_AFFINITY_SUPPORTED