Blame - openmp/runtime/src/kmp_affinity.cpp - toolchain/llvm-project

blob: 5fcee142c045fa33e7693ec793297157d4514f75 [file] [log] [blame]

Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1	/*
				2	* kmp_affinity.cpp -- affinity management
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3	*/
				4
				5
				6	//===----------------------------------------------------------------------===//
				7	//
				8	// The LLVM Compiler Infrastructure
				9	//
				10	// This file is dual licensed under the MIT and the University of Illinois Open
				11	// Source Licenses. See LICENSE.txt for details.
				12	//
				13	//===----------------------------------------------------------------------===//
				14
				15
				16	#include "kmp.h"
				17	#include "kmp_i18n.h"
				18	#include "kmp_io.h"
				19	#include "kmp_str.h"
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	20	#include "kmp_wrapper_getpid.h"
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	21
Alp Toker	763b939	2014-02-28 09:42:41 +0000	[diff] [blame]	22	#if KMP_AFFINITY_SUPPORTED
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	23
				24	//
				25	// Print the affinity mask to the character array in a pretty format.
				26	//
				27	char *
				28	__kmp_affinity_print_mask(char buf, int buf_len, kmp_affin_mask_t mask)
				29	{
				30	KMP_ASSERT(buf_len >= 40);
				31	char *scan = buf;
				32	char *end = buf + buf_len - 1;
				33
				34	//
				35	// Find first element / check for empty set.
				36	//
				37	size_t i;
				38	for (i = 0; i < KMP_CPU_SETSIZE; i++) {
				39	if (KMP_CPU_ISSET(i, mask)) {
				40	break;
				41	}
				42	}
				43	if (i == KMP_CPU_SETSIZE) {
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	44	KMP_SNPRINTF(scan, buf_len, "{<empty>}");
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	45	while (*scan != '\0') scan++;
				46	KMP_ASSERT(scan <= end);
				47	return buf;
				48	}
				49
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	50	KMP_SNPRINTF(scan, buf_len, "{%ld", (long)i);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	51	while (*scan != '\0') scan++;
				52	i++;
				53	for (; i < KMP_CPU_SETSIZE; i++) {
				54	if (! KMP_CPU_ISSET(i, mask)) {
				55	continue;
				56	}
				57
				58	//
				59	// Check for buffer overflow. A string of the form ",<n>" will have
				60	// at most 10 characters, plus we want to leave room to print ",...}"
				61	// if the set is too large to print for a total of 15 characters.
				62	// We already left room for '\0' in setting end.
				63	//
				64	if (end - scan < 15) {
				65	break;
				66	}
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	67	KMP_SNPRINTF(scan, buf_len, ",%-ld", (long)i);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	68	while (*scan != '\0') scan++;
				69	}
				70	if (i < KMP_CPU_SETSIZE) {
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	71	KMP_SNPRINTF(scan, buf_len, ",...");
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	72	while (*scan != '\0') scan++;
				73	}
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	74	KMP_SNPRINTF(scan, buf_len, "}");
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	75	while (*scan != '\0') scan++;
				76	KMP_ASSERT(scan <= end);
				77	return buf;
				78	}
				79
				80
				81	void
				82	__kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask)
				83	{
				84	KMP_CPU_ZERO(mask);
				85
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	86	# if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	87
				88	if (__kmp_num_proc_groups > 1) {
				89	int group;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	90	KMP_DEBUG_ASSERT(__kmp_GetActiveProcessorCount != NULL);
				91	for (group = 0; group < __kmp_num_proc_groups; group++) {
				92	int i;
				93	int num = __kmp_GetActiveProcessorCount(group);
				94	for (i = 0; i < num; i++) {
				95	KMP_CPU_SET(i + group * (CHAR_BIT * sizeof(DWORD_PTR)), mask);
				96	}
				97	}
				98	}
				99	else
				100
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	101	# endif /* KMP_GROUP_AFFINITY */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	102
				103	{
				104	int proc;
				105	for (proc = 0; proc < __kmp_xproc; proc++) {
				106	KMP_CPU_SET(proc, mask);
				107	}
				108	}
				109	}
				110
				111
				112	//
				113	// In Linux* OS debug & cover (-O0) builds, we need to avoid inline member
				114	// functions.
				115	//
				116	// The icc codegen emits sections with extremely long names, of the form
				117	// ".gnu.linkonce.<mangled_name>". There seems to have been a linker bug
				118	// introduced between GNU ld version 2.14.90.0.4 and 2.15.92.0.2 involving
				119	// some sort of memory corruption or table overflow that is triggered by
				120	// these long strings. I checked the latest version of the linker -
				121	// GNU ld (Linux* OS/GNU Binutils) 2.18.50.0.7.20080422 - and the bug is not
				122	// fixed.
				123	//
				124	// Unfortunately, my attempts to reproduce it in a smaller example have
				125	// failed - I'm not sure what the prospects are of getting it fixed
Jonathan Peyton	6633829	2015-06-01 02:37:28 +0000	[diff] [blame]	126	// properly - but we need a reproducer smaller than all of libomp.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	127	//
				128	// Work around the problem by avoiding inline constructors in such builds.
				129	// We do this for all platforms, not just Linux* OS - non-inline functions are
				130	// more debuggable and provide better coverage into than inline functions.
				131	// Use inline functions in shipping libs, for performance.
				132	//
				133
				134	# if !defined(KMP_DEBUG) && !defined(COVER)
				135
				136	class Address {
				137	public:
				138	static const unsigned maxDepth = 32;
				139	unsigned labels[maxDepth];
				140	unsigned childNums[maxDepth];
				141	unsigned depth;
				142	unsigned leader;
				143	Address(unsigned _depth)
				144	: depth(_depth), leader(FALSE) {
				145	}
				146	Address &operator=(const Address &b) {
				147	depth = b.depth;
				148	for (unsigned i = 0; i < depth; i++) {
				149	labels[i] = b.labels[i];
				150	childNums[i] = b.childNums[i];
				151	}
				152	leader = FALSE;
				153	return *this;
				154	}
				155	bool operator==(const Address &b) const {
				156	if (depth != b.depth)
				157	return false;
				158	for (unsigned i = 0; i < depth; i++)
				159	if(labels[i] != b.labels[i])
				160	return false;
				161	return true;
				162	}
				163	bool isClose(const Address &b, int level) const {
				164	if (depth != b.depth)
				165	return false;
				166	if ((unsigned)level >= depth)
				167	return true;
				168	for (unsigned i = 0; i < (depth - level); i++)
				169	if(labels[i] != b.labels[i])
				170	return false;
				171	return true;
				172	}
				173	bool operator!=(const Address &b) const {
				174	return !operator==(b);
				175	}
				176	};
				177
				178	class AddrUnsPair {
				179	public:
				180	Address first;
				181	unsigned second;
				182	AddrUnsPair(Address _first, unsigned _second)
				183	: first(_first), second(_second) {
				184	}
				185	AddrUnsPair &operator=(const AddrUnsPair &b)
				186	{
				187	first = b.first;
				188	second = b.second;
				189	return *this;
				190	}
				191	};
				192
				193	# else
				194
				195	class Address {
				196	public:
				197	static const unsigned maxDepth = 32;
				198	unsigned labels[maxDepth];
				199	unsigned childNums[maxDepth];
				200	unsigned depth;
				201	unsigned leader;
				202	Address(unsigned _depth);
				203	Address &operator=(const Address &b);
				204	bool operator==(const Address &b) const;
				205	bool isClose(const Address &b, int level) const;
				206	bool operator!=(const Address &b) const;
				207	};
				208
				209	Address::Address(unsigned _depth)
				210	{
				211	depth = _depth;
				212	leader = FALSE;
				213	}
				214
				215	Address &Address::operator=(const Address &b) {
				216	depth = b.depth;
				217	for (unsigned i = 0; i < depth; i++) {
				218	labels[i] = b.labels[i];
				219	childNums[i] = b.childNums[i];
				220	}
				221	leader = FALSE;
				222	return *this;
				223	}
				224
				225	bool Address::operator==(const Address &b) const {
				226	if (depth != b.depth)
				227	return false;
				228	for (unsigned i = 0; i < depth; i++)
				229	if(labels[i] != b.labels[i])
				230	return false;
				231	return true;
				232	}
				233
				234	bool Address::isClose(const Address &b, int level) const {
				235	if (depth != b.depth)
				236	return false;
				237	if ((unsigned)level >= depth)
				238	return true;
				239	for (unsigned i = 0; i < (depth - level); i++)
				240	if(labels[i] != b.labels[i])
				241	return false;
				242	return true;
				243	}
				244
				245	bool Address::operator!=(const Address &b) const {
				246	return !operator==(b);
				247	}
				248
				249	class AddrUnsPair {
				250	public:
				251	Address first;
				252	unsigned second;
				253	AddrUnsPair(Address _first, unsigned _second);
				254	AddrUnsPair &operator=(const AddrUnsPair &b);
				255	};
				256
				257	AddrUnsPair::AddrUnsPair(Address _first, unsigned _second)
				258	: first(_first), second(_second)
				259	{
				260	}
				261
				262	AddrUnsPair &AddrUnsPair::operator=(const AddrUnsPair &b)
				263	{
				264	first = b.first;
				265	second = b.second;
				266	return *this;
				267	}
				268
				269	# endif /* !defined(KMP_DEBUG) && !defined(COVER) */
				270
				271
				272	static int
				273	__kmp_affinity_cmp_Address_labels(const void a, const void b)
				274	{
				275	const Address aa = (const Address )&(((AddrUnsPair *)a)
				276	->first);
				277	const Address bb = (const Address )&(((AddrUnsPair *)b)
				278	->first);
				279	unsigned depth = aa->depth;
				280	unsigned i;
				281	KMP_DEBUG_ASSERT(depth == bb->depth);
				282	for (i = 0; i < depth; i++) {
				283	if (aa->labels[i] < bb->labels[i]) return -1;
				284	if (aa->labels[i] > bb->labels[i]) return 1;
				285	}
				286	return 0;
				287	}
				288
				289
				290	static int
				291	__kmp_affinity_cmp_Address_child_num(const void a, const void b)
				292	{
				293	const Address aa = (const Address )&(((AddrUnsPair *)a)
				294	->first);
				295	const Address bb = (const Address )&(((AddrUnsPair *)b)
				296	->first);
				297	unsigned depth = aa->depth;
				298	unsigned i;
				299	KMP_DEBUG_ASSERT(depth == bb->depth);
				300	KMP_DEBUG_ASSERT((unsigned)__kmp_affinity_compact <= depth);
				301	KMP_DEBUG_ASSERT(__kmp_affinity_compact >= 0);
				302	for (i = 0; i < (unsigned)__kmp_affinity_compact; i++) {
				303	int j = depth - i - 1;
				304	if (aa->childNums[j] < bb->childNums[j]) return -1;
				305	if (aa->childNums[j] > bb->childNums[j]) return 1;
				306	}
				307	for (; i < depth; i++) {
				308	int j = i - __kmp_affinity_compact;
				309	if (aa->childNums[j] < bb->childNums[j]) return -1;
				310	if (aa->childNums[j] > bb->childNums[j]) return 1;
				311	}
				312	return 0;
				313	}
				314
Jonathan Peyton	7f09a98	2015-06-22 15:59:18 +0000	[diff] [blame]	315	/** A structure for holding machine-specific hierarchy info to be computed once at init.
				316	This structure represents a mapping of threads to the actual machine hierarchy, or to
				317	our best guess at what the hierarchy might be, for the purpose of performing an
				318	efficient barrier. In the worst case, when there is no machine hierarchy information,
				319	it produces a tree suitable for a barrier, similar to the tree used in the hyper barrier. */
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	320	class hierarchy_info {
				321	public:
Jonathan Peyton	7f09a98	2015-06-22 15:59:18 +0000	[diff] [blame]	322	/** Number of levels in the hierarchy. Typical levels are threads/core, cores/package
				323	or socket, packages/node, nodes/machine, etc. We don't want to get specific with
				324	nomenclature. When the machine is oversubscribed we add levels to duplicate the
				325	hierarchy, doubling the thread capacity of the hierarchy each time we add a level. */
				326	kmp_uint32 maxLevels;
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	327
				328	/** This is specifically the depth of the machine configuration hierarchy, in terms of the
				329	number of levels along the longest path from root to any leaf. It corresponds to the
				330	number of entries in numPerLevel if we exclude all but one trailing 1. */
				331	kmp_uint32 depth;
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	332	kmp_uint32 base_num_threads;
Andrey Churbanov	aa1f2b6	2015-04-13 18:51:59 +0000	[diff] [blame]	333	volatile kmp_int8 uninitialized; // 0=initialized, 1=uninitialized, 2=initialization in progress
Jonathan Peyton	7f09a98	2015-06-22 15:59:18 +0000	[diff] [blame]	334	volatile kmp_int8 resizing; // 0=not resizing, 1=resizing
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	335
				336	/** Level 0 corresponds to leaves. numPerLevel[i] is the number of children the parent of a
				337	node at level i has. For example, if we have a machine with 4 packages, 4 cores/package
				338	and 2 HT per core, then numPerLevel = {2, 4, 4, 1, 1}. All empty levels are set to 1. */
Jonathan Peyton	7f09a98	2015-06-22 15:59:18 +0000	[diff] [blame]	339	kmp_uint32 *numPerLevel;
				340	kmp_uint32 *skipPerLevel;
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	341
				342	void deriveLevels(AddrUnsPair *adr2os, int num_addrs) {
				343	int hier_depth = adr2os[0].first.depth;
				344	int level = 0;
				345	for (int i=hier_depth-1; i>=0; --i) {
				346	int max = -1;
				347	for (int j=0; j<num_addrs; ++j) {
				348	int next = adr2os[j].first.childNums[i];
				349	if (next > max) max = next;
				350	}
				351	numPerLevel[level] = max+1;
				352	++level;
				353	}
				354	}
				355
Jonathan Peyton	7f09a98	2015-06-22 15:59:18 +0000	[diff] [blame]	356	hierarchy_info() : maxLevels(7), depth(1), uninitialized(1), resizing(0) {}
				357
				358	// TO FIX: This destructor causes a segfault in the library at shutdown.
				359	//~hierarchy_info() { if (!uninitialized && numPerLevel) __kmp_free(numPerLevel); }
				360
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	361	void init(AddrUnsPair *adr2os, int num_addrs)
				362	{
Andrey Churbanov	aa1f2b6	2015-04-13 18:51:59 +0000	[diff] [blame]	363	kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&uninitialized, 1, 2);
				364	if (bool_result == 0) { // Wait for initialization
				365	while (TCR_1(uninitialized) != 0) KMP_CPU_PAUSE();
				366	return;
				367	}
				368	KMP_DEBUG_ASSERT(bool_result==1);
				369
Jonathan Peyton	7f09a98	2015-06-22 15:59:18 +0000	[diff] [blame]	370	/* Added explicit initialization of the data fields here to prevent usage of dirty value
Andrey Churbanov	b41e62b	2015-02-10 20:10:21 +0000	[diff] [blame]	371	observed when static library is re-initialized multiple times (e.g. when
				372	non-OpenMP thread repeatedly launches/joins thread that uses OpenMP). */
				373	depth = 1;
Jonathan Peyton	7f09a98	2015-06-22 15:59:18 +0000	[diff] [blame]	374	resizing = 0;
				375	maxLevels = 7;
				376	numPerLevel = (kmp_uint32 )__kmp_allocate(maxLevels2*sizeof(kmp_uint32));
				377	skipPerLevel = &(numPerLevel[maxLevels]);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	378	for (kmp_uint32 i=0; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
				379	numPerLevel[i] = 1;
				380	skipPerLevel[i] = 1;
				381	}
				382
				383	// Sort table by physical ID
				384	if (adr2os) {
				385	qsort(adr2os, num_addrs, sizeof(*adr2os), __kmp_affinity_cmp_Address_labels);
				386	deriveLevels(adr2os, num_addrs);
				387	}
				388	else {
				389	numPerLevel[0] = 4;
				390	numPerLevel[1] = num_addrs/4;
				391	if (num_addrs%4) numPerLevel[1]++;
				392	}
				393
				394	base_num_threads = num_addrs;
				395	for (int i=maxLevels-1; i>=0; --i) // count non-empty levels to get depth
				396	if (numPerLevel[i] != 1 \|\| depth > 1) // only count one top-level '1'
				397	depth++;
				398
				399	kmp_uint32 branch = 4;
				400	if (numPerLevel[0] == 1) branch = num_addrs/4;
				401	if (branch<4) branch=4;
				402	for (kmp_uint32 d=0; d<depth-1; ++d) { // optimize hierarchy width
				403	while (numPerLevel[d] > branch \|\| (d==0 && numPerLevel[d]>4)) { // max 4 on level 0!
				404	if (numPerLevel[d] & 1) numPerLevel[d]++;
				405	numPerLevel[d] = numPerLevel[d] >> 1;
				406	if (numPerLevel[d+1] == 1) depth++;
				407	numPerLevel[d+1] = numPerLevel[d+1] << 1;
				408	}
				409	if(numPerLevel[0] == 1) {
				410	branch = branch >> 1;
				411	if (branch<4) branch = 4;
				412	}
				413	}
				414
				415	for (kmp_uint32 i=1; i<depth; ++i)
				416	skipPerLevel[i] = numPerLevel[i-1] * skipPerLevel[i-1];
Andrey Churbanov	aa1f2b6	2015-04-13 18:51:59 +0000	[diff] [blame]	417	// Fill in hierarchy in the case of oversubscription
				418	for (kmp_uint32 i=depth; i<maxLevels; ++i)
				419	skipPerLevel[i] = 2*skipPerLevel[i-1];
				420
				421	uninitialized = 0; // One writer
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	422
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	423	}
Jonathan Peyton	7f09a98	2015-06-22 15:59:18 +0000	[diff] [blame]	424
				425	void resize(kmp_uint32 nproc)
				426	{
				427	kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
				428	if (bool_result == 0) { // Someone else is resizing
				429	while (TCR_1(resizing) != 0) KMP_CPU_PAUSE();
				430	return;
				431	}
				432	KMP_DEBUG_ASSERT(bool_result!=0);
				433	KMP_DEBUG_ASSERT(nproc > base_num_threads);
				434
				435	// Calculate new max_levels
				436	kmp_uint32 old_sz = skipPerLevel[depth-1];
				437	kmp_uint32 incs = 0, old_maxLevels= maxLevels;
				438	while (nproc > old_sz) {
				439	old_sz *=2;
				440	incs++;
				441	}
				442	maxLevels += incs;
				443
				444	// Resize arrays
				445	kmp_uint32 *old_numPerLevel = numPerLevel;
				446	kmp_uint32 *old_skipPerLevel = skipPerLevel;
				447	numPerLevel = skipPerLevel = NULL;
				448	numPerLevel = (kmp_uint32 )__kmp_allocate(maxLevels2*sizeof(kmp_uint32));
				449	skipPerLevel = &(numPerLevel[maxLevels]);
				450
				451	// Copy old elements from old arrays
				452	for (kmp_uint32 i=0; i<old_maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
				453	numPerLevel[i] = old_numPerLevel[i];
				454	skipPerLevel[i] = old_skipPerLevel[i];
				455	}
				456
				457	// Init new elements in arrays to 1
				458	for (kmp_uint32 i=old_maxLevels; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
				459	numPerLevel[i] = 1;
				460	skipPerLevel[i] = 1;
				461	}
				462
				463	// Free old arrays
				464	__kmp_free(old_numPerLevel);
				465
				466	// Fill in oversubscription levels of hierarchy
				467	for (kmp_uint32 i=old_maxLevels; i<maxLevels; ++i)
				468	skipPerLevel[i] = 2*skipPerLevel[i-1];
				469
				470	base_num_threads = nproc;
				471	resizing = 0; // One writer
				472
				473	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	474	};
				475
				476	static hierarchy_info machine_hierarchy;
				477
				478	void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
Andrey Churbanov	1362ae7	2015-04-02 13:18:50 +0000	[diff] [blame]	479	kmp_uint32 depth;
Andrey Churbanov	aa1f2b6	2015-04-13 18:51:59 +0000	[diff] [blame]	480	// The test below is true if affinity is available, but set to "none". Need to init on first use of hierarchical barrier.
				481	if (TCR_1(machine_hierarchy.uninitialized))
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	482	machine_hierarchy.init(NULL, nproc);
Jonathan Peyton	7f09a98	2015-06-22 15:59:18 +0000	[diff] [blame]	483	// Adjust the hierarchy in case num threads exceeds original
				484	if (nproc > machine_hierarchy.base_num_threads)
				485	machine_hierarchy.resize(nproc);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	486
Andrey Churbanov	1362ae7	2015-04-02 13:18:50 +0000	[diff] [blame]	487	depth = machine_hierarchy.depth;
				488	KMP_DEBUG_ASSERT(depth > 0);
Jonathan Peyton	7f09a98	2015-06-22 15:59:18 +0000	[diff] [blame]	489	// The loop below adjusts the depth in the case of a resize
				490	while (nproc > machine_hierarchy.skipPerLevel[depth-1])
Andrey Churbanov	1362ae7	2015-04-02 13:18:50 +0000	[diff] [blame]	491	depth++;
Andrey Churbanov	aa1f2b6	2015-04-13 18:51:59 +0000	[diff] [blame]	492
Andrey Churbanov	1362ae7	2015-04-02 13:18:50 +0000	[diff] [blame]	493	thr_bar->depth = depth;
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	494	thr_bar->base_leaf_kids = (kmp_uint8)machine_hierarchy.numPerLevel[0]-1;
				495	thr_bar->skip_per_level = machine_hierarchy.skipPerLevel;
				496	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	497
				498	//
				499	// When sorting by labels, __kmp_affinity_assign_child_nums() must first be
				500	// called to renumber the labels from [0..n] and place them into the child_num
				501	// vector of the address object. This is done in case the labels used for
Alp Toker	8f2d3f0	2014-02-24 10:40:15 +0000	[diff] [blame]	502	// the children at one node of the hierarchy differ from those used for
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	503	// another node at the same level. Example: suppose the machine has 2 nodes
				504	// with 2 packages each. The first node contains packages 601 and 602, and
				505	// second node contains packages 603 and 604. If we try to sort the table
				506	// for "scatter" affinity, the table will still be sorted 601, 602, 603, 604
				507	// because we are paying attention to the labels themselves, not the ordinal
				508	// child numbers. By using the child numbers in the sort, the result is
				509	// {0,0}=601, {0,1}=603, {1,0}=602, {1,1}=604.
				510	//
				511	static void
				512	__kmp_affinity_assign_child_nums(AddrUnsPair *address2os,
				513	int numAddrs)
				514	{
				515	KMP_DEBUG_ASSERT(numAddrs > 0);
				516	int depth = address2os->first.depth;
				517	unsigned counts = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				518	unsigned lastLabel = (unsigned )__kmp_allocate(depth
				519	* sizeof(unsigned));
				520	int labCt;
				521	for (labCt = 0; labCt < depth; labCt++) {
				522	address2os[0].first.childNums[labCt] = counts[labCt] = 0;
				523	lastLabel[labCt] = address2os[0].first.labels[labCt];
				524	}
				525	int i;
				526	for (i = 1; i < numAddrs; i++) {
				527	for (labCt = 0; labCt < depth; labCt++) {
				528	if (address2os[i].first.labels[labCt] != lastLabel[labCt]) {
				529	int labCt2;
				530	for (labCt2 = labCt + 1; labCt2 < depth; labCt2++) {
				531	counts[labCt2] = 0;
				532	lastLabel[labCt2] = address2os[i].first.labels[labCt2];
				533	}
				534	counts[labCt]++;
				535	lastLabel[labCt] = address2os[i].first.labels[labCt];
				536	break;
				537	}
				538	}
				539	for (labCt = 0; labCt < depth; labCt++) {
				540	address2os[i].first.childNums[labCt] = counts[labCt];
				541	}
				542	for (; labCt < (int)Address::maxDepth; labCt++) {
				543	address2os[i].first.childNums[labCt] = 0;
				544	}
				545	}
				546	}
				547
				548
				549	//
				550	// All of the __kmp_affinity_create_*_map() routines should set
				551	// __kmp_affinity_masks to a vector of affinity mask objects of length
				552	// __kmp_affinity_num_masks, if __kmp_affinity_type != affinity_none, and
				553	// return the number of levels in the machine topology tree (zero if
				554	// __kmp_affinity_type == affinity_none).
				555	//
				556	// All of the __kmp_affinity_create__map() routines should set fullMask
				557	// to the affinity mask for the initialization thread. They need to save and
				558	// restore the mask, and it could be needed later, so saving it is just an
				559	// optimization to avoid calling kmp_get_system_affinity() again.
				560	//
				561	static kmp_affin_mask_t *fullMask = NULL;
				562
				563	kmp_affin_mask_t *
				564	__kmp_affinity_get_fullMask() { return fullMask; }
				565
				566
				567	static int nCoresPerPkg, nPackages;
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame]	568	static int __kmp_nThreadsPerCore;
				569	#ifndef KMP_DFLT_NTH_CORES
				570	static int __kmp_ncores;
				571	#endif
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	572
				573	//
				574	// __kmp_affinity_uniform_topology() doesn't work when called from
				575	// places which support arbitrarily many levels in the machine topology
				576	// map, i.e. the non-default cases in __kmp_affinity_create_cpuinfo_map()
				577	// __kmp_affinity_create_x2apicid_map().
				578	//
				579	inline static bool
				580	__kmp_affinity_uniform_topology()
				581	{
				582	return __kmp_avail_proc == (__kmp_nThreadsPerCore * nCoresPerPkg * nPackages);
				583	}
				584
				585
				586	//
				587	// Print out the detailed machine topology map, i.e. the physical locations
				588	// of each OS proc.
				589	//
				590	static void
				591	__kmp_affinity_print_topology(AddrUnsPair *address2os, int len, int depth,
				592	int pkgLevel, int coreLevel, int threadLevel)
				593	{
				594	int proc;
				595
				596	KMP_INFORM(OSProcToPhysicalThreadMap, "KMP_AFFINITY");
				597	for (proc = 0; proc < len; proc++) {
				598	int level;
				599	kmp_str_buf_t buf;
				600	__kmp_str_buf_init(&buf);
				601	for (level = 0; level < depth; level++) {
				602	if (level == threadLevel) {
				603	__kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Thread));
				604	}
				605	else if (level == coreLevel) {
				606	__kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Core));
				607	}
				608	else if (level == pkgLevel) {
				609	__kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Package));
				610	}
				611	else if (level > pkgLevel) {
				612	__kmp_str_buf_print(&buf, "%s_%d ", KMP_I18N_STR(Node),
				613	level - pkgLevel - 1);
				614	}
				615	else {
				616	__kmp_str_buf_print(&buf, "L%d ", level);
				617	}
				618	__kmp_str_buf_print(&buf, "%d ",
				619	address2os[proc].first.labels[level]);
				620	}
				621	KMP_INFORM(OSProcMapToPack, "KMP_AFFINITY", address2os[proc].second,
				622	buf.str);
				623	__kmp_str_buf_free(&buf);
				624	}
				625	}
				626
				627
				628	//
				629	// If we don't know how to retrieve the machine's processor topology, or
				630	// encounter an error in doing so, this routine is called to form a "flat"
				631	// mapping of os thread id's <-> processor id's.
				632	//
				633	static int
				634	__kmp_affinity_create_flat_map(AddrUnsPair **address2os,
				635	kmp_i18n_id_t *const msg_id)
				636	{
				637	*address2os = NULL;
				638	*msg_id = kmp_i18n_null;
				639
				640	//
				641	// Even if __kmp_affinity_type == affinity_none, this routine might still
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame]	642	// called to set __kmp_ncores, as well as
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	643	// __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
				644	//
				645	if (! KMP_AFFINITY_CAPABLE()) {
				646	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				647	__kmp_ncores = nPackages = __kmp_xproc;
				648	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	649	if (__kmp_affinity_verbose) {
				650	KMP_INFORM(AffFlatTopology, "KMP_AFFINITY");
				651	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				652	KMP_INFORM(Uniform, "KMP_AFFINITY");
				653	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				654	__kmp_nThreadsPerCore, __kmp_ncores);
				655	}
				656	return 0;
				657	}
				658
				659	//
				660	// When affinity is off, this routine will still be called to set
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame]	661	// __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	662	// nCoresPerPkg, & nPackages. Make sure all these vars are set
				663	// correctly, and return now if affinity is not enabled.
				664	//
				665	__kmp_ncores = nPackages = __kmp_avail_proc;
				666	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	667	if (__kmp_affinity_verbose) {
				668	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				669	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, fullMask);
				670
				671	KMP_INFORM(AffCapableUseFlat, "KMP_AFFINITY");
				672	if (__kmp_affinity_respect_mask) {
				673	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				674	} else {
				675	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				676	}
				677	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				678	KMP_INFORM(Uniform, "KMP_AFFINITY");
				679	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				680	__kmp_nThreadsPerCore, __kmp_ncores);
				681	}
				682	if (__kmp_affinity_type == affinity_none) {
				683	return 0;
				684	}
				685
				686	//
				687	// Contruct the data structure to be returned.
				688	//
				689	address2os = (AddrUnsPair)
				690	__kmp_allocate(sizeof(*address2os) __kmp_avail_proc);
				691	int avail_ct = 0;
				692	unsigned int i;
				693	for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
				694	//
				695	// Skip this proc if it is not included in the machine model.
				696	//
				697	if (! KMP_CPU_ISSET(i, fullMask)) {
				698	continue;
				699	}
				700
				701	Address addr(1);
				702	addr.labels[0] = i;
				703	(*address2os)[avail_ct++] = AddrUnsPair(addr,i);
				704	}
				705	if (__kmp_affinity_verbose) {
				706	KMP_INFORM(OSProcToPackage, "KMP_AFFINITY");
				707	}
				708
				709	if (__kmp_affinity_gran_levels < 0) {
				710	//
				711	// Only the package level is modeled in the machine topology map,
				712	// so the #levels of granularity is either 0 or 1.
				713	//
				714	if (__kmp_affinity_gran > affinity_gran_package) {
				715	__kmp_affinity_gran_levels = 1;
				716	}
				717	else {
				718	__kmp_affinity_gran_levels = 0;
				719	}
				720	}
				721	return 1;
				722	}
				723
				724
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	725	# if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	726
				727	//
				728	// If multiple Windows* OS processor groups exist, we can create a 2-level
				729	// topology map with the groups at level 0 and the individual procs at
				730	// level 1.
				731	//
				732	// This facilitates letting the threads float among all procs in a group,
				733	// if granularity=group (the default when there are multiple groups).
				734	//
				735	static int
				736	__kmp_affinity_create_proc_group_map(AddrUnsPair **address2os,
				737	kmp_i18n_id_t *const msg_id)
				738	{
				739	*address2os = NULL;
				740	*msg_id = kmp_i18n_null;
				741
				742	//
				743	// If we don't have multiple processor groups, return now.
				744	// The flat mapping will be used.
				745	//
				746	if ((! KMP_AFFINITY_CAPABLE()) \|\| (__kmp_get_proc_group(fullMask) >= 0)) {
				747	// FIXME set *msg_id
				748	return -1;
				749	}
				750
				751	//
				752	// Contruct the data structure to be returned.
				753	//
				754	address2os = (AddrUnsPair)
				755	__kmp_allocate(sizeof(*address2os) __kmp_avail_proc);
				756	int avail_ct = 0;
				757	int i;
				758	for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
				759	//
				760	// Skip this proc if it is not included in the machine model.
				761	//
				762	if (! KMP_CPU_ISSET(i, fullMask)) {
				763	continue;
				764	}
				765
				766	Address addr(2);
				767	addr.labels[0] = i / (CHAR_BIT * sizeof(DWORD_PTR));
				768	addr.labels[1] = i % (CHAR_BIT * sizeof(DWORD_PTR));
				769	(*address2os)[avail_ct++] = AddrUnsPair(addr,i);
				770
				771	if (__kmp_affinity_verbose) {
				772	KMP_INFORM(AffOSProcToGroup, "KMP_AFFINITY", i, addr.labels[0],
				773	addr.labels[1]);
				774	}
				775	}
				776
				777	if (__kmp_affinity_gran_levels < 0) {
				778	if (__kmp_affinity_gran == affinity_gran_group) {
				779	__kmp_affinity_gran_levels = 1;
				780	}
				781	else if ((__kmp_affinity_gran == affinity_gran_fine)
				782	\|\| (__kmp_affinity_gran == affinity_gran_thread)) {
				783	__kmp_affinity_gran_levels = 0;
				784	}
				785	else {
				786	const char *gran_str = NULL;
				787	if (__kmp_affinity_gran == affinity_gran_core) {
				788	gran_str = "core";
				789	}
				790	else if (__kmp_affinity_gran == affinity_gran_package) {
				791	gran_str = "package";
				792	}
				793	else if (__kmp_affinity_gran == affinity_gran_node) {
				794	gran_str = "node";
				795	}
				796	else {
				797	KMP_ASSERT(0);
				798	}
				799
				800	// Warning: can't use affinity granularity \"gran\" with group topology method, using "thread"
				801	__kmp_affinity_gran_levels = 0;
				802	}
				803	}
				804	return 2;
				805	}
				806
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	807	# endif /* KMP_GROUP_AFFINITY */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	808
				809
				810	# if KMP_ARCH_X86 \|\| KMP_ARCH_X86_64
				811
				812	static int
				813	__kmp_cpuid_mask_width(int count) {
				814	int r = 0;
				815
				816	while((1<<r) < count)
				817	++r;
				818	return r;
				819	}
				820
				821
				822	class apicThreadInfo {
				823	public:
				824	unsigned osId; // param to __kmp_affinity_bind_thread
				825	unsigned apicId; // from cpuid after binding
				826	unsigned maxCoresPerPkg; // ""
				827	unsigned maxThreadsPerPkg; // ""
				828	unsigned pkgId; // inferred from above values
				829	unsigned coreId; // ""
				830	unsigned threadId; // ""
				831	};
				832
				833
				834	static int
				835	__kmp_affinity_cmp_apicThreadInfo_os_id(const void a, const void b)
				836	{
				837	const apicThreadInfo aa = (const apicThreadInfo )a;
				838	const apicThreadInfo bb = (const apicThreadInfo )b;
				839	if (aa->osId < bb->osId) return -1;
				840	if (aa->osId > bb->osId) return 1;
				841	return 0;
				842	}
				843
				844
				845	static int
				846	__kmp_affinity_cmp_apicThreadInfo_phys_id(const void a, const void b)
				847	{
				848	const apicThreadInfo aa = (const apicThreadInfo )a;
				849	const apicThreadInfo bb = (const apicThreadInfo )b;
				850	if (aa->pkgId < bb->pkgId) return -1;
				851	if (aa->pkgId > bb->pkgId) return 1;
				852	if (aa->coreId < bb->coreId) return -1;
				853	if (aa->coreId > bb->coreId) return 1;
				854	if (aa->threadId < bb->threadId) return -1;
				855	if (aa->threadId > bb->threadId) return 1;
				856	return 0;
				857	}
				858
				859
				860	//
				861	// On IA-32 architecture and Intel(R) 64 architecture, we attempt to use
				862	// an algorithm which cycles through the available os threads, setting
				863	// the current thread's affinity mask to that thread, and then retrieves
				864	// the Apic Id for each thread context using the cpuid instruction.
				865	//
				866	static int
				867	__kmp_affinity_create_apicid_map(AddrUnsPair **address2os,
				868	kmp_i18n_id_t *const msg_id)
				869	{
Andrey Churbanov	1c33129	2015-01-27 17:03:42 +0000	[diff] [blame]	870	kmp_cpuid buf;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	871	int rc;
				872	*address2os = NULL;
				873	*msg_id = kmp_i18n_null;
				874
Andrey Churbanov	1c33129	2015-01-27 17:03:42 +0000	[diff] [blame]	875	//
				876	// Check if cpuid leaf 4 is supported.
				877	//
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	878	__kmp_x86_cpuid(0, 0, &buf);
				879	if (buf.eax < 4) {
				880	*msg_id = kmp_i18n_str_NoLeaf4Support;
				881	return -1;
				882	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	883
				884	//
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	885	// The algorithm used starts by setting the affinity to each available
Andrey Churbanov	1c33129	2015-01-27 17:03:42 +0000	[diff] [blame]	886	// thread and retrieving info from the cpuid instruction, so if we are
				887	// not capable of calling __kmp_get_system_affinity() and
				888	// _kmp_get_system_affinity(), then we need to do something else - use
				889	// the defaults that we calculated from issuing cpuid without binding
				890	// to each proc.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	891	//
				892	if (! KMP_AFFINITY_CAPABLE()) {
				893	//
				894	// Hack to try and infer the machine topology using only the data
				895	// available from cpuid on the current thread, and __kmp_xproc.
				896	//
				897	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				898
				899	//
				900	// Get an upper bound on the number of threads per package using
				901	// cpuid(1).
				902	//
				903	// On some OS/chps combinations where HT is supported by the chip
				904	// but is disabled, this value will be 2 on a single core chip.
				905	// Usually, it will be 2 if HT is enabled and 1 if HT is disabled.
				906	//
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	907	__kmp_x86_cpuid(1, 0, &buf);
				908	int maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
				909	if (maxThreadsPerPkg == 0) {
				910	maxThreadsPerPkg = 1;
				911	}
				912
				913	//
				914	// The num cores per pkg comes from cpuid(4).
				915	// 1 must be added to the encoded value.
				916	//
				917	// The author of cpu_count.cpp treated this only an upper bound
				918	// on the number of cores, but I haven't seen any cases where it
				919	// was greater than the actual number of cores, so we will treat
				920	// it as exact in this block of code.
				921	//
				922	// First, we need to check if cpuid(4) is supported on this chip.
				923	// To see if cpuid(n) is supported, issue cpuid(0) and check if eax
				924	// has the value n or greater.
				925	//
				926	__kmp_x86_cpuid(0, 0, &buf);
				927	if (buf.eax >= 4) {
				928	__kmp_x86_cpuid(4, 0, &buf);
				929	nCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
				930	}
				931	else {
				932	nCoresPerPkg = 1;
				933	}
				934
				935	//
				936	// There is no way to reliably tell if HT is enabled without issuing
				937	// the cpuid instruction from every thread, can correlating the cpuid
				938	// info, so if the machine is not affinity capable, we assume that HT
				939	// is off. We have seen quite a few machines where maxThreadsPerPkg
				940	// is 2, yet the machine does not support HT.
				941	//
				942	// - Older OSes are usually found on machines with older chips, which
				943	// do not support HT.
				944	//
				945	// - The performance penalty for mistakenly identifying a machine as
				946	// HT when it isn't (which results in blocktime being incorrecly set
				947	// to 0) is greater than the penalty when for mistakenly identifying
				948	// a machine as being 1 thread/core when it is really HT enabled
				949	// (which results in blocktime being incorrectly set to a positive
				950	// value).
				951	//
				952	__kmp_ncores = __kmp_xproc;
				953	nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
				954	__kmp_nThreadsPerCore = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	955	if (__kmp_affinity_verbose) {
				956	KMP_INFORM(AffNotCapableUseLocCpuid, "KMP_AFFINITY");
				957	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				958	if (__kmp_affinity_uniform_topology()) {
				959	KMP_INFORM(Uniform, "KMP_AFFINITY");
				960	} else {
				961	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				962	}
				963	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				964	__kmp_nThreadsPerCore, __kmp_ncores);
				965	}
				966	return 0;
				967	}
				968
				969	//
				970	//
				971	// From here on, we can assume that it is safe to call
				972	// __kmp_get_system_affinity() and __kmp_set_system_affinity(),
				973	// even if __kmp_affinity_type = affinity_none.
				974	//
				975
				976	//
				977	// Save the affinity mask for the current thread.
				978	//
				979	kmp_affin_mask_t *oldMask;
				980	KMP_CPU_ALLOC(oldMask);
				981	KMP_ASSERT(oldMask != NULL);
				982	__kmp_get_system_affinity(oldMask, TRUE);
				983
				984	//
				985	// Run through each of the available contexts, binding the current thread
				986	// to it, and obtaining the pertinent information using the cpuid instr.
				987	//
				988	// The relevant information is:
				989	//
				990	// Apic Id: Bits 24:31 of ebx after issuing cpuid(1) - each thread context
				991	// has a uniqie Apic Id, which is of the form pkg# : core# : thread#.
				992	//
				993	// Max Threads Per Pkg: Bits 16:23 of ebx after issuing cpuid(1). The
				994	// value of this field determines the width of the core# + thread#
				995	// fields in the Apic Id. It is also an upper bound on the number
				996	// of threads per package, but it has been verified that situations
				997	// happen were it is not exact. In particular, on certain OS/chip
				998	// combinations where Intel(R) Hyper-Threading Technology is supported
				999	// by the chip but has
				1000	// been disabled, the value of this field will be 2 (for a single core
				1001	// chip). On other OS/chip combinations supporting
				1002	// Intel(R) Hyper-Threading Technology, the value of
				1003	// this field will be 1 when Intel(R) Hyper-Threading Technology is
				1004	// disabled and 2 when it is enabled.
				1005	//
				1006	// Max Cores Per Pkg: Bits 26:31 of eax after issuing cpuid(4). The
				1007	// value of this field (+1) determines the width of the core# field in
				1008	// the Apic Id. The comments in "cpucount.cpp" say that this value is
				1009	// an upper bound, but the IA-32 architecture manual says that it is
				1010	// exactly the number of cores per package, and I haven't seen any
				1011	// case where it wasn't.
				1012	//
				1013	// From this information, deduce the package Id, core Id, and thread Id,
				1014	// and set the corresponding fields in the apicThreadInfo struct.
				1015	//
				1016	unsigned i;
				1017	apicThreadInfo threadInfo = (apicThreadInfo )__kmp_allocate(
				1018	__kmp_avail_proc * sizeof(apicThreadInfo));
				1019	unsigned nApics = 0;
				1020	for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
				1021	//
				1022	// Skip this proc if it is not included in the machine model.
				1023	//
				1024	if (! KMP_CPU_ISSET(i, fullMask)) {
				1025	continue;
				1026	}
				1027	KMP_DEBUG_ASSERT((int)nApics < __kmp_avail_proc);
				1028
				1029	__kmp_affinity_bind_thread(i);
				1030	threadInfo[nApics].osId = i;
				1031
				1032	//
				1033	// The apic id and max threads per pkg come from cpuid(1).
				1034	//
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1035	__kmp_x86_cpuid(1, 0, &buf);
				1036	if (! (buf.edx >> 9) & 1) {
				1037	__kmp_set_system_affinity(oldMask, TRUE);
				1038	__kmp_free(threadInfo);
				1039	KMP_CPU_FREE(oldMask);
				1040	*msg_id = kmp_i18n_str_ApicNotPresent;
				1041	return -1;
				1042	}
				1043	threadInfo[nApics].apicId = (buf.ebx >> 24) & 0xff;
				1044	threadInfo[nApics].maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
				1045	if (threadInfo[nApics].maxThreadsPerPkg == 0) {
				1046	threadInfo[nApics].maxThreadsPerPkg = 1;
				1047	}
				1048
				1049	//
				1050	// Max cores per pkg comes from cpuid(4).
				1051	// 1 must be added to the encoded value.
				1052	//
				1053	// First, we need to check if cpuid(4) is supported on this chip.
				1054	// To see if cpuid(n) is supported, issue cpuid(0) and check if eax
				1055	// has the value n or greater.
				1056	//
				1057	__kmp_x86_cpuid(0, 0, &buf);
				1058	if (buf.eax >= 4) {
				1059	__kmp_x86_cpuid(4, 0, &buf);
				1060	threadInfo[nApics].maxCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
				1061	}
				1062	else {
				1063	threadInfo[nApics].maxCoresPerPkg = 1;
				1064	}
				1065
				1066	//
				1067	// Infer the pkgId / coreId / threadId using only the info
				1068	// obtained locally.
				1069	//
				1070	int widthCT = __kmp_cpuid_mask_width(
				1071	threadInfo[nApics].maxThreadsPerPkg);
				1072	threadInfo[nApics].pkgId = threadInfo[nApics].apicId >> widthCT;
				1073
				1074	int widthC = __kmp_cpuid_mask_width(
				1075	threadInfo[nApics].maxCoresPerPkg);
				1076	int widthT = widthCT - widthC;
				1077	if (widthT < 0) {
				1078	//
				1079	// I've never seen this one happen, but I suppose it could, if
				1080	// the cpuid instruction on a chip was really screwed up.
				1081	// Make sure to restore the affinity mask before the tail call.
				1082	//
				1083	__kmp_set_system_affinity(oldMask, TRUE);
				1084	__kmp_free(threadInfo);
				1085	KMP_CPU_FREE(oldMask);
				1086	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1087	return -1;
				1088	}
				1089
				1090	int maskC = (1 << widthC) - 1;
				1091	threadInfo[nApics].coreId = (threadInfo[nApics].apicId >> widthT)
				1092	&maskC;
				1093
				1094	int maskT = (1 << widthT) - 1;
				1095	threadInfo[nApics].threadId = threadInfo[nApics].apicId &maskT;
				1096
				1097	nApics++;
				1098	}
				1099
				1100	//
				1101	// We've collected all the info we need.
				1102	// Restore the old affinity mask for this thread.
				1103	//
				1104	__kmp_set_system_affinity(oldMask, TRUE);
				1105
				1106	//
				1107	// If there's only one thread context to bind to, form an Address object
				1108	// with depth 1 and return immediately (or, if affinity is off, set
				1109	// address2os to NULL and return).
				1110	//
				1111	// If it is configured to omit the package level when there is only a
				1112	// single package, the logic at the end of this routine won't work if
				1113	// there is only a single thread - it would try to form an Address
				1114	// object with depth 0.
				1115	//
				1116	KMP_ASSERT(nApics > 0);
				1117	if (nApics == 1) {
				1118	__kmp_ncores = nPackages = 1;
				1119	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1120	if (__kmp_affinity_verbose) {
				1121	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				1122	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				1123
				1124	KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
				1125	if (__kmp_affinity_respect_mask) {
				1126	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				1127	} else {
				1128	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				1129	}
				1130	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1131	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1132	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				1133	__kmp_nThreadsPerCore, __kmp_ncores);
				1134	}
				1135
				1136	if (__kmp_affinity_type == affinity_none) {
				1137	__kmp_free(threadInfo);
				1138	KMP_CPU_FREE(oldMask);
				1139	return 0;
				1140	}
				1141
				1142	address2os = (AddrUnsPair)__kmp_allocate(sizeof(AddrUnsPair));
				1143	Address addr(1);
				1144	addr.labels[0] = threadInfo[0].pkgId;
				1145	(*address2os)[0] = AddrUnsPair(addr, threadInfo[0].osId);
				1146
				1147	if (__kmp_affinity_gran_levels < 0) {
				1148	__kmp_affinity_gran_levels = 0;
				1149	}
				1150
				1151	if (__kmp_affinity_verbose) {
				1152	__kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
				1153	}
				1154
				1155	__kmp_free(threadInfo);
				1156	KMP_CPU_FREE(oldMask);
				1157	return 1;
				1158	}
				1159
				1160	//
				1161	// Sort the threadInfo table by physical Id.
				1162	//
				1163	qsort(threadInfo, nApics, sizeof(*threadInfo),
				1164	__kmp_affinity_cmp_apicThreadInfo_phys_id);
				1165
				1166	//
				1167	// The table is now sorted by pkgId / coreId / threadId, but we really
				1168	// don't know the radix of any of the fields. pkgId's may be sparsely
				1169	// assigned among the chips on a system. Although coreId's are usually
				1170	// assigned [0 .. coresPerPkg-1] and threadId's are usually assigned
				1171	// [0..threadsPerCore-1], we don't want to make any such assumptions.
				1172	//
				1173	// For that matter, we don't know what coresPerPkg and threadsPerCore
				1174	// (or the total # packages) are at this point - we want to determine
				1175	// that now. We only have an upper bound on the first two figures.
				1176	//
				1177	// We also perform a consistency check at this point: the values returned
				1178	// by the cpuid instruction for any thread bound to a given package had
				1179	// better return the same info for maxThreadsPerPkg and maxCoresPerPkg.
				1180	//
				1181	nPackages = 1;
				1182	nCoresPerPkg = 1;
				1183	__kmp_nThreadsPerCore = 1;
				1184	unsigned nCores = 1;
				1185
				1186	unsigned pkgCt = 1; // to determine radii
				1187	unsigned lastPkgId = threadInfo[0].pkgId;
				1188	unsigned coreCt = 1;
				1189	unsigned lastCoreId = threadInfo[0].coreId;
				1190	unsigned threadCt = 1;
				1191	unsigned lastThreadId = threadInfo[0].threadId;
				1192
				1193	// intra-pkg consist checks
				1194	unsigned prevMaxCoresPerPkg = threadInfo[0].maxCoresPerPkg;
				1195	unsigned prevMaxThreadsPerPkg = threadInfo[0].maxThreadsPerPkg;
				1196
				1197	for (i = 1; i < nApics; i++) {
				1198	if (threadInfo[i].pkgId != lastPkgId) {
				1199	nCores++;
				1200	pkgCt++;
				1201	lastPkgId = threadInfo[i].pkgId;
				1202	if ((int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt;
				1203	coreCt = 1;
				1204	lastCoreId = threadInfo[i].coreId;
				1205	if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
				1206	threadCt = 1;
				1207	lastThreadId = threadInfo[i].threadId;
				1208
				1209	//
				1210	// This is a different package, so go on to the next iteration
				1211	// without doing any consistency checks. Reset the consistency
				1212	// check vars, though.
				1213	//
				1214	prevMaxCoresPerPkg = threadInfo[i].maxCoresPerPkg;
				1215	prevMaxThreadsPerPkg = threadInfo[i].maxThreadsPerPkg;
				1216	continue;
				1217	}
				1218
				1219	if (threadInfo[i].coreId != lastCoreId) {
				1220	nCores++;
				1221	coreCt++;
				1222	lastCoreId = threadInfo[i].coreId;
				1223	if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
				1224	threadCt = 1;
				1225	lastThreadId = threadInfo[i].threadId;
				1226	}
				1227	else if (threadInfo[i].threadId != lastThreadId) {
				1228	threadCt++;
				1229	lastThreadId = threadInfo[i].threadId;
				1230	}
				1231	else {
				1232	__kmp_free(threadInfo);
				1233	KMP_CPU_FREE(oldMask);
				1234	*msg_id = kmp_i18n_str_LegacyApicIDsNotUnique;
				1235	return -1;
				1236	}
				1237
				1238	//
				1239	// Check to make certain that the maxCoresPerPkg and maxThreadsPerPkg
				1240	// fields agree between all the threads bounds to a given package.
				1241	//
				1242	if ((prevMaxCoresPerPkg != threadInfo[i].maxCoresPerPkg)
				1243	\|\| (prevMaxThreadsPerPkg != threadInfo[i].maxThreadsPerPkg)) {
				1244	__kmp_free(threadInfo);
				1245	KMP_CPU_FREE(oldMask);
				1246	*msg_id = kmp_i18n_str_InconsistentCpuidInfo;
				1247	return -1;
				1248	}
				1249	}
				1250	nPackages = pkgCt;
				1251	if ((int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt;
				1252	if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
				1253
				1254	//
				1255	// When affinity is off, this routine will still be called to set
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame]	1256	// __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1257	// nCoresPerPkg, & nPackages. Make sure all these vars are set
				1258	// correctly, and return now if affinity is not enabled.
				1259	//
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1260	__kmp_ncores = nCores;
				1261	if (__kmp_affinity_verbose) {
				1262	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				1263	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				1264
				1265	KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
				1266	if (__kmp_affinity_respect_mask) {
				1267	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				1268	} else {
				1269	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				1270	}
				1271	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1272	if (__kmp_affinity_uniform_topology()) {
				1273	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1274	} else {
				1275	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				1276	}
				1277	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				1278	__kmp_nThreadsPerCore, __kmp_ncores);
				1279
				1280	}
				1281
				1282	if (__kmp_affinity_type == affinity_none) {
				1283	__kmp_free(threadInfo);
				1284	KMP_CPU_FREE(oldMask);
				1285	return 0;
				1286	}
				1287
				1288	//
				1289	// Now that we've determined the number of packages, the number of cores
				1290	// per package, and the number of threads per core, we can construct the
				1291	// data structure that is to be returned.
				1292	//
				1293	int pkgLevel = 0;
				1294	int coreLevel = (nCoresPerPkg <= 1) ? -1 : 1;
				1295	int threadLevel = (__kmp_nThreadsPerCore <= 1) ? -1 : ((coreLevel >= 0) ? 2 : 1);
				1296	unsigned depth = (pkgLevel >= 0) + (coreLevel >= 0) + (threadLevel >= 0);
				1297
				1298	KMP_ASSERT(depth > 0);
				1299	address2os = (AddrUnsPair)__kmp_allocate(sizeof(AddrUnsPair) * nApics);
				1300
				1301	for (i = 0; i < nApics; ++i) {
				1302	Address addr(depth);
				1303	unsigned os = threadInfo[i].osId;
				1304	int d = 0;
				1305
				1306	if (pkgLevel >= 0) {
				1307	addr.labels[d++] = threadInfo[i].pkgId;
				1308	}
				1309	if (coreLevel >= 0) {
				1310	addr.labels[d++] = threadInfo[i].coreId;
				1311	}
				1312	if (threadLevel >= 0) {
				1313	addr.labels[d++] = threadInfo[i].threadId;
				1314	}
				1315	(*address2os)[i] = AddrUnsPair(addr, os);
				1316	}
				1317
				1318	if (__kmp_affinity_gran_levels < 0) {
				1319	//
				1320	// Set the granularity level based on what levels are modeled
				1321	// in the machine topology map.
				1322	//
				1323	__kmp_affinity_gran_levels = 0;
				1324	if ((threadLevel >= 0)
				1325	&& (__kmp_affinity_gran > affinity_gran_thread)) {
				1326	__kmp_affinity_gran_levels++;
				1327	}
				1328	if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
				1329	__kmp_affinity_gran_levels++;
				1330	}
				1331	if ((pkgLevel >= 0) && (__kmp_affinity_gran > affinity_gran_package)) {
				1332	__kmp_affinity_gran_levels++;
				1333	}
				1334	}
				1335
				1336	if (__kmp_affinity_verbose) {
				1337	__kmp_affinity_print_topology(*address2os, nApics, depth, pkgLevel,
				1338	coreLevel, threadLevel);
				1339	}
				1340
				1341	__kmp_free(threadInfo);
				1342	KMP_CPU_FREE(oldMask);
				1343	return depth;
				1344	}
				1345
				1346
				1347	//
				1348	// Intel(R) microarchitecture code name Nehalem, Dunnington and later
				1349	// architectures support a newer interface for specifying the x2APIC Ids,
				1350	// based on cpuid leaf 11.
				1351	//
				1352	static int
				1353	__kmp_affinity_create_x2apicid_map(AddrUnsPair **address2os,
				1354	kmp_i18n_id_t *const msg_id)
				1355	{
				1356	kmp_cpuid buf;
				1357
				1358	*address2os = NULL;
				1359	*msg_id = kmp_i18n_null;
				1360
				1361	//
				1362	// Check to see if cpuid leaf 11 is supported.
				1363	//
				1364	__kmp_x86_cpuid(0, 0, &buf);
				1365	if (buf.eax < 11) {
				1366	*msg_id = kmp_i18n_str_NoLeaf11Support;
				1367	return -1;
				1368	}
				1369	__kmp_x86_cpuid(11, 0, &buf);
				1370	if (buf.ebx == 0) {
				1371	*msg_id = kmp_i18n_str_NoLeaf11Support;
				1372	return -1;
				1373	}
				1374
				1375	//
				1376	// Find the number of levels in the machine topology. While we're at it,
				1377	// get the default values for __kmp_nThreadsPerCore & nCoresPerPkg. We will
				1378	// try to get more accurate values later by explicitly counting them,
				1379	// but get reasonable defaults now, in case we return early.
				1380	//
				1381	int level;
				1382	int threadLevel = -1;
				1383	int coreLevel = -1;
				1384	int pkgLevel = -1;
				1385	__kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
				1386
				1387	for (level = 0;; level++) {
				1388	if (level > 31) {
				1389	//
				1390	// FIXME: Hack for DPD200163180
				1391	//
				1392	// If level is big then something went wrong -> exiting
				1393	//
				1394	// There could actually be 32 valid levels in the machine topology,
				1395	// but so far, the only machine we have seen which does not exit
				1396	// this loop before iteration 32 has fubar x2APIC settings.
				1397	//
				1398	// For now, just reject this case based upon loop trip count.
				1399	//
				1400	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1401	return -1;
				1402	}
				1403	__kmp_x86_cpuid(11, level, &buf);
				1404	if (buf.ebx == 0) {
				1405	if (pkgLevel < 0) {
				1406	//
				1407	// Will infer nPackages from __kmp_xproc
				1408	//
				1409	pkgLevel = level;
				1410	level++;
				1411	}
				1412	break;
				1413	}
				1414	int kind = (buf.ecx >> 8) & 0xff;
				1415	if (kind == 1) {
				1416	//
				1417	// SMT level
				1418	//
				1419	threadLevel = level;
				1420	coreLevel = -1;
				1421	pkgLevel = -1;
				1422	__kmp_nThreadsPerCore = buf.ebx & 0xff;
				1423	if (__kmp_nThreadsPerCore == 0) {
				1424	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1425	return -1;
				1426	}
				1427	}
				1428	else if (kind == 2) {
				1429	//
				1430	// core level
				1431	//
				1432	coreLevel = level;
				1433	pkgLevel = -1;
				1434	nCoresPerPkg = buf.ebx & 0xff;
				1435	if (nCoresPerPkg == 0) {
				1436	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1437	return -1;
				1438	}
				1439	}
				1440	else {
				1441	if (level <= 0) {
				1442	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1443	return -1;
				1444	}
				1445	if (pkgLevel >= 0) {
				1446	continue;
				1447	}
				1448	pkgLevel = level;
				1449	nPackages = buf.ebx & 0xff;
				1450	if (nPackages == 0) {
				1451	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1452	return -1;
				1453	}
				1454	}
				1455	}
				1456	int depth = level;
				1457
				1458	//
				1459	// In the above loop, "level" was counted from the finest level (usually
				1460	// thread) to the coarsest. The caller expects that we will place the
				1461	// labels in (*address2os)[].first.labels[] in the inverse order, so
				1462	// we need to invert the vars saying which level means what.
				1463	//
				1464	if (threadLevel >= 0) {
				1465	threadLevel = depth - threadLevel - 1;
				1466	}
				1467	if (coreLevel >= 0) {
				1468	coreLevel = depth - coreLevel - 1;
				1469	}
				1470	KMP_DEBUG_ASSERT(pkgLevel >= 0);
				1471	pkgLevel = depth - pkgLevel - 1;
				1472
				1473	//
				1474	// The algorithm used starts by setting the affinity to each available
Andrey Churbanov	1c33129	2015-01-27 17:03:42 +0000	[diff] [blame]	1475	// thread and retrieving info from the cpuid instruction, so if we are
				1476	// not capable of calling __kmp_get_system_affinity() and
				1477	// _kmp_get_system_affinity(), then we need to do something else - use
				1478	// the defaults that we calculated from issuing cpuid without binding
				1479	// to each proc.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1480	//
				1481	if (! KMP_AFFINITY_CAPABLE())
				1482	{
				1483	//
				1484	// Hack to try and infer the machine topology using only the data
				1485	// available from cpuid on the current thread, and __kmp_xproc.
				1486	//
				1487	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				1488
				1489	__kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
				1490	nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1491	if (__kmp_affinity_verbose) {
				1492	KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY");
				1493	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1494	if (__kmp_affinity_uniform_topology()) {
				1495	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1496	} else {
				1497	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				1498	}
				1499	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				1500	__kmp_nThreadsPerCore, __kmp_ncores);
				1501	}
				1502	return 0;
				1503	}
				1504
				1505	//
				1506	//
				1507	// From here on, we can assume that it is safe to call
				1508	// __kmp_get_system_affinity() and __kmp_set_system_affinity(),
				1509	// even if __kmp_affinity_type = affinity_none.
				1510	//
				1511
				1512	//
				1513	// Save the affinity mask for the current thread.
				1514	//
				1515	kmp_affin_mask_t *oldMask;
				1516	KMP_CPU_ALLOC(oldMask);
				1517	__kmp_get_system_affinity(oldMask, TRUE);
				1518
				1519	//
				1520	// Allocate the data structure to be returned.
				1521	//
				1522	AddrUnsPair retval = (AddrUnsPair )
				1523	__kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc);
				1524
				1525	//
				1526	// Run through each of the available contexts, binding the current thread
				1527	// to it, and obtaining the pertinent information using the cpuid instr.
				1528	//
				1529	unsigned int proc;
				1530	int nApics = 0;
				1531	for (proc = 0; proc < KMP_CPU_SETSIZE; ++proc) {
				1532	//
				1533	// Skip this proc if it is not included in the machine model.
				1534	//
				1535	if (! KMP_CPU_ISSET(proc, fullMask)) {
				1536	continue;
				1537	}
				1538	KMP_DEBUG_ASSERT(nApics < __kmp_avail_proc);
				1539
				1540	__kmp_affinity_bind_thread(proc);
				1541
				1542	//
				1543	// Extrach the labels for each level in the machine topology map
				1544	// from the Apic ID.
				1545	//
				1546	Address addr(depth);
				1547	int prev_shift = 0;
				1548
				1549	for (level = 0; level < depth; level++) {
				1550	__kmp_x86_cpuid(11, level, &buf);
				1551	unsigned apicId = buf.edx;
				1552	if (buf.ebx == 0) {
				1553	if (level != depth - 1) {
				1554	KMP_CPU_FREE(oldMask);
				1555	*msg_id = kmp_i18n_str_InconsistentCpuidInfo;
				1556	return -1;
				1557	}
				1558	addr.labels[depth - level - 1] = apicId >> prev_shift;
				1559	level++;
				1560	break;
				1561	}
				1562	int shift = buf.eax & 0x1f;
				1563	int mask = (1 << shift) - 1;
				1564	addr.labels[depth - level - 1] = (apicId & mask) >> prev_shift;
				1565	prev_shift = shift;
				1566	}
				1567	if (level != depth) {
				1568	KMP_CPU_FREE(oldMask);
				1569	*msg_id = kmp_i18n_str_InconsistentCpuidInfo;
				1570	return -1;
				1571	}
				1572
				1573	retval[nApics] = AddrUnsPair(addr, proc);
				1574	nApics++;
				1575	}
				1576
				1577	//
				1578	// We've collected all the info we need.
				1579	// Restore the old affinity mask for this thread.
				1580	//
				1581	__kmp_set_system_affinity(oldMask, TRUE);
				1582
				1583	//
				1584	// If there's only one thread context to bind to, return now.
				1585	//
				1586	KMP_ASSERT(nApics > 0);
				1587	if (nApics == 1) {
				1588	__kmp_ncores = nPackages = 1;
				1589	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1590	if (__kmp_affinity_verbose) {
				1591	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				1592	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				1593
				1594	KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
				1595	if (__kmp_affinity_respect_mask) {
				1596	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				1597	} else {
				1598	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				1599	}
				1600	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1601	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1602	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				1603	__kmp_nThreadsPerCore, __kmp_ncores);
				1604	}
				1605
				1606	if (__kmp_affinity_type == affinity_none) {
				1607	__kmp_free(retval);
				1608	KMP_CPU_FREE(oldMask);
				1609	return 0;
				1610	}
				1611
				1612	//
				1613	// Form an Address object which only includes the package level.
				1614	//
				1615	Address addr(1);
				1616	addr.labels[0] = retval[0].first.labels[pkgLevel];
				1617	retval[0].first = addr;
				1618
				1619	if (__kmp_affinity_gran_levels < 0) {
				1620	__kmp_affinity_gran_levels = 0;
				1621	}
				1622
				1623	if (__kmp_affinity_verbose) {
				1624	__kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
				1625	}
				1626
				1627	*address2os = retval;
				1628	KMP_CPU_FREE(oldMask);
				1629	return 1;
				1630	}
				1631
				1632	//
				1633	// Sort the table by physical Id.
				1634	//
				1635	qsort(retval, nApics, sizeof(*retval), __kmp_affinity_cmp_Address_labels);
				1636
				1637	//
				1638	// Find the radix at each of the levels.
				1639	//
				1640	unsigned totals = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1641	unsigned counts = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1642	unsigned maxCt = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1643	unsigned last = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1644	for (level = 0; level < depth; level++) {
				1645	totals[level] = 1;
				1646	maxCt[level] = 1;
				1647	counts[level] = 1;
				1648	last[level] = retval[0].first.labels[level];
				1649	}
				1650
				1651	//
				1652	// From here on, the iteration variable "level" runs from the finest
				1653	// level to the coarsest, i.e. we iterate forward through
				1654	// (*address2os)[].first.labels[] - in the previous loops, we iterated
				1655	// backwards.
				1656	//
				1657	for (proc = 1; (int)proc < nApics; proc++) {
				1658	int level;
				1659	for (level = 0; level < depth; level++) {
				1660	if (retval[proc].first.labels[level] != last[level]) {
				1661	int j;
				1662	for (j = level + 1; j < depth; j++) {
				1663	totals[j]++;
				1664	counts[j] = 1;
				1665	// The line below causes printing incorrect topology information
				1666	// in case the max value for some level (maxCt[level]) is encountered earlier than
				1667	// some less value while going through the array.
				1668	// For example, let pkg0 has 4 cores and pkg1 has 2 cores. Then maxCt[1] == 2
				1669	// whereas it must be 4.
				1670	// TODO!!! Check if it can be commented safely
				1671	//maxCt[j] = 1;
				1672	last[j] = retval[proc].first.labels[j];
				1673	}
				1674	totals[level]++;
				1675	counts[level]++;
				1676	if (counts[level] > maxCt[level]) {
				1677	maxCt[level] = counts[level];
				1678	}
				1679	last[level] = retval[proc].first.labels[level];
				1680	break;
				1681	}
				1682	else if (level == depth - 1) {
				1683	__kmp_free(last);
				1684	__kmp_free(maxCt);
				1685	__kmp_free(counts);
				1686	__kmp_free(totals);
				1687	__kmp_free(retval);
				1688	KMP_CPU_FREE(oldMask);
				1689	*msg_id = kmp_i18n_str_x2ApicIDsNotUnique;
				1690	return -1;
				1691	}
				1692	}
				1693	}
				1694
				1695	//
				1696	// When affinity is off, this routine will still be called to set
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame]	1697	// __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1698	// nCoresPerPkg, & nPackages. Make sure all these vars are set
				1699	// correctly, and return if affinity is not enabled.
				1700	//
				1701	if (threadLevel >= 0) {
				1702	__kmp_nThreadsPerCore = maxCt[threadLevel];
				1703	}
				1704	else {
				1705	__kmp_nThreadsPerCore = 1;
				1706	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1707	nPackages = totals[pkgLevel];
				1708
				1709	if (coreLevel >= 0) {
				1710	__kmp_ncores = totals[coreLevel];
				1711	nCoresPerPkg = maxCt[coreLevel];
				1712	}
				1713	else {
				1714	__kmp_ncores = nPackages;
				1715	nCoresPerPkg = 1;
				1716	}
				1717
				1718	//
				1719	// Check to see if the machine topology is uniform
				1720	//
				1721	unsigned prod = maxCt[0];
				1722	for (level = 1; level < depth; level++) {
				1723	prod *= maxCt[level];
				1724	}
				1725	bool uniform = (prod == totals[level - 1]);
				1726
				1727	//
				1728	// Print the machine topology summary.
				1729	//
				1730	if (__kmp_affinity_verbose) {
				1731	char mask[KMP_AFFIN_MASK_PRINT_LEN];
				1732	__kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				1733
				1734	KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
				1735	if (__kmp_affinity_respect_mask) {
				1736	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask);
				1737	} else {
				1738	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask);
				1739	}
				1740	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1741	if (uniform) {
				1742	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1743	} else {
				1744	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				1745	}
				1746
				1747	kmp_str_buf_t buf;
				1748	__kmp_str_buf_init(&buf);
				1749
				1750	__kmp_str_buf_print(&buf, "%d", totals[0]);
				1751	for (level = 1; level <= pkgLevel; level++) {
				1752	__kmp_str_buf_print(&buf, " x %d", maxCt[level]);
				1753	}
				1754	KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg,
				1755	__kmp_nThreadsPerCore, __kmp_ncores);
				1756
				1757	__kmp_str_buf_free(&buf);
				1758	}
				1759
				1760	if (__kmp_affinity_type == affinity_none) {
				1761	__kmp_free(last);
				1762	__kmp_free(maxCt);
				1763	__kmp_free(counts);
				1764	__kmp_free(totals);
				1765	__kmp_free(retval);
				1766	KMP_CPU_FREE(oldMask);
				1767	return 0;
				1768	}
				1769
				1770	//
				1771	// Find any levels with radiix 1, and remove them from the map
				1772	// (except for the package level).
				1773	//
				1774	int new_depth = 0;
				1775	for (level = 0; level < depth; level++) {
				1776	if ((maxCt[level] == 1) && (level != pkgLevel)) {
				1777	continue;
				1778	}
				1779	new_depth++;
				1780	}
				1781
				1782	//
				1783	// If we are removing any levels, allocate a new vector to return,
				1784	// and copy the relevant information to it.
				1785	//
				1786	if (new_depth != depth) {
				1787	AddrUnsPair new_retval = (AddrUnsPair )__kmp_allocate(
				1788	sizeof(AddrUnsPair) * nApics);
				1789	for (proc = 0; (int)proc < nApics; proc++) {
				1790	Address addr(new_depth);
				1791	new_retval[proc] = AddrUnsPair(addr, retval[proc].second);
				1792	}
				1793	int new_level = 0;
				1794	for (level = 0; level < depth; level++) {
				1795	if ((maxCt[level] == 1) && (level != pkgLevel)) {
				1796	if (level == threadLevel) {
				1797	threadLevel = -1;
				1798	}
				1799	else if ((threadLevel >= 0) && (level < threadLevel)) {
				1800	threadLevel--;
				1801	}
				1802	if (level == coreLevel) {
				1803	coreLevel = -1;
				1804	}
				1805	else if ((coreLevel >= 0) && (level < coreLevel)) {
				1806	coreLevel--;
				1807	}
				1808	if (level < pkgLevel) {
				1809	pkgLevel--;
				1810	}
				1811	continue;
				1812	}
				1813	for (proc = 0; (int)proc < nApics; proc++) {
				1814	new_retval[proc].first.labels[new_level]
				1815	= retval[proc].first.labels[level];
				1816	}
				1817	new_level++;
				1818	}
				1819
				1820	__kmp_free(retval);
				1821	retval = new_retval;
				1822	depth = new_depth;
				1823	}
				1824
				1825	if (__kmp_affinity_gran_levels < 0) {
				1826	//
				1827	// Set the granularity level based on what levels are modeled
				1828	// in the machine topology map.
				1829	//
				1830	__kmp_affinity_gran_levels = 0;
				1831	if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
				1832	__kmp_affinity_gran_levels++;
				1833	}
				1834	if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
				1835	__kmp_affinity_gran_levels++;
				1836	}
				1837	if (__kmp_affinity_gran > affinity_gran_package) {
				1838	__kmp_affinity_gran_levels++;
				1839	}
				1840	}
				1841
				1842	if (__kmp_affinity_verbose) {
				1843	__kmp_affinity_print_topology(retval, nApics, depth, pkgLevel,
				1844	coreLevel, threadLevel);
				1845	}
				1846
				1847	__kmp_free(last);
				1848	__kmp_free(maxCt);
				1849	__kmp_free(counts);
				1850	__kmp_free(totals);
				1851	KMP_CPU_FREE(oldMask);
				1852	*address2os = retval;
				1853	return depth;
				1854	}
				1855
				1856
				1857	# endif /* KMP_ARCH_X86 \|\| KMP_ARCH_X86_64 */
				1858
				1859
				1860	#define osIdIndex 0
				1861	#define threadIdIndex 1
				1862	#define coreIdIndex 2
				1863	#define pkgIdIndex 3
				1864	#define nodeIdIndex 4
				1865
				1866	typedef unsigned *ProcCpuInfo;
				1867	static unsigned maxIndex = pkgIdIndex;
				1868
				1869
				1870	static int
				1871	__kmp_affinity_cmp_ProcCpuInfo_os_id(const void a, const void b)
				1872	{
				1873	const unsigned aa = (const unsigned )a;
				1874	const unsigned bb = (const unsigned )b;
				1875	if (aa[osIdIndex] < bb[osIdIndex]) return -1;
				1876	if (aa[osIdIndex] > bb[osIdIndex]) return 1;
				1877	return 0;
				1878	};
				1879
				1880
				1881	static int
				1882	__kmp_affinity_cmp_ProcCpuInfo_phys_id(const void a, const void b)
				1883	{
				1884	unsigned i;
				1885	const unsigned aa = ((const unsigned **)a);
				1886	const unsigned bb = ((const unsigned **)b);
				1887	for (i = maxIndex; ; i--) {
				1888	if (aa[i] < bb[i]) return -1;
				1889	if (aa[i] > bb[i]) return 1;
				1890	if (i == osIdIndex) break;
				1891	}
				1892	return 0;
				1893	}
				1894
				1895
				1896	//
				1897	// Parse /proc/cpuinfo (or an alternate file in the same format) to obtain the
				1898	// affinity map.
				1899	//
				1900	static int
				1901	__kmp_affinity_create_cpuinfo_map(AddrUnsPair *address2os, int line,
				1902	kmp_i18n_id_t const msg_id, FILE f)
				1903	{
				1904	*address2os = NULL;
				1905	*msg_id = kmp_i18n_null;
				1906
				1907	//
				1908	// Scan of the file, and count the number of "processor" (osId) fields,
Alp Toker	8f2d3f0	2014-02-24 10:40:15 +0000	[diff] [blame]	1909	// and find the highest value of <n> for a node_<n> field.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1910	//
				1911	char buf[256];
				1912	unsigned num_records = 0;
				1913	while (! feof(f)) {
				1914	buf[sizeof(buf) - 1] = 1;
				1915	if (! fgets(buf, sizeof(buf), f)) {
				1916	//
				1917	// Read errors presumably because of EOF
				1918	//
				1919	break;
				1920	}
				1921
				1922	char s1[] = "processor";
				1923	if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
				1924	num_records++;
				1925	continue;
				1926	}
				1927
				1928	//
				1929	// FIXME - this will match "node_<n> <garbage>"
				1930	//
				1931	unsigned level;
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	1932	if (KMP_SSCANF(buf, "node_%d id", &level) == 1) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1933	if (nodeIdIndex + level >= maxIndex) {
				1934	maxIndex = nodeIdIndex + level;
				1935	}
				1936	continue;
				1937	}
				1938	}
				1939
				1940	//
				1941	// Check for empty file / no valid processor records, or too many.
				1942	// The number of records can't exceed the number of valid bits in the
				1943	// affinity mask.
				1944	//
				1945	if (num_records == 0) {
				1946	*line = 0;
				1947	*msg_id = kmp_i18n_str_NoProcRecords;
				1948	return -1;
				1949	}
				1950	if (num_records > (unsigned)__kmp_xproc) {
				1951	*line = 0;
				1952	*msg_id = kmp_i18n_str_TooManyProcRecords;
				1953	return -1;
				1954	}
				1955
				1956	//
				1957	// Set the file pointer back to the begginning, so that we can scan the
				1958	// file again, this time performing a full parse of the data.
				1959	// Allocate a vector of ProcCpuInfo object, where we will place the data.
				1960	// Adding an extra element at the end allows us to remove a lot of extra
				1961	// checks for termination conditions.
				1962	//
				1963	if (fseek(f, 0, SEEK_SET) != 0) {
				1964	*line = 0;
				1965	*msg_id = kmp_i18n_str_CantRewindCpuinfo;
				1966	return -1;
				1967	}
				1968
				1969	//
				1970	// Allocate the array of records to store the proc info in. The dummy
				1971	// element at the end makes the logic in filling them out easier to code.
				1972	//
				1973	unsigned threadInfo = (unsigned )__kmp_allocate((num_records + 1)
				1974	* sizeof(unsigned *));
				1975	unsigned i;
				1976	for (i = 0; i <= num_records; i++) {
				1977	threadInfo[i] = (unsigned *)__kmp_allocate((maxIndex + 1)
				1978	* sizeof(unsigned));
				1979	}
				1980
				1981	#define CLEANUP_THREAD_INFO \
				1982	for (i = 0; i <= num_records; i++) { \
				1983	__kmp_free(threadInfo[i]); \
				1984	} \
				1985	__kmp_free(threadInfo);
				1986
				1987	//
				1988	// A value of UINT_MAX means that we didn't find the field
				1989	//
				1990	unsigned __index;
				1991
				1992	#define INIT_PROC_INFO(p) \
				1993	for (__index = 0; __index <= maxIndex; __index++) { \
				1994	(p)[__index] = UINT_MAX; \
				1995	}
				1996
				1997	for (i = 0; i <= num_records; i++) {
				1998	INIT_PROC_INFO(threadInfo[i]);
				1999	}
				2000
				2001	unsigned num_avail = 0;
				2002	*line = 0;
				2003	while (! feof(f)) {
				2004	//
				2005	// Create an inner scoping level, so that all the goto targets at the
				2006	// end of the loop appear in an outer scoping level. This avoids
				2007	// warnings about jumping past an initialization to a target in the
				2008	// same block.
				2009	//
				2010	{
				2011	buf[sizeof(buf) - 1] = 1;
				2012	bool long_line = false;
				2013	if (! fgets(buf, sizeof(buf), f)) {
				2014	//
				2015	// Read errors presumably because of EOF
				2016	//
				2017	// If there is valid data in threadInfo[num_avail], then fake
				2018	// a blank line in ensure that the last address gets parsed.
				2019	//
				2020	bool valid = false;
				2021	for (i = 0; i <= maxIndex; i++) {
				2022	if (threadInfo[num_avail][i] != UINT_MAX) {
				2023	valid = true;
				2024	}
				2025	}
				2026	if (! valid) {
				2027	break;
				2028	}
				2029	buf[0] = 0;
				2030	} else if (!buf[sizeof(buf) - 1]) {
				2031	//
				2032	// The line is longer than the buffer. Set a flag and don't
				2033	// emit an error if we were going to ignore the line, anyway.
				2034	//
				2035	long_line = true;
				2036
				2037	#define CHECK_LINE \
				2038	if (long_line) { \
				2039	CLEANUP_THREAD_INFO; \
				2040	*msg_id = kmp_i18n_str_LongLineCpuinfo; \
				2041	return -1; \
				2042	}
				2043	}
				2044	(*line)++;
				2045
				2046	char s1[] = "processor";
				2047	if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
				2048	CHECK_LINE;
				2049	char *p = strchr(buf + sizeof(s1) - 1, ':');
				2050	unsigned val;
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	2051	if ((p == NULL) \|\| (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2052	if (threadInfo[num_avail][osIdIndex] != UINT_MAX) goto dup_field;
				2053	threadInfo[num_avail][osIdIndex] = val;
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	2054	#if KMP_OS_LINUX && USE_SYSFS_INFO
				2055	char path[256];
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	2056	KMP_SNPRINTF(path, sizeof(path),
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	2057	"/sys/devices/system/cpu/cpu%u/topology/physical_package_id",
				2058	threadInfo[num_avail][osIdIndex]);
				2059	__kmp_read_from_file(path, "%u", &threadInfo[num_avail][pkgIdIndex]);
				2060
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	2061	KMP_SNPRINTF(path, sizeof(path),
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	2062	"/sys/devices/system/cpu/cpu%u/topology/core_id",
				2063	threadInfo[num_avail][osIdIndex]);
				2064	__kmp_read_from_file(path, "%u", &threadInfo[num_avail][coreIdIndex]);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2065	continue;
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	2066	#else
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2067	}
				2068	char s2[] = "physical id";
				2069	if (strncmp(buf, s2, sizeof(s2) - 1) == 0) {
				2070	CHECK_LINE;
				2071	char *p = strchr(buf + sizeof(s2) - 1, ':');
				2072	unsigned val;
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	2073	if ((p == NULL) \|\| (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2074	if (threadInfo[num_avail][pkgIdIndex] != UINT_MAX) goto dup_field;
				2075	threadInfo[num_avail][pkgIdIndex] = val;
				2076	continue;
				2077	}
				2078	char s3[] = "core id";
				2079	if (strncmp(buf, s3, sizeof(s3) - 1) == 0) {
				2080	CHECK_LINE;
				2081	char *p = strchr(buf + sizeof(s3) - 1, ':');
				2082	unsigned val;
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	2083	if ((p == NULL) \|\| (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2084	if (threadInfo[num_avail][coreIdIndex] != UINT_MAX) goto dup_field;
				2085	threadInfo[num_avail][coreIdIndex] = val;
				2086	continue;
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	2087	#endif // KMP_OS_LINUX && USE_SYSFS_INFO
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2088	}
				2089	char s4[] = "thread id";
				2090	if (strncmp(buf, s4, sizeof(s4) - 1) == 0) {
				2091	CHECK_LINE;
				2092	char *p = strchr(buf + sizeof(s4) - 1, ':');
				2093	unsigned val;
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	2094	if ((p == NULL) \|\| (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2095	if (threadInfo[num_avail][threadIdIndex] != UINT_MAX) goto dup_field;
				2096	threadInfo[num_avail][threadIdIndex] = val;
				2097	continue;
				2098	}
				2099	unsigned level;
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	2100	if (KMP_SSCANF(buf, "node_%d id", &level) == 1) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2101	CHECK_LINE;
				2102	char *p = strchr(buf + sizeof(s4) - 1, ':');
				2103	unsigned val;
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	2104	if ((p == NULL) \|\| (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2105	KMP_ASSERT(nodeIdIndex + level <= maxIndex);
				2106	if (threadInfo[num_avail][nodeIdIndex + level] != UINT_MAX) goto dup_field;
				2107	threadInfo[num_avail][nodeIdIndex + level] = val;
				2108	continue;
				2109	}
				2110
				2111	//
				2112	// We didn't recognize the leading token on the line.
				2113	// There are lots of leading tokens that we don't recognize -
				2114	// if the line isn't empty, go on to the next line.
				2115	//
				2116	if ((buf != 0) && (buf != '\n')) {
				2117	//
				2118	// If the line is longer than the buffer, read characters
				2119	// until we find a newline.
				2120	//
				2121	if (long_line) {
				2122	int ch;
				2123	while (((ch = fgetc(f)) != EOF) && (ch != '\n'));
				2124	}
				2125	continue;
				2126	}
				2127
				2128	//
				2129	// A newline has signalled the end of the processor record.
				2130	// Check that there aren't too many procs specified.
				2131	//
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	2132	if ((int)num_avail == __kmp_xproc) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2133	CLEANUP_THREAD_INFO;
				2134	*msg_id = kmp_i18n_str_TooManyEntries;
				2135	return -1;
				2136	}
				2137
				2138	//
				2139	// Check for missing fields. The osId field must be there, and we
				2140	// currently require that the physical id field is specified, also.
				2141	//
				2142	if (threadInfo[num_avail][osIdIndex] == UINT_MAX) {
				2143	CLEANUP_THREAD_INFO;
				2144	*msg_id = kmp_i18n_str_MissingProcField;
				2145	return -1;
				2146	}
				2147	if (threadInfo[0][pkgIdIndex] == UINT_MAX) {
				2148	CLEANUP_THREAD_INFO;
				2149	*msg_id = kmp_i18n_str_MissingPhysicalIDField;
				2150	return -1;
				2151	}
				2152
				2153	//
				2154	// Skip this proc if it is not included in the machine model.
				2155	//
				2156	if (! KMP_CPU_ISSET(threadInfo[num_avail][osIdIndex], fullMask)) {
				2157	INIT_PROC_INFO(threadInfo[num_avail]);
				2158	continue;
				2159	}
				2160
				2161	//
				2162	// We have a successful parse of this proc's info.
				2163	// Increment the counter, and prepare for the next proc.
				2164	//
				2165	num_avail++;
				2166	KMP_ASSERT(num_avail <= num_records);
				2167	INIT_PROC_INFO(threadInfo[num_avail]);
				2168	}
				2169	continue;
				2170
				2171	no_val:
				2172	CLEANUP_THREAD_INFO;
				2173	*msg_id = kmp_i18n_str_MissingValCpuinfo;
				2174	return -1;
				2175
				2176	dup_field:
				2177	CLEANUP_THREAD_INFO;
				2178	*msg_id = kmp_i18n_str_DuplicateFieldCpuinfo;
				2179	return -1;
				2180	}
				2181	*line = 0;
				2182
				2183	# if KMP_MIC && REDUCE_TEAM_SIZE
				2184	unsigned teamSize = 0;
				2185	# endif // KMP_MIC && REDUCE_TEAM_SIZE
				2186
				2187	// check for num_records == __kmp_xproc ???
				2188
				2189	//
				2190	// If there's only one thread context to bind to, form an Address object
				2191	// with depth 1 and return immediately (or, if affinity is off, set
				2192	// address2os to NULL and return).
				2193	//
				2194	// If it is configured to omit the package level when there is only a
				2195	// single package, the logic at the end of this routine won't work if
				2196	// there is only a single thread - it would try to form an Address
				2197	// object with depth 0.
				2198	//
				2199	KMP_ASSERT(num_avail > 0);
				2200	KMP_ASSERT(num_avail <= num_records);
				2201	if (num_avail == 1) {
				2202	__kmp_ncores = 1;
				2203	__kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2204	if (__kmp_affinity_verbose) {
				2205	if (! KMP_AFFINITY_CAPABLE()) {
				2206	KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
				2207	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				2208	KMP_INFORM(Uniform, "KMP_AFFINITY");
				2209	}
				2210	else {
				2211	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				2212	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				2213	fullMask);
				2214	KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
				2215	if (__kmp_affinity_respect_mask) {
				2216	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				2217	} else {
				2218	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				2219	}
				2220	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				2221	KMP_INFORM(Uniform, "KMP_AFFINITY");
				2222	}
				2223	int index;
				2224	kmp_str_buf_t buf;
				2225	__kmp_str_buf_init(&buf);
				2226	__kmp_str_buf_print(&buf, "1");
				2227	for (index = maxIndex - 1; index > pkgIdIndex; index--) {
				2228	__kmp_str_buf_print(&buf, " x 1");
				2229	}
				2230	KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, 1, 1, 1);
				2231	__kmp_str_buf_free(&buf);
				2232	}
				2233
				2234	if (__kmp_affinity_type == affinity_none) {
				2235	CLEANUP_THREAD_INFO;
				2236	return 0;
				2237	}
				2238
				2239	address2os = (AddrUnsPair)__kmp_allocate(sizeof(AddrUnsPair));
				2240	Address addr(1);
				2241	addr.labels[0] = threadInfo[0][pkgIdIndex];
				2242	(*address2os)[0] = AddrUnsPair(addr, threadInfo[0][osIdIndex]);
				2243
				2244	if (__kmp_affinity_gran_levels < 0) {
				2245	__kmp_affinity_gran_levels = 0;
				2246	}
				2247
				2248	if (__kmp_affinity_verbose) {
				2249	__kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
				2250	}
				2251
				2252	CLEANUP_THREAD_INFO;
				2253	return 1;
				2254	}
				2255
				2256	//
				2257	// Sort the threadInfo table by physical Id.
				2258	//
				2259	qsort(threadInfo, num_avail, sizeof(*threadInfo),
				2260	__kmp_affinity_cmp_ProcCpuInfo_phys_id);
				2261
				2262	//
				2263	// The table is now sorted by pkgId / coreId / threadId, but we really
				2264	// don't know the radix of any of the fields. pkgId's may be sparsely
				2265	// assigned among the chips on a system. Although coreId's are usually
				2266	// assigned [0 .. coresPerPkg-1] and threadId's are usually assigned
				2267	// [0..threadsPerCore-1], we don't want to make any such assumptions.
				2268	//
				2269	// For that matter, we don't know what coresPerPkg and threadsPerCore
				2270	// (or the total # packages) are at this point - we want to determine
				2271	// that now. We only have an upper bound on the first two figures.
				2272	//
				2273	unsigned counts = (unsigned )__kmp_allocate((maxIndex + 1)
				2274	* sizeof(unsigned));
				2275	unsigned maxCt = (unsigned )__kmp_allocate((maxIndex + 1)
				2276	* sizeof(unsigned));
				2277	unsigned totals = (unsigned )__kmp_allocate((maxIndex + 1)
				2278	* sizeof(unsigned));
				2279	unsigned lastId = (unsigned )__kmp_allocate((maxIndex + 1)
				2280	* sizeof(unsigned));
				2281
				2282	bool assign_thread_ids = false;
				2283	unsigned threadIdCt;
				2284	unsigned index;
				2285
				2286	restart_radix_check:
				2287	threadIdCt = 0;
				2288
				2289	//
				2290	// Initialize the counter arrays with data from threadInfo[0].
				2291	//
				2292	if (assign_thread_ids) {
				2293	if (threadInfo[0][threadIdIndex] == UINT_MAX) {
				2294	threadInfo[0][threadIdIndex] = threadIdCt++;
				2295	}
				2296	else if (threadIdCt <= threadInfo[0][threadIdIndex]) {
				2297	threadIdCt = threadInfo[0][threadIdIndex] + 1;
				2298	}
				2299	}
				2300	for (index = 0; index <= maxIndex; index++) {
				2301	counts[index] = 1;
				2302	maxCt[index] = 1;
				2303	totals[index] = 1;
				2304	lastId[index] = threadInfo[0][index];;
				2305	}
				2306
				2307	//
				2308	// Run through the rest of the OS procs.
				2309	//
				2310	for (i = 1; i < num_avail; i++) {
				2311	//
				2312	// Find the most significant index whose id differs
				2313	// from the id for the previous OS proc.
				2314	//
				2315	for (index = maxIndex; index >= threadIdIndex; index--) {
				2316	if (assign_thread_ids && (index == threadIdIndex)) {
				2317	//
				2318	// Auto-assign the thread id field if it wasn't specified.
				2319	//
				2320	if (threadInfo[i][threadIdIndex] == UINT_MAX) {
				2321	threadInfo[i][threadIdIndex] = threadIdCt++;
				2322	}
				2323
				2324	//
				2325	// Aparrently the thread id field was specified for some
				2326	// entries and not others. Start the thread id counter
				2327	// off at the next higher thread id.
				2328	//
				2329	else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
				2330	threadIdCt = threadInfo[i][threadIdIndex] + 1;
				2331	}
				2332	}
				2333	if (threadInfo[i][index] != lastId[index]) {
				2334	//
				2335	// Run through all indices which are less significant,
				2336	// and reset the counts to 1.
				2337	//
				2338	// At all levels up to and including index, we need to
				2339	// increment the totals and record the last id.
				2340	//
				2341	unsigned index2;
				2342	for (index2 = threadIdIndex; index2 < index; index2++) {
				2343	totals[index2]++;
				2344	if (counts[index2] > maxCt[index2]) {
				2345	maxCt[index2] = counts[index2];
				2346	}
				2347	counts[index2] = 1;
				2348	lastId[index2] = threadInfo[i][index2];
				2349	}
				2350	counts[index]++;
				2351	totals[index]++;
				2352	lastId[index] = threadInfo[i][index];
				2353
				2354	if (assign_thread_ids && (index > threadIdIndex)) {
				2355
				2356	# if KMP_MIC && REDUCE_TEAM_SIZE
				2357	//
				2358	// The default team size is the total #threads in the machine
				2359	// minus 1 thread for every core that has 3 or more threads.
				2360	//
				2361	teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 );
				2362	# endif // KMP_MIC && REDUCE_TEAM_SIZE
				2363
				2364	//
				2365	// Restart the thread counter, as we are on a new core.
				2366	//
				2367	threadIdCt = 0;
				2368
				2369	//
				2370	// Auto-assign the thread id field if it wasn't specified.
				2371	//
				2372	if (threadInfo[i][threadIdIndex] == UINT_MAX) {
				2373	threadInfo[i][threadIdIndex] = threadIdCt++;
				2374	}
				2375
				2376	//
				2377	// Aparrently the thread id field was specified for some
				2378	// entries and not others. Start the thread id counter
				2379	// off at the next higher thread id.
				2380	//
				2381	else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
				2382	threadIdCt = threadInfo[i][threadIdIndex] + 1;
				2383	}
				2384	}
				2385	break;
				2386	}
				2387	}
				2388	if (index < threadIdIndex) {
				2389	//
				2390	// If thread ids were specified, it is an error if they are not
				2391	// unique. Also, check that we waven't already restarted the
				2392	// loop (to be safe - shouldn't need to).
				2393	//
				2394	if ((threadInfo[i][threadIdIndex] != UINT_MAX)
				2395	\|\| assign_thread_ids) {
				2396	__kmp_free(lastId);
				2397	__kmp_free(totals);
				2398	__kmp_free(maxCt);
				2399	__kmp_free(counts);
				2400	CLEANUP_THREAD_INFO;
				2401	*msg_id = kmp_i18n_str_PhysicalIDsNotUnique;
				2402	return -1;
				2403	}
				2404
				2405	//
				2406	// If the thread ids were not specified and we see entries
				2407	// entries that are duplicates, start the loop over and
				2408	// assign the thread ids manually.
				2409	//
				2410	assign_thread_ids = true;
				2411	goto restart_radix_check;
				2412	}
				2413	}
				2414
				2415	# if KMP_MIC && REDUCE_TEAM_SIZE
				2416	//
				2417	// The default team size is the total #threads in the machine
				2418	// minus 1 thread for every core that has 3 or more threads.
				2419	//
				2420	teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 );
				2421	# endif // KMP_MIC && REDUCE_TEAM_SIZE
				2422
				2423	for (index = threadIdIndex; index <= maxIndex; index++) {
				2424	if (counts[index] > maxCt[index]) {
				2425	maxCt[index] = counts[index];
				2426	}
				2427	}
				2428
				2429	__kmp_nThreadsPerCore = maxCt[threadIdIndex];
				2430	nCoresPerPkg = maxCt[coreIdIndex];
				2431	nPackages = totals[pkgIdIndex];
				2432
				2433	//
				2434	// Check to see if the machine topology is uniform
				2435	//
				2436	unsigned prod = totals[maxIndex];
				2437	for (index = threadIdIndex; index < maxIndex; index++) {
				2438	prod *= maxCt[index];
				2439	}
				2440	bool uniform = (prod == totals[threadIdIndex]);
				2441
				2442	//
				2443	// When affinity is off, this routine will still be called to set
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame]	2444	// __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2445	// nCoresPerPkg, & nPackages. Make sure all these vars are set
				2446	// correctly, and return now if affinity is not enabled.
				2447	//
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2448	__kmp_ncores = totals[coreIdIndex];
				2449
				2450	if (__kmp_affinity_verbose) {
				2451	if (! KMP_AFFINITY_CAPABLE()) {
				2452	KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
				2453	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				2454	if (uniform) {
				2455	KMP_INFORM(Uniform, "KMP_AFFINITY");
				2456	} else {
				2457	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				2458	}
				2459	}
				2460	else {
				2461	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				2462	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, fullMask);
				2463	KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
				2464	if (__kmp_affinity_respect_mask) {
				2465	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				2466	} else {
				2467	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				2468	}
				2469	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				2470	if (uniform) {
				2471	KMP_INFORM(Uniform, "KMP_AFFINITY");
				2472	} else {
				2473	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				2474	}
				2475	}
				2476	kmp_str_buf_t buf;
				2477	__kmp_str_buf_init(&buf);
				2478
				2479	__kmp_str_buf_print(&buf, "%d", totals[maxIndex]);
				2480	for (index = maxIndex - 1; index >= pkgIdIndex; index--) {
				2481	__kmp_str_buf_print(&buf, " x %d", maxCt[index]);
				2482	}
				2483	KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, maxCt[coreIdIndex],
				2484	maxCt[threadIdIndex], __kmp_ncores);
				2485
				2486	__kmp_str_buf_free(&buf);
				2487	}
				2488
				2489	# if KMP_MIC && REDUCE_TEAM_SIZE
				2490	//
				2491	// Set the default team size.
				2492	//
				2493	if ((__kmp_dflt_team_nth == 0) && (teamSize > 0)) {
				2494	__kmp_dflt_team_nth = teamSize;
				2495	KA_TRACE(20, ("__kmp_affinity_create_cpuinfo_map: setting __kmp_dflt_team_nth = %d\n",
				2496	__kmp_dflt_team_nth));
				2497	}
				2498	# endif // KMP_MIC && REDUCE_TEAM_SIZE
				2499
				2500	if (__kmp_affinity_type == affinity_none) {
				2501	__kmp_free(lastId);
				2502	__kmp_free(totals);
				2503	__kmp_free(maxCt);
				2504	__kmp_free(counts);
				2505	CLEANUP_THREAD_INFO;
				2506	return 0;
				2507	}
				2508
				2509	//
				2510	// Count the number of levels which have more nodes at that level than
				2511	// at the parent's level (with there being an implicit root node of
				2512	// the top level). This is equivalent to saying that there is at least
				2513	// one node at this level which has a sibling. These levels are in the
				2514	// map, and the package level is always in the map.
				2515	//
				2516	bool inMap = (bool )__kmp_allocate((maxIndex + 1) * sizeof(bool));
				2517	int level = 0;
				2518	for (index = threadIdIndex; index < maxIndex; index++) {
				2519	KMP_ASSERT(totals[index] >= totals[index + 1]);
				2520	inMap[index] = (totals[index] > totals[index + 1]);
				2521	}
				2522	inMap[maxIndex] = (totals[maxIndex] > 1);
				2523	inMap[pkgIdIndex] = true;
				2524
				2525	int depth = 0;
				2526	for (index = threadIdIndex; index <= maxIndex; index++) {
				2527	if (inMap[index]) {
				2528	depth++;
				2529	}
				2530	}
				2531	KMP_ASSERT(depth > 0);
				2532
				2533	//
				2534	// Construct the data structure that is to be returned.
				2535	//
				2536	address2os = (AddrUnsPair)
				2537	__kmp_allocate(sizeof(AddrUnsPair) * num_avail);
				2538	int pkgLevel = -1;
				2539	int coreLevel = -1;
				2540	int threadLevel = -1;
				2541
				2542	for (i = 0; i < num_avail; ++i) {
				2543	Address addr(depth);
				2544	unsigned os = threadInfo[i][osIdIndex];
				2545	int src_index;
				2546	int dst_index = 0;
				2547
				2548	for (src_index = maxIndex; src_index >= threadIdIndex; src_index--) {
				2549	if (! inMap[src_index]) {
				2550	continue;
				2551	}
				2552	addr.labels[dst_index] = threadInfo[i][src_index];
				2553	if (src_index == pkgIdIndex) {
				2554	pkgLevel = dst_index;
				2555	}
				2556	else if (src_index == coreIdIndex) {
				2557	coreLevel = dst_index;
				2558	}
				2559	else if (src_index == threadIdIndex) {
				2560	threadLevel = dst_index;
				2561	}
				2562	dst_index++;
				2563	}
				2564	(*address2os)[i] = AddrUnsPair(addr, os);
				2565	}
				2566
				2567	if (__kmp_affinity_gran_levels < 0) {
				2568	//
				2569	// Set the granularity level based on what levels are modeled
				2570	// in the machine topology map.
				2571	//
				2572	unsigned src_index;
				2573	__kmp_affinity_gran_levels = 0;
				2574	for (src_index = threadIdIndex; src_index <= maxIndex; src_index++) {
				2575	if (! inMap[src_index]) {
				2576	continue;
				2577	}
				2578	switch (src_index) {
				2579	case threadIdIndex:
				2580	if (__kmp_affinity_gran > affinity_gran_thread) {
				2581	__kmp_affinity_gran_levels++;
				2582	}
				2583
				2584	break;
				2585	case coreIdIndex:
				2586	if (__kmp_affinity_gran > affinity_gran_core) {
				2587	__kmp_affinity_gran_levels++;
				2588	}
				2589	break;
				2590
				2591	case pkgIdIndex:
				2592	if (__kmp_affinity_gran > affinity_gran_package) {
				2593	__kmp_affinity_gran_levels++;
				2594	}
				2595	break;
				2596	}
				2597	}
				2598	}
				2599
				2600	if (__kmp_affinity_verbose) {
				2601	__kmp_affinity_print_topology(*address2os, num_avail, depth, pkgLevel,
				2602	coreLevel, threadLevel);
				2603	}
				2604
				2605	__kmp_free(inMap);
				2606	__kmp_free(lastId);
				2607	__kmp_free(totals);
				2608	__kmp_free(maxCt);
				2609	__kmp_free(counts);
				2610	CLEANUP_THREAD_INFO;
				2611	return depth;
				2612	}
				2613
				2614
				2615	//
				2616	// Create and return a table of affinity masks, indexed by OS thread ID.
				2617	// This routine handles OR'ing together all the affinity masks of threads
				2618	// that are sufficiently close, if granularity > fine.
				2619	//
				2620	static kmp_affin_mask_t *
				2621	__kmp_create_masks(unsigned maxIndex, unsigned numUnique,
				2622	AddrUnsPair *address2os, unsigned numAddrs)
				2623	{
				2624	//
				2625	// First form a table of affinity masks in order of OS thread id.
				2626	//
				2627	unsigned depth;
				2628	unsigned maxOsId;
				2629	unsigned i;
				2630
				2631	KMP_ASSERT(numAddrs > 0);
				2632	depth = address2os[0].first.depth;
				2633
				2634	maxOsId = 0;
				2635	for (i = 0; i < numAddrs; i++) {
				2636	unsigned osId = address2os[i].second;
				2637	if (osId > maxOsId) {
				2638	maxOsId = osId;
				2639	}
				2640	}
				2641	kmp_affin_mask_t osId2Mask = (kmp_affin_mask_t )__kmp_allocate(
				2642	(maxOsId + 1) * __kmp_affin_mask_size);
				2643
				2644	//
				2645	// Sort the address2os table according to physical order. Doing so
				2646	// will put all threads on the same core/package/node in consecutive
				2647	// locations.
				2648	//
				2649	qsort(address2os, numAddrs, sizeof(*address2os),
				2650	__kmp_affinity_cmp_Address_labels);
				2651
				2652	KMP_ASSERT(__kmp_affinity_gran_levels >= 0);
				2653	if (__kmp_affinity_verbose && (__kmp_affinity_gran_levels > 0)) {
				2654	KMP_INFORM(ThreadsMigrate, "KMP_AFFINITY", __kmp_affinity_gran_levels);
				2655	}
				2656	if (__kmp_affinity_gran_levels >= (int)depth) {
				2657	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				2658	&& (__kmp_affinity_type != affinity_none))) {
				2659	KMP_WARNING(AffThreadsMayMigrate);
				2660	}
				2661	}
				2662
				2663	//
				2664	// Run through the table, forming the masks for all threads on each
				2665	// core. Threads on the same core will have identical "Address"
				2666	// objects, not considering the last level, which must be the thread
				2667	// id. All threads on a core will appear consecutively.
				2668	//
				2669	unsigned unique = 0;
				2670	unsigned j = 0; // index of 1st thread on core
				2671	unsigned leader = 0;
				2672	Address *leaderAddr = &(address2os[0].first);
				2673	kmp_affin_mask_t *sum
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	2674	= (kmp_affin_mask_t *)KMP_ALLOCA(__kmp_affin_mask_size);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2675	KMP_CPU_ZERO(sum);
				2676	KMP_CPU_SET(address2os[0].second, sum);
				2677	for (i = 1; i < numAddrs; i++) {
				2678	//
Alp Toker	8f2d3f0	2014-02-24 10:40:15 +0000	[diff] [blame]	2679	// If this thread is sufficiently close to the leader (within the
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2680	// granularity setting), then set the bit for this os thread in the
				2681	// affinity mask for this group, and go on to the next thread.
				2682	//
				2683	if (leaderAddr->isClose(address2os[i].first,
				2684	__kmp_affinity_gran_levels)) {
				2685	KMP_CPU_SET(address2os[i].second, sum);
				2686	continue;
				2687	}
				2688
				2689	//
				2690	// For every thread in this group, copy the mask to the thread's
				2691	// entry in the osId2Mask table. Mark the first address as a
				2692	// leader.
				2693	//
				2694	for (; j < i; j++) {
				2695	unsigned osId = address2os[j].second;
				2696	KMP_DEBUG_ASSERT(osId <= maxOsId);
				2697	kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
				2698	KMP_CPU_COPY(mask, sum);
				2699	address2os[j].first.leader = (j == leader);
				2700	}
				2701	unique++;
				2702
				2703	//
				2704	// Start a new mask.
				2705	//
				2706	leader = i;
				2707	leaderAddr = &(address2os[i].first);
				2708	KMP_CPU_ZERO(sum);
				2709	KMP_CPU_SET(address2os[i].second, sum);
				2710	}
				2711
				2712	//
				2713	// For every thread in last group, copy the mask to the thread's
				2714	// entry in the osId2Mask table.
				2715	//
				2716	for (; j < i; j++) {
				2717	unsigned osId = address2os[j].second;
				2718	KMP_DEBUG_ASSERT(osId <= maxOsId);
				2719	kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
				2720	KMP_CPU_COPY(mask, sum);
				2721	address2os[j].first.leader = (j == leader);
				2722	}
				2723	unique++;
				2724
				2725	*maxIndex = maxOsId;
				2726	*numUnique = unique;
				2727	return osId2Mask;
				2728	}
				2729
				2730
				2731	//
				2732	// Stuff for the affinity proclist parsers. It's easier to declare these vars
				2733	// as file-static than to try and pass them through the calling sequence of
				2734	// the recursive-descent OMP_PLACES parser.
				2735	//
				2736	static kmp_affin_mask_t *newMasks;
				2737	static int numNewMasks;
				2738	static int nextNewMask;
				2739
				2740	#define ADD_MASK(_mask) \
				2741	{ \
				2742	if (nextNewMask >= numNewMasks) { \
				2743	numNewMasks *= 2; \
				2744	newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_REALLOC(newMasks, \
				2745	numNewMasks * __kmp_affin_mask_size); \
				2746	} \
				2747	KMP_CPU_COPY(KMP_CPU_INDEX(newMasks, nextNewMask), (_mask)); \
				2748	nextNewMask++; \
				2749	}
				2750
				2751	#define ADD_MASK_OSID(_osId,_osId2Mask,_maxOsId) \
				2752	{ \
				2753	if (((_osId) > _maxOsId) \|\| \
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	2754	(! KMP_CPU_ISSET((_osId), KMP_CPU_INDEX((_osId2Mask), (_osId))))) { \
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2755	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings \
				2756	&& (__kmp_affinity_type != affinity_none))) { \
				2757	KMP_WARNING(AffIgnoreInvalidProcID, _osId); \
				2758	} \
				2759	} \
				2760	else { \
				2761	ADD_MASK(KMP_CPU_INDEX(_osId2Mask, (_osId))); \
				2762	} \
				2763	}
				2764
				2765
				2766	//
				2767	// Re-parse the proclist (for the explicit affinity type), and form the list
				2768	// of affinity newMasks indexed by gtid.
				2769	//
				2770	static void
				2771	__kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks,
				2772	unsigned int out_numMasks, const char proclist,
				2773	kmp_affin_mask_t *osId2Mask, int maxOsId)
				2774	{
				2775	const char *scan = proclist;
				2776	const char *next = proclist;
				2777
				2778	//
				2779	// We use malloc() for the temporary mask vector,
				2780	// so that we can use realloc() to extend it.
				2781	//
				2782	numNewMasks = 2;
				2783	newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(numNewMasks
				2784	* __kmp_affin_mask_size);
				2785	nextNewMask = 0;
				2786	kmp_affin_mask_t sumMask = (kmp_affin_mask_t )__kmp_allocate(
				2787	__kmp_affin_mask_size);
				2788	int setSize = 0;
				2789
				2790	for (;;) {
				2791	int start, end, stride;
				2792
				2793	SKIP_WS(scan);
				2794	next = scan;
				2795	if (*next == '\0') {
				2796	break;
				2797	}
				2798
				2799	if (*next == '{') {
				2800	int num;
				2801	setSize = 0;
				2802	next++; // skip '{'
				2803	SKIP_WS(next);
				2804	scan = next;
				2805
				2806	//
				2807	// Read the first integer in the set.
				2808	//
				2809	KMP_ASSERT2((next >= '0') && (next <= '9'),
				2810	"bad proclist");
				2811	SKIP_DIGITS(next);
				2812	num = __kmp_str_to_int(scan, *next);
				2813	KMP_ASSERT2(num >= 0, "bad explicit proc list");
				2814
				2815	//
				2816	// Copy the mask for that osId to the sum (union) mask.
				2817	//
				2818	if ((num > maxOsId) \|\|
				2819	(! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
				2820	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				2821	&& (__kmp_affinity_type != affinity_none))) {
				2822	KMP_WARNING(AffIgnoreInvalidProcID, num);
				2823	}
				2824	KMP_CPU_ZERO(sumMask);
				2825	}
				2826	else {
				2827	KMP_CPU_COPY(sumMask, KMP_CPU_INDEX(osId2Mask, num));
				2828	setSize = 1;
				2829	}
				2830
				2831	for (;;) {
				2832	//
				2833	// Check for end of set.
				2834	//
				2835	SKIP_WS(next);
				2836	if (*next == '}') {
				2837	next++; // skip '}'
				2838	break;
				2839	}
				2840
				2841	//
				2842	// Skip optional comma.
				2843	//
				2844	if (*next == ',') {
				2845	next++;
				2846	}
				2847	SKIP_WS(next);
				2848
				2849	//
				2850	// Read the next integer in the set.
				2851	//
				2852	scan = next;
				2853	KMP_ASSERT2((next >= '0') && (next <= '9'),
				2854	"bad explicit proc list");
				2855
				2856	SKIP_DIGITS(next);
				2857	num = __kmp_str_to_int(scan, *next);
				2858	KMP_ASSERT2(num >= 0, "bad explicit proc list");
				2859
				2860	//
				2861	// Add the mask for that osId to the sum mask.
				2862	//
				2863	if ((num > maxOsId) \|\|
				2864	(! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
				2865	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				2866	&& (__kmp_affinity_type != affinity_none))) {
				2867	KMP_WARNING(AffIgnoreInvalidProcID, num);
				2868	}
				2869	}
				2870	else {
				2871	KMP_CPU_UNION(sumMask, KMP_CPU_INDEX(osId2Mask, num));
				2872	setSize++;
				2873	}
				2874	}
				2875	if (setSize > 0) {
				2876	ADD_MASK(sumMask);
				2877	}
				2878
				2879	SKIP_WS(next);
				2880	if (*next == ',') {
				2881	next++;
				2882	}
				2883	scan = next;
				2884	continue;
				2885	}
				2886
				2887	//
				2888	// Read the first integer.
				2889	//
				2890	KMP_ASSERT2((next >= '0') && (next <= '9'), "bad explicit proc list");
				2891	SKIP_DIGITS(next);
				2892	start = __kmp_str_to_int(scan, *next);
				2893	KMP_ASSERT2(start >= 0, "bad explicit proc list");
				2894	SKIP_WS(next);
				2895
				2896	//
				2897	// If this isn't a range, then add a mask to the list and go on.
				2898	//
				2899	if (*next != '-') {
				2900	ADD_MASK_OSID(start, osId2Mask, maxOsId);
				2901
				2902	//
				2903	// Skip optional comma.
				2904	//
				2905	if (*next == ',') {
				2906	next++;
				2907	}
				2908	scan = next;
				2909	continue;
				2910	}
				2911
				2912	//
				2913	// This is a range. Skip over the '-' and read in the 2nd int.
				2914	//
				2915	next++; // skip '-'
				2916	SKIP_WS(next);
				2917	scan = next;
				2918	KMP_ASSERT2((next >= '0') && (next <= '9'), "bad explicit proc list");
				2919	SKIP_DIGITS(next);
				2920	end = __kmp_str_to_int(scan, *next);
				2921	KMP_ASSERT2(end >= 0, "bad explicit proc list");
				2922
				2923	//
				2924	// Check for a stride parameter
				2925	//
				2926	stride = 1;
				2927	SKIP_WS(next);
				2928	if (*next == ':') {
				2929	//
				2930	// A stride is specified. Skip over the ':" and read the 3rd int.
				2931	//
				2932	int sign = +1;
				2933	next++; // skip ':'
				2934	SKIP_WS(next);
				2935	scan = next;
				2936	if (*next == '-') {
				2937	sign = -1;
				2938	next++;
				2939	SKIP_WS(next);
				2940	scan = next;
				2941	}
				2942	KMP_ASSERT2((next >= '0') && (next <= '9'),
				2943	"bad explicit proc list");
				2944	SKIP_DIGITS(next);
				2945	stride = __kmp_str_to_int(scan, *next);
				2946	KMP_ASSERT2(stride >= 0, "bad explicit proc list");
				2947	stride *= sign;
				2948	}
				2949
				2950	//
				2951	// Do some range checks.
				2952	//
				2953	KMP_ASSERT2(stride != 0, "bad explicit proc list");
				2954	if (stride > 0) {
				2955	KMP_ASSERT2(start <= end, "bad explicit proc list");
				2956	}
				2957	else {
				2958	KMP_ASSERT2(start >= end, "bad explicit proc list");
				2959	}
				2960	KMP_ASSERT2((end - start) / stride <= 65536, "bad explicit proc list");
				2961
				2962	//
				2963	// Add the mask for each OS proc # to the list.
				2964	//
				2965	if (stride > 0) {
				2966	do {
				2967	ADD_MASK_OSID(start, osId2Mask, maxOsId);
				2968	start += stride;
				2969	} while (start <= end);
				2970	}
				2971	else {
				2972	do {
				2973	ADD_MASK_OSID(start, osId2Mask, maxOsId);
				2974	start += stride;
				2975	} while (start >= end);
				2976	}
				2977
				2978	//
				2979	// Skip optional comma.
				2980	//
				2981	SKIP_WS(next);
				2982	if (*next == ',') {
				2983	next++;
				2984	}
				2985	scan = next;
				2986	}
				2987
				2988	*out_numMasks = nextNewMask;
				2989	if (nextNewMask == 0) {
				2990	*out_masks = NULL;
				2991	KMP_INTERNAL_FREE(newMasks);
				2992	return;
				2993	}
				2994	*out_masks
				2995	= (kmp_affin_mask_t )__kmp_allocate(nextNewMask __kmp_affin_mask_size);
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	2996	KMP_MEMCPY(out_masks, newMasks, nextNewMask __kmp_affin_mask_size);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2997	__kmp_free(sumMask);
				2998	KMP_INTERNAL_FREE(newMasks);
				2999	}
				3000
				3001
				3002	# if OMP_40_ENABLED
				3003
				3004	/*-----------------------------------------------------------------------------
				3005
				3006	Re-parse the OMP_PLACES proc id list, forming the newMasks for the different
				3007	places. Again, Here is the grammar:
				3008
				3009	place_list := place
				3010	place_list := place , place_list
				3011	place := num
				3012	place := place : num
				3013	place := place : num : signed
				3014	place := { subplacelist }
				3015	place := ! place // (lowest priority)
				3016	subplace_list := subplace
				3017	subplace_list := subplace , subplace_list
				3018	subplace := num
				3019	subplace := num : num
				3020	subplace := num : num : signed
				3021	signed := num
				3022	signed := + signed
				3023	signed := - signed
				3024
				3025	-----------------------------------------------------------------------------*/
				3026
				3027	static void
				3028	__kmp_process_subplace_list(const char *scan, kmp_affin_mask_t osId2Mask,
				3029	int maxOsId, kmp_affin_mask_t tempMask, int setSize)
				3030	{
				3031	const char *next;
				3032
				3033	for (;;) {
				3034	int start, count, stride, i;
				3035
				3036	//
				3037	// Read in the starting proc id
				3038	//
				3039	SKIP_WS(*scan);
				3040	KMP_ASSERT2((scan >= '0') && (scan <= '9'),
				3041	"bad explicit places list");
				3042	next = *scan;
				3043	SKIP_DIGITS(next);
				3044	start = __kmp_str_to_int(scan, next);
				3045	KMP_ASSERT(start >= 0);
				3046	*scan = next;
				3047
				3048	//
				3049	// valid follow sets are ',' ':' and '}'
				3050	//
				3051	SKIP_WS(*scan);
				3052	if (scan == '}' \|\| scan == ',') {
				3053	if ((start > maxOsId) \|\|
				3054	(! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
				3055	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3056	&& (__kmp_affinity_type != affinity_none))) {
				3057	KMP_WARNING(AffIgnoreInvalidProcID, start);
				3058	}
				3059	}
				3060	else {
				3061	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
				3062	(*setSize)++;
				3063	}
				3064	if (**scan == '}') {
				3065	break;
				3066	}
				3067	(*scan)++; // skip ','
				3068	continue;
				3069	}
				3070	KMP_ASSERT2(**scan == ':', "bad explicit places list");
				3071	(*scan)++; // skip ':'
				3072
				3073	//
				3074	// Read count parameter
				3075	//
				3076	SKIP_WS(*scan);
				3077	KMP_ASSERT2((scan >= '0') && (scan <= '9'),
				3078	"bad explicit places list");
				3079	next = *scan;
				3080	SKIP_DIGITS(next);
				3081	count = __kmp_str_to_int(scan, next);
				3082	KMP_ASSERT(count >= 0);
				3083	*scan = next;
				3084
				3085	//
				3086	// valid follow sets are ',' ':' and '}'
				3087	//
				3088	SKIP_WS(*scan);
				3089	if (scan == '}' \|\| scan == ',') {
				3090	for (i = 0; i < count; i++) {
				3091	if ((start > maxOsId) \|\|
				3092	(! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
				3093	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3094	&& (__kmp_affinity_type != affinity_none))) {
				3095	KMP_WARNING(AffIgnoreInvalidProcID, start);
				3096	}
				3097	break; // don't proliferate warnings for large count
				3098	}
				3099	else {
				3100	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
				3101	start++;
				3102	(*setSize)++;
				3103	}
				3104	}
				3105	if (**scan == '}') {
				3106	break;
				3107	}
				3108	(*scan)++; // skip ','
				3109	continue;
				3110	}
				3111	KMP_ASSERT2(**scan == ':', "bad explicit places list");
				3112	(*scan)++; // skip ':'
				3113
				3114	//
				3115	// Read stride parameter
				3116	//
				3117	int sign = +1;
				3118	for (;;) {
				3119	SKIP_WS(*scan);
				3120	if (**scan == '+') {
				3121	(*scan)++; // skip '+'
				3122	continue;
				3123	}
				3124	if (**scan == '-') {
				3125	sign *= -1;
				3126	(*scan)++; // skip '-'
				3127	continue;
				3128	}
				3129	break;
				3130	}
				3131	SKIP_WS(*scan);
				3132	KMP_ASSERT2((scan >= '0') && (scan <= '9'),
				3133	"bad explicit places list");
				3134	next = *scan;
				3135	SKIP_DIGITS(next);
				3136	stride = __kmp_str_to_int(scan, next);
				3137	KMP_ASSERT(stride >= 0);
				3138	*scan = next;
				3139	stride *= sign;
				3140
				3141	//
				3142	// valid follow sets are ',' and '}'
				3143	//
				3144	SKIP_WS(*scan);
				3145	if (scan == '}' \|\| scan == ',') {
				3146	for (i = 0; i < count; i++) {
				3147	if ((start > maxOsId) \|\|
				3148	(! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
				3149	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3150	&& (__kmp_affinity_type != affinity_none))) {
				3151	KMP_WARNING(AffIgnoreInvalidProcID, start);
				3152	}
				3153	break; // don't proliferate warnings for large count
				3154	}
				3155	else {
				3156	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
				3157	start += stride;
				3158	(*setSize)++;
				3159	}
				3160	}
				3161	if (**scan == '}') {
				3162	break;
				3163	}
				3164	(*scan)++; // skip ','
				3165	continue;
				3166	}
				3167
				3168	KMP_ASSERT2(0, "bad explicit places list");
				3169	}
				3170	}
				3171
				3172
				3173	static void
				3174	__kmp_process_place(const char *scan, kmp_affin_mask_t osId2Mask,
				3175	int maxOsId, kmp_affin_mask_t tempMask, int setSize)
				3176	{
				3177	const char *next;
				3178
				3179	//
				3180	// valid follow sets are '{' '!' and num
				3181	//
				3182	SKIP_WS(*scan);
				3183	if (**scan == '{') {
				3184	(*scan)++; // skip '{'
				3185	__kmp_process_subplace_list(scan, osId2Mask, maxOsId , tempMask,
				3186	setSize);
				3187	KMP_ASSERT2(**scan == '}', "bad explicit places list");
				3188	(*scan)++; // skip '}'
				3189	}
				3190	else if (**scan == '!') {
				3191	__kmp_process_place(scan, osId2Mask, maxOsId, tempMask, setSize);
				3192	KMP_CPU_COMPLEMENT(tempMask);
				3193	(*scan)++; // skip '!'
				3194	}
				3195	else if ((scan >= '0') && (scan <= '9')) {
				3196	next = *scan;
				3197	SKIP_DIGITS(next);
				3198	int num = __kmp_str_to_int(scan, next);
				3199	KMP_ASSERT(num >= 0);
				3200	if ((num > maxOsId) \|\|
				3201	(! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
				3202	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3203	&& (__kmp_affinity_type != affinity_none))) {
				3204	KMP_WARNING(AffIgnoreInvalidProcID, num);
				3205	}
				3206	}
				3207	else {
				3208	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, num));
				3209	(*setSize)++;
				3210	}
				3211	*scan = next; // skip num
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3212	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3213	else {
				3214	KMP_ASSERT2(0, "bad explicit places list");
				3215	}
				3216	}
				3217
				3218
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3219	//static void
				3220	void
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3221	__kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
				3222	unsigned int out_numMasks, const char placelist,
				3223	kmp_affin_mask_t *osId2Mask, int maxOsId)
				3224	{
				3225	const char *scan = placelist;
				3226	const char *next = placelist;
				3227
				3228	numNewMasks = 2;
				3229	newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(numNewMasks
				3230	* __kmp_affin_mask_size);
				3231	nextNewMask = 0;
				3232
				3233	kmp_affin_mask_t tempMask = (kmp_affin_mask_t )__kmp_allocate(
				3234	__kmp_affin_mask_size);
				3235	KMP_CPU_ZERO(tempMask);
				3236	int setSize = 0;
				3237
				3238	for (;;) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3239	__kmp_process_place(&scan, osId2Mask, maxOsId, tempMask, &setSize);
				3240
				3241	//
				3242	// valid follow sets are ',' ':' and EOL
				3243	//
				3244	SKIP_WS(scan);
				3245	if (scan == '\0' \|\| scan == ',') {
				3246	if (setSize > 0) {
				3247	ADD_MASK(tempMask);
				3248	}
				3249	KMP_CPU_ZERO(tempMask);
				3250	setSize = 0;
				3251	if (*scan == '\0') {
				3252	break;
				3253	}
				3254	scan++; // skip ','
				3255	continue;
				3256	}
				3257
				3258	KMP_ASSERT2(*scan == ':', "bad explicit places list");
				3259	scan++; // skip ':'
				3260
				3261	//
				3262	// Read count parameter
				3263	//
				3264	SKIP_WS(scan);
				3265	KMP_ASSERT2((scan >= '0') && (scan <= '9'),
				3266	"bad explicit places list");
				3267	next = scan;
				3268	SKIP_DIGITS(next);
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	3269	int count = __kmp_str_to_int(scan, *next);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3270	KMP_ASSERT(count >= 0);
				3271	scan = next;
				3272
				3273	//
				3274	// valid follow sets are ',' ':' and EOL
				3275	//
				3276	SKIP_WS(scan);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3277	int stride;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3278	if (scan == '\0' \|\| scan == ',') {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3279	stride = +1;
				3280	}
				3281	else {
				3282	KMP_ASSERT2(*scan == ':', "bad explicit places list");
				3283	scan++; // skip ':'
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3284
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3285	//
				3286	// Read stride parameter
				3287	//
				3288	int sign = +1;
				3289	for (;;) {
				3290	SKIP_WS(scan);
				3291	if (*scan == '+') {
				3292	scan++; // skip '+'
				3293	continue;
				3294	}
				3295	if (*scan == '-') {
				3296	sign *= -1;
				3297	scan++; // skip '-'
				3298	continue;
				3299	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3300	break;
				3301	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3302	SKIP_WS(scan);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3303	KMP_ASSERT2((scan >= '0') && (scan <= '9'),
				3304	"bad explicit places list");
				3305	next = scan;
				3306	SKIP_DIGITS(next);
				3307	stride = __kmp_str_to_int(scan, *next);
				3308	KMP_DEBUG_ASSERT(stride >= 0);
				3309	scan = next;
				3310	stride *= sign;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3311	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3312
				3313	if (stride > 0) {
				3314	int i;
				3315	for (i = 0; i < count; i++) {
				3316	int j;
				3317	if (setSize == 0) {
				3318	break;
				3319	}
				3320	ADD_MASK(tempMask);
				3321	setSize = 0;
				3322	for (j = __kmp_affin_mask_size * CHAR_BIT - 1; j >= stride; j--) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3323	if (! KMP_CPU_ISSET(j - stride, tempMask)) {
				3324	KMP_CPU_CLR(j, tempMask);
				3325	}
				3326	else if ((j > maxOsId) \|\|
				3327	(! KMP_CPU_ISSET(j, KMP_CPU_INDEX(osId2Mask, j)))) {
Andrey Churbanov	16a1432	2015-03-10 09:34:38 +0000	[diff] [blame]	3328	if ((__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3329	&& (__kmp_affinity_type != affinity_none))) && i < count - 1) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3330	KMP_WARNING(AffIgnoreInvalidProcID, j);
				3331	}
				3332	KMP_CPU_CLR(j, tempMask);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3333	}
				3334	else {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3335	KMP_CPU_SET(j, tempMask);
				3336	setSize++;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3337	}
				3338	}
				3339	for (; j >= 0; j--) {
				3340	KMP_CPU_CLR(j, tempMask);
				3341	}
				3342	}
				3343	}
				3344	else {
				3345	int i;
				3346	for (i = 0; i < count; i++) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3347	int j;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3348	if (setSize == 0) {
				3349	break;
				3350	}
				3351	ADD_MASK(tempMask);
				3352	setSize = 0;
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3353	for (j = 0; j < ((int)__kmp_affin_mask_size * CHAR_BIT) + stride;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3354	j++) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3355	if (! KMP_CPU_ISSET(j - stride, tempMask)) {
				3356	KMP_CPU_CLR(j, tempMask);
				3357	}
				3358	else if ((j > maxOsId) \|\|
				3359	(! KMP_CPU_ISSET(j, KMP_CPU_INDEX(osId2Mask, j)))) {
Andrey Churbanov	16a1432	2015-03-10 09:34:38 +0000	[diff] [blame]	3360	if ((__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3361	&& (__kmp_affinity_type != affinity_none))) && i < count - 1) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3362	KMP_WARNING(AffIgnoreInvalidProcID, j);
				3363	}
				3364	KMP_CPU_CLR(j, tempMask);
				3365	}
				3366	else {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3367	KMP_CPU_SET(j, tempMask);
				3368	setSize++;
				3369	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3370	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3371	for (; j < (int)__kmp_affin_mask_size * CHAR_BIT; j++) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3372	KMP_CPU_CLR(j, tempMask);
				3373	}
				3374	}
				3375	}
				3376	KMP_CPU_ZERO(tempMask);
				3377	setSize = 0;
				3378
				3379	//
				3380	// valid follow sets are ',' and EOL
				3381	//
				3382	SKIP_WS(scan);
				3383	if (*scan == '\0') {
				3384	break;
				3385	}
				3386	if (*scan == ',') {
				3387	scan++; // skip ','
				3388	continue;
				3389	}
				3390
				3391	KMP_ASSERT2(0, "bad explicit places list");
				3392	}
				3393
				3394	*out_numMasks = nextNewMask;
				3395	if (nextNewMask == 0) {
				3396	*out_masks = NULL;
				3397	KMP_INTERNAL_FREE(newMasks);
				3398	return;
				3399	}
				3400	*out_masks
				3401	= (kmp_affin_mask_t )__kmp_allocate(nextNewMask __kmp_affin_mask_size);
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	3402	KMP_MEMCPY(out_masks, newMasks, nextNewMask __kmp_affin_mask_size);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3403	__kmp_free(tempMask);
				3404	KMP_INTERNAL_FREE(newMasks);
				3405	}
				3406
				3407	# endif /* OMP_40_ENABLED */
				3408
				3409	#undef ADD_MASK
				3410	#undef ADD_MASK_OSID
				3411
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3412	static void
				3413	__kmp_apply_thread_places(AddrUnsPair **pAddr, int depth)
				3414	{
				3415	if ( __kmp_place_num_cores == 0 ) {
				3416	if ( __kmp_place_num_threads_per_core == 0 ) {
				3417	return; // no cores limiting actions requested, exit
				3418	}
				3419	__kmp_place_num_cores = nCoresPerPkg; // use all available cores
				3420	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3421	if ( !__kmp_affinity_uniform_topology() ) {
				3422	KMP_WARNING( AffThrPlaceNonUniform );
				3423	return; // don't support non-uniform topology
				3424	}
				3425	if ( depth != 3 ) {
				3426	KMP_WARNING( AffThrPlaceNonThreeLevel );
				3427	return; // don't support not-3-level topology
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3428	}
				3429	if ( __kmp_place_num_threads_per_core == 0 ) {
				3430	__kmp_place_num_threads_per_core = __kmp_nThreadsPerCore; // use all HW contexts
				3431	}
Andrey Churbanov	1287557	2015-03-10 09:00:36 +0000	[diff] [blame]	3432	if ( __kmp_place_core_offset + __kmp_place_num_cores > nCoresPerPkg ) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3433	KMP_WARNING( AffThrPlaceManyCores );
				3434	return;
				3435	}
				3436
				3437	AddrUnsPair newAddr = (AddrUnsPair )__kmp_allocate( sizeof(AddrUnsPair) *
				3438	nPackages * __kmp_place_num_cores * __kmp_place_num_threads_per_core);
				3439	int i, j, k, n_old = 0, n_new = 0;
				3440	for ( i = 0; i < nPackages; ++i ) {
				3441	for ( j = 0; j < nCoresPerPkg; ++j ) {
Andrey Churbanov	1287557	2015-03-10 09:00:36 +0000	[diff] [blame]	3442	if ( j < __kmp_place_core_offset \|\| j >= __kmp_place_core_offset + __kmp_place_num_cores ) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3443	n_old += __kmp_nThreadsPerCore; // skip not-requested core
				3444	} else {
				3445	for ( k = 0; k < __kmp_nThreadsPerCore; ++k ) {
Andrey Churbanov	1287557	2015-03-10 09:00:36 +0000	[diff] [blame]	3446	if ( k < __kmp_place_num_threads_per_core ) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3447	newAddr[n_new] = (*pAddr)[n_old]; // copy requested core' data to new location
				3448	n_new++;
				3449	}
				3450	n_old++;
				3451	}
				3452	}
				3453	}
				3454	}
				3455	nCoresPerPkg = __kmp_place_num_cores; // correct nCoresPerPkg
				3456	__kmp_nThreadsPerCore = __kmp_place_num_threads_per_core; // correct __kmp_nThreadsPerCore
				3457	__kmp_avail_proc = n_new; // correct avail_proc
				3458	__kmp_ncores = nPackages * __kmp_place_num_cores; // correct ncores
				3459
				3460	__kmp_free( *pAddr );
				3461	*pAddr = newAddr; // replace old topology with new one
				3462	}
				3463
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3464
				3465	static AddrUnsPair *address2os = NULL;
				3466	static int * procarr = NULL;
				3467	static int __kmp_aff_depth = 0;
				3468
				3469	static void
				3470	__kmp_aux_affinity_initialize(void)
				3471	{
				3472	if (__kmp_affinity_masks != NULL) {
				3473	KMP_ASSERT(fullMask != NULL);
				3474	return;
				3475	}
				3476
				3477	//
				3478	// Create the "full" mask - this defines all of the processors that we
				3479	// consider to be in the machine model. If respect is set, then it is
				3480	// the initialization thread's affinity mask. Otherwise, it is all
				3481	// processors that we know about on the machine.
				3482	//
				3483	if (fullMask == NULL) {
				3484	fullMask = (kmp_affin_mask_t *)__kmp_allocate(__kmp_affin_mask_size);
				3485	}
				3486	if (KMP_AFFINITY_CAPABLE()) {
				3487	if (__kmp_affinity_respect_mask) {
				3488	__kmp_get_system_affinity(fullMask, TRUE);
				3489
				3490	//
				3491	// Count the number of available processors.
				3492	//
				3493	unsigned i;
				3494	__kmp_avail_proc = 0;
				3495	for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
				3496	if (! KMP_CPU_ISSET(i, fullMask)) {
				3497	continue;
				3498	}
				3499	__kmp_avail_proc++;
				3500	}
				3501	if (__kmp_avail_proc > __kmp_xproc) {
				3502	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3503	&& (__kmp_affinity_type != affinity_none))) {
				3504	KMP_WARNING(ErrorInitializeAffinity);
				3505	}
				3506	__kmp_affinity_type = affinity_none;
Andrey Churbanov	1f037e4	2015-03-10 09:15:26 +0000	[diff] [blame]	3507	KMP_AFFINITY_DISABLE();
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3508	return;
				3509	}
				3510	}
				3511	else {
				3512	__kmp_affinity_entire_machine_mask(fullMask);
				3513	__kmp_avail_proc = __kmp_xproc;
				3514	}
				3515	}
				3516
				3517	int depth = -1;
				3518	kmp_i18n_id_t msg_id = kmp_i18n_null;
				3519
				3520	//
Alp Toker	8f2d3f0	2014-02-24 10:40:15 +0000	[diff] [blame]	3521	// For backward compatibility, setting KMP_CPUINFO_FILE =>
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3522	// KMP_TOPOLOGY_METHOD=cpuinfo
				3523	//
				3524	if ((__kmp_cpuinfo_file != NULL) &&
				3525	(__kmp_affinity_top_method == affinity_top_method_all)) {
				3526	__kmp_affinity_top_method = affinity_top_method_cpuinfo;
				3527	}
				3528
				3529	if (__kmp_affinity_top_method == affinity_top_method_all) {
				3530	//
				3531	// In the default code path, errors are not fatal - we just try using
				3532	// another method. We only emit a warning message if affinity is on,
				3533	// or the verbose flag is set, an the nowarnings flag was not set.
				3534	//
				3535	const char *file_name = NULL;
				3536	int line = 0;
				3537
				3538	# if KMP_ARCH_X86 \|\| KMP_ARCH_X86_64
				3539
				3540	if (__kmp_affinity_verbose) {
				3541	KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
				3542	}
				3543
				3544	file_name = NULL;
				3545	depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
				3546	if (depth == 0) {
				3547	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3548	KMP_ASSERT(address2os == NULL);
				3549	return;
				3550	}
				3551
				3552	if (depth < 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3553	if (__kmp_affinity_verbose) {
				3554	if (msg_id != kmp_i18n_null) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3555	KMP_INFORM(AffInfoStrStr, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id),
				3556	KMP_I18N_STR(DecodingLegacyAPIC));
				3557	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3558	else {
				3559	KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(DecodingLegacyAPIC));
				3560	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3561	}
				3562
				3563	file_name = NULL;
				3564	depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
				3565	if (depth == 0) {
				3566	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3567	KMP_ASSERT(address2os == NULL);
				3568	return;
				3569	}
				3570	}
				3571
				3572	# endif /* KMP_ARCH_X86 \|\| KMP_ARCH_X86_64 */
				3573
				3574	# if KMP_OS_LINUX
				3575
				3576	if (depth < 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3577	if (__kmp_affinity_verbose) {
				3578	if (msg_id != kmp_i18n_null) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3579	KMP_INFORM(AffStrParseFilename, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id), "/proc/cpuinfo");
				3580	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3581	else {
				3582	KMP_INFORM(AffParseFilename, "KMP_AFFINITY", "/proc/cpuinfo");
				3583	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3584	}
				3585
				3586	FILE *f = fopen("/proc/cpuinfo", "r");
				3587	if (f == NULL) {
				3588	msg_id = kmp_i18n_str_CantOpenCpuinfo;
				3589	}
				3590	else {
				3591	file_name = "/proc/cpuinfo";
				3592	depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
				3593	fclose(f);
				3594	if (depth == 0) {
				3595	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3596	KMP_ASSERT(address2os == NULL);
				3597	return;
				3598	}
				3599	}
				3600	}
				3601
				3602	# endif /* KMP_OS_LINUX */
				3603
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	3604	# if KMP_GROUP_AFFINITY
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3605
				3606	if ((depth < 0) && (__kmp_num_proc_groups > 1)) {
				3607	if (__kmp_affinity_verbose) {
				3608	KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
				3609	}
				3610
				3611	depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
				3612	KMP_ASSERT(depth != 0);
				3613	}
				3614
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	3615	# endif /* KMP_GROUP_AFFINITY */
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3616
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3617	if (depth < 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3618	if (__kmp_affinity_verbose && (msg_id != kmp_i18n_null)) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3619	if (file_name == NULL) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3620	KMP_INFORM(UsingFlatOS, __kmp_i18n_catgets(msg_id));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3621	}
				3622	else if (line == 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3623	KMP_INFORM(UsingFlatOSFile, file_name, __kmp_i18n_catgets(msg_id));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3624	}
				3625	else {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3626	KMP_INFORM(UsingFlatOSFileLine, file_name, line, __kmp_i18n_catgets(msg_id));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3627	}
				3628	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3629	// FIXME - print msg if msg_id = kmp_i18n_null ???
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3630
				3631	file_name = "";
				3632	depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
				3633	if (depth == 0) {
				3634	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3635	KMP_ASSERT(address2os == NULL);
				3636	return;
				3637	}
				3638	KMP_ASSERT(depth > 0);
				3639	KMP_ASSERT(address2os != NULL);
				3640	}
				3641	}
				3642
				3643	//
				3644	// If the user has specified that a paricular topology discovery method
				3645	// is to be used, then we abort if that method fails. The exception is
				3646	// group affinity, which might have been implicitly set.
				3647	//
				3648
				3649	# if KMP_ARCH_X86 \|\| KMP_ARCH_X86_64
				3650
				3651	else if (__kmp_affinity_top_method == affinity_top_method_x2apicid) {
				3652	if (__kmp_affinity_verbose) {
				3653	KMP_INFORM(AffInfoStr, "KMP_AFFINITY",
				3654	KMP_I18N_STR(Decodingx2APIC));
				3655	}
				3656
				3657	depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
				3658	if (depth == 0) {
				3659	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3660	KMP_ASSERT(address2os == NULL);
				3661	return;
				3662	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3663	if (depth < 0) {
				3664	KMP_ASSERT(msg_id != kmp_i18n_null);
				3665	KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
				3666	}
				3667	}
				3668	else if (__kmp_affinity_top_method == affinity_top_method_apicid) {
				3669	if (__kmp_affinity_verbose) {
				3670	KMP_INFORM(AffInfoStr, "KMP_AFFINITY",
				3671	KMP_I18N_STR(DecodingLegacyAPIC));
				3672	}
				3673
				3674	depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
				3675	if (depth == 0) {
				3676	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3677	KMP_ASSERT(address2os == NULL);
				3678	return;
				3679	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3680	if (depth < 0) {
				3681	KMP_ASSERT(msg_id != kmp_i18n_null);
				3682	KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
				3683	}
				3684	}
				3685
				3686	# endif /* KMP_ARCH_X86 \|\| KMP_ARCH_X86_64 */
				3687
				3688	else if (__kmp_affinity_top_method == affinity_top_method_cpuinfo) {
				3689	const char *filename;
				3690	if (__kmp_cpuinfo_file != NULL) {
				3691	filename = __kmp_cpuinfo_file;
				3692	}
				3693	else {
				3694	filename = "/proc/cpuinfo";
				3695	}
				3696
				3697	if (__kmp_affinity_verbose) {
				3698	KMP_INFORM(AffParseFilename, "KMP_AFFINITY", filename);
				3699	}
				3700
				3701	FILE *f = fopen(filename, "r");
				3702	if (f == NULL) {
				3703	int code = errno;
				3704	if (__kmp_cpuinfo_file != NULL) {
				3705	__kmp_msg(
				3706	kmp_ms_fatal,
				3707	KMP_MSG(CantOpenFileForReading, filename),
				3708	KMP_ERR(code),
				3709	KMP_HNT(NameComesFrom_CPUINFO_FILE),
				3710	__kmp_msg_null
				3711	);
				3712	}
				3713	else {
				3714	__kmp_msg(
				3715	kmp_ms_fatal,
				3716	KMP_MSG(CantOpenFileForReading, filename),
				3717	KMP_ERR(code),
				3718	__kmp_msg_null
				3719	);
				3720	}
				3721	}
				3722	int line = 0;
				3723	depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
				3724	fclose(f);
				3725	if (depth < 0) {
				3726	KMP_ASSERT(msg_id != kmp_i18n_null);
				3727	if (line > 0) {
				3728	KMP_FATAL(FileLineMsgExiting, filename, line, __kmp_i18n_catgets(msg_id));
				3729	}
				3730	else {
				3731	KMP_FATAL(FileMsgExiting, filename, __kmp_i18n_catgets(msg_id));
				3732	}
				3733	}
				3734	if (__kmp_affinity_type == affinity_none) {
				3735	KMP_ASSERT(depth == 0);
				3736	KMP_ASSERT(address2os == NULL);
				3737	return;
				3738	}
				3739	}
				3740
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	3741	# if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3742
				3743	else if (__kmp_affinity_top_method == affinity_top_method_group) {
				3744	if (__kmp_affinity_verbose) {
				3745	KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
				3746	}
				3747
				3748	depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
				3749	KMP_ASSERT(depth != 0);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3750	if (depth < 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3751	KMP_ASSERT(msg_id != kmp_i18n_null);
				3752	KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3753	}
				3754	}
				3755
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	3756	# endif /* KMP_GROUP_AFFINITY */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3757
				3758	else if (__kmp_affinity_top_method == affinity_top_method_flat) {
				3759	if (__kmp_affinity_verbose) {
				3760	KMP_INFORM(AffUsingFlatOS, "KMP_AFFINITY");
				3761	}
				3762
				3763	depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
				3764	if (depth == 0) {
				3765	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3766	KMP_ASSERT(address2os == NULL);
				3767	return;
				3768	}
				3769	// should not fail
				3770	KMP_ASSERT(depth > 0);
				3771	KMP_ASSERT(address2os != NULL);
				3772	}
				3773
				3774	if (address2os == NULL) {
				3775	if (KMP_AFFINITY_CAPABLE()
				3776	&& (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3777	&& (__kmp_affinity_type != affinity_none)))) {
				3778	KMP_WARNING(ErrorInitializeAffinity);
				3779	}
				3780	__kmp_affinity_type = affinity_none;
Andrey Churbanov	1f037e4	2015-03-10 09:15:26 +0000	[diff] [blame]	3781	KMP_AFFINITY_DISABLE();
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3782	return;
				3783	}
				3784
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3785	__kmp_apply_thread_places(&address2os, depth);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3786
				3787	//
				3788	// Create the table of masks, indexed by thread Id.
				3789	//
				3790	unsigned maxIndex;
				3791	unsigned numUnique;
				3792	kmp_affin_mask_t *osId2Mask = __kmp_create_masks(&maxIndex, &numUnique,
				3793	address2os, __kmp_avail_proc);
				3794	if (__kmp_affinity_gran_levels == 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3795	KMP_DEBUG_ASSERT((int)numUnique == __kmp_avail_proc);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3796	}
				3797
				3798	//
				3799	// Set the childNums vector in all Address objects. This must be done
				3800	// before we can sort using __kmp_affinity_cmp_Address_child_num(),
				3801	// which takes into account the setting of __kmp_affinity_compact.
				3802	//
				3803	__kmp_affinity_assign_child_nums(address2os, __kmp_avail_proc);
				3804
				3805	switch (__kmp_affinity_type) {
				3806
				3807	case affinity_explicit:
				3808	KMP_DEBUG_ASSERT(__kmp_affinity_proclist != NULL);
				3809	# if OMP_40_ENABLED
				3810	if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
				3811	# endif
				3812	{
				3813	__kmp_affinity_process_proclist(&__kmp_affinity_masks,
				3814	&__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask,
				3815	maxIndex);
				3816	}
				3817	# if OMP_40_ENABLED
				3818	else {
				3819	__kmp_affinity_process_placelist(&__kmp_affinity_masks,
				3820	&__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask,
				3821	maxIndex);
				3822	}
				3823	# endif
				3824	if (__kmp_affinity_num_masks == 0) {
				3825	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3826	&& (__kmp_affinity_type != affinity_none))) {
				3827	KMP_WARNING(AffNoValidProcID);
				3828	}
				3829	__kmp_affinity_type = affinity_none;
				3830	return;
				3831	}
				3832	break;
				3833
				3834	//
				3835	// The other affinity types rely on sorting the Addresses according
				3836	// to some permutation of the machine topology tree. Set
				3837	// __kmp_affinity_compact and __kmp_affinity_offset appropriately,
				3838	// then jump to a common code fragment to do the sort and create
				3839	// the array of affinity masks.
				3840	//
				3841
				3842	case affinity_logical:
				3843	__kmp_affinity_compact = 0;
				3844	if (__kmp_affinity_offset) {
				3845	__kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset
				3846	% __kmp_avail_proc;
				3847	}
				3848	goto sortAddresses;
				3849
				3850	case affinity_physical:
				3851	if (__kmp_nThreadsPerCore > 1) {
				3852	__kmp_affinity_compact = 1;
				3853	if (__kmp_affinity_compact >= depth) {
				3854	__kmp_affinity_compact = 0;
				3855	}
				3856	} else {
				3857	__kmp_affinity_compact = 0;
				3858	}
				3859	if (__kmp_affinity_offset) {
				3860	__kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset
				3861	% __kmp_avail_proc;
				3862	}
				3863	goto sortAddresses;
				3864
				3865	case affinity_scatter:
				3866	if (__kmp_affinity_compact >= depth) {
				3867	__kmp_affinity_compact = 0;
				3868	}
				3869	else {
				3870	__kmp_affinity_compact = depth - 1 - __kmp_affinity_compact;
				3871	}
				3872	goto sortAddresses;
				3873
				3874	case affinity_compact:
				3875	if (__kmp_affinity_compact >= depth) {
				3876	__kmp_affinity_compact = depth - 1;
				3877	}
				3878	goto sortAddresses;
				3879
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3880	case affinity_balanced:
Jonathan Peyton	caf09fe	2015-05-27 23:27:33 +0000	[diff] [blame]	3881	// Balanced works only for the case of a single package
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3882	if( nPackages > 1 ) {
				3883	if( __kmp_affinity_verbose \|\| __kmp_affinity_warnings ) {
				3884	KMP_WARNING( AffBalancedNotAvail, "KMP_AFFINITY" );
				3885	}
				3886	__kmp_affinity_type = affinity_none;
				3887	return;
				3888	} else if( __kmp_affinity_uniform_topology() ) {
				3889	break;
				3890	} else { // Non-uniform topology
				3891
				3892	// Save the depth for further usage
				3893	__kmp_aff_depth = depth;
				3894
				3895	// Number of hyper threads per core in HT machine
				3896	int nth_per_core = __kmp_nThreadsPerCore;
				3897
				3898	int core_level;
				3899	if( nth_per_core > 1 ) {
				3900	core_level = depth - 2;
				3901	} else {
				3902	core_level = depth - 1;
				3903	}
				3904	int ncores = address2os[ __kmp_avail_proc - 1 ].first.labels[ core_level ] + 1;
				3905	int nproc = nth_per_core * ncores;
				3906
				3907	procarr = ( int * )__kmp_allocate( sizeof( int ) * nproc );
				3908	for( int i = 0; i < nproc; i++ ) {
				3909	procarr[ i ] = -1;
				3910	}
				3911
				3912	for( int i = 0; i < __kmp_avail_proc; i++ ) {
				3913	int proc = address2os[ i ].second;
				3914	// If depth == 3 then level=0 - package, level=1 - core, level=2 - thread.
				3915	// If there is only one thread per core then depth == 2: level 0 - package,
				3916	// level 1 - core.
				3917	int level = depth - 1;
				3918
				3919	// __kmp_nth_per_core == 1
				3920	int thread = 0;
				3921	int core = address2os[ i ].first.labels[ level ];
				3922	// If the thread level exists, that is we have more than one thread context per core
				3923	if( nth_per_core > 1 ) {
				3924	thread = address2os[ i ].first.labels[ level ] % nth_per_core;
				3925	core = address2os[ i ].first.labels[ level - 1 ];
				3926	}
				3927	procarr[ core * nth_per_core + thread ] = proc;
				3928	}
				3929
				3930	break;
				3931	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3932
				3933	sortAddresses:
				3934	//
				3935	// Allocate the gtid->affinity mask table.
				3936	//
				3937	if (__kmp_affinity_dups) {
				3938	__kmp_affinity_num_masks = __kmp_avail_proc;
				3939	}
				3940	else {
				3941	__kmp_affinity_num_masks = numUnique;
				3942	}
				3943
				3944	# if OMP_40_ENABLED
				3945	if ( ( __kmp_nested_proc_bind.bind_types[0] != proc_bind_intel )
				3946	&& ( __kmp_affinity_num_places > 0 )
				3947	&& ( (unsigned)__kmp_affinity_num_places < __kmp_affinity_num_masks ) ) {
				3948	__kmp_affinity_num_masks = __kmp_affinity_num_places;
				3949	}
				3950	# endif
				3951
				3952	__kmp_affinity_masks = (kmp_affin_mask_t*)__kmp_allocate(
				3953	__kmp_affinity_num_masks * __kmp_affin_mask_size);
				3954
				3955	//
				3956	// Sort the address2os table according to the current setting of
				3957	// __kmp_affinity_compact, then fill out __kmp_affinity_masks.
				3958	//
				3959	qsort(address2os, __kmp_avail_proc, sizeof(*address2os),
				3960	__kmp_affinity_cmp_Address_child_num);
				3961	{
				3962	int i;
				3963	unsigned j;
				3964	for (i = 0, j = 0; i < __kmp_avail_proc; i++) {
				3965	if ((! __kmp_affinity_dups) && (! address2os[i].first.leader)) {
				3966	continue;
				3967	}
				3968	unsigned osId = address2os[i].second;
				3969	kmp_affin_mask_t *src = KMP_CPU_INDEX(osId2Mask, osId);
				3970	kmp_affin_mask_t *dest
				3971	= KMP_CPU_INDEX(__kmp_affinity_masks, j);
				3972	KMP_ASSERT(KMP_CPU_ISSET(osId, src));
				3973	KMP_CPU_COPY(dest, src);
				3974	if (++j >= __kmp_affinity_num_masks) {
				3975	break;
				3976	}
				3977	}
				3978	KMP_DEBUG_ASSERT(j == __kmp_affinity_num_masks);
				3979	}
				3980	break;
				3981
				3982	default:
				3983	KMP_ASSERT2(0, "Unexpected affinity setting");
				3984	}
				3985
				3986	__kmp_free(osId2Mask);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3987	machine_hierarchy.init(address2os, __kmp_avail_proc);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3988	}
				3989
				3990
				3991	void
				3992	__kmp_affinity_initialize(void)
				3993	{
				3994	//
				3995	// Much of the code above was written assumming that if a machine was not
				3996	// affinity capable, then __kmp_affinity_type == affinity_none. We now
				3997	// explicitly represent this as __kmp_affinity_type == affinity_disabled.
				3998	//
				3999	// There are too many checks for __kmp_affinity_type == affinity_none
				4000	// in this code. Instead of trying to change them all, check if
				4001	// __kmp_affinity_type == affinity_disabled, and if so, slam it with
				4002	// affinity_none, call the real initialization routine, then restore
				4003	// __kmp_affinity_type to affinity_disabled.
				4004	//
				4005	int disabled = (__kmp_affinity_type == affinity_disabled);
				4006	if (! KMP_AFFINITY_CAPABLE()) {
				4007	KMP_ASSERT(disabled);
				4008	}
				4009	if (disabled) {
				4010	__kmp_affinity_type = affinity_none;
				4011	}
				4012	__kmp_aux_affinity_initialize();
				4013	if (disabled) {
				4014	__kmp_affinity_type = affinity_disabled;
				4015	}
				4016	}
				4017
				4018
				4019	void
				4020	__kmp_affinity_uninitialize(void)
				4021	{
				4022	if (__kmp_affinity_masks != NULL) {
				4023	__kmp_free(__kmp_affinity_masks);
				4024	__kmp_affinity_masks = NULL;
				4025	}
				4026	if (fullMask != NULL) {
				4027	KMP_CPU_FREE(fullMask);
				4028	fullMask = NULL;
				4029	}
				4030	__kmp_affinity_num_masks = 0;
				4031	# if OMP_40_ENABLED
				4032	__kmp_affinity_num_places = 0;
				4033	# endif
				4034	if (__kmp_affinity_proclist != NULL) {
				4035	__kmp_free(__kmp_affinity_proclist);
				4036	__kmp_affinity_proclist = NULL;
				4037	}
				4038	if( address2os != NULL ) {
				4039	__kmp_free( address2os );
				4040	address2os = NULL;
				4041	}
				4042	if( procarr != NULL ) {
				4043	__kmp_free( procarr );
				4044	procarr = NULL;
				4045	}
				4046	}
				4047
				4048
				4049	void
				4050	__kmp_affinity_set_init_mask(int gtid, int isa_root)
				4051	{
				4052	if (! KMP_AFFINITY_CAPABLE()) {
				4053	return;
				4054	}
				4055
				4056	kmp_info_t th = (kmp_info_t )TCR_SYNC_PTR(__kmp_threads[gtid]);
				4057	if (th->th.th_affin_mask == NULL) {
				4058	KMP_CPU_ALLOC(th->th.th_affin_mask);
				4059	}
				4060	else {
				4061	KMP_CPU_ZERO(th->th.th_affin_mask);
				4062	}
				4063
				4064	//
				4065	// Copy the thread mask to the kmp_info_t strucuture.
				4066	// If __kmp_affinity_type == affinity_none, copy the "full" mask, i.e. one
				4067	// that has all of the OS proc ids set, or if __kmp_affinity_respect_mask
				4068	// is set, then the full mask is the same as the mask of the initialization
				4069	// thread.
				4070	//
				4071	kmp_affin_mask_t *mask;
				4072	int i;
				4073
				4074	# if OMP_40_ENABLED
				4075	if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
				4076	# endif
				4077	{
Andrey Churbanov	f28f613	2015-01-13 14:54:00 +0000	[diff] [blame]	4078	if ((__kmp_affinity_type == affinity_none) \|\| (__kmp_affinity_type == affinity_balanced)
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4079	) {
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	4080	# if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4081	if (__kmp_num_proc_groups > 1) {
				4082	return;
				4083	}
				4084	# endif
				4085	KMP_ASSERT(fullMask != NULL);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4086	i = KMP_PLACE_ALL;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4087	mask = fullMask;
				4088	}
				4089	else {
				4090	KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 );
				4091	i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
				4092	mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
				4093	}
				4094	}
				4095	# if OMP_40_ENABLED
				4096	else {
				4097	if ((! isa_root)
				4098	\|\| (__kmp_nested_proc_bind.bind_types[0] == proc_bind_false)) {
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	4099	# if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4100	if (__kmp_num_proc_groups > 1) {
				4101	return;
				4102	}
				4103	# endif
				4104	KMP_ASSERT(fullMask != NULL);
				4105	i = KMP_PLACE_ALL;
				4106	mask = fullMask;
				4107	}
				4108	else {
				4109	//
				4110	// int i = some hash function or just a counter that doesn't
				4111	// always start at 0. Use gtid for now.
				4112	//
				4113	KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 );
				4114	i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
				4115	mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
				4116	}
				4117	}
				4118	# endif
				4119
				4120	# if OMP_40_ENABLED
				4121	th->th.th_current_place = i;
				4122	if (isa_root) {
				4123	th->th.th_new_place = i;
				4124	th->th.th_first_place = 0;
				4125	th->th.th_last_place = __kmp_affinity_num_masks - 1;
				4126	}
				4127
				4128	if (i == KMP_PLACE_ALL) {
				4129	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to all places\n",
				4130	gtid));
				4131	}
				4132	else {
				4133	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to place %d\n",
				4134	gtid, i));
				4135	}
				4136	# else
				4137	if (i == -1) {
				4138	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to fullMask\n",
				4139	gtid));
				4140	}
				4141	else {
				4142	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to mask %d\n",
				4143	gtid, i));
				4144	}
				4145	# endif /* OMP_40_ENABLED */
				4146
				4147	KMP_CPU_COPY(th->th.th_affin_mask, mask);
				4148
				4149	if (__kmp_affinity_verbose) {
				4150	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4151	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4152	th->th.th_affin_mask);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4153	KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(), gtid,
				4154	buf);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4155	}
				4156
				4157	# if KMP_OS_WINDOWS
				4158	//
				4159	// On Windows* OS, the process affinity mask might have changed.
				4160	// If the user didn't request affinity and this call fails,
				4161	// just continue silently. See CQ171393.
				4162	//
				4163	if ( __kmp_affinity_type == affinity_none ) {
				4164	__kmp_set_system_affinity(th->th.th_affin_mask, FALSE);
				4165	}
				4166	else
				4167	# endif
				4168	__kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
				4169	}
				4170
				4171
				4172	# if OMP_40_ENABLED
				4173
				4174	void
				4175	__kmp_affinity_set_place(int gtid)
				4176	{
				4177	int retval;
				4178
				4179	if (! KMP_AFFINITY_CAPABLE()) {
				4180	return;
				4181	}
				4182
				4183	kmp_info_t th = (kmp_info_t )TCR_SYNC_PTR(__kmp_threads[gtid]);
				4184
				4185	KA_TRACE(100, ("__kmp_affinity_set_place: binding T#%d to place %d (current place = %d)\n",
				4186	gtid, th->th.th_new_place, th->th.th_current_place));
				4187
				4188	//
Alp Toker	8f2d3f0	2014-02-24 10:40:15 +0000	[diff] [blame]	4189	// Check that the new place is within this thread's partition.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4190	//
				4191	KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4192	KMP_ASSERT(th->th.th_new_place >= 0);
				4193	KMP_ASSERT((unsigned)th->th.th_new_place <= __kmp_affinity_num_masks);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4194	if (th->th.th_first_place <= th->th.th_last_place) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4195	KMP_ASSERT((th->th.th_new_place >= th->th.th_first_place)
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4196	&& (th->th.th_new_place <= th->th.th_last_place));
				4197	}
				4198	else {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4199	KMP_ASSERT((th->th.th_new_place <= th->th.th_first_place)
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4200	\|\| (th->th.th_new_place >= th->th.th_last_place));
				4201	}
				4202
				4203	//
				4204	// Copy the thread mask to the kmp_info_t strucuture,
				4205	// and set this thread's affinity.
				4206	//
				4207	kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity_masks,
				4208	th->th.th_new_place);
				4209	KMP_CPU_COPY(th->th.th_affin_mask, mask);
				4210	th->th.th_current_place = th->th.th_new_place;
				4211
				4212	if (__kmp_affinity_verbose) {
				4213	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4214	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4215	th->th.th_affin_mask);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4216	KMP_INFORM(BoundToOSProcSet, "OMP_PROC_BIND", (kmp_int32)getpid(),
				4217	gtid, buf);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4218	}
				4219	__kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
				4220	}
				4221
				4222	# endif /* OMP_40_ENABLED */
				4223
				4224
				4225	int
				4226	__kmp_aux_set_affinity(void **mask)
				4227	{
				4228	int gtid;
				4229	kmp_info_t *th;
				4230	int retval;
				4231
				4232	if (! KMP_AFFINITY_CAPABLE()) {
				4233	return -1;
				4234	}
				4235
				4236	gtid = __kmp_entry_gtid();
				4237	KA_TRACE(1000, ;{
				4238	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4239	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4240	(kmp_affin_mask_t )(mask));
				4241	__kmp_debug_printf("kmp_set_affinity: setting affinity mask for thread %d = %s\n",
				4242	gtid, buf);
				4243	});
				4244
				4245	if (__kmp_env_consistency_check) {
				4246	if ((mask == NULL) \|\| (*mask == NULL)) {
				4247	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				4248	}
				4249	else {
				4250	unsigned proc;
				4251	int num_procs = 0;
				4252
				4253	for (proc = 0; proc < KMP_CPU_SETSIZE; proc++) {
				4254	if (! KMP_CPU_ISSET(proc, (kmp_affin_mask_t )(mask))) {
				4255	continue;
				4256	}
				4257	num_procs++;
				4258	if (! KMP_CPU_ISSET(proc, fullMask)) {
				4259	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				4260	break;
				4261	}
				4262	}
				4263	if (num_procs == 0) {
				4264	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				4265	}
				4266
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	4267	# if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4268	if (__kmp_get_proc_group((kmp_affin_mask_t )(mask)) < 0) {
				4269	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				4270	}
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	4271	# endif /* KMP_GROUP_AFFINITY */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4272
				4273	}
				4274	}
				4275
				4276	th = __kmp_threads[gtid];
				4277	KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
				4278	retval = __kmp_set_system_affinity((kmp_affin_mask_t )(mask), FALSE);
				4279	if (retval == 0) {
				4280	KMP_CPU_COPY(th->th.th_affin_mask, (kmp_affin_mask_t )(mask));
				4281	}
				4282
				4283	# if OMP_40_ENABLED
				4284	th->th.th_current_place = KMP_PLACE_UNDEFINED;
				4285	th->th.th_new_place = KMP_PLACE_UNDEFINED;
				4286	th->th.th_first_place = 0;
				4287	th->th.th_last_place = __kmp_affinity_num_masks - 1;
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4288
				4289	//
				4290	// Turn off 4.0 affinity for the current tread at this parallel level.
				4291	//
				4292	th->th.th_current_task->td_icvs.proc_bind = proc_bind_false;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4293	# endif
				4294
				4295	return retval;
				4296	}
				4297
				4298
				4299	int
				4300	__kmp_aux_get_affinity(void **mask)
				4301	{
				4302	int gtid;
				4303	int retval;
				4304	kmp_info_t *th;
				4305
				4306	if (! KMP_AFFINITY_CAPABLE()) {
				4307	return -1;
				4308	}
				4309
				4310	gtid = __kmp_entry_gtid();
				4311	th = __kmp_threads[gtid];
				4312	KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
				4313
				4314	KA_TRACE(1000, ;{
				4315	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4316	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4317	th->th.th_affin_mask);
				4318	__kmp_printf("kmp_get_affinity: stored affinity mask for thread %d = %s\n", gtid, buf);
				4319	});
				4320
				4321	if (__kmp_env_consistency_check) {
				4322	if ((mask == NULL) \|\| (*mask == NULL)) {
				4323	KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity");
				4324	}
				4325	}
				4326
				4327	# if !KMP_OS_WINDOWS
				4328
				4329	retval = __kmp_get_system_affinity((kmp_affin_mask_t )(mask), FALSE);
				4330	KA_TRACE(1000, ;{
				4331	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4332	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4333	(kmp_affin_mask_t )(mask));
				4334	__kmp_printf("kmp_get_affinity: system affinity mask for thread %d = %s\n", gtid, buf);
				4335	});
				4336	return retval;
				4337
				4338	# else
				4339
				4340	KMP_CPU_COPY((kmp_affin_mask_t )(mask), th->th.th_affin_mask);
				4341	return 0;
				4342
				4343	# endif /* KMP_OS_WINDOWS */
				4344
				4345	}
				4346
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4347	int
				4348	__kmp_aux_set_affinity_mask_proc(int proc, void **mask)
				4349	{
				4350	int retval;
				4351
				4352	if (! KMP_AFFINITY_CAPABLE()) {
				4353	return -1;
				4354	}
				4355
				4356	KA_TRACE(1000, ;{
				4357	int gtid = __kmp_entry_gtid();
				4358	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4359	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4360	(kmp_affin_mask_t )(mask));
				4361	__kmp_debug_printf("kmp_set_affinity_mask_proc: setting proc %d in affinity mask for thread %d = %s\n",
				4362	proc, gtid, buf);
				4363	});
				4364
				4365	if (__kmp_env_consistency_check) {
				4366	if ((mask == NULL) \|\| (*mask == NULL)) {
				4367	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity_mask_proc");
				4368	}
				4369	}
				4370
				4371	if ((proc < 0) \|\| ((unsigned)proc >= KMP_CPU_SETSIZE)) {
				4372	return -1;
				4373	}
				4374	if (! KMP_CPU_ISSET(proc, fullMask)) {
				4375	return -2;
				4376	}
				4377
				4378	KMP_CPU_SET(proc, (kmp_affin_mask_t )(mask));
				4379	return 0;
				4380	}
				4381
				4382
				4383	int
				4384	__kmp_aux_unset_affinity_mask_proc(int proc, void **mask)
				4385	{
				4386	int retval;
				4387
				4388	if (! KMP_AFFINITY_CAPABLE()) {
				4389	return -1;
				4390	}
				4391
				4392	KA_TRACE(1000, ;{
				4393	int gtid = __kmp_entry_gtid();
				4394	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4395	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4396	(kmp_affin_mask_t )(mask));
				4397	__kmp_debug_printf("kmp_unset_affinity_mask_proc: unsetting proc %d in affinity mask for thread %d = %s\n",
				4398	proc, gtid, buf);
				4399	});
				4400
				4401	if (__kmp_env_consistency_check) {
				4402	if ((mask == NULL) \|\| (*mask == NULL)) {
				4403	KMP_FATAL(AffinityInvalidMask, "kmp_unset_affinity_mask_proc");
				4404	}
				4405	}
				4406
				4407	if ((proc < 0) \|\| ((unsigned)proc >= KMP_CPU_SETSIZE)) {
				4408	return -1;
				4409	}
				4410	if (! KMP_CPU_ISSET(proc, fullMask)) {
				4411	return -2;
				4412	}
				4413
				4414	KMP_CPU_CLR(proc, (kmp_affin_mask_t )(mask));
				4415	return 0;
				4416	}
				4417
				4418
				4419	int
				4420	__kmp_aux_get_affinity_mask_proc(int proc, void **mask)
				4421	{
				4422	int retval;
				4423
				4424	if (! KMP_AFFINITY_CAPABLE()) {
				4425	return -1;
				4426	}
				4427
				4428	KA_TRACE(1000, ;{
				4429	int gtid = __kmp_entry_gtid();
				4430	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4431	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4432	(kmp_affin_mask_t )(mask));
				4433	__kmp_debug_printf("kmp_get_affinity_mask_proc: getting proc %d in affinity mask for thread %d = %s\n",
				4434	proc, gtid, buf);
				4435	});
				4436
				4437	if (__kmp_env_consistency_check) {
				4438	if ((mask == NULL) \|\| (*mask == NULL)) {
Andrey Churbanov	4b2f17a	2015-01-29 15:49:22 +0000	[diff] [blame]	4439	KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity_mask_proc");
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4440	}
				4441	}
				4442
				4443	if ((proc < 0) \|\| ((unsigned)proc >= KMP_CPU_SETSIZE)) {
				4444	return 0;
				4445	}
				4446	if (! KMP_CPU_ISSET(proc, fullMask)) {
				4447	return 0;
				4448	}
				4449
				4450	return KMP_CPU_ISSET(proc, (kmp_affin_mask_t )(mask));
				4451	}
				4452
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4453
				4454	// Dynamic affinity settings - Affinity balanced
				4455	void __kmp_balanced_affinity( int tid, int nthreads )
				4456	{
				4457	if( __kmp_affinity_uniform_topology() ) {
				4458	int coreID;
				4459	int threadID;
				4460	// Number of hyper threads per core in HT machine
				4461	int __kmp_nth_per_core = __kmp_avail_proc / __kmp_ncores;
				4462	// Number of cores
				4463	int ncores = __kmp_ncores;
				4464	// How many threads will be bound to each core
				4465	int chunk = nthreads / ncores;
				4466	// How many cores will have an additional thread bound to it - "big cores"
				4467	int big_cores = nthreads % ncores;
				4468	// Number of threads on the big cores
				4469	int big_nth = ( chunk + 1 ) * big_cores;
				4470	if( tid < big_nth ) {
				4471	coreID = tid / (chunk + 1 );
				4472	threadID = ( tid % (chunk + 1 ) ) % __kmp_nth_per_core ;
				4473	} else { //tid >= big_nth
				4474	coreID = ( tid - big_cores ) / chunk;
				4475	threadID = ( ( tid - big_cores ) % chunk ) % __kmp_nth_per_core ;
				4476	}
				4477
				4478	KMP_DEBUG_ASSERT2(KMP_AFFINITY_CAPABLE(),
				4479	"Illegal set affinity operation when not capable");
				4480
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	4481	kmp_affin_mask_t mask = (kmp_affin_mask_t )KMP_ALLOCA(__kmp_affin_mask_size);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4482	KMP_CPU_ZERO(mask);
				4483
				4484	// Granularity == thread
				4485	if( __kmp_affinity_gran == affinity_gran_fine \|\| __kmp_affinity_gran == affinity_gran_thread) {
				4486	int osID = address2os[ coreID * __kmp_nth_per_core + threadID ].second;
				4487	KMP_CPU_SET( osID, mask);
				4488	} else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
				4489	for( int i = 0; i < __kmp_nth_per_core; i++ ) {
				4490	int osID;
				4491	osID = address2os[ coreID * __kmp_nth_per_core + i ].second;
				4492	KMP_CPU_SET( osID, mask);
				4493	}
				4494	}
				4495	if (__kmp_affinity_verbose) {
				4496	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4497	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4498	KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
				4499	tid, buf);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4500	}
				4501	__kmp_set_system_affinity( mask, TRUE );
				4502	} else { // Non-uniform topology
				4503
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	4504	kmp_affin_mask_t mask = (kmp_affin_mask_t )KMP_ALLOCA(__kmp_affin_mask_size);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4505	KMP_CPU_ZERO(mask);
				4506
				4507	// Number of hyper threads per core in HT machine
				4508	int nth_per_core = __kmp_nThreadsPerCore;
				4509	int core_level;
				4510	if( nth_per_core > 1 ) {
				4511	core_level = __kmp_aff_depth - 2;
				4512	} else {
				4513	core_level = __kmp_aff_depth - 1;
				4514	}
				4515
				4516	// Number of cores - maximum value; it does not count trail cores with 0 processors
				4517	int ncores = address2os[ __kmp_avail_proc - 1 ].first.labels[ core_level ] + 1;
				4518
				4519	// For performance gain consider the special case nthreads == __kmp_avail_proc
				4520	if( nthreads == __kmp_avail_proc ) {
				4521	if( __kmp_affinity_gran == affinity_gran_fine \|\| __kmp_affinity_gran == affinity_gran_thread) {
				4522	int osID = address2os[ tid ].second;
				4523	KMP_CPU_SET( osID, mask);
				4524	} else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
				4525	int coreID = address2os[ tid ].first.labels[ core_level ];
				4526	// We'll count found osIDs for the current core; they can be not more than nth_per_core;
				4527	// since the address2os is sortied we can break when cnt==nth_per_core
				4528	int cnt = 0;
				4529	for( int i = 0; i < __kmp_avail_proc; i++ ) {
				4530	int osID = address2os[ i ].second;
				4531	int core = address2os[ i ].first.labels[ core_level ];
				4532	if( core == coreID ) {
				4533	KMP_CPU_SET( osID, mask);
				4534	cnt++;
				4535	if( cnt == nth_per_core ) {
				4536	break;
				4537	}
				4538	}
				4539	}
				4540	}
				4541	} else if( nthreads <= __kmp_ncores ) {
				4542
				4543	int core = 0;
				4544	for( int i = 0; i < ncores; i++ ) {
				4545	// Check if this core from procarr[] is in the mask
				4546	int in_mask = 0;
				4547	for( int j = 0; j < nth_per_core; j++ ) {
				4548	if( procarr[ i * nth_per_core + j ] != - 1 ) {
				4549	in_mask = 1;
				4550	break;
				4551	}
				4552	}
				4553	if( in_mask ) {
				4554	if( tid == core ) {
				4555	for( int j = 0; j < nth_per_core; j++ ) {
				4556	int osID = procarr[ i * nth_per_core + j ];
				4557	if( osID != -1 ) {
				4558	KMP_CPU_SET( osID, mask );
				4559	// For granularity=thread it is enough to set the first available osID for this core
				4560	if( __kmp_affinity_gran == affinity_gran_fine \|\| __kmp_affinity_gran == affinity_gran_thread) {
				4561	break;
				4562	}
				4563	}
				4564	}
				4565	break;
				4566	} else {
				4567	core++;
				4568	}
				4569	}
				4570	}
				4571
				4572	} else { // nthreads > __kmp_ncores
				4573
				4574	// Array to save the number of processors at each core
Jonathan Peyton	7be07533	2015-06-22 15:53:50 +0000	[diff] [blame]	4575	int* nproc_at_core = (int)KMP_ALLOCA(sizeof(int)ncores);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4576	// Array to save the number of cores with "x" available processors;
Jonathan Peyton	7be07533	2015-06-22 15:53:50 +0000	[diff] [blame]	4577	int* ncores_with_x_procs = (int)KMP_ALLOCA(sizeof(int)(nth_per_core+1));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4578	// Array to save the number of cores with # procs from x to nth_per_core
Jonathan Peyton	7be07533	2015-06-22 15:53:50 +0000	[diff] [blame]	4579	int* ncores_with_x_to_max_procs = (int)KMP_ALLOCA(sizeof(int)(nth_per_core+1));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4580
				4581	for( int i = 0; i <= nth_per_core; i++ ) {
				4582	ncores_with_x_procs[ i ] = 0;
				4583	ncores_with_x_to_max_procs[ i ] = 0;
				4584	}
				4585
				4586	for( int i = 0; i < ncores; i++ ) {
				4587	int cnt = 0;
				4588	for( int j = 0; j < nth_per_core; j++ ) {
				4589	if( procarr[ i * nth_per_core + j ] != -1 ) {
				4590	cnt++;
				4591	}
				4592	}
				4593	nproc_at_core[ i ] = cnt;
				4594	ncores_with_x_procs[ cnt ]++;
				4595	}
				4596
				4597	for( int i = 0; i <= nth_per_core; i++ ) {
				4598	for( int j = i; j <= nth_per_core; j++ ) {
				4599	ncores_with_x_to_max_procs[ i ] += ncores_with_x_procs[ j ];
				4600	}
				4601	}
				4602
				4603	// Max number of processors
				4604	int nproc = nth_per_core * ncores;
				4605	// An array to keep number of threads per each context
				4606	int * newarr = ( int * )__kmp_allocate( sizeof( int ) * nproc );
				4607	for( int i = 0; i < nproc; i++ ) {
				4608	newarr[ i ] = 0;
				4609	}
				4610
				4611	int nth = nthreads;
				4612	int flag = 0;
				4613	while( nth > 0 ) {
				4614	for( int j = 1; j <= nth_per_core; j++ ) {
				4615	int cnt = ncores_with_x_to_max_procs[ j ];
				4616	for( int i = 0; i < ncores; i++ ) {
				4617	// Skip the core with 0 processors
				4618	if( nproc_at_core[ i ] == 0 ) {
				4619	continue;
				4620	}
				4621	for( int k = 0; k < nth_per_core; k++ ) {
				4622	if( procarr[ i * nth_per_core + k ] != -1 ) {
				4623	if( newarr[ i * nth_per_core + k ] == 0 ) {
				4624	newarr[ i * nth_per_core + k ] = 1;
				4625	cnt--;
				4626	nth--;
				4627	break;
				4628	} else {
				4629	if( flag != 0 ) {
				4630	newarr[ i * nth_per_core + k ] ++;
				4631	cnt--;
				4632	nth--;
				4633	break;
				4634	}
				4635	}
				4636	}
				4637	}
				4638	if( cnt == 0 \|\| nth == 0 ) {
				4639	break;
				4640	}
				4641	}
				4642	if( nth == 0 ) {
				4643	break;
				4644	}
				4645	}
				4646	flag = 1;
				4647	}
				4648	int sum = 0;
				4649	for( int i = 0; i < nproc; i++ ) {
				4650	sum += newarr[ i ];
				4651	if( sum > tid ) {
				4652	// Granularity == thread
				4653	if( __kmp_affinity_gran == affinity_gran_fine \|\| __kmp_affinity_gran == affinity_gran_thread) {
				4654	int osID = procarr[ i ];
				4655	KMP_CPU_SET( osID, mask);
				4656	} else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
				4657	int coreID = i / nth_per_core;
				4658	for( int ii = 0; ii < nth_per_core; ii++ ) {
				4659	int osID = procarr[ coreID * nth_per_core + ii ];
				4660	if( osID != -1 ) {
				4661	KMP_CPU_SET( osID, mask);
				4662	}
				4663	}
				4664	}
				4665	break;
				4666	}
				4667	}
				4668	__kmp_free( newarr );
				4669	}
				4670
				4671	if (__kmp_affinity_verbose) {
				4672	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4673	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4674	KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
				4675	tid, buf);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4676	}
				4677	__kmp_set_system_affinity( mask, TRUE );
				4678	}
				4679	}
				4680
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4681	#else
				4682	// affinity not supported
				4683
Andrey Churbanov	aa1f2b6	2015-04-13 18:51:59 +0000	[diff] [blame]	4684	static const kmp_uint32 noaff_maxLevels=7;
				4685	kmp_uint32 noaff_skipPerLevel[noaff_maxLevels];
				4686	kmp_uint32 noaff_depth;
				4687	kmp_uint8 noaff_leaf_kids;
				4688	kmp_int8 noaff_uninitialized=1;
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4689
Andrey Churbanov	aa1f2b6	2015-04-13 18:51:59 +0000	[diff] [blame]	4690	void noaff_init(int nprocs)
				4691	{
				4692	kmp_int8 result = KMP_COMPARE_AND_STORE_ACQ8(&noaff_uninitialized, 1, 2);
				4693	if (result == 0) return; // Already initialized
				4694	else if (result == 2) { // Someone else is initializing
				4695	while (TCR_1(noaff_uninitialized) != 0) KMP_CPU_PAUSE();
				4696	return;
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4697	}
Andrey Churbanov	aa1f2b6	2015-04-13 18:51:59 +0000	[diff] [blame]	4698	KMP_DEBUG_ASSERT(result==1);
				4699
				4700	kmp_uint32 numPerLevel[noaff_maxLevels];
				4701	noaff_depth = 1;
				4702	for (kmp_uint32 i=0; i<noaff_maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
				4703	numPerLevel[i] = 1;
				4704	noaff_skipPerLevel[i] = 1;
				4705	}
				4706
				4707	numPerLevel[0] = 4;
				4708	numPerLevel[1] = nprocs/4;
				4709	if (nprocs%4) numPerLevel[1]++;
				4710
				4711	for (int i=noaff_maxLevels-1; i>=0; --i) // count non-empty levels to get depth
				4712	if (numPerLevel[i] != 1 \|\| noaff_depth > 1) // only count one top-level '1'
				4713	noaff_depth++;
				4714
				4715	kmp_uint32 branch = 4;
				4716	if (numPerLevel[0] == 1) branch = nprocs/4;
				4717	if (branch<4) branch=4;
				4718	for (kmp_uint32 d=0; d<noaff_depth-1; ++d) { // optimize hierarchy width
				4719	while (numPerLevel[d] > branch \|\| (d==0 && numPerLevel[d]>4)) { // max 4 on level 0!
				4720	if (numPerLevel[d] & 1) numPerLevel[d]++;
				4721	numPerLevel[d] = numPerLevel[d] >> 1;
				4722	if (numPerLevel[d+1] == 1) noaff_depth++;
				4723	numPerLevel[d+1] = numPerLevel[d+1] << 1;
				4724	}
				4725	if(numPerLevel[0] == 1) {
				4726	branch = branch >> 1;
				4727	if (branch<4) branch = 4;
				4728	}
				4729	}
				4730
				4731	for (kmp_uint32 i=1; i<noaff_depth; ++i)
				4732	noaff_skipPerLevel[i] = numPerLevel[i-1] * noaff_skipPerLevel[i-1];
				4733	// Fill in hierarchy in the case of oversubscription
				4734	for (kmp_uint32 i=noaff_depth; i<noaff_maxLevels; ++i)
				4735	noaff_skipPerLevel[i] = 2*noaff_skipPerLevel[i-1];
				4736	noaff_leaf_kids = (kmp_uint8)numPerLevel[0]-1;
				4737	noaff_uninitialized = 0; // One writer
				4738
				4739	}
				4740
				4741	void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
				4742	if (noaff_uninitialized)
				4743	noaff_init(nproc);
				4744
				4745	thr_bar->depth = noaff_depth;
				4746	thr_bar->base_leaf_kids = noaff_leaf_kids;
				4747	thr_bar->skip_per_level = noaff_skipPerLevel;
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4748	}
				4749
Alp Toker	763b939	2014-02-28 09:42:41 +0000	[diff] [blame]	4750	#endif // KMP_AFFINITY_SUPPORTED