Blame - openmp/runtime/src/kmp_affinity.cpp - toolchain/llvm-project

blob: 7f1a29d53f36d8ce146de64cd5298049c412aa5f [file] [log] [blame]

Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1	/*
				2	* kmp_affinity.cpp -- affinity management
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3	*/
				4
				5
				6	//===----------------------------------------------------------------------===//
				7	//
				8	// The LLVM Compiler Infrastructure
				9	//
				10	// This file is dual licensed under the MIT and the University of Illinois Open
				11	// Source Licenses. See LICENSE.txt for details.
				12	//
				13	//===----------------------------------------------------------------------===//
				14
				15
				16	#include "kmp.h"
				17	#include "kmp_i18n.h"
				18	#include "kmp_io.h"
				19	#include "kmp_str.h"
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	20	#include "kmp_wrapper_getpid.h"
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	21
Alp Toker	763b939	2014-02-28 09:42:41 +0000	[diff] [blame]	22	#if KMP_AFFINITY_SUPPORTED
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	23
				24	//
				25	// Print the affinity mask to the character array in a pretty format.
				26	//
				27	char *
				28	__kmp_affinity_print_mask(char buf, int buf_len, kmp_affin_mask_t mask)
				29	{
				30	KMP_ASSERT(buf_len >= 40);
				31	char *scan = buf;
				32	char *end = buf + buf_len - 1;
				33
				34	//
				35	// Find first element / check for empty set.
				36	//
				37	size_t i;
				38	for (i = 0; i < KMP_CPU_SETSIZE; i++) {
				39	if (KMP_CPU_ISSET(i, mask)) {
				40	break;
				41	}
				42	}
				43	if (i == KMP_CPU_SETSIZE) {
				44	sprintf(scan, "{<empty>}");
				45	while (*scan != '\0') scan++;
				46	KMP_ASSERT(scan <= end);
				47	return buf;
				48	}
				49
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	50	sprintf(scan, "{%ld", (long)i);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	51	while (*scan != '\0') scan++;
				52	i++;
				53	for (; i < KMP_CPU_SETSIZE; i++) {
				54	if (! KMP_CPU_ISSET(i, mask)) {
				55	continue;
				56	}
				57
				58	//
				59	// Check for buffer overflow. A string of the form ",<n>" will have
				60	// at most 10 characters, plus we want to leave room to print ",...}"
				61	// if the set is too large to print for a total of 15 characters.
				62	// We already left room for '\0' in setting end.
				63	//
				64	if (end - scan < 15) {
				65	break;
				66	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	67	sprintf(scan, ",%-ld", (long)i);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	68	while (*scan != '\0') scan++;
				69	}
				70	if (i < KMP_CPU_SETSIZE) {
				71	sprintf(scan, ",...");
				72	while (*scan != '\0') scan++;
				73	}
				74	sprintf(scan, "}");
				75	while (*scan != '\0') scan++;
				76	KMP_ASSERT(scan <= end);
				77	return buf;
				78	}
				79
				80
				81	void
				82	__kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask)
				83	{
				84	KMP_CPU_ZERO(mask);
				85
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	86	# if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	87
				88	if (__kmp_num_proc_groups > 1) {
				89	int group;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	90	KMP_DEBUG_ASSERT(__kmp_GetActiveProcessorCount != NULL);
				91	for (group = 0; group < __kmp_num_proc_groups; group++) {
				92	int i;
				93	int num = __kmp_GetActiveProcessorCount(group);
				94	for (i = 0; i < num; i++) {
				95	KMP_CPU_SET(i + group * (CHAR_BIT * sizeof(DWORD_PTR)), mask);
				96	}
				97	}
				98	}
				99	else
				100
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	101	# endif /* KMP_GROUP_AFFINITY */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	102
				103	{
				104	int proc;
				105	for (proc = 0; proc < __kmp_xproc; proc++) {
				106	KMP_CPU_SET(proc, mask);
				107	}
				108	}
				109	}
				110
				111
				112	//
				113	// In Linux* OS debug & cover (-O0) builds, we need to avoid inline member
				114	// functions.
				115	//
				116	// The icc codegen emits sections with extremely long names, of the form
				117	// ".gnu.linkonce.<mangled_name>". There seems to have been a linker bug
				118	// introduced between GNU ld version 2.14.90.0.4 and 2.15.92.0.2 involving
				119	// some sort of memory corruption or table overflow that is triggered by
				120	// these long strings. I checked the latest version of the linker -
				121	// GNU ld (Linux* OS/GNU Binutils) 2.18.50.0.7.20080422 - and the bug is not
				122	// fixed.
				123	//
				124	// Unfortunately, my attempts to reproduce it in a smaller example have
				125	// failed - I'm not sure what the prospects are of getting it fixed
				126	// properly - but we need a reproducer smaller than all of libiomp.
				127	//
				128	// Work around the problem by avoiding inline constructors in such builds.
				129	// We do this for all platforms, not just Linux* OS - non-inline functions are
				130	// more debuggable and provide better coverage into than inline functions.
				131	// Use inline functions in shipping libs, for performance.
				132	//
				133
				134	# if !defined(KMP_DEBUG) && !defined(COVER)
				135
				136	class Address {
				137	public:
				138	static const unsigned maxDepth = 32;
				139	unsigned labels[maxDepth];
				140	unsigned childNums[maxDepth];
				141	unsigned depth;
				142	unsigned leader;
				143	Address(unsigned _depth)
				144	: depth(_depth), leader(FALSE) {
				145	}
				146	Address &operator=(const Address &b) {
				147	depth = b.depth;
				148	for (unsigned i = 0; i < depth; i++) {
				149	labels[i] = b.labels[i];
				150	childNums[i] = b.childNums[i];
				151	}
				152	leader = FALSE;
				153	return *this;
				154	}
				155	bool operator==(const Address &b) const {
				156	if (depth != b.depth)
				157	return false;
				158	for (unsigned i = 0; i < depth; i++)
				159	if(labels[i] != b.labels[i])
				160	return false;
				161	return true;
				162	}
				163	bool isClose(const Address &b, int level) const {
				164	if (depth != b.depth)
				165	return false;
				166	if ((unsigned)level >= depth)
				167	return true;
				168	for (unsigned i = 0; i < (depth - level); i++)
				169	if(labels[i] != b.labels[i])
				170	return false;
				171	return true;
				172	}
				173	bool operator!=(const Address &b) const {
				174	return !operator==(b);
				175	}
				176	};
				177
				178	class AddrUnsPair {
				179	public:
				180	Address first;
				181	unsigned second;
				182	AddrUnsPair(Address _first, unsigned _second)
				183	: first(_first), second(_second) {
				184	}
				185	AddrUnsPair &operator=(const AddrUnsPair &b)
				186	{
				187	first = b.first;
				188	second = b.second;
				189	return *this;
				190	}
				191	};
				192
				193	# else
				194
				195	class Address {
				196	public:
				197	static const unsigned maxDepth = 32;
				198	unsigned labels[maxDepth];
				199	unsigned childNums[maxDepth];
				200	unsigned depth;
				201	unsigned leader;
				202	Address(unsigned _depth);
				203	Address &operator=(const Address &b);
				204	bool operator==(const Address &b) const;
				205	bool isClose(const Address &b, int level) const;
				206	bool operator!=(const Address &b) const;
				207	};
				208
				209	Address::Address(unsigned _depth)
				210	{
				211	depth = _depth;
				212	leader = FALSE;
				213	}
				214
				215	Address &Address::operator=(const Address &b) {
				216	depth = b.depth;
				217	for (unsigned i = 0; i < depth; i++) {
				218	labels[i] = b.labels[i];
				219	childNums[i] = b.childNums[i];
				220	}
				221	leader = FALSE;
				222	return *this;
				223	}
				224
				225	bool Address::operator==(const Address &b) const {
				226	if (depth != b.depth)
				227	return false;
				228	for (unsigned i = 0; i < depth; i++)
				229	if(labels[i] != b.labels[i])
				230	return false;
				231	return true;
				232	}
				233
				234	bool Address::isClose(const Address &b, int level) const {
				235	if (depth != b.depth)
				236	return false;
				237	if ((unsigned)level >= depth)
				238	return true;
				239	for (unsigned i = 0; i < (depth - level); i++)
				240	if(labels[i] != b.labels[i])
				241	return false;
				242	return true;
				243	}
				244
				245	bool Address::operator!=(const Address &b) const {
				246	return !operator==(b);
				247	}
				248
				249	class AddrUnsPair {
				250	public:
				251	Address first;
				252	unsigned second;
				253	AddrUnsPair(Address _first, unsigned _second);
				254	AddrUnsPair &operator=(const AddrUnsPair &b);
				255	};
				256
				257	AddrUnsPair::AddrUnsPair(Address _first, unsigned _second)
				258	: first(_first), second(_second)
				259	{
				260	}
				261
				262	AddrUnsPair &AddrUnsPair::operator=(const AddrUnsPair &b)
				263	{
				264	first = b.first;
				265	second = b.second;
				266	return *this;
				267	}
				268
				269	# endif /* !defined(KMP_DEBUG) && !defined(COVER) */
				270
				271
				272	static int
				273	__kmp_affinity_cmp_Address_labels(const void a, const void b)
				274	{
				275	const Address aa = (const Address )&(((AddrUnsPair *)a)
				276	->first);
				277	const Address bb = (const Address )&(((AddrUnsPair *)b)
				278	->first);
				279	unsigned depth = aa->depth;
				280	unsigned i;
				281	KMP_DEBUG_ASSERT(depth == bb->depth);
				282	for (i = 0; i < depth; i++) {
				283	if (aa->labels[i] < bb->labels[i]) return -1;
				284	if (aa->labels[i] > bb->labels[i]) return 1;
				285	}
				286	return 0;
				287	}
				288
				289
				290	static int
				291	__kmp_affinity_cmp_Address_child_num(const void a, const void b)
				292	{
				293	const Address aa = (const Address )&(((AddrUnsPair *)a)
				294	->first);
				295	const Address bb = (const Address )&(((AddrUnsPair *)b)
				296	->first);
				297	unsigned depth = aa->depth;
				298	unsigned i;
				299	KMP_DEBUG_ASSERT(depth == bb->depth);
				300	KMP_DEBUG_ASSERT((unsigned)__kmp_affinity_compact <= depth);
				301	KMP_DEBUG_ASSERT(__kmp_affinity_compact >= 0);
				302	for (i = 0; i < (unsigned)__kmp_affinity_compact; i++) {
				303	int j = depth - i - 1;
				304	if (aa->childNums[j] < bb->childNums[j]) return -1;
				305	if (aa->childNums[j] > bb->childNums[j]) return 1;
				306	}
				307	for (; i < depth; i++) {
				308	int j = i - __kmp_affinity_compact;
				309	if (aa->childNums[j] < bb->childNums[j]) return -1;
				310	if (aa->childNums[j] > bb->childNums[j]) return 1;
				311	}
				312	return 0;
				313	}
				314
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	315	/** A structure for holding machine-specific hierarchy info to be computed once at init. */
				316	class hierarchy_info {
				317	public:
				318	/** Typical levels are threads/core, cores/package or socket, packages/node, nodes/machine,
				319	etc. We don't want to get specific with nomenclature */
				320	static const kmp_uint32 maxLevels=7;
				321
				322	/** This is specifically the depth of the machine configuration hierarchy, in terms of the
				323	number of levels along the longest path from root to any leaf. It corresponds to the
				324	number of entries in numPerLevel if we exclude all but one trailing 1. */
				325	kmp_uint32 depth;
				326	kmp_uint32 base_depth;
				327	kmp_uint32 base_num_threads;
				328	bool uninitialized;
				329
				330	/** Level 0 corresponds to leaves. numPerLevel[i] is the number of children the parent of a
				331	node at level i has. For example, if we have a machine with 4 packages, 4 cores/package
				332	and 2 HT per core, then numPerLevel = {2, 4, 4, 1, 1}. All empty levels are set to 1. */
				333	kmp_uint32 numPerLevel[maxLevels];
				334	kmp_uint32 skipPerLevel[maxLevels];
				335
				336	void deriveLevels(AddrUnsPair *adr2os, int num_addrs) {
				337	int hier_depth = adr2os[0].first.depth;
				338	int level = 0;
				339	for (int i=hier_depth-1; i>=0; --i) {
				340	int max = -1;
				341	for (int j=0; j<num_addrs; ++j) {
				342	int next = adr2os[j].first.childNums[i];
				343	if (next > max) max = next;
				344	}
				345	numPerLevel[level] = max+1;
				346	++level;
				347	}
				348	}
				349
				350	hierarchy_info() : depth(1), uninitialized(true) {}
				351	void init(AddrUnsPair *adr2os, int num_addrs)
				352	{
Andrey Churbanov	b41e62b	2015-02-10 20:10:21 +0000	[diff] [blame]	353	/* Added explicit initialization of the depth here to prevent usage of dirty value
				354	observed when static library is re-initialized multiple times (e.g. when
				355	non-OpenMP thread repeatedly launches/joins thread that uses OpenMP). */
				356	depth = 1;
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	357	uninitialized = false;
				358	for (kmp_uint32 i=0; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
				359	numPerLevel[i] = 1;
				360	skipPerLevel[i] = 1;
				361	}
				362
				363	// Sort table by physical ID
				364	if (adr2os) {
				365	qsort(adr2os, num_addrs, sizeof(*adr2os), __kmp_affinity_cmp_Address_labels);
				366	deriveLevels(adr2os, num_addrs);
				367	}
				368	else {
				369	numPerLevel[0] = 4;
				370	numPerLevel[1] = num_addrs/4;
				371	if (num_addrs%4) numPerLevel[1]++;
				372	}
				373
				374	base_num_threads = num_addrs;
				375	for (int i=maxLevels-1; i>=0; --i) // count non-empty levels to get depth
				376	if (numPerLevel[i] != 1 \|\| depth > 1) // only count one top-level '1'
				377	depth++;
				378
				379	kmp_uint32 branch = 4;
				380	if (numPerLevel[0] == 1) branch = num_addrs/4;
				381	if (branch<4) branch=4;
				382	for (kmp_uint32 d=0; d<depth-1; ++d) { // optimize hierarchy width
				383	while (numPerLevel[d] > branch \|\| (d==0 && numPerLevel[d]>4)) { // max 4 on level 0!
				384	if (numPerLevel[d] & 1) numPerLevel[d]++;
				385	numPerLevel[d] = numPerLevel[d] >> 1;
				386	if (numPerLevel[d+1] == 1) depth++;
				387	numPerLevel[d+1] = numPerLevel[d+1] << 1;
				388	}
				389	if(numPerLevel[0] == 1) {
				390	branch = branch >> 1;
				391	if (branch<4) branch = 4;
				392	}
				393	}
				394
				395	for (kmp_uint32 i=1; i<depth; ++i)
				396	skipPerLevel[i] = numPerLevel[i-1] * skipPerLevel[i-1];
				397
				398	base_depth = depth;
				399	}
				400	};
				401
				402	static hierarchy_info machine_hierarchy;
				403
				404	void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
				405	if (machine_hierarchy.uninitialized)
				406	machine_hierarchy.init(NULL, nproc);
				407
				408	if (nproc <= machine_hierarchy.base_num_threads)
				409	machine_hierarchy.depth = machine_hierarchy.base_depth;
				410	KMP_DEBUG_ASSERT(machine_hierarchy.depth > 0);
				411	while (nproc > machine_hierarchy.skipPerLevel[machine_hierarchy.depth-1]) {
				412	machine_hierarchy.depth++;
				413	machine_hierarchy.skipPerLevel[machine_hierarchy.depth-1] = 2*machine_hierarchy.skipPerLevel[machine_hierarchy.depth-2];
				414	}
				415	thr_bar->depth = machine_hierarchy.depth;
				416	thr_bar->base_leaf_kids = (kmp_uint8)machine_hierarchy.numPerLevel[0]-1;
				417	thr_bar->skip_per_level = machine_hierarchy.skipPerLevel;
				418	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	419
				420	//
				421	// When sorting by labels, __kmp_affinity_assign_child_nums() must first be
				422	// called to renumber the labels from [0..n] and place them into the child_num
				423	// vector of the address object. This is done in case the labels used for
Alp Toker	8f2d3f0	2014-02-24 10:40:15 +0000	[diff] [blame]	424	// the children at one node of the hierarchy differ from those used for
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	425	// another node at the same level. Example: suppose the machine has 2 nodes
				426	// with 2 packages each. The first node contains packages 601 and 602, and
				427	// second node contains packages 603 and 604. If we try to sort the table
				428	// for "scatter" affinity, the table will still be sorted 601, 602, 603, 604
				429	// because we are paying attention to the labels themselves, not the ordinal
				430	// child numbers. By using the child numbers in the sort, the result is
				431	// {0,0}=601, {0,1}=603, {1,0}=602, {1,1}=604.
				432	//
				433	static void
				434	__kmp_affinity_assign_child_nums(AddrUnsPair *address2os,
				435	int numAddrs)
				436	{
				437	KMP_DEBUG_ASSERT(numAddrs > 0);
				438	int depth = address2os->first.depth;
				439	unsigned counts = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				440	unsigned lastLabel = (unsigned )__kmp_allocate(depth
				441	* sizeof(unsigned));
				442	int labCt;
				443	for (labCt = 0; labCt < depth; labCt++) {
				444	address2os[0].first.childNums[labCt] = counts[labCt] = 0;
				445	lastLabel[labCt] = address2os[0].first.labels[labCt];
				446	}
				447	int i;
				448	for (i = 1; i < numAddrs; i++) {
				449	for (labCt = 0; labCt < depth; labCt++) {
				450	if (address2os[i].first.labels[labCt] != lastLabel[labCt]) {
				451	int labCt2;
				452	for (labCt2 = labCt + 1; labCt2 < depth; labCt2++) {
				453	counts[labCt2] = 0;
				454	lastLabel[labCt2] = address2os[i].first.labels[labCt2];
				455	}
				456	counts[labCt]++;
				457	lastLabel[labCt] = address2os[i].first.labels[labCt];
				458	break;
				459	}
				460	}
				461	for (labCt = 0; labCt < depth; labCt++) {
				462	address2os[i].first.childNums[labCt] = counts[labCt];
				463	}
				464	for (; labCt < (int)Address::maxDepth; labCt++) {
				465	address2os[i].first.childNums[labCt] = 0;
				466	}
				467	}
				468	}
				469
				470
				471	//
				472	// All of the __kmp_affinity_create_*_map() routines should set
				473	// __kmp_affinity_masks to a vector of affinity mask objects of length
				474	// __kmp_affinity_num_masks, if __kmp_affinity_type != affinity_none, and
				475	// return the number of levels in the machine topology tree (zero if
				476	// __kmp_affinity_type == affinity_none).
				477	//
				478	// All of the __kmp_affinity_create__map() routines should set fullMask
				479	// to the affinity mask for the initialization thread. They need to save and
				480	// restore the mask, and it could be needed later, so saving it is just an
				481	// optimization to avoid calling kmp_get_system_affinity() again.
				482	//
				483	static kmp_affin_mask_t *fullMask = NULL;
				484
				485	kmp_affin_mask_t *
				486	__kmp_affinity_get_fullMask() { return fullMask; }
				487
				488
				489	static int nCoresPerPkg, nPackages;
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame]	490	static int __kmp_nThreadsPerCore;
				491	#ifndef KMP_DFLT_NTH_CORES
				492	static int __kmp_ncores;
				493	#endif
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	494
				495	//
				496	// __kmp_affinity_uniform_topology() doesn't work when called from
				497	// places which support arbitrarily many levels in the machine topology
				498	// map, i.e. the non-default cases in __kmp_affinity_create_cpuinfo_map()
				499	// __kmp_affinity_create_x2apicid_map().
				500	//
				501	inline static bool
				502	__kmp_affinity_uniform_topology()
				503	{
				504	return __kmp_avail_proc == (__kmp_nThreadsPerCore * nCoresPerPkg * nPackages);
				505	}
				506
				507
				508	//
				509	// Print out the detailed machine topology map, i.e. the physical locations
				510	// of each OS proc.
				511	//
				512	static void
				513	__kmp_affinity_print_topology(AddrUnsPair *address2os, int len, int depth,
				514	int pkgLevel, int coreLevel, int threadLevel)
				515	{
				516	int proc;
				517
				518	KMP_INFORM(OSProcToPhysicalThreadMap, "KMP_AFFINITY");
				519	for (proc = 0; proc < len; proc++) {
				520	int level;
				521	kmp_str_buf_t buf;
				522	__kmp_str_buf_init(&buf);
				523	for (level = 0; level < depth; level++) {
				524	if (level == threadLevel) {
				525	__kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Thread));
				526	}
				527	else if (level == coreLevel) {
				528	__kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Core));
				529	}
				530	else if (level == pkgLevel) {
				531	__kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Package));
				532	}
				533	else if (level > pkgLevel) {
				534	__kmp_str_buf_print(&buf, "%s_%d ", KMP_I18N_STR(Node),
				535	level - pkgLevel - 1);
				536	}
				537	else {
				538	__kmp_str_buf_print(&buf, "L%d ", level);
				539	}
				540	__kmp_str_buf_print(&buf, "%d ",
				541	address2os[proc].first.labels[level]);
				542	}
				543	KMP_INFORM(OSProcMapToPack, "KMP_AFFINITY", address2os[proc].second,
				544	buf.str);
				545	__kmp_str_buf_free(&buf);
				546	}
				547	}
				548
				549
				550	//
				551	// If we don't know how to retrieve the machine's processor topology, or
				552	// encounter an error in doing so, this routine is called to form a "flat"
				553	// mapping of os thread id's <-> processor id's.
				554	//
				555	static int
				556	__kmp_affinity_create_flat_map(AddrUnsPair **address2os,
				557	kmp_i18n_id_t *const msg_id)
				558	{
				559	*address2os = NULL;
				560	*msg_id = kmp_i18n_null;
				561
				562	//
				563	// Even if __kmp_affinity_type == affinity_none, this routine might still
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame]	564	// called to set __kmp_ncores, as well as
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	565	// __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
				566	//
				567	if (! KMP_AFFINITY_CAPABLE()) {
				568	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				569	__kmp_ncores = nPackages = __kmp_xproc;
				570	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	571	if (__kmp_affinity_verbose) {
				572	KMP_INFORM(AffFlatTopology, "KMP_AFFINITY");
				573	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				574	KMP_INFORM(Uniform, "KMP_AFFINITY");
				575	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				576	__kmp_nThreadsPerCore, __kmp_ncores);
				577	}
				578	return 0;
				579	}
				580
				581	//
				582	// When affinity is off, this routine will still be called to set
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame]	583	// __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	584	// nCoresPerPkg, & nPackages. Make sure all these vars are set
				585	// correctly, and return now if affinity is not enabled.
				586	//
				587	__kmp_ncores = nPackages = __kmp_avail_proc;
				588	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	589	if (__kmp_affinity_verbose) {
				590	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				591	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, fullMask);
				592
				593	KMP_INFORM(AffCapableUseFlat, "KMP_AFFINITY");
				594	if (__kmp_affinity_respect_mask) {
				595	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				596	} else {
				597	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				598	}
				599	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				600	KMP_INFORM(Uniform, "KMP_AFFINITY");
				601	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				602	__kmp_nThreadsPerCore, __kmp_ncores);
				603	}
				604	if (__kmp_affinity_type == affinity_none) {
				605	return 0;
				606	}
				607
				608	//
				609	// Contruct the data structure to be returned.
				610	//
				611	address2os = (AddrUnsPair)
				612	__kmp_allocate(sizeof(*address2os) __kmp_avail_proc);
				613	int avail_ct = 0;
				614	unsigned int i;
				615	for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
				616	//
				617	// Skip this proc if it is not included in the machine model.
				618	//
				619	if (! KMP_CPU_ISSET(i, fullMask)) {
				620	continue;
				621	}
				622
				623	Address addr(1);
				624	addr.labels[0] = i;
				625	(*address2os)[avail_ct++] = AddrUnsPair(addr,i);
				626	}
				627	if (__kmp_affinity_verbose) {
				628	KMP_INFORM(OSProcToPackage, "KMP_AFFINITY");
				629	}
				630
				631	if (__kmp_affinity_gran_levels < 0) {
				632	//
				633	// Only the package level is modeled in the machine topology map,
				634	// so the #levels of granularity is either 0 or 1.
				635	//
				636	if (__kmp_affinity_gran > affinity_gran_package) {
				637	__kmp_affinity_gran_levels = 1;
				638	}
				639	else {
				640	__kmp_affinity_gran_levels = 0;
				641	}
				642	}
				643	return 1;
				644	}
				645
				646
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	647	# if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	648
				649	//
				650	// If multiple Windows* OS processor groups exist, we can create a 2-level
				651	// topology map with the groups at level 0 and the individual procs at
				652	// level 1.
				653	//
				654	// This facilitates letting the threads float among all procs in a group,
				655	// if granularity=group (the default when there are multiple groups).
				656	//
				657	static int
				658	__kmp_affinity_create_proc_group_map(AddrUnsPair **address2os,
				659	kmp_i18n_id_t *const msg_id)
				660	{
				661	*address2os = NULL;
				662	*msg_id = kmp_i18n_null;
				663
				664	//
				665	// If we don't have multiple processor groups, return now.
				666	// The flat mapping will be used.
				667	//
				668	if ((! KMP_AFFINITY_CAPABLE()) \|\| (__kmp_get_proc_group(fullMask) >= 0)) {
				669	// FIXME set *msg_id
				670	return -1;
				671	}
				672
				673	//
				674	// Contruct the data structure to be returned.
				675	//
				676	address2os = (AddrUnsPair)
				677	__kmp_allocate(sizeof(*address2os) __kmp_avail_proc);
				678	int avail_ct = 0;
				679	int i;
				680	for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
				681	//
				682	// Skip this proc if it is not included in the machine model.
				683	//
				684	if (! KMP_CPU_ISSET(i, fullMask)) {
				685	continue;
				686	}
				687
				688	Address addr(2);
				689	addr.labels[0] = i / (CHAR_BIT * sizeof(DWORD_PTR));
				690	addr.labels[1] = i % (CHAR_BIT * sizeof(DWORD_PTR));
				691	(*address2os)[avail_ct++] = AddrUnsPair(addr,i);
				692
				693	if (__kmp_affinity_verbose) {
				694	KMP_INFORM(AffOSProcToGroup, "KMP_AFFINITY", i, addr.labels[0],
				695	addr.labels[1]);
				696	}
				697	}
				698
				699	if (__kmp_affinity_gran_levels < 0) {
				700	if (__kmp_affinity_gran == affinity_gran_group) {
				701	__kmp_affinity_gran_levels = 1;
				702	}
				703	else if ((__kmp_affinity_gran == affinity_gran_fine)
				704	\|\| (__kmp_affinity_gran == affinity_gran_thread)) {
				705	__kmp_affinity_gran_levels = 0;
				706	}
				707	else {
				708	const char *gran_str = NULL;
				709	if (__kmp_affinity_gran == affinity_gran_core) {
				710	gran_str = "core";
				711	}
				712	else if (__kmp_affinity_gran == affinity_gran_package) {
				713	gran_str = "package";
				714	}
				715	else if (__kmp_affinity_gran == affinity_gran_node) {
				716	gran_str = "node";
				717	}
				718	else {
				719	KMP_ASSERT(0);
				720	}
				721
				722	// Warning: can't use affinity granularity \"gran\" with group topology method, using "thread"
				723	__kmp_affinity_gran_levels = 0;
				724	}
				725	}
				726	return 2;
				727	}
				728
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	729	# endif /* KMP_GROUP_AFFINITY */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	730
				731
				732	# if KMP_ARCH_X86 \|\| KMP_ARCH_X86_64
				733
				734	static int
				735	__kmp_cpuid_mask_width(int count) {
				736	int r = 0;
				737
				738	while((1<<r) < count)
				739	++r;
				740	return r;
				741	}
				742
				743
				744	class apicThreadInfo {
				745	public:
				746	unsigned osId; // param to __kmp_affinity_bind_thread
				747	unsigned apicId; // from cpuid after binding
				748	unsigned maxCoresPerPkg; // ""
				749	unsigned maxThreadsPerPkg; // ""
				750	unsigned pkgId; // inferred from above values
				751	unsigned coreId; // ""
				752	unsigned threadId; // ""
				753	};
				754
				755
				756	static int
				757	__kmp_affinity_cmp_apicThreadInfo_os_id(const void a, const void b)
				758	{
				759	const apicThreadInfo aa = (const apicThreadInfo )a;
				760	const apicThreadInfo bb = (const apicThreadInfo )b;
				761	if (aa->osId < bb->osId) return -1;
				762	if (aa->osId > bb->osId) return 1;
				763	return 0;
				764	}
				765
				766
				767	static int
				768	__kmp_affinity_cmp_apicThreadInfo_phys_id(const void a, const void b)
				769	{
				770	const apicThreadInfo aa = (const apicThreadInfo )a;
				771	const apicThreadInfo bb = (const apicThreadInfo )b;
				772	if (aa->pkgId < bb->pkgId) return -1;
				773	if (aa->pkgId > bb->pkgId) return 1;
				774	if (aa->coreId < bb->coreId) return -1;
				775	if (aa->coreId > bb->coreId) return 1;
				776	if (aa->threadId < bb->threadId) return -1;
				777	if (aa->threadId > bb->threadId) return 1;
				778	return 0;
				779	}
				780
				781
				782	//
				783	// On IA-32 architecture and Intel(R) 64 architecture, we attempt to use
				784	// an algorithm which cycles through the available os threads, setting
				785	// the current thread's affinity mask to that thread, and then retrieves
				786	// the Apic Id for each thread context using the cpuid instruction.
				787	//
				788	static int
				789	__kmp_affinity_create_apicid_map(AddrUnsPair **address2os,
				790	kmp_i18n_id_t *const msg_id)
				791	{
Andrey Churbanov	1c33129	2015-01-27 17:03:42 +0000	[diff] [blame]	792	kmp_cpuid buf;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	793	int rc;
				794	*address2os = NULL;
				795	*msg_id = kmp_i18n_null;
				796
Andrey Churbanov	1c33129	2015-01-27 17:03:42 +0000	[diff] [blame]	797	//
				798	// Check if cpuid leaf 4 is supported.
				799	//
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	800	__kmp_x86_cpuid(0, 0, &buf);
				801	if (buf.eax < 4) {
				802	*msg_id = kmp_i18n_str_NoLeaf4Support;
				803	return -1;
				804	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	805
				806	//
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	807	// The algorithm used starts by setting the affinity to each available
Andrey Churbanov	1c33129	2015-01-27 17:03:42 +0000	[diff] [blame]	808	// thread and retrieving info from the cpuid instruction, so if we are
				809	// not capable of calling __kmp_get_system_affinity() and
				810	// _kmp_get_system_affinity(), then we need to do something else - use
				811	// the defaults that we calculated from issuing cpuid without binding
				812	// to each proc.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	813	//
				814	if (! KMP_AFFINITY_CAPABLE()) {
				815	//
				816	// Hack to try and infer the machine topology using only the data
				817	// available from cpuid on the current thread, and __kmp_xproc.
				818	//
				819	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				820
				821	//
				822	// Get an upper bound on the number of threads per package using
				823	// cpuid(1).
				824	//
				825	// On some OS/chps combinations where HT is supported by the chip
				826	// but is disabled, this value will be 2 on a single core chip.
				827	// Usually, it will be 2 if HT is enabled and 1 if HT is disabled.
				828	//
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	829	__kmp_x86_cpuid(1, 0, &buf);
				830	int maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
				831	if (maxThreadsPerPkg == 0) {
				832	maxThreadsPerPkg = 1;
				833	}
				834
				835	//
				836	// The num cores per pkg comes from cpuid(4).
				837	// 1 must be added to the encoded value.
				838	//
				839	// The author of cpu_count.cpp treated this only an upper bound
				840	// on the number of cores, but I haven't seen any cases where it
				841	// was greater than the actual number of cores, so we will treat
				842	// it as exact in this block of code.
				843	//
				844	// First, we need to check if cpuid(4) is supported on this chip.
				845	// To see if cpuid(n) is supported, issue cpuid(0) and check if eax
				846	// has the value n or greater.
				847	//
				848	__kmp_x86_cpuid(0, 0, &buf);
				849	if (buf.eax >= 4) {
				850	__kmp_x86_cpuid(4, 0, &buf);
				851	nCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
				852	}
				853	else {
				854	nCoresPerPkg = 1;
				855	}
				856
				857	//
				858	// There is no way to reliably tell if HT is enabled without issuing
				859	// the cpuid instruction from every thread, can correlating the cpuid
				860	// info, so if the machine is not affinity capable, we assume that HT
				861	// is off. We have seen quite a few machines where maxThreadsPerPkg
				862	// is 2, yet the machine does not support HT.
				863	//
				864	// - Older OSes are usually found on machines with older chips, which
				865	// do not support HT.
				866	//
				867	// - The performance penalty for mistakenly identifying a machine as
				868	// HT when it isn't (which results in blocktime being incorrecly set
				869	// to 0) is greater than the penalty when for mistakenly identifying
				870	// a machine as being 1 thread/core when it is really HT enabled
				871	// (which results in blocktime being incorrectly set to a positive
				872	// value).
				873	//
				874	__kmp_ncores = __kmp_xproc;
				875	nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
				876	__kmp_nThreadsPerCore = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	877	if (__kmp_affinity_verbose) {
				878	KMP_INFORM(AffNotCapableUseLocCpuid, "KMP_AFFINITY");
				879	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				880	if (__kmp_affinity_uniform_topology()) {
				881	KMP_INFORM(Uniform, "KMP_AFFINITY");
				882	} else {
				883	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				884	}
				885	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				886	__kmp_nThreadsPerCore, __kmp_ncores);
				887	}
				888	return 0;
				889	}
				890
				891	//
				892	//
				893	// From here on, we can assume that it is safe to call
				894	// __kmp_get_system_affinity() and __kmp_set_system_affinity(),
				895	// even if __kmp_affinity_type = affinity_none.
				896	//
				897
				898	//
				899	// Save the affinity mask for the current thread.
				900	//
				901	kmp_affin_mask_t *oldMask;
				902	KMP_CPU_ALLOC(oldMask);
				903	KMP_ASSERT(oldMask != NULL);
				904	__kmp_get_system_affinity(oldMask, TRUE);
				905
				906	//
				907	// Run through each of the available contexts, binding the current thread
				908	// to it, and obtaining the pertinent information using the cpuid instr.
				909	//
				910	// The relevant information is:
				911	//
				912	// Apic Id: Bits 24:31 of ebx after issuing cpuid(1) - each thread context
				913	// has a uniqie Apic Id, which is of the form pkg# : core# : thread#.
				914	//
				915	// Max Threads Per Pkg: Bits 16:23 of ebx after issuing cpuid(1). The
				916	// value of this field determines the width of the core# + thread#
				917	// fields in the Apic Id. It is also an upper bound on the number
				918	// of threads per package, but it has been verified that situations
				919	// happen were it is not exact. In particular, on certain OS/chip
				920	// combinations where Intel(R) Hyper-Threading Technology is supported
				921	// by the chip but has
				922	// been disabled, the value of this field will be 2 (for a single core
				923	// chip). On other OS/chip combinations supporting
				924	// Intel(R) Hyper-Threading Technology, the value of
				925	// this field will be 1 when Intel(R) Hyper-Threading Technology is
				926	// disabled and 2 when it is enabled.
				927	//
				928	// Max Cores Per Pkg: Bits 26:31 of eax after issuing cpuid(4). The
				929	// value of this field (+1) determines the width of the core# field in
				930	// the Apic Id. The comments in "cpucount.cpp" say that this value is
				931	// an upper bound, but the IA-32 architecture manual says that it is
				932	// exactly the number of cores per package, and I haven't seen any
				933	// case where it wasn't.
				934	//
				935	// From this information, deduce the package Id, core Id, and thread Id,
				936	// and set the corresponding fields in the apicThreadInfo struct.
				937	//
				938	unsigned i;
				939	apicThreadInfo threadInfo = (apicThreadInfo )__kmp_allocate(
				940	__kmp_avail_proc * sizeof(apicThreadInfo));
				941	unsigned nApics = 0;
				942	for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
				943	//
				944	// Skip this proc if it is not included in the machine model.
				945	//
				946	if (! KMP_CPU_ISSET(i, fullMask)) {
				947	continue;
				948	}
				949	KMP_DEBUG_ASSERT((int)nApics < __kmp_avail_proc);
				950
				951	__kmp_affinity_bind_thread(i);
				952	threadInfo[nApics].osId = i;
				953
				954	//
				955	// The apic id and max threads per pkg come from cpuid(1).
				956	//
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	957	__kmp_x86_cpuid(1, 0, &buf);
				958	if (! (buf.edx >> 9) & 1) {
				959	__kmp_set_system_affinity(oldMask, TRUE);
				960	__kmp_free(threadInfo);
				961	KMP_CPU_FREE(oldMask);
				962	*msg_id = kmp_i18n_str_ApicNotPresent;
				963	return -1;
				964	}
				965	threadInfo[nApics].apicId = (buf.ebx >> 24) & 0xff;
				966	threadInfo[nApics].maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
				967	if (threadInfo[nApics].maxThreadsPerPkg == 0) {
				968	threadInfo[nApics].maxThreadsPerPkg = 1;
				969	}
				970
				971	//
				972	// Max cores per pkg comes from cpuid(4).
				973	// 1 must be added to the encoded value.
				974	//
				975	// First, we need to check if cpuid(4) is supported on this chip.
				976	// To see if cpuid(n) is supported, issue cpuid(0) and check if eax
				977	// has the value n or greater.
				978	//
				979	__kmp_x86_cpuid(0, 0, &buf);
				980	if (buf.eax >= 4) {
				981	__kmp_x86_cpuid(4, 0, &buf);
				982	threadInfo[nApics].maxCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
				983	}
				984	else {
				985	threadInfo[nApics].maxCoresPerPkg = 1;
				986	}
				987
				988	//
				989	// Infer the pkgId / coreId / threadId using only the info
				990	// obtained locally.
				991	//
				992	int widthCT = __kmp_cpuid_mask_width(
				993	threadInfo[nApics].maxThreadsPerPkg);
				994	threadInfo[nApics].pkgId = threadInfo[nApics].apicId >> widthCT;
				995
				996	int widthC = __kmp_cpuid_mask_width(
				997	threadInfo[nApics].maxCoresPerPkg);
				998	int widthT = widthCT - widthC;
				999	if (widthT < 0) {
				1000	//
				1001	// I've never seen this one happen, but I suppose it could, if
				1002	// the cpuid instruction on a chip was really screwed up.
				1003	// Make sure to restore the affinity mask before the tail call.
				1004	//
				1005	__kmp_set_system_affinity(oldMask, TRUE);
				1006	__kmp_free(threadInfo);
				1007	KMP_CPU_FREE(oldMask);
				1008	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1009	return -1;
				1010	}
				1011
				1012	int maskC = (1 << widthC) - 1;
				1013	threadInfo[nApics].coreId = (threadInfo[nApics].apicId >> widthT)
				1014	&maskC;
				1015
				1016	int maskT = (1 << widthT) - 1;
				1017	threadInfo[nApics].threadId = threadInfo[nApics].apicId &maskT;
				1018
				1019	nApics++;
				1020	}
				1021
				1022	//
				1023	// We've collected all the info we need.
				1024	// Restore the old affinity mask for this thread.
				1025	//
				1026	__kmp_set_system_affinity(oldMask, TRUE);
				1027
				1028	//
				1029	// If there's only one thread context to bind to, form an Address object
				1030	// with depth 1 and return immediately (or, if affinity is off, set
				1031	// address2os to NULL and return).
				1032	//
				1033	// If it is configured to omit the package level when there is only a
				1034	// single package, the logic at the end of this routine won't work if
				1035	// there is only a single thread - it would try to form an Address
				1036	// object with depth 0.
				1037	//
				1038	KMP_ASSERT(nApics > 0);
				1039	if (nApics == 1) {
				1040	__kmp_ncores = nPackages = 1;
				1041	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1042	if (__kmp_affinity_verbose) {
				1043	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				1044	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				1045
				1046	KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
				1047	if (__kmp_affinity_respect_mask) {
				1048	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				1049	} else {
				1050	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				1051	}
				1052	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1053	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1054	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				1055	__kmp_nThreadsPerCore, __kmp_ncores);
				1056	}
				1057
				1058	if (__kmp_affinity_type == affinity_none) {
				1059	__kmp_free(threadInfo);
				1060	KMP_CPU_FREE(oldMask);
				1061	return 0;
				1062	}
				1063
				1064	address2os = (AddrUnsPair)__kmp_allocate(sizeof(AddrUnsPair));
				1065	Address addr(1);
				1066	addr.labels[0] = threadInfo[0].pkgId;
				1067	(*address2os)[0] = AddrUnsPair(addr, threadInfo[0].osId);
				1068
				1069	if (__kmp_affinity_gran_levels < 0) {
				1070	__kmp_affinity_gran_levels = 0;
				1071	}
				1072
				1073	if (__kmp_affinity_verbose) {
				1074	__kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
				1075	}
				1076
				1077	__kmp_free(threadInfo);
				1078	KMP_CPU_FREE(oldMask);
				1079	return 1;
				1080	}
				1081
				1082	//
				1083	// Sort the threadInfo table by physical Id.
				1084	//
				1085	qsort(threadInfo, nApics, sizeof(*threadInfo),
				1086	__kmp_affinity_cmp_apicThreadInfo_phys_id);
				1087
				1088	//
				1089	// The table is now sorted by pkgId / coreId / threadId, but we really
				1090	// don't know the radix of any of the fields. pkgId's may be sparsely
				1091	// assigned among the chips on a system. Although coreId's are usually
				1092	// assigned [0 .. coresPerPkg-1] and threadId's are usually assigned
				1093	// [0..threadsPerCore-1], we don't want to make any such assumptions.
				1094	//
				1095	// For that matter, we don't know what coresPerPkg and threadsPerCore
				1096	// (or the total # packages) are at this point - we want to determine
				1097	// that now. We only have an upper bound on the first two figures.
				1098	//
				1099	// We also perform a consistency check at this point: the values returned
				1100	// by the cpuid instruction for any thread bound to a given package had
				1101	// better return the same info for maxThreadsPerPkg and maxCoresPerPkg.
				1102	//
				1103	nPackages = 1;
				1104	nCoresPerPkg = 1;
				1105	__kmp_nThreadsPerCore = 1;
				1106	unsigned nCores = 1;
				1107
				1108	unsigned pkgCt = 1; // to determine radii
				1109	unsigned lastPkgId = threadInfo[0].pkgId;
				1110	unsigned coreCt = 1;
				1111	unsigned lastCoreId = threadInfo[0].coreId;
				1112	unsigned threadCt = 1;
				1113	unsigned lastThreadId = threadInfo[0].threadId;
				1114
				1115	// intra-pkg consist checks
				1116	unsigned prevMaxCoresPerPkg = threadInfo[0].maxCoresPerPkg;
				1117	unsigned prevMaxThreadsPerPkg = threadInfo[0].maxThreadsPerPkg;
				1118
				1119	for (i = 1; i < nApics; i++) {
				1120	if (threadInfo[i].pkgId != lastPkgId) {
				1121	nCores++;
				1122	pkgCt++;
				1123	lastPkgId = threadInfo[i].pkgId;
				1124	if ((int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt;
				1125	coreCt = 1;
				1126	lastCoreId = threadInfo[i].coreId;
				1127	if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
				1128	threadCt = 1;
				1129	lastThreadId = threadInfo[i].threadId;
				1130
				1131	//
				1132	// This is a different package, so go on to the next iteration
				1133	// without doing any consistency checks. Reset the consistency
				1134	// check vars, though.
				1135	//
				1136	prevMaxCoresPerPkg = threadInfo[i].maxCoresPerPkg;
				1137	prevMaxThreadsPerPkg = threadInfo[i].maxThreadsPerPkg;
				1138	continue;
				1139	}
				1140
				1141	if (threadInfo[i].coreId != lastCoreId) {
				1142	nCores++;
				1143	coreCt++;
				1144	lastCoreId = threadInfo[i].coreId;
				1145	if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
				1146	threadCt = 1;
				1147	lastThreadId = threadInfo[i].threadId;
				1148	}
				1149	else if (threadInfo[i].threadId != lastThreadId) {
				1150	threadCt++;
				1151	lastThreadId = threadInfo[i].threadId;
				1152	}
				1153	else {
				1154	__kmp_free(threadInfo);
				1155	KMP_CPU_FREE(oldMask);
				1156	*msg_id = kmp_i18n_str_LegacyApicIDsNotUnique;
				1157	return -1;
				1158	}
				1159
				1160	//
				1161	// Check to make certain that the maxCoresPerPkg and maxThreadsPerPkg
				1162	// fields agree between all the threads bounds to a given package.
				1163	//
				1164	if ((prevMaxCoresPerPkg != threadInfo[i].maxCoresPerPkg)
				1165	\|\| (prevMaxThreadsPerPkg != threadInfo[i].maxThreadsPerPkg)) {
				1166	__kmp_free(threadInfo);
				1167	KMP_CPU_FREE(oldMask);
				1168	*msg_id = kmp_i18n_str_InconsistentCpuidInfo;
				1169	return -1;
				1170	}
				1171	}
				1172	nPackages = pkgCt;
				1173	if ((int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt;
				1174	if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
				1175
				1176	//
				1177	// When affinity is off, this routine will still be called to set
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame]	1178	// __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1179	// nCoresPerPkg, & nPackages. Make sure all these vars are set
				1180	// correctly, and return now if affinity is not enabled.
				1181	//
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1182	__kmp_ncores = nCores;
				1183	if (__kmp_affinity_verbose) {
				1184	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				1185	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				1186
				1187	KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
				1188	if (__kmp_affinity_respect_mask) {
				1189	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				1190	} else {
				1191	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				1192	}
				1193	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1194	if (__kmp_affinity_uniform_topology()) {
				1195	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1196	} else {
				1197	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				1198	}
				1199	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				1200	__kmp_nThreadsPerCore, __kmp_ncores);
				1201
				1202	}
				1203
				1204	if (__kmp_affinity_type == affinity_none) {
				1205	__kmp_free(threadInfo);
				1206	KMP_CPU_FREE(oldMask);
				1207	return 0;
				1208	}
				1209
				1210	//
				1211	// Now that we've determined the number of packages, the number of cores
				1212	// per package, and the number of threads per core, we can construct the
				1213	// data structure that is to be returned.
				1214	//
				1215	int pkgLevel = 0;
				1216	int coreLevel = (nCoresPerPkg <= 1) ? -1 : 1;
				1217	int threadLevel = (__kmp_nThreadsPerCore <= 1) ? -1 : ((coreLevel >= 0) ? 2 : 1);
				1218	unsigned depth = (pkgLevel >= 0) + (coreLevel >= 0) + (threadLevel >= 0);
				1219
				1220	KMP_ASSERT(depth > 0);
				1221	address2os = (AddrUnsPair)__kmp_allocate(sizeof(AddrUnsPair) * nApics);
				1222
				1223	for (i = 0; i < nApics; ++i) {
				1224	Address addr(depth);
				1225	unsigned os = threadInfo[i].osId;
				1226	int d = 0;
				1227
				1228	if (pkgLevel >= 0) {
				1229	addr.labels[d++] = threadInfo[i].pkgId;
				1230	}
				1231	if (coreLevel >= 0) {
				1232	addr.labels[d++] = threadInfo[i].coreId;
				1233	}
				1234	if (threadLevel >= 0) {
				1235	addr.labels[d++] = threadInfo[i].threadId;
				1236	}
				1237	(*address2os)[i] = AddrUnsPair(addr, os);
				1238	}
				1239
				1240	if (__kmp_affinity_gran_levels < 0) {
				1241	//
				1242	// Set the granularity level based on what levels are modeled
				1243	// in the machine topology map.
				1244	//
				1245	__kmp_affinity_gran_levels = 0;
				1246	if ((threadLevel >= 0)
				1247	&& (__kmp_affinity_gran > affinity_gran_thread)) {
				1248	__kmp_affinity_gran_levels++;
				1249	}
				1250	if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
				1251	__kmp_affinity_gran_levels++;
				1252	}
				1253	if ((pkgLevel >= 0) && (__kmp_affinity_gran > affinity_gran_package)) {
				1254	__kmp_affinity_gran_levels++;
				1255	}
				1256	}
				1257
				1258	if (__kmp_affinity_verbose) {
				1259	__kmp_affinity_print_topology(*address2os, nApics, depth, pkgLevel,
				1260	coreLevel, threadLevel);
				1261	}
				1262
				1263	__kmp_free(threadInfo);
				1264	KMP_CPU_FREE(oldMask);
				1265	return depth;
				1266	}
				1267
				1268
				1269	//
				1270	// Intel(R) microarchitecture code name Nehalem, Dunnington and later
				1271	// architectures support a newer interface for specifying the x2APIC Ids,
				1272	// based on cpuid leaf 11.
				1273	//
				1274	static int
				1275	__kmp_affinity_create_x2apicid_map(AddrUnsPair **address2os,
				1276	kmp_i18n_id_t *const msg_id)
				1277	{
				1278	kmp_cpuid buf;
				1279
				1280	*address2os = NULL;
				1281	*msg_id = kmp_i18n_null;
				1282
				1283	//
				1284	// Check to see if cpuid leaf 11 is supported.
				1285	//
				1286	__kmp_x86_cpuid(0, 0, &buf);
				1287	if (buf.eax < 11) {
				1288	*msg_id = kmp_i18n_str_NoLeaf11Support;
				1289	return -1;
				1290	}
				1291	__kmp_x86_cpuid(11, 0, &buf);
				1292	if (buf.ebx == 0) {
				1293	*msg_id = kmp_i18n_str_NoLeaf11Support;
				1294	return -1;
				1295	}
				1296
				1297	//
				1298	// Find the number of levels in the machine topology. While we're at it,
				1299	// get the default values for __kmp_nThreadsPerCore & nCoresPerPkg. We will
				1300	// try to get more accurate values later by explicitly counting them,
				1301	// but get reasonable defaults now, in case we return early.
				1302	//
				1303	int level;
				1304	int threadLevel = -1;
				1305	int coreLevel = -1;
				1306	int pkgLevel = -1;
				1307	__kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
				1308
				1309	for (level = 0;; level++) {
				1310	if (level > 31) {
				1311	//
				1312	// FIXME: Hack for DPD200163180
				1313	//
				1314	// If level is big then something went wrong -> exiting
				1315	//
				1316	// There could actually be 32 valid levels in the machine topology,
				1317	// but so far, the only machine we have seen which does not exit
				1318	// this loop before iteration 32 has fubar x2APIC settings.
				1319	//
				1320	// For now, just reject this case based upon loop trip count.
				1321	//
				1322	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1323	return -1;
				1324	}
				1325	__kmp_x86_cpuid(11, level, &buf);
				1326	if (buf.ebx == 0) {
				1327	if (pkgLevel < 0) {
				1328	//
				1329	// Will infer nPackages from __kmp_xproc
				1330	//
				1331	pkgLevel = level;
				1332	level++;
				1333	}
				1334	break;
				1335	}
				1336	int kind = (buf.ecx >> 8) & 0xff;
				1337	if (kind == 1) {
				1338	//
				1339	// SMT level
				1340	//
				1341	threadLevel = level;
				1342	coreLevel = -1;
				1343	pkgLevel = -1;
				1344	__kmp_nThreadsPerCore = buf.ebx & 0xff;
				1345	if (__kmp_nThreadsPerCore == 0) {
				1346	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1347	return -1;
				1348	}
				1349	}
				1350	else if (kind == 2) {
				1351	//
				1352	// core level
				1353	//
				1354	coreLevel = level;
				1355	pkgLevel = -1;
				1356	nCoresPerPkg = buf.ebx & 0xff;
				1357	if (nCoresPerPkg == 0) {
				1358	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1359	return -1;
				1360	}
				1361	}
				1362	else {
				1363	if (level <= 0) {
				1364	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1365	return -1;
				1366	}
				1367	if (pkgLevel >= 0) {
				1368	continue;
				1369	}
				1370	pkgLevel = level;
				1371	nPackages = buf.ebx & 0xff;
				1372	if (nPackages == 0) {
				1373	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1374	return -1;
				1375	}
				1376	}
				1377	}
				1378	int depth = level;
				1379
				1380	//
				1381	// In the above loop, "level" was counted from the finest level (usually
				1382	// thread) to the coarsest. The caller expects that we will place the
				1383	// labels in (*address2os)[].first.labels[] in the inverse order, so
				1384	// we need to invert the vars saying which level means what.
				1385	//
				1386	if (threadLevel >= 0) {
				1387	threadLevel = depth - threadLevel - 1;
				1388	}
				1389	if (coreLevel >= 0) {
				1390	coreLevel = depth - coreLevel - 1;
				1391	}
				1392	KMP_DEBUG_ASSERT(pkgLevel >= 0);
				1393	pkgLevel = depth - pkgLevel - 1;
				1394
				1395	//
				1396	// The algorithm used starts by setting the affinity to each available
Andrey Churbanov	1c33129	2015-01-27 17:03:42 +0000	[diff] [blame]	1397	// thread and retrieving info from the cpuid instruction, so if we are
				1398	// not capable of calling __kmp_get_system_affinity() and
				1399	// _kmp_get_system_affinity(), then we need to do something else - use
				1400	// the defaults that we calculated from issuing cpuid without binding
				1401	// to each proc.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1402	//
				1403	if (! KMP_AFFINITY_CAPABLE())
				1404	{
				1405	//
				1406	// Hack to try and infer the machine topology using only the data
				1407	// available from cpuid on the current thread, and __kmp_xproc.
				1408	//
				1409	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				1410
				1411	__kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
				1412	nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1413	if (__kmp_affinity_verbose) {
				1414	KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY");
				1415	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1416	if (__kmp_affinity_uniform_topology()) {
				1417	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1418	} else {
				1419	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				1420	}
				1421	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				1422	__kmp_nThreadsPerCore, __kmp_ncores);
				1423	}
				1424	return 0;
				1425	}
				1426
				1427	//
				1428	//
				1429	// From here on, we can assume that it is safe to call
				1430	// __kmp_get_system_affinity() and __kmp_set_system_affinity(),
				1431	// even if __kmp_affinity_type = affinity_none.
				1432	//
				1433
				1434	//
				1435	// Save the affinity mask for the current thread.
				1436	//
				1437	kmp_affin_mask_t *oldMask;
				1438	KMP_CPU_ALLOC(oldMask);
				1439	__kmp_get_system_affinity(oldMask, TRUE);
				1440
				1441	//
				1442	// Allocate the data structure to be returned.
				1443	//
				1444	AddrUnsPair retval = (AddrUnsPair )
				1445	__kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc);
				1446
				1447	//
				1448	// Run through each of the available contexts, binding the current thread
				1449	// to it, and obtaining the pertinent information using the cpuid instr.
				1450	//
				1451	unsigned int proc;
				1452	int nApics = 0;
				1453	for (proc = 0; proc < KMP_CPU_SETSIZE; ++proc) {
				1454	//
				1455	// Skip this proc if it is not included in the machine model.
				1456	//
				1457	if (! KMP_CPU_ISSET(proc, fullMask)) {
				1458	continue;
				1459	}
				1460	KMP_DEBUG_ASSERT(nApics < __kmp_avail_proc);
				1461
				1462	__kmp_affinity_bind_thread(proc);
				1463
				1464	//
				1465	// Extrach the labels for each level in the machine topology map
				1466	// from the Apic ID.
				1467	//
				1468	Address addr(depth);
				1469	int prev_shift = 0;
				1470
				1471	for (level = 0; level < depth; level++) {
				1472	__kmp_x86_cpuid(11, level, &buf);
				1473	unsigned apicId = buf.edx;
				1474	if (buf.ebx == 0) {
				1475	if (level != depth - 1) {
				1476	KMP_CPU_FREE(oldMask);
				1477	*msg_id = kmp_i18n_str_InconsistentCpuidInfo;
				1478	return -1;
				1479	}
				1480	addr.labels[depth - level - 1] = apicId >> prev_shift;
				1481	level++;
				1482	break;
				1483	}
				1484	int shift = buf.eax & 0x1f;
				1485	int mask = (1 << shift) - 1;
				1486	addr.labels[depth - level - 1] = (apicId & mask) >> prev_shift;
				1487	prev_shift = shift;
				1488	}
				1489	if (level != depth) {
				1490	KMP_CPU_FREE(oldMask);
				1491	*msg_id = kmp_i18n_str_InconsistentCpuidInfo;
				1492	return -1;
				1493	}
				1494
				1495	retval[nApics] = AddrUnsPair(addr, proc);
				1496	nApics++;
				1497	}
				1498
				1499	//
				1500	// We've collected all the info we need.
				1501	// Restore the old affinity mask for this thread.
				1502	//
				1503	__kmp_set_system_affinity(oldMask, TRUE);
				1504
				1505	//
				1506	// If there's only one thread context to bind to, return now.
				1507	//
				1508	KMP_ASSERT(nApics > 0);
				1509	if (nApics == 1) {
				1510	__kmp_ncores = nPackages = 1;
				1511	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1512	if (__kmp_affinity_verbose) {
				1513	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				1514	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				1515
				1516	KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
				1517	if (__kmp_affinity_respect_mask) {
				1518	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				1519	} else {
				1520	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				1521	}
				1522	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1523	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1524	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				1525	__kmp_nThreadsPerCore, __kmp_ncores);
				1526	}
				1527
				1528	if (__kmp_affinity_type == affinity_none) {
				1529	__kmp_free(retval);
				1530	KMP_CPU_FREE(oldMask);
				1531	return 0;
				1532	}
				1533
				1534	//
				1535	// Form an Address object which only includes the package level.
				1536	//
				1537	Address addr(1);
				1538	addr.labels[0] = retval[0].first.labels[pkgLevel];
				1539	retval[0].first = addr;
				1540
				1541	if (__kmp_affinity_gran_levels < 0) {
				1542	__kmp_affinity_gran_levels = 0;
				1543	}
				1544
				1545	if (__kmp_affinity_verbose) {
				1546	__kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
				1547	}
				1548
				1549	*address2os = retval;
				1550	KMP_CPU_FREE(oldMask);
				1551	return 1;
				1552	}
				1553
				1554	//
				1555	// Sort the table by physical Id.
				1556	//
				1557	qsort(retval, nApics, sizeof(*retval), __kmp_affinity_cmp_Address_labels);
				1558
				1559	//
				1560	// Find the radix at each of the levels.
				1561	//
				1562	unsigned totals = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1563	unsigned counts = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1564	unsigned maxCt = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1565	unsigned last = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1566	for (level = 0; level < depth; level++) {
				1567	totals[level] = 1;
				1568	maxCt[level] = 1;
				1569	counts[level] = 1;
				1570	last[level] = retval[0].first.labels[level];
				1571	}
				1572
				1573	//
				1574	// From here on, the iteration variable "level" runs from the finest
				1575	// level to the coarsest, i.e. we iterate forward through
				1576	// (*address2os)[].first.labels[] - in the previous loops, we iterated
				1577	// backwards.
				1578	//
				1579	for (proc = 1; (int)proc < nApics; proc++) {
				1580	int level;
				1581	for (level = 0; level < depth; level++) {
				1582	if (retval[proc].first.labels[level] != last[level]) {
				1583	int j;
				1584	for (j = level + 1; j < depth; j++) {
				1585	totals[j]++;
				1586	counts[j] = 1;
				1587	// The line below causes printing incorrect topology information
				1588	// in case the max value for some level (maxCt[level]) is encountered earlier than
				1589	// some less value while going through the array.
				1590	// For example, let pkg0 has 4 cores and pkg1 has 2 cores. Then maxCt[1] == 2
				1591	// whereas it must be 4.
				1592	// TODO!!! Check if it can be commented safely
				1593	//maxCt[j] = 1;
				1594	last[j] = retval[proc].first.labels[j];
				1595	}
				1596	totals[level]++;
				1597	counts[level]++;
				1598	if (counts[level] > maxCt[level]) {
				1599	maxCt[level] = counts[level];
				1600	}
				1601	last[level] = retval[proc].first.labels[level];
				1602	break;
				1603	}
				1604	else if (level == depth - 1) {
				1605	__kmp_free(last);
				1606	__kmp_free(maxCt);
				1607	__kmp_free(counts);
				1608	__kmp_free(totals);
				1609	__kmp_free(retval);
				1610	KMP_CPU_FREE(oldMask);
				1611	*msg_id = kmp_i18n_str_x2ApicIDsNotUnique;
				1612	return -1;
				1613	}
				1614	}
				1615	}
				1616
				1617	//
				1618	// When affinity is off, this routine will still be called to set
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame]	1619	// __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1620	// nCoresPerPkg, & nPackages. Make sure all these vars are set
				1621	// correctly, and return if affinity is not enabled.
				1622	//
				1623	if (threadLevel >= 0) {
				1624	__kmp_nThreadsPerCore = maxCt[threadLevel];
				1625	}
				1626	else {
				1627	__kmp_nThreadsPerCore = 1;
				1628	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1629	nPackages = totals[pkgLevel];
				1630
				1631	if (coreLevel >= 0) {
				1632	__kmp_ncores = totals[coreLevel];
				1633	nCoresPerPkg = maxCt[coreLevel];
				1634	}
				1635	else {
				1636	__kmp_ncores = nPackages;
				1637	nCoresPerPkg = 1;
				1638	}
				1639
				1640	//
				1641	// Check to see if the machine topology is uniform
				1642	//
				1643	unsigned prod = maxCt[0];
				1644	for (level = 1; level < depth; level++) {
				1645	prod *= maxCt[level];
				1646	}
				1647	bool uniform = (prod == totals[level - 1]);
				1648
				1649	//
				1650	// Print the machine topology summary.
				1651	//
				1652	if (__kmp_affinity_verbose) {
				1653	char mask[KMP_AFFIN_MASK_PRINT_LEN];
				1654	__kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				1655
				1656	KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
				1657	if (__kmp_affinity_respect_mask) {
				1658	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask);
				1659	} else {
				1660	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask);
				1661	}
				1662	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1663	if (uniform) {
				1664	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1665	} else {
				1666	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				1667	}
				1668
				1669	kmp_str_buf_t buf;
				1670	__kmp_str_buf_init(&buf);
				1671
				1672	__kmp_str_buf_print(&buf, "%d", totals[0]);
				1673	for (level = 1; level <= pkgLevel; level++) {
				1674	__kmp_str_buf_print(&buf, " x %d", maxCt[level]);
				1675	}
				1676	KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg,
				1677	__kmp_nThreadsPerCore, __kmp_ncores);
				1678
				1679	__kmp_str_buf_free(&buf);
				1680	}
				1681
				1682	if (__kmp_affinity_type == affinity_none) {
				1683	__kmp_free(last);
				1684	__kmp_free(maxCt);
				1685	__kmp_free(counts);
				1686	__kmp_free(totals);
				1687	__kmp_free(retval);
				1688	KMP_CPU_FREE(oldMask);
				1689	return 0;
				1690	}
				1691
				1692	//
				1693	// Find any levels with radiix 1, and remove them from the map
				1694	// (except for the package level).
				1695	//
				1696	int new_depth = 0;
				1697	for (level = 0; level < depth; level++) {
				1698	if ((maxCt[level] == 1) && (level != pkgLevel)) {
				1699	continue;
				1700	}
				1701	new_depth++;
				1702	}
				1703
				1704	//
				1705	// If we are removing any levels, allocate a new vector to return,
				1706	// and copy the relevant information to it.
				1707	//
				1708	if (new_depth != depth) {
				1709	AddrUnsPair new_retval = (AddrUnsPair )__kmp_allocate(
				1710	sizeof(AddrUnsPair) * nApics);
				1711	for (proc = 0; (int)proc < nApics; proc++) {
				1712	Address addr(new_depth);
				1713	new_retval[proc] = AddrUnsPair(addr, retval[proc].second);
				1714	}
				1715	int new_level = 0;
				1716	for (level = 0; level < depth; level++) {
				1717	if ((maxCt[level] == 1) && (level != pkgLevel)) {
				1718	if (level == threadLevel) {
				1719	threadLevel = -1;
				1720	}
				1721	else if ((threadLevel >= 0) && (level < threadLevel)) {
				1722	threadLevel--;
				1723	}
				1724	if (level == coreLevel) {
				1725	coreLevel = -1;
				1726	}
				1727	else if ((coreLevel >= 0) && (level < coreLevel)) {
				1728	coreLevel--;
				1729	}
				1730	if (level < pkgLevel) {
				1731	pkgLevel--;
				1732	}
				1733	continue;
				1734	}
				1735	for (proc = 0; (int)proc < nApics; proc++) {
				1736	new_retval[proc].first.labels[new_level]
				1737	= retval[proc].first.labels[level];
				1738	}
				1739	new_level++;
				1740	}
				1741
				1742	__kmp_free(retval);
				1743	retval = new_retval;
				1744	depth = new_depth;
				1745	}
				1746
				1747	if (__kmp_affinity_gran_levels < 0) {
				1748	//
				1749	// Set the granularity level based on what levels are modeled
				1750	// in the machine topology map.
				1751	//
				1752	__kmp_affinity_gran_levels = 0;
				1753	if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
				1754	__kmp_affinity_gran_levels++;
				1755	}
				1756	if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
				1757	__kmp_affinity_gran_levels++;
				1758	}
				1759	if (__kmp_affinity_gran > affinity_gran_package) {
				1760	__kmp_affinity_gran_levels++;
				1761	}
				1762	}
				1763
				1764	if (__kmp_affinity_verbose) {
				1765	__kmp_affinity_print_topology(retval, nApics, depth, pkgLevel,
				1766	coreLevel, threadLevel);
				1767	}
				1768
				1769	__kmp_free(last);
				1770	__kmp_free(maxCt);
				1771	__kmp_free(counts);
				1772	__kmp_free(totals);
				1773	KMP_CPU_FREE(oldMask);
				1774	*address2os = retval;
				1775	return depth;
				1776	}
				1777
				1778
				1779	# endif /* KMP_ARCH_X86 \|\| KMP_ARCH_X86_64 */
				1780
				1781
				1782	#define osIdIndex 0
				1783	#define threadIdIndex 1
				1784	#define coreIdIndex 2
				1785	#define pkgIdIndex 3
				1786	#define nodeIdIndex 4
				1787
				1788	typedef unsigned *ProcCpuInfo;
				1789	static unsigned maxIndex = pkgIdIndex;
				1790
				1791
				1792	static int
				1793	__kmp_affinity_cmp_ProcCpuInfo_os_id(const void a, const void b)
				1794	{
				1795	const unsigned aa = (const unsigned )a;
				1796	const unsigned bb = (const unsigned )b;
				1797	if (aa[osIdIndex] < bb[osIdIndex]) return -1;
				1798	if (aa[osIdIndex] > bb[osIdIndex]) return 1;
				1799	return 0;
				1800	};
				1801
				1802
				1803	static int
				1804	__kmp_affinity_cmp_ProcCpuInfo_phys_id(const void a, const void b)
				1805	{
				1806	unsigned i;
				1807	const unsigned aa = ((const unsigned **)a);
				1808	const unsigned bb = ((const unsigned **)b);
				1809	for (i = maxIndex; ; i--) {
				1810	if (aa[i] < bb[i]) return -1;
				1811	if (aa[i] > bb[i]) return 1;
				1812	if (i == osIdIndex) break;
				1813	}
				1814	return 0;
				1815	}
				1816
				1817
				1818	//
				1819	// Parse /proc/cpuinfo (or an alternate file in the same format) to obtain the
				1820	// affinity map.
				1821	//
				1822	static int
				1823	__kmp_affinity_create_cpuinfo_map(AddrUnsPair *address2os, int line,
				1824	kmp_i18n_id_t const msg_id, FILE f)
				1825	{
				1826	*address2os = NULL;
				1827	*msg_id = kmp_i18n_null;
				1828
				1829	//
				1830	// Scan of the file, and count the number of "processor" (osId) fields,
Alp Toker	8f2d3f0	2014-02-24 10:40:15 +0000	[diff] [blame]	1831	// and find the highest value of <n> for a node_<n> field.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1832	//
				1833	char buf[256];
				1834	unsigned num_records = 0;
				1835	while (! feof(f)) {
				1836	buf[sizeof(buf) - 1] = 1;
				1837	if (! fgets(buf, sizeof(buf), f)) {
				1838	//
				1839	// Read errors presumably because of EOF
				1840	//
				1841	break;
				1842	}
				1843
				1844	char s1[] = "processor";
				1845	if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
				1846	num_records++;
				1847	continue;
				1848	}
				1849
				1850	//
				1851	// FIXME - this will match "node_<n> <garbage>"
				1852	//
				1853	unsigned level;
				1854	if (sscanf(buf, "node_%d id", &level) == 1) {
				1855	if (nodeIdIndex + level >= maxIndex) {
				1856	maxIndex = nodeIdIndex + level;
				1857	}
				1858	continue;
				1859	}
				1860	}
				1861
				1862	//
				1863	// Check for empty file / no valid processor records, or too many.
				1864	// The number of records can't exceed the number of valid bits in the
				1865	// affinity mask.
				1866	//
				1867	if (num_records == 0) {
				1868	*line = 0;
				1869	*msg_id = kmp_i18n_str_NoProcRecords;
				1870	return -1;
				1871	}
				1872	if (num_records > (unsigned)__kmp_xproc) {
				1873	*line = 0;
				1874	*msg_id = kmp_i18n_str_TooManyProcRecords;
				1875	return -1;
				1876	}
				1877
				1878	//
				1879	// Set the file pointer back to the begginning, so that we can scan the
				1880	// file again, this time performing a full parse of the data.
				1881	// Allocate a vector of ProcCpuInfo object, where we will place the data.
				1882	// Adding an extra element at the end allows us to remove a lot of extra
				1883	// checks for termination conditions.
				1884	//
				1885	if (fseek(f, 0, SEEK_SET) != 0) {
				1886	*line = 0;
				1887	*msg_id = kmp_i18n_str_CantRewindCpuinfo;
				1888	return -1;
				1889	}
				1890
				1891	//
				1892	// Allocate the array of records to store the proc info in. The dummy
				1893	// element at the end makes the logic in filling them out easier to code.
				1894	//
				1895	unsigned threadInfo = (unsigned )__kmp_allocate((num_records + 1)
				1896	* sizeof(unsigned *));
				1897	unsigned i;
				1898	for (i = 0; i <= num_records; i++) {
				1899	threadInfo[i] = (unsigned *)__kmp_allocate((maxIndex + 1)
				1900	* sizeof(unsigned));
				1901	}
				1902
				1903	#define CLEANUP_THREAD_INFO \
				1904	for (i = 0; i <= num_records; i++) { \
				1905	__kmp_free(threadInfo[i]); \
				1906	} \
				1907	__kmp_free(threadInfo);
				1908
				1909	//
				1910	// A value of UINT_MAX means that we didn't find the field
				1911	//
				1912	unsigned __index;
				1913
				1914	#define INIT_PROC_INFO(p) \
				1915	for (__index = 0; __index <= maxIndex; __index++) { \
				1916	(p)[__index] = UINT_MAX; \
				1917	}
				1918
				1919	for (i = 0; i <= num_records; i++) {
				1920	INIT_PROC_INFO(threadInfo[i]);
				1921	}
				1922
				1923	unsigned num_avail = 0;
				1924	*line = 0;
				1925	while (! feof(f)) {
				1926	//
				1927	// Create an inner scoping level, so that all the goto targets at the
				1928	// end of the loop appear in an outer scoping level. This avoids
				1929	// warnings about jumping past an initialization to a target in the
				1930	// same block.
				1931	//
				1932	{
				1933	buf[sizeof(buf) - 1] = 1;
				1934	bool long_line = false;
				1935	if (! fgets(buf, sizeof(buf), f)) {
				1936	//
				1937	// Read errors presumably because of EOF
				1938	//
				1939	// If there is valid data in threadInfo[num_avail], then fake
				1940	// a blank line in ensure that the last address gets parsed.
				1941	//
				1942	bool valid = false;
				1943	for (i = 0; i <= maxIndex; i++) {
				1944	if (threadInfo[num_avail][i] != UINT_MAX) {
				1945	valid = true;
				1946	}
				1947	}
				1948	if (! valid) {
				1949	break;
				1950	}
				1951	buf[0] = 0;
				1952	} else if (!buf[sizeof(buf) - 1]) {
				1953	//
				1954	// The line is longer than the buffer. Set a flag and don't
				1955	// emit an error if we were going to ignore the line, anyway.
				1956	//
				1957	long_line = true;
				1958
				1959	#define CHECK_LINE \
				1960	if (long_line) { \
				1961	CLEANUP_THREAD_INFO; \
				1962	*msg_id = kmp_i18n_str_LongLineCpuinfo; \
				1963	return -1; \
				1964	}
				1965	}
				1966	(*line)++;
				1967
				1968	char s1[] = "processor";
				1969	if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
				1970	CHECK_LINE;
				1971	char *p = strchr(buf + sizeof(s1) - 1, ':');
				1972	unsigned val;
				1973	if ((p == NULL) \|\| (sscanf(p + 1, "%u\n", &val) != 1)) goto no_val;
				1974	if (threadInfo[num_avail][osIdIndex] != UINT_MAX) goto dup_field;
				1975	threadInfo[num_avail][osIdIndex] = val;
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	1976	#if KMP_OS_LINUX && USE_SYSFS_INFO
				1977	char path[256];
				1978	snprintf(path, sizeof(path),
				1979	"/sys/devices/system/cpu/cpu%u/topology/physical_package_id",
				1980	threadInfo[num_avail][osIdIndex]);
				1981	__kmp_read_from_file(path, "%u", &threadInfo[num_avail][pkgIdIndex]);
				1982
				1983	snprintf(path, sizeof(path),
				1984	"/sys/devices/system/cpu/cpu%u/topology/core_id",
				1985	threadInfo[num_avail][osIdIndex]);
				1986	__kmp_read_from_file(path, "%u", &threadInfo[num_avail][coreIdIndex]);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1987	continue;
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	1988	#else
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1989	}
				1990	char s2[] = "physical id";
				1991	if (strncmp(buf, s2, sizeof(s2) - 1) == 0) {
				1992	CHECK_LINE;
				1993	char *p = strchr(buf + sizeof(s2) - 1, ':');
				1994	unsigned val;
				1995	if ((p == NULL) \|\| (sscanf(p + 1, "%u\n", &val) != 1)) goto no_val;
				1996	if (threadInfo[num_avail][pkgIdIndex] != UINT_MAX) goto dup_field;
				1997	threadInfo[num_avail][pkgIdIndex] = val;
				1998	continue;
				1999	}
				2000	char s3[] = "core id";
				2001	if (strncmp(buf, s3, sizeof(s3) - 1) == 0) {
				2002	CHECK_LINE;
				2003	char *p = strchr(buf + sizeof(s3) - 1, ':');
				2004	unsigned val;
				2005	if ((p == NULL) \|\| (sscanf(p + 1, "%u\n", &val) != 1)) goto no_val;
				2006	if (threadInfo[num_avail][coreIdIndex] != UINT_MAX) goto dup_field;
				2007	threadInfo[num_avail][coreIdIndex] = val;
				2008	continue;
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	2009	#endif // KMP_OS_LINUX && USE_SYSFS_INFO
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2010	}
				2011	char s4[] = "thread id";
				2012	if (strncmp(buf, s4, sizeof(s4) - 1) == 0) {
				2013	CHECK_LINE;
				2014	char *p = strchr(buf + sizeof(s4) - 1, ':');
				2015	unsigned val;
				2016	if ((p == NULL) \|\| (sscanf(p + 1, "%u\n", &val) != 1)) goto no_val;
				2017	if (threadInfo[num_avail][threadIdIndex] != UINT_MAX) goto dup_field;
				2018	threadInfo[num_avail][threadIdIndex] = val;
				2019	continue;
				2020	}
				2021	unsigned level;
				2022	if (sscanf(buf, "node_%d id", &level) == 1) {
				2023	CHECK_LINE;
				2024	char *p = strchr(buf + sizeof(s4) - 1, ':');
				2025	unsigned val;
				2026	if ((p == NULL) \|\| (sscanf(p + 1, "%u\n", &val) != 1)) goto no_val;
				2027	KMP_ASSERT(nodeIdIndex + level <= maxIndex);
				2028	if (threadInfo[num_avail][nodeIdIndex + level] != UINT_MAX) goto dup_field;
				2029	threadInfo[num_avail][nodeIdIndex + level] = val;
				2030	continue;
				2031	}
				2032
				2033	//
				2034	// We didn't recognize the leading token on the line.
				2035	// There are lots of leading tokens that we don't recognize -
				2036	// if the line isn't empty, go on to the next line.
				2037	//
				2038	if ((buf != 0) && (buf != '\n')) {
				2039	//
				2040	// If the line is longer than the buffer, read characters
				2041	// until we find a newline.
				2042	//
				2043	if (long_line) {
				2044	int ch;
				2045	while (((ch = fgetc(f)) != EOF) && (ch != '\n'));
				2046	}
				2047	continue;
				2048	}
				2049
				2050	//
				2051	// A newline has signalled the end of the processor record.
				2052	// Check that there aren't too many procs specified.
				2053	//
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	2054	if ((int)num_avail == __kmp_xproc) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2055	CLEANUP_THREAD_INFO;
				2056	*msg_id = kmp_i18n_str_TooManyEntries;
				2057	return -1;
				2058	}
				2059
				2060	//
				2061	// Check for missing fields. The osId field must be there, and we
				2062	// currently require that the physical id field is specified, also.
				2063	//
				2064	if (threadInfo[num_avail][osIdIndex] == UINT_MAX) {
				2065	CLEANUP_THREAD_INFO;
				2066	*msg_id = kmp_i18n_str_MissingProcField;
				2067	return -1;
				2068	}
				2069	if (threadInfo[0][pkgIdIndex] == UINT_MAX) {
				2070	CLEANUP_THREAD_INFO;
				2071	*msg_id = kmp_i18n_str_MissingPhysicalIDField;
				2072	return -1;
				2073	}
				2074
				2075	//
				2076	// Skip this proc if it is not included in the machine model.
				2077	//
				2078	if (! KMP_CPU_ISSET(threadInfo[num_avail][osIdIndex], fullMask)) {
				2079	INIT_PROC_INFO(threadInfo[num_avail]);
				2080	continue;
				2081	}
				2082
				2083	//
				2084	// We have a successful parse of this proc's info.
				2085	// Increment the counter, and prepare for the next proc.
				2086	//
				2087	num_avail++;
				2088	KMP_ASSERT(num_avail <= num_records);
				2089	INIT_PROC_INFO(threadInfo[num_avail]);
				2090	}
				2091	continue;
				2092
				2093	no_val:
				2094	CLEANUP_THREAD_INFO;
				2095	*msg_id = kmp_i18n_str_MissingValCpuinfo;
				2096	return -1;
				2097
				2098	dup_field:
				2099	CLEANUP_THREAD_INFO;
				2100	*msg_id = kmp_i18n_str_DuplicateFieldCpuinfo;
				2101	return -1;
				2102	}
				2103	*line = 0;
				2104
				2105	# if KMP_MIC && REDUCE_TEAM_SIZE
				2106	unsigned teamSize = 0;
				2107	# endif // KMP_MIC && REDUCE_TEAM_SIZE
				2108
				2109	// check for num_records == __kmp_xproc ???
				2110
				2111	//
				2112	// If there's only one thread context to bind to, form an Address object
				2113	// with depth 1 and return immediately (or, if affinity is off, set
				2114	// address2os to NULL and return).
				2115	//
				2116	// If it is configured to omit the package level when there is only a
				2117	// single package, the logic at the end of this routine won't work if
				2118	// there is only a single thread - it would try to form an Address
				2119	// object with depth 0.
				2120	//
				2121	KMP_ASSERT(num_avail > 0);
				2122	KMP_ASSERT(num_avail <= num_records);
				2123	if (num_avail == 1) {
				2124	__kmp_ncores = 1;
				2125	__kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2126	if (__kmp_affinity_verbose) {
				2127	if (! KMP_AFFINITY_CAPABLE()) {
				2128	KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
				2129	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				2130	KMP_INFORM(Uniform, "KMP_AFFINITY");
				2131	}
				2132	else {
				2133	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				2134	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				2135	fullMask);
				2136	KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
				2137	if (__kmp_affinity_respect_mask) {
				2138	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				2139	} else {
				2140	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				2141	}
				2142	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				2143	KMP_INFORM(Uniform, "KMP_AFFINITY");
				2144	}
				2145	int index;
				2146	kmp_str_buf_t buf;
				2147	__kmp_str_buf_init(&buf);
				2148	__kmp_str_buf_print(&buf, "1");
				2149	for (index = maxIndex - 1; index > pkgIdIndex; index--) {
				2150	__kmp_str_buf_print(&buf, " x 1");
				2151	}
				2152	KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, 1, 1, 1);
				2153	__kmp_str_buf_free(&buf);
				2154	}
				2155
				2156	if (__kmp_affinity_type == affinity_none) {
				2157	CLEANUP_THREAD_INFO;
				2158	return 0;
				2159	}
				2160
				2161	address2os = (AddrUnsPair)__kmp_allocate(sizeof(AddrUnsPair));
				2162	Address addr(1);
				2163	addr.labels[0] = threadInfo[0][pkgIdIndex];
				2164	(*address2os)[0] = AddrUnsPair(addr, threadInfo[0][osIdIndex]);
				2165
				2166	if (__kmp_affinity_gran_levels < 0) {
				2167	__kmp_affinity_gran_levels = 0;
				2168	}
				2169
				2170	if (__kmp_affinity_verbose) {
				2171	__kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
				2172	}
				2173
				2174	CLEANUP_THREAD_INFO;
				2175	return 1;
				2176	}
				2177
				2178	//
				2179	// Sort the threadInfo table by physical Id.
				2180	//
				2181	qsort(threadInfo, num_avail, sizeof(*threadInfo),
				2182	__kmp_affinity_cmp_ProcCpuInfo_phys_id);
				2183
				2184	//
				2185	// The table is now sorted by pkgId / coreId / threadId, but we really
				2186	// don't know the radix of any of the fields. pkgId's may be sparsely
				2187	// assigned among the chips on a system. Although coreId's are usually
				2188	// assigned [0 .. coresPerPkg-1] and threadId's are usually assigned
				2189	// [0..threadsPerCore-1], we don't want to make any such assumptions.
				2190	//
				2191	// For that matter, we don't know what coresPerPkg and threadsPerCore
				2192	// (or the total # packages) are at this point - we want to determine
				2193	// that now. We only have an upper bound on the first two figures.
				2194	//
				2195	unsigned counts = (unsigned )__kmp_allocate((maxIndex + 1)
				2196	* sizeof(unsigned));
				2197	unsigned maxCt = (unsigned )__kmp_allocate((maxIndex + 1)
				2198	* sizeof(unsigned));
				2199	unsigned totals = (unsigned )__kmp_allocate((maxIndex + 1)
				2200	* sizeof(unsigned));
				2201	unsigned lastId = (unsigned )__kmp_allocate((maxIndex + 1)
				2202	* sizeof(unsigned));
				2203
				2204	bool assign_thread_ids = false;
				2205	unsigned threadIdCt;
				2206	unsigned index;
				2207
				2208	restart_radix_check:
				2209	threadIdCt = 0;
				2210
				2211	//
				2212	// Initialize the counter arrays with data from threadInfo[0].
				2213	//
				2214	if (assign_thread_ids) {
				2215	if (threadInfo[0][threadIdIndex] == UINT_MAX) {
				2216	threadInfo[0][threadIdIndex] = threadIdCt++;
				2217	}
				2218	else if (threadIdCt <= threadInfo[0][threadIdIndex]) {
				2219	threadIdCt = threadInfo[0][threadIdIndex] + 1;
				2220	}
				2221	}
				2222	for (index = 0; index <= maxIndex; index++) {
				2223	counts[index] = 1;
				2224	maxCt[index] = 1;
				2225	totals[index] = 1;
				2226	lastId[index] = threadInfo[0][index];;
				2227	}
				2228
				2229	//
				2230	// Run through the rest of the OS procs.
				2231	//
				2232	for (i = 1; i < num_avail; i++) {
				2233	//
				2234	// Find the most significant index whose id differs
				2235	// from the id for the previous OS proc.
				2236	//
				2237	for (index = maxIndex; index >= threadIdIndex; index--) {
				2238	if (assign_thread_ids && (index == threadIdIndex)) {
				2239	//
				2240	// Auto-assign the thread id field if it wasn't specified.
				2241	//
				2242	if (threadInfo[i][threadIdIndex] == UINT_MAX) {
				2243	threadInfo[i][threadIdIndex] = threadIdCt++;
				2244	}
				2245
				2246	//
				2247	// Aparrently the thread id field was specified for some
				2248	// entries and not others. Start the thread id counter
				2249	// off at the next higher thread id.
				2250	//
				2251	else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
				2252	threadIdCt = threadInfo[i][threadIdIndex] + 1;
				2253	}
				2254	}
				2255	if (threadInfo[i][index] != lastId[index]) {
				2256	//
				2257	// Run through all indices which are less significant,
				2258	// and reset the counts to 1.
				2259	//
				2260	// At all levels up to and including index, we need to
				2261	// increment the totals and record the last id.
				2262	//
				2263	unsigned index2;
				2264	for (index2 = threadIdIndex; index2 < index; index2++) {
				2265	totals[index2]++;
				2266	if (counts[index2] > maxCt[index2]) {
				2267	maxCt[index2] = counts[index2];
				2268	}
				2269	counts[index2] = 1;
				2270	lastId[index2] = threadInfo[i][index2];
				2271	}
				2272	counts[index]++;
				2273	totals[index]++;
				2274	lastId[index] = threadInfo[i][index];
				2275
				2276	if (assign_thread_ids && (index > threadIdIndex)) {
				2277
				2278	# if KMP_MIC && REDUCE_TEAM_SIZE
				2279	//
				2280	// The default team size is the total #threads in the machine
				2281	// minus 1 thread for every core that has 3 or more threads.
				2282	//
				2283	teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 );
				2284	# endif // KMP_MIC && REDUCE_TEAM_SIZE
				2285
				2286	//
				2287	// Restart the thread counter, as we are on a new core.
				2288	//
				2289	threadIdCt = 0;
				2290
				2291	//
				2292	// Auto-assign the thread id field if it wasn't specified.
				2293	//
				2294	if (threadInfo[i][threadIdIndex] == UINT_MAX) {
				2295	threadInfo[i][threadIdIndex] = threadIdCt++;
				2296	}
				2297
				2298	//
				2299	// Aparrently the thread id field was specified for some
				2300	// entries and not others. Start the thread id counter
				2301	// off at the next higher thread id.
				2302	//
				2303	else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
				2304	threadIdCt = threadInfo[i][threadIdIndex] + 1;
				2305	}
				2306	}
				2307	break;
				2308	}
				2309	}
				2310	if (index < threadIdIndex) {
				2311	//
				2312	// If thread ids were specified, it is an error if they are not
				2313	// unique. Also, check that we waven't already restarted the
				2314	// loop (to be safe - shouldn't need to).
				2315	//
				2316	if ((threadInfo[i][threadIdIndex] != UINT_MAX)
				2317	\|\| assign_thread_ids) {
				2318	__kmp_free(lastId);
				2319	__kmp_free(totals);
				2320	__kmp_free(maxCt);
				2321	__kmp_free(counts);
				2322	CLEANUP_THREAD_INFO;
				2323	*msg_id = kmp_i18n_str_PhysicalIDsNotUnique;
				2324	return -1;
				2325	}
				2326
				2327	//
				2328	// If the thread ids were not specified and we see entries
				2329	// entries that are duplicates, start the loop over and
				2330	// assign the thread ids manually.
				2331	//
				2332	assign_thread_ids = true;
				2333	goto restart_radix_check;
				2334	}
				2335	}
				2336
				2337	# if KMP_MIC && REDUCE_TEAM_SIZE
				2338	//
				2339	// The default team size is the total #threads in the machine
				2340	// minus 1 thread for every core that has 3 or more threads.
				2341	//
				2342	teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 );
				2343	# endif // KMP_MIC && REDUCE_TEAM_SIZE
				2344
				2345	for (index = threadIdIndex; index <= maxIndex; index++) {
				2346	if (counts[index] > maxCt[index]) {
				2347	maxCt[index] = counts[index];
				2348	}
				2349	}
				2350
				2351	__kmp_nThreadsPerCore = maxCt[threadIdIndex];
				2352	nCoresPerPkg = maxCt[coreIdIndex];
				2353	nPackages = totals[pkgIdIndex];
				2354
				2355	//
				2356	// Check to see if the machine topology is uniform
				2357	//
				2358	unsigned prod = totals[maxIndex];
				2359	for (index = threadIdIndex; index < maxIndex; index++) {
				2360	prod *= maxCt[index];
				2361	}
				2362	bool uniform = (prod == totals[threadIdIndex]);
				2363
				2364	//
				2365	// When affinity is off, this routine will still be called to set
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame]	2366	// __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2367	// nCoresPerPkg, & nPackages. Make sure all these vars are set
				2368	// correctly, and return now if affinity is not enabled.
				2369	//
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2370	__kmp_ncores = totals[coreIdIndex];
				2371
				2372	if (__kmp_affinity_verbose) {
				2373	if (! KMP_AFFINITY_CAPABLE()) {
				2374	KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
				2375	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				2376	if (uniform) {
				2377	KMP_INFORM(Uniform, "KMP_AFFINITY");
				2378	} else {
				2379	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				2380	}
				2381	}
				2382	else {
				2383	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				2384	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, fullMask);
				2385	KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
				2386	if (__kmp_affinity_respect_mask) {
				2387	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				2388	} else {
				2389	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				2390	}
				2391	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				2392	if (uniform) {
				2393	KMP_INFORM(Uniform, "KMP_AFFINITY");
				2394	} else {
				2395	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				2396	}
				2397	}
				2398	kmp_str_buf_t buf;
				2399	__kmp_str_buf_init(&buf);
				2400
				2401	__kmp_str_buf_print(&buf, "%d", totals[maxIndex]);
				2402	for (index = maxIndex - 1; index >= pkgIdIndex; index--) {
				2403	__kmp_str_buf_print(&buf, " x %d", maxCt[index]);
				2404	}
				2405	KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, maxCt[coreIdIndex],
				2406	maxCt[threadIdIndex], __kmp_ncores);
				2407
				2408	__kmp_str_buf_free(&buf);
				2409	}
				2410
				2411	# if KMP_MIC && REDUCE_TEAM_SIZE
				2412	//
				2413	// Set the default team size.
				2414	//
				2415	if ((__kmp_dflt_team_nth == 0) && (teamSize > 0)) {
				2416	__kmp_dflt_team_nth = teamSize;
				2417	KA_TRACE(20, ("__kmp_affinity_create_cpuinfo_map: setting __kmp_dflt_team_nth = %d\n",
				2418	__kmp_dflt_team_nth));
				2419	}
				2420	# endif // KMP_MIC && REDUCE_TEAM_SIZE
				2421
				2422	if (__kmp_affinity_type == affinity_none) {
				2423	__kmp_free(lastId);
				2424	__kmp_free(totals);
				2425	__kmp_free(maxCt);
				2426	__kmp_free(counts);
				2427	CLEANUP_THREAD_INFO;
				2428	return 0;
				2429	}
				2430
				2431	//
				2432	// Count the number of levels which have more nodes at that level than
				2433	// at the parent's level (with there being an implicit root node of
				2434	// the top level). This is equivalent to saying that there is at least
				2435	// one node at this level which has a sibling. These levels are in the
				2436	// map, and the package level is always in the map.
				2437	//
				2438	bool inMap = (bool )__kmp_allocate((maxIndex + 1) * sizeof(bool));
				2439	int level = 0;
				2440	for (index = threadIdIndex; index < maxIndex; index++) {
				2441	KMP_ASSERT(totals[index] >= totals[index + 1]);
				2442	inMap[index] = (totals[index] > totals[index + 1]);
				2443	}
				2444	inMap[maxIndex] = (totals[maxIndex] > 1);
				2445	inMap[pkgIdIndex] = true;
				2446
				2447	int depth = 0;
				2448	for (index = threadIdIndex; index <= maxIndex; index++) {
				2449	if (inMap[index]) {
				2450	depth++;
				2451	}
				2452	}
				2453	KMP_ASSERT(depth > 0);
				2454
				2455	//
				2456	// Construct the data structure that is to be returned.
				2457	//
				2458	address2os = (AddrUnsPair)
				2459	__kmp_allocate(sizeof(AddrUnsPair) * num_avail);
				2460	int pkgLevel = -1;
				2461	int coreLevel = -1;
				2462	int threadLevel = -1;
				2463
				2464	for (i = 0; i < num_avail; ++i) {
				2465	Address addr(depth);
				2466	unsigned os = threadInfo[i][osIdIndex];
				2467	int src_index;
				2468	int dst_index = 0;
				2469
				2470	for (src_index = maxIndex; src_index >= threadIdIndex; src_index--) {
				2471	if (! inMap[src_index]) {
				2472	continue;
				2473	}
				2474	addr.labels[dst_index] = threadInfo[i][src_index];
				2475	if (src_index == pkgIdIndex) {
				2476	pkgLevel = dst_index;
				2477	}
				2478	else if (src_index == coreIdIndex) {
				2479	coreLevel = dst_index;
				2480	}
				2481	else if (src_index == threadIdIndex) {
				2482	threadLevel = dst_index;
				2483	}
				2484	dst_index++;
				2485	}
				2486	(*address2os)[i] = AddrUnsPair(addr, os);
				2487	}
				2488
				2489	if (__kmp_affinity_gran_levels < 0) {
				2490	//
				2491	// Set the granularity level based on what levels are modeled
				2492	// in the machine topology map.
				2493	//
				2494	unsigned src_index;
				2495	__kmp_affinity_gran_levels = 0;
				2496	for (src_index = threadIdIndex; src_index <= maxIndex; src_index++) {
				2497	if (! inMap[src_index]) {
				2498	continue;
				2499	}
				2500	switch (src_index) {
				2501	case threadIdIndex:
				2502	if (__kmp_affinity_gran > affinity_gran_thread) {
				2503	__kmp_affinity_gran_levels++;
				2504	}
				2505
				2506	break;
				2507	case coreIdIndex:
				2508	if (__kmp_affinity_gran > affinity_gran_core) {
				2509	__kmp_affinity_gran_levels++;
				2510	}
				2511	break;
				2512
				2513	case pkgIdIndex:
				2514	if (__kmp_affinity_gran > affinity_gran_package) {
				2515	__kmp_affinity_gran_levels++;
				2516	}
				2517	break;
				2518	}
				2519	}
				2520	}
				2521
				2522	if (__kmp_affinity_verbose) {
				2523	__kmp_affinity_print_topology(*address2os, num_avail, depth, pkgLevel,
				2524	coreLevel, threadLevel);
				2525	}
				2526
				2527	__kmp_free(inMap);
				2528	__kmp_free(lastId);
				2529	__kmp_free(totals);
				2530	__kmp_free(maxCt);
				2531	__kmp_free(counts);
				2532	CLEANUP_THREAD_INFO;
				2533	return depth;
				2534	}
				2535
				2536
				2537	//
				2538	// Create and return a table of affinity masks, indexed by OS thread ID.
				2539	// This routine handles OR'ing together all the affinity masks of threads
				2540	// that are sufficiently close, if granularity > fine.
				2541	//
				2542	static kmp_affin_mask_t *
				2543	__kmp_create_masks(unsigned maxIndex, unsigned numUnique,
				2544	AddrUnsPair *address2os, unsigned numAddrs)
				2545	{
				2546	//
				2547	// First form a table of affinity masks in order of OS thread id.
				2548	//
				2549	unsigned depth;
				2550	unsigned maxOsId;
				2551	unsigned i;
				2552
				2553	KMP_ASSERT(numAddrs > 0);
				2554	depth = address2os[0].first.depth;
				2555
				2556	maxOsId = 0;
				2557	for (i = 0; i < numAddrs; i++) {
				2558	unsigned osId = address2os[i].second;
				2559	if (osId > maxOsId) {
				2560	maxOsId = osId;
				2561	}
				2562	}
				2563	kmp_affin_mask_t osId2Mask = (kmp_affin_mask_t )__kmp_allocate(
				2564	(maxOsId + 1) * __kmp_affin_mask_size);
				2565
				2566	//
				2567	// Sort the address2os table according to physical order. Doing so
				2568	// will put all threads on the same core/package/node in consecutive
				2569	// locations.
				2570	//
				2571	qsort(address2os, numAddrs, sizeof(*address2os),
				2572	__kmp_affinity_cmp_Address_labels);
				2573
				2574	KMP_ASSERT(__kmp_affinity_gran_levels >= 0);
				2575	if (__kmp_affinity_verbose && (__kmp_affinity_gran_levels > 0)) {
				2576	KMP_INFORM(ThreadsMigrate, "KMP_AFFINITY", __kmp_affinity_gran_levels);
				2577	}
				2578	if (__kmp_affinity_gran_levels >= (int)depth) {
				2579	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				2580	&& (__kmp_affinity_type != affinity_none))) {
				2581	KMP_WARNING(AffThreadsMayMigrate);
				2582	}
				2583	}
				2584
				2585	//
				2586	// Run through the table, forming the masks for all threads on each
				2587	// core. Threads on the same core will have identical "Address"
				2588	// objects, not considering the last level, which must be the thread
				2589	// id. All threads on a core will appear consecutively.
				2590	//
				2591	unsigned unique = 0;
				2592	unsigned j = 0; // index of 1st thread on core
				2593	unsigned leader = 0;
				2594	Address *leaderAddr = &(address2os[0].first);
				2595	kmp_affin_mask_t *sum
				2596	= (kmp_affin_mask_t *)alloca(__kmp_affin_mask_size);
				2597	KMP_CPU_ZERO(sum);
				2598	KMP_CPU_SET(address2os[0].second, sum);
				2599	for (i = 1; i < numAddrs; i++) {
				2600	//
Alp Toker	8f2d3f0	2014-02-24 10:40:15 +0000	[diff] [blame]	2601	// If this thread is sufficiently close to the leader (within the
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2602	// granularity setting), then set the bit for this os thread in the
				2603	// affinity mask for this group, and go on to the next thread.
				2604	//
				2605	if (leaderAddr->isClose(address2os[i].first,
				2606	__kmp_affinity_gran_levels)) {
				2607	KMP_CPU_SET(address2os[i].second, sum);
				2608	continue;
				2609	}
				2610
				2611	//
				2612	// For every thread in this group, copy the mask to the thread's
				2613	// entry in the osId2Mask table. Mark the first address as a
				2614	// leader.
				2615	//
				2616	for (; j < i; j++) {
				2617	unsigned osId = address2os[j].second;
				2618	KMP_DEBUG_ASSERT(osId <= maxOsId);
				2619	kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
				2620	KMP_CPU_COPY(mask, sum);
				2621	address2os[j].first.leader = (j == leader);
				2622	}
				2623	unique++;
				2624
				2625	//
				2626	// Start a new mask.
				2627	//
				2628	leader = i;
				2629	leaderAddr = &(address2os[i].first);
				2630	KMP_CPU_ZERO(sum);
				2631	KMP_CPU_SET(address2os[i].second, sum);
				2632	}
				2633
				2634	//
				2635	// For every thread in last group, copy the mask to the thread's
				2636	// entry in the osId2Mask table.
				2637	//
				2638	for (; j < i; j++) {
				2639	unsigned osId = address2os[j].second;
				2640	KMP_DEBUG_ASSERT(osId <= maxOsId);
				2641	kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
				2642	KMP_CPU_COPY(mask, sum);
				2643	address2os[j].first.leader = (j == leader);
				2644	}
				2645	unique++;
				2646
				2647	*maxIndex = maxOsId;
				2648	*numUnique = unique;
				2649	return osId2Mask;
				2650	}
				2651
				2652
				2653	//
				2654	// Stuff for the affinity proclist parsers. It's easier to declare these vars
				2655	// as file-static than to try and pass them through the calling sequence of
				2656	// the recursive-descent OMP_PLACES parser.
				2657	//
				2658	static kmp_affin_mask_t *newMasks;
				2659	static int numNewMasks;
				2660	static int nextNewMask;
				2661
				2662	#define ADD_MASK(_mask) \
				2663	{ \
				2664	if (nextNewMask >= numNewMasks) { \
				2665	numNewMasks *= 2; \
				2666	newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_REALLOC(newMasks, \
				2667	numNewMasks * __kmp_affin_mask_size); \
				2668	} \
				2669	KMP_CPU_COPY(KMP_CPU_INDEX(newMasks, nextNewMask), (_mask)); \
				2670	nextNewMask++; \
				2671	}
				2672
				2673	#define ADD_MASK_OSID(_osId,_osId2Mask,_maxOsId) \
				2674	{ \
				2675	if (((_osId) > _maxOsId) \|\| \
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	2676	(! KMP_CPU_ISSET((_osId), KMP_CPU_INDEX((_osId2Mask), (_osId))))) { \
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2677	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings \
				2678	&& (__kmp_affinity_type != affinity_none))) { \
				2679	KMP_WARNING(AffIgnoreInvalidProcID, _osId); \
				2680	} \
				2681	} \
				2682	else { \
				2683	ADD_MASK(KMP_CPU_INDEX(_osId2Mask, (_osId))); \
				2684	} \
				2685	}
				2686
				2687
				2688	//
				2689	// Re-parse the proclist (for the explicit affinity type), and form the list
				2690	// of affinity newMasks indexed by gtid.
				2691	//
				2692	static void
				2693	__kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks,
				2694	unsigned int out_numMasks, const char proclist,
				2695	kmp_affin_mask_t *osId2Mask, int maxOsId)
				2696	{
				2697	const char *scan = proclist;
				2698	const char *next = proclist;
				2699
				2700	//
				2701	// We use malloc() for the temporary mask vector,
				2702	// so that we can use realloc() to extend it.
				2703	//
				2704	numNewMasks = 2;
				2705	newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(numNewMasks
				2706	* __kmp_affin_mask_size);
				2707	nextNewMask = 0;
				2708	kmp_affin_mask_t sumMask = (kmp_affin_mask_t )__kmp_allocate(
				2709	__kmp_affin_mask_size);
				2710	int setSize = 0;
				2711
				2712	for (;;) {
				2713	int start, end, stride;
				2714
				2715	SKIP_WS(scan);
				2716	next = scan;
				2717	if (*next == '\0') {
				2718	break;
				2719	}
				2720
				2721	if (*next == '{') {
				2722	int num;
				2723	setSize = 0;
				2724	next++; // skip '{'
				2725	SKIP_WS(next);
				2726	scan = next;
				2727
				2728	//
				2729	// Read the first integer in the set.
				2730	//
				2731	KMP_ASSERT2((next >= '0') && (next <= '9'),
				2732	"bad proclist");
				2733	SKIP_DIGITS(next);
				2734	num = __kmp_str_to_int(scan, *next);
				2735	KMP_ASSERT2(num >= 0, "bad explicit proc list");
				2736
				2737	//
				2738	// Copy the mask for that osId to the sum (union) mask.
				2739	//
				2740	if ((num > maxOsId) \|\|
				2741	(! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
				2742	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				2743	&& (__kmp_affinity_type != affinity_none))) {
				2744	KMP_WARNING(AffIgnoreInvalidProcID, num);
				2745	}
				2746	KMP_CPU_ZERO(sumMask);
				2747	}
				2748	else {
				2749	KMP_CPU_COPY(sumMask, KMP_CPU_INDEX(osId2Mask, num));
				2750	setSize = 1;
				2751	}
				2752
				2753	for (;;) {
				2754	//
				2755	// Check for end of set.
				2756	//
				2757	SKIP_WS(next);
				2758	if (*next == '}') {
				2759	next++; // skip '}'
				2760	break;
				2761	}
				2762
				2763	//
				2764	// Skip optional comma.
				2765	//
				2766	if (*next == ',') {
				2767	next++;
				2768	}
				2769	SKIP_WS(next);
				2770
				2771	//
				2772	// Read the next integer in the set.
				2773	//
				2774	scan = next;
				2775	KMP_ASSERT2((next >= '0') && (next <= '9'),
				2776	"bad explicit proc list");
				2777
				2778	SKIP_DIGITS(next);
				2779	num = __kmp_str_to_int(scan, *next);
				2780	KMP_ASSERT2(num >= 0, "bad explicit proc list");
				2781
				2782	//
				2783	// Add the mask for that osId to the sum mask.
				2784	//
				2785	if ((num > maxOsId) \|\|
				2786	(! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
				2787	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				2788	&& (__kmp_affinity_type != affinity_none))) {
				2789	KMP_WARNING(AffIgnoreInvalidProcID, num);
				2790	}
				2791	}
				2792	else {
				2793	KMP_CPU_UNION(sumMask, KMP_CPU_INDEX(osId2Mask, num));
				2794	setSize++;
				2795	}
				2796	}
				2797	if (setSize > 0) {
				2798	ADD_MASK(sumMask);
				2799	}
				2800
				2801	SKIP_WS(next);
				2802	if (*next == ',') {
				2803	next++;
				2804	}
				2805	scan = next;
				2806	continue;
				2807	}
				2808
				2809	//
				2810	// Read the first integer.
				2811	//
				2812	KMP_ASSERT2((next >= '0') && (next <= '9'), "bad explicit proc list");
				2813	SKIP_DIGITS(next);
				2814	start = __kmp_str_to_int(scan, *next);
				2815	KMP_ASSERT2(start >= 0, "bad explicit proc list");
				2816	SKIP_WS(next);
				2817
				2818	//
				2819	// If this isn't a range, then add a mask to the list and go on.
				2820	//
				2821	if (*next != '-') {
				2822	ADD_MASK_OSID(start, osId2Mask, maxOsId);
				2823
				2824	//
				2825	// Skip optional comma.
				2826	//
				2827	if (*next == ',') {
				2828	next++;
				2829	}
				2830	scan = next;
				2831	continue;
				2832	}
				2833
				2834	//
				2835	// This is a range. Skip over the '-' and read in the 2nd int.
				2836	//
				2837	next++; // skip '-'
				2838	SKIP_WS(next);
				2839	scan = next;
				2840	KMP_ASSERT2((next >= '0') && (next <= '9'), "bad explicit proc list");
				2841	SKIP_DIGITS(next);
				2842	end = __kmp_str_to_int(scan, *next);
				2843	KMP_ASSERT2(end >= 0, "bad explicit proc list");
				2844
				2845	//
				2846	// Check for a stride parameter
				2847	//
				2848	stride = 1;
				2849	SKIP_WS(next);
				2850	if (*next == ':') {
				2851	//
				2852	// A stride is specified. Skip over the ':" and read the 3rd int.
				2853	//
				2854	int sign = +1;
				2855	next++; // skip ':'
				2856	SKIP_WS(next);
				2857	scan = next;
				2858	if (*next == '-') {
				2859	sign = -1;
				2860	next++;
				2861	SKIP_WS(next);
				2862	scan = next;
				2863	}
				2864	KMP_ASSERT2((next >= '0') && (next <= '9'),
				2865	"bad explicit proc list");
				2866	SKIP_DIGITS(next);
				2867	stride = __kmp_str_to_int(scan, *next);
				2868	KMP_ASSERT2(stride >= 0, "bad explicit proc list");
				2869	stride *= sign;
				2870	}
				2871
				2872	//
				2873	// Do some range checks.
				2874	//
				2875	KMP_ASSERT2(stride != 0, "bad explicit proc list");
				2876	if (stride > 0) {
				2877	KMP_ASSERT2(start <= end, "bad explicit proc list");
				2878	}
				2879	else {
				2880	KMP_ASSERT2(start >= end, "bad explicit proc list");
				2881	}
				2882	KMP_ASSERT2((end - start) / stride <= 65536, "bad explicit proc list");
				2883
				2884	//
				2885	// Add the mask for each OS proc # to the list.
				2886	//
				2887	if (stride > 0) {
				2888	do {
				2889	ADD_MASK_OSID(start, osId2Mask, maxOsId);
				2890	start += stride;
				2891	} while (start <= end);
				2892	}
				2893	else {
				2894	do {
				2895	ADD_MASK_OSID(start, osId2Mask, maxOsId);
				2896	start += stride;
				2897	} while (start >= end);
				2898	}
				2899
				2900	//
				2901	// Skip optional comma.
				2902	//
				2903	SKIP_WS(next);
				2904	if (*next == ',') {
				2905	next++;
				2906	}
				2907	scan = next;
				2908	}
				2909
				2910	*out_numMasks = nextNewMask;
				2911	if (nextNewMask == 0) {
				2912	*out_masks = NULL;
				2913	KMP_INTERNAL_FREE(newMasks);
				2914	return;
				2915	}
				2916	*out_masks
				2917	= (kmp_affin_mask_t )__kmp_allocate(nextNewMask __kmp_affin_mask_size);
				2918	memcpy(out_masks, newMasks, nextNewMask __kmp_affin_mask_size);
				2919	__kmp_free(sumMask);
				2920	KMP_INTERNAL_FREE(newMasks);
				2921	}
				2922
				2923
				2924	# if OMP_40_ENABLED
				2925
				2926	/*-----------------------------------------------------------------------------
				2927
				2928	Re-parse the OMP_PLACES proc id list, forming the newMasks for the different
				2929	places. Again, Here is the grammar:
				2930
				2931	place_list := place
				2932	place_list := place , place_list
				2933	place := num
				2934	place := place : num
				2935	place := place : num : signed
				2936	place := { subplacelist }
				2937	place := ! place // (lowest priority)
				2938	subplace_list := subplace
				2939	subplace_list := subplace , subplace_list
				2940	subplace := num
				2941	subplace := num : num
				2942	subplace := num : num : signed
				2943	signed := num
				2944	signed := + signed
				2945	signed := - signed
				2946
				2947	-----------------------------------------------------------------------------*/
				2948
				2949	static void
				2950	__kmp_process_subplace_list(const char *scan, kmp_affin_mask_t osId2Mask,
				2951	int maxOsId, kmp_affin_mask_t tempMask, int setSize)
				2952	{
				2953	const char *next;
				2954
				2955	for (;;) {
				2956	int start, count, stride, i;
				2957
				2958	//
				2959	// Read in the starting proc id
				2960	//
				2961	SKIP_WS(*scan);
				2962	KMP_ASSERT2((scan >= '0') && (scan <= '9'),
				2963	"bad explicit places list");
				2964	next = *scan;
				2965	SKIP_DIGITS(next);
				2966	start = __kmp_str_to_int(scan, next);
				2967	KMP_ASSERT(start >= 0);
				2968	*scan = next;
				2969
				2970	//
				2971	// valid follow sets are ',' ':' and '}'
				2972	//
				2973	SKIP_WS(*scan);
				2974	if (scan == '}' \|\| scan == ',') {
				2975	if ((start > maxOsId) \|\|
				2976	(! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
				2977	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				2978	&& (__kmp_affinity_type != affinity_none))) {
				2979	KMP_WARNING(AffIgnoreInvalidProcID, start);
				2980	}
				2981	}
				2982	else {
				2983	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
				2984	(*setSize)++;
				2985	}
				2986	if (**scan == '}') {
				2987	break;
				2988	}
				2989	(*scan)++; // skip ','
				2990	continue;
				2991	}
				2992	KMP_ASSERT2(**scan == ':', "bad explicit places list");
				2993	(*scan)++; // skip ':'
				2994
				2995	//
				2996	// Read count parameter
				2997	//
				2998	SKIP_WS(*scan);
				2999	KMP_ASSERT2((scan >= '0') && (scan <= '9'),
				3000	"bad explicit places list");
				3001	next = *scan;
				3002	SKIP_DIGITS(next);
				3003	count = __kmp_str_to_int(scan, next);
				3004	KMP_ASSERT(count >= 0);
				3005	*scan = next;
				3006
				3007	//
				3008	// valid follow sets are ',' ':' and '}'
				3009	//
				3010	SKIP_WS(*scan);
				3011	if (scan == '}' \|\| scan == ',') {
				3012	for (i = 0; i < count; i++) {
				3013	if ((start > maxOsId) \|\|
				3014	(! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
				3015	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3016	&& (__kmp_affinity_type != affinity_none))) {
				3017	KMP_WARNING(AffIgnoreInvalidProcID, start);
				3018	}
				3019	break; // don't proliferate warnings for large count
				3020	}
				3021	else {
				3022	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
				3023	start++;
				3024	(*setSize)++;
				3025	}
				3026	}
				3027	if (**scan == '}') {
				3028	break;
				3029	}
				3030	(*scan)++; // skip ','
				3031	continue;
				3032	}
				3033	KMP_ASSERT2(**scan == ':', "bad explicit places list");
				3034	(*scan)++; // skip ':'
				3035
				3036	//
				3037	// Read stride parameter
				3038	//
				3039	int sign = +1;
				3040	for (;;) {
				3041	SKIP_WS(*scan);
				3042	if (**scan == '+') {
				3043	(*scan)++; // skip '+'
				3044	continue;
				3045	}
				3046	if (**scan == '-') {
				3047	sign *= -1;
				3048	(*scan)++; // skip '-'
				3049	continue;
				3050	}
				3051	break;
				3052	}
				3053	SKIP_WS(*scan);
				3054	KMP_ASSERT2((scan >= '0') && (scan <= '9'),
				3055	"bad explicit places list");
				3056	next = *scan;
				3057	SKIP_DIGITS(next);
				3058	stride = __kmp_str_to_int(scan, next);
				3059	KMP_ASSERT(stride >= 0);
				3060	*scan = next;
				3061	stride *= sign;
				3062
				3063	//
				3064	// valid follow sets are ',' and '}'
				3065	//
				3066	SKIP_WS(*scan);
				3067	if (scan == '}' \|\| scan == ',') {
				3068	for (i = 0; i < count; i++) {
				3069	if ((start > maxOsId) \|\|
				3070	(! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
				3071	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3072	&& (__kmp_affinity_type != affinity_none))) {
				3073	KMP_WARNING(AffIgnoreInvalidProcID, start);
				3074	}
				3075	break; // don't proliferate warnings for large count
				3076	}
				3077	else {
				3078	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
				3079	start += stride;
				3080	(*setSize)++;
				3081	}
				3082	}
				3083	if (**scan == '}') {
				3084	break;
				3085	}
				3086	(*scan)++; // skip ','
				3087	continue;
				3088	}
				3089
				3090	KMP_ASSERT2(0, "bad explicit places list");
				3091	}
				3092	}
				3093
				3094
				3095	static void
				3096	__kmp_process_place(const char *scan, kmp_affin_mask_t osId2Mask,
				3097	int maxOsId, kmp_affin_mask_t tempMask, int setSize)
				3098	{
				3099	const char *next;
				3100
				3101	//
				3102	// valid follow sets are '{' '!' and num
				3103	//
				3104	SKIP_WS(*scan);
				3105	if (**scan == '{') {
				3106	(*scan)++; // skip '{'
				3107	__kmp_process_subplace_list(scan, osId2Mask, maxOsId , tempMask,
				3108	setSize);
				3109	KMP_ASSERT2(**scan == '}', "bad explicit places list");
				3110	(*scan)++; // skip '}'
				3111	}
				3112	else if (**scan == '!') {
				3113	__kmp_process_place(scan, osId2Mask, maxOsId, tempMask, setSize);
				3114	KMP_CPU_COMPLEMENT(tempMask);
				3115	(*scan)++; // skip '!'
				3116	}
				3117	else if ((scan >= '0') && (scan <= '9')) {
				3118	next = *scan;
				3119	SKIP_DIGITS(next);
				3120	int num = __kmp_str_to_int(scan, next);
				3121	KMP_ASSERT(num >= 0);
				3122	if ((num > maxOsId) \|\|
				3123	(! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
				3124	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3125	&& (__kmp_affinity_type != affinity_none))) {
				3126	KMP_WARNING(AffIgnoreInvalidProcID, num);
				3127	}
				3128	}
				3129	else {
				3130	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, num));
				3131	(*setSize)++;
				3132	}
				3133	*scan = next; // skip num
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3134	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3135	else {
				3136	KMP_ASSERT2(0, "bad explicit places list");
				3137	}
				3138	}
				3139
				3140
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3141	//static void
				3142	void
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3143	__kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
				3144	unsigned int out_numMasks, const char placelist,
				3145	kmp_affin_mask_t *osId2Mask, int maxOsId)
				3146	{
				3147	const char *scan = placelist;
				3148	const char *next = placelist;
				3149
				3150	numNewMasks = 2;
				3151	newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(numNewMasks
				3152	* __kmp_affin_mask_size);
				3153	nextNewMask = 0;
				3154
				3155	kmp_affin_mask_t tempMask = (kmp_affin_mask_t )__kmp_allocate(
				3156	__kmp_affin_mask_size);
				3157	KMP_CPU_ZERO(tempMask);
				3158	int setSize = 0;
				3159
				3160	for (;;) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3161	__kmp_process_place(&scan, osId2Mask, maxOsId, tempMask, &setSize);
				3162
				3163	//
				3164	// valid follow sets are ',' ':' and EOL
				3165	//
				3166	SKIP_WS(scan);
				3167	if (scan == '\0' \|\| scan == ',') {
				3168	if (setSize > 0) {
				3169	ADD_MASK(tempMask);
				3170	}
				3171	KMP_CPU_ZERO(tempMask);
				3172	setSize = 0;
				3173	if (*scan == '\0') {
				3174	break;
				3175	}
				3176	scan++; // skip ','
				3177	continue;
				3178	}
				3179
				3180	KMP_ASSERT2(*scan == ':', "bad explicit places list");
				3181	scan++; // skip ':'
				3182
				3183	//
				3184	// Read count parameter
				3185	//
				3186	SKIP_WS(scan);
				3187	KMP_ASSERT2((scan >= '0') && (scan <= '9'),
				3188	"bad explicit places list");
				3189	next = scan;
				3190	SKIP_DIGITS(next);
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	3191	int count = __kmp_str_to_int(scan, *next);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3192	KMP_ASSERT(count >= 0);
				3193	scan = next;
				3194
				3195	//
				3196	// valid follow sets are ',' ':' and EOL
				3197	//
				3198	SKIP_WS(scan);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3199	int stride;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3200	if (scan == '\0' \|\| scan == ',') {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3201	stride = +1;
				3202	}
				3203	else {
				3204	KMP_ASSERT2(*scan == ':', "bad explicit places list");
				3205	scan++; // skip ':'
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3206
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3207	//
				3208	// Read stride parameter
				3209	//
				3210	int sign = +1;
				3211	for (;;) {
				3212	SKIP_WS(scan);
				3213	if (*scan == '+') {
				3214	scan++; // skip '+'
				3215	continue;
				3216	}
				3217	if (*scan == '-') {
				3218	sign *= -1;
				3219	scan++; // skip '-'
				3220	continue;
				3221	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3222	break;
				3223	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3224	SKIP_WS(scan);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3225	KMP_ASSERT2((scan >= '0') && (scan <= '9'),
				3226	"bad explicit places list");
				3227	next = scan;
				3228	SKIP_DIGITS(next);
				3229	stride = __kmp_str_to_int(scan, *next);
				3230	KMP_DEBUG_ASSERT(stride >= 0);
				3231	scan = next;
				3232	stride *= sign;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3233	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3234
				3235	if (stride > 0) {
				3236	int i;
				3237	for (i = 0; i < count; i++) {
				3238	int j;
				3239	if (setSize == 0) {
				3240	break;
				3241	}
				3242	ADD_MASK(tempMask);
				3243	setSize = 0;
				3244	for (j = __kmp_affin_mask_size * CHAR_BIT - 1; j >= stride; j--) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3245	if (! KMP_CPU_ISSET(j - stride, tempMask)) {
				3246	KMP_CPU_CLR(j, tempMask);
				3247	}
				3248	else if ((j > maxOsId) \|\|
				3249	(! KMP_CPU_ISSET(j, KMP_CPU_INDEX(osId2Mask, j)))) {
				3250	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3251	&& (__kmp_affinity_type != affinity_none))) {
				3252	KMP_WARNING(AffIgnoreInvalidProcID, j);
				3253	}
				3254	KMP_CPU_CLR(j, tempMask);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3255	}
				3256	else {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3257	KMP_CPU_SET(j, tempMask);
				3258	setSize++;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3259	}
				3260	}
				3261	for (; j >= 0; j--) {
				3262	KMP_CPU_CLR(j, tempMask);
				3263	}
				3264	}
				3265	}
				3266	else {
				3267	int i;
				3268	for (i = 0; i < count; i++) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3269	int j;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3270	if (setSize == 0) {
				3271	break;
				3272	}
				3273	ADD_MASK(tempMask);
				3274	setSize = 0;
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3275	for (j = 0; j < ((int)__kmp_affin_mask_size * CHAR_BIT) + stride;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3276	j++) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3277	if (! KMP_CPU_ISSET(j - stride, tempMask)) {
				3278	KMP_CPU_CLR(j, tempMask);
				3279	}
				3280	else if ((j > maxOsId) \|\|
				3281	(! KMP_CPU_ISSET(j, KMP_CPU_INDEX(osId2Mask, j)))) {
				3282	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3283	&& (__kmp_affinity_type != affinity_none))) {
				3284	KMP_WARNING(AffIgnoreInvalidProcID, j);
				3285	}
				3286	KMP_CPU_CLR(j, tempMask);
				3287	}
				3288	else {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3289	KMP_CPU_SET(j, tempMask);
				3290	setSize++;
				3291	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3292	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3293	for (; j < (int)__kmp_affin_mask_size * CHAR_BIT; j++) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3294	KMP_CPU_CLR(j, tempMask);
				3295	}
				3296	}
				3297	}
				3298	KMP_CPU_ZERO(tempMask);
				3299	setSize = 0;
				3300
				3301	//
				3302	// valid follow sets are ',' and EOL
				3303	//
				3304	SKIP_WS(scan);
				3305	if (*scan == '\0') {
				3306	break;
				3307	}
				3308	if (*scan == ',') {
				3309	scan++; // skip ','
				3310	continue;
				3311	}
				3312
				3313	KMP_ASSERT2(0, "bad explicit places list");
				3314	}
				3315
				3316	*out_numMasks = nextNewMask;
				3317	if (nextNewMask == 0) {
				3318	*out_masks = NULL;
				3319	KMP_INTERNAL_FREE(newMasks);
				3320	return;
				3321	}
				3322	*out_masks
				3323	= (kmp_affin_mask_t )__kmp_allocate(nextNewMask __kmp_affin_mask_size);
				3324	memcpy(out_masks, newMasks, nextNewMask __kmp_affin_mask_size);
				3325	__kmp_free(tempMask);
				3326	KMP_INTERNAL_FREE(newMasks);
				3327	}
				3328
				3329	# endif /* OMP_40_ENABLED */
				3330
				3331	#undef ADD_MASK
				3332	#undef ADD_MASK_OSID
				3333
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3334	static void
				3335	__kmp_apply_thread_places(AddrUnsPair **pAddr, int depth)
				3336	{
				3337	if ( __kmp_place_num_cores == 0 ) {
				3338	if ( __kmp_place_num_threads_per_core == 0 ) {
				3339	return; // no cores limiting actions requested, exit
				3340	}
				3341	__kmp_place_num_cores = nCoresPerPkg; // use all available cores
				3342	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3343	if ( !__kmp_affinity_uniform_topology() ) {
				3344	KMP_WARNING( AffThrPlaceNonUniform );
				3345	return; // don't support non-uniform topology
				3346	}
				3347	if ( depth != 3 ) {
				3348	KMP_WARNING( AffThrPlaceNonThreeLevel );
				3349	return; // don't support not-3-level topology
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3350	}
				3351	if ( __kmp_place_num_threads_per_core == 0 ) {
				3352	__kmp_place_num_threads_per_core = __kmp_nThreadsPerCore; // use all HW contexts
				3353	}
Andrey Churbanov	1287557	2015-03-10 09:00:36 +0000	[diff] [blame^]	3354	if ( __kmp_place_core_offset + __kmp_place_num_cores > nCoresPerPkg ) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3355	KMP_WARNING( AffThrPlaceManyCores );
				3356	return;
				3357	}
				3358
				3359	AddrUnsPair newAddr = (AddrUnsPair )__kmp_allocate( sizeof(AddrUnsPair) *
				3360	nPackages * __kmp_place_num_cores * __kmp_place_num_threads_per_core);
				3361	int i, j, k, n_old = 0, n_new = 0;
				3362	for ( i = 0; i < nPackages; ++i ) {
				3363	for ( j = 0; j < nCoresPerPkg; ++j ) {
Andrey Churbanov	1287557	2015-03-10 09:00:36 +0000	[diff] [blame^]	3364	if ( j < __kmp_place_core_offset \|\| j >= __kmp_place_core_offset + __kmp_place_num_cores ) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3365	n_old += __kmp_nThreadsPerCore; // skip not-requested core
				3366	} else {
				3367	for ( k = 0; k < __kmp_nThreadsPerCore; ++k ) {
Andrey Churbanov	1287557	2015-03-10 09:00:36 +0000	[diff] [blame^]	3368	if ( k < __kmp_place_num_threads_per_core ) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3369	newAddr[n_new] = (*pAddr)[n_old]; // copy requested core' data to new location
				3370	n_new++;
				3371	}
				3372	n_old++;
				3373	}
				3374	}
				3375	}
				3376	}
				3377	nCoresPerPkg = __kmp_place_num_cores; // correct nCoresPerPkg
				3378	__kmp_nThreadsPerCore = __kmp_place_num_threads_per_core; // correct __kmp_nThreadsPerCore
				3379	__kmp_avail_proc = n_new; // correct avail_proc
				3380	__kmp_ncores = nPackages * __kmp_place_num_cores; // correct ncores
				3381
				3382	__kmp_free( *pAddr );
				3383	*pAddr = newAddr; // replace old topology with new one
				3384	}
				3385
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3386
				3387	static AddrUnsPair *address2os = NULL;
				3388	static int * procarr = NULL;
				3389	static int __kmp_aff_depth = 0;
				3390
				3391	static void
				3392	__kmp_aux_affinity_initialize(void)
				3393	{
				3394	if (__kmp_affinity_masks != NULL) {
				3395	KMP_ASSERT(fullMask != NULL);
				3396	return;
				3397	}
				3398
				3399	//
				3400	// Create the "full" mask - this defines all of the processors that we
				3401	// consider to be in the machine model. If respect is set, then it is
				3402	// the initialization thread's affinity mask. Otherwise, it is all
				3403	// processors that we know about on the machine.
				3404	//
				3405	if (fullMask == NULL) {
				3406	fullMask = (kmp_affin_mask_t *)__kmp_allocate(__kmp_affin_mask_size);
				3407	}
				3408	if (KMP_AFFINITY_CAPABLE()) {
				3409	if (__kmp_affinity_respect_mask) {
				3410	__kmp_get_system_affinity(fullMask, TRUE);
				3411
				3412	//
				3413	// Count the number of available processors.
				3414	//
				3415	unsigned i;
				3416	__kmp_avail_proc = 0;
				3417	for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
				3418	if (! KMP_CPU_ISSET(i, fullMask)) {
				3419	continue;
				3420	}
				3421	__kmp_avail_proc++;
				3422	}
				3423	if (__kmp_avail_proc > __kmp_xproc) {
				3424	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3425	&& (__kmp_affinity_type != affinity_none))) {
				3426	KMP_WARNING(ErrorInitializeAffinity);
				3427	}
				3428	__kmp_affinity_type = affinity_none;
				3429	__kmp_affin_mask_size = 0;
				3430	return;
				3431	}
				3432	}
				3433	else {
				3434	__kmp_affinity_entire_machine_mask(fullMask);
				3435	__kmp_avail_proc = __kmp_xproc;
				3436	}
				3437	}
				3438
				3439	int depth = -1;
				3440	kmp_i18n_id_t msg_id = kmp_i18n_null;
				3441
				3442	//
Alp Toker	8f2d3f0	2014-02-24 10:40:15 +0000	[diff] [blame]	3443	// For backward compatibility, setting KMP_CPUINFO_FILE =>
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3444	// KMP_TOPOLOGY_METHOD=cpuinfo
				3445	//
				3446	if ((__kmp_cpuinfo_file != NULL) &&
				3447	(__kmp_affinity_top_method == affinity_top_method_all)) {
				3448	__kmp_affinity_top_method = affinity_top_method_cpuinfo;
				3449	}
				3450
				3451	if (__kmp_affinity_top_method == affinity_top_method_all) {
				3452	//
				3453	// In the default code path, errors are not fatal - we just try using
				3454	// another method. We only emit a warning message if affinity is on,
				3455	// or the verbose flag is set, an the nowarnings flag was not set.
				3456	//
				3457	const char *file_name = NULL;
				3458	int line = 0;
				3459
				3460	# if KMP_ARCH_X86 \|\| KMP_ARCH_X86_64
				3461
				3462	if (__kmp_affinity_verbose) {
				3463	KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
				3464	}
				3465
				3466	file_name = NULL;
				3467	depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
				3468	if (depth == 0) {
				3469	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3470	KMP_ASSERT(address2os == NULL);
				3471	return;
				3472	}
				3473
				3474	if (depth < 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3475	if (__kmp_affinity_verbose) {
				3476	if (msg_id != kmp_i18n_null) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3477	KMP_INFORM(AffInfoStrStr, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id),
				3478	KMP_I18N_STR(DecodingLegacyAPIC));
				3479	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3480	else {
				3481	KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(DecodingLegacyAPIC));
				3482	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3483	}
				3484
				3485	file_name = NULL;
				3486	depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
				3487	if (depth == 0) {
				3488	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3489	KMP_ASSERT(address2os == NULL);
				3490	return;
				3491	}
				3492	}
				3493
				3494	# endif /* KMP_ARCH_X86 \|\| KMP_ARCH_X86_64 */
				3495
				3496	# if KMP_OS_LINUX
				3497
				3498	if (depth < 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3499	if (__kmp_affinity_verbose) {
				3500	if (msg_id != kmp_i18n_null) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3501	KMP_INFORM(AffStrParseFilename, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id), "/proc/cpuinfo");
				3502	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3503	else {
				3504	KMP_INFORM(AffParseFilename, "KMP_AFFINITY", "/proc/cpuinfo");
				3505	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3506	}
				3507
				3508	FILE *f = fopen("/proc/cpuinfo", "r");
				3509	if (f == NULL) {
				3510	msg_id = kmp_i18n_str_CantOpenCpuinfo;
				3511	}
				3512	else {
				3513	file_name = "/proc/cpuinfo";
				3514	depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
				3515	fclose(f);
				3516	if (depth == 0) {
				3517	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3518	KMP_ASSERT(address2os == NULL);
				3519	return;
				3520	}
				3521	}
				3522	}
				3523
				3524	# endif /* KMP_OS_LINUX */
				3525
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	3526	# if KMP_GROUP_AFFINITY
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3527
				3528	if ((depth < 0) && (__kmp_num_proc_groups > 1)) {
				3529	if (__kmp_affinity_verbose) {
				3530	KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
				3531	}
				3532
				3533	depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
				3534	KMP_ASSERT(depth != 0);
				3535	}
				3536
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	3537	# endif /* KMP_GROUP_AFFINITY */
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3538
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3539	if (depth < 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3540	if (__kmp_affinity_verbose && (msg_id != kmp_i18n_null)) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3541	if (file_name == NULL) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3542	KMP_INFORM(UsingFlatOS, __kmp_i18n_catgets(msg_id));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3543	}
				3544	else if (line == 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3545	KMP_INFORM(UsingFlatOSFile, file_name, __kmp_i18n_catgets(msg_id));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3546	}
				3547	else {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3548	KMP_INFORM(UsingFlatOSFileLine, file_name, line, __kmp_i18n_catgets(msg_id));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3549	}
				3550	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3551	// FIXME - print msg if msg_id = kmp_i18n_null ???
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3552
				3553	file_name = "";
				3554	depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
				3555	if (depth == 0) {
				3556	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3557	KMP_ASSERT(address2os == NULL);
				3558	return;
				3559	}
				3560	KMP_ASSERT(depth > 0);
				3561	KMP_ASSERT(address2os != NULL);
				3562	}
				3563	}
				3564
				3565	//
				3566	// If the user has specified that a paricular topology discovery method
				3567	// is to be used, then we abort if that method fails. The exception is
				3568	// group affinity, which might have been implicitly set.
				3569	//
				3570
				3571	# if KMP_ARCH_X86 \|\| KMP_ARCH_X86_64
				3572
				3573	else if (__kmp_affinity_top_method == affinity_top_method_x2apicid) {
				3574	if (__kmp_affinity_verbose) {
				3575	KMP_INFORM(AffInfoStr, "KMP_AFFINITY",
				3576	KMP_I18N_STR(Decodingx2APIC));
				3577	}
				3578
				3579	depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
				3580	if (depth == 0) {
				3581	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3582	KMP_ASSERT(address2os == NULL);
				3583	return;
				3584	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3585	if (depth < 0) {
				3586	KMP_ASSERT(msg_id != kmp_i18n_null);
				3587	KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
				3588	}
				3589	}
				3590	else if (__kmp_affinity_top_method == affinity_top_method_apicid) {
				3591	if (__kmp_affinity_verbose) {
				3592	KMP_INFORM(AffInfoStr, "KMP_AFFINITY",
				3593	KMP_I18N_STR(DecodingLegacyAPIC));
				3594	}
				3595
				3596	depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
				3597	if (depth == 0) {
				3598	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3599	KMP_ASSERT(address2os == NULL);
				3600	return;
				3601	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3602	if (depth < 0) {
				3603	KMP_ASSERT(msg_id != kmp_i18n_null);
				3604	KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
				3605	}
				3606	}
				3607
				3608	# endif /* KMP_ARCH_X86 \|\| KMP_ARCH_X86_64 */
				3609
				3610	else if (__kmp_affinity_top_method == affinity_top_method_cpuinfo) {
				3611	const char *filename;
				3612	if (__kmp_cpuinfo_file != NULL) {
				3613	filename = __kmp_cpuinfo_file;
				3614	}
				3615	else {
				3616	filename = "/proc/cpuinfo";
				3617	}
				3618
				3619	if (__kmp_affinity_verbose) {
				3620	KMP_INFORM(AffParseFilename, "KMP_AFFINITY", filename);
				3621	}
				3622
				3623	FILE *f = fopen(filename, "r");
				3624	if (f == NULL) {
				3625	int code = errno;
				3626	if (__kmp_cpuinfo_file != NULL) {
				3627	__kmp_msg(
				3628	kmp_ms_fatal,
				3629	KMP_MSG(CantOpenFileForReading, filename),
				3630	KMP_ERR(code),
				3631	KMP_HNT(NameComesFrom_CPUINFO_FILE),
				3632	__kmp_msg_null
				3633	);
				3634	}
				3635	else {
				3636	__kmp_msg(
				3637	kmp_ms_fatal,
				3638	KMP_MSG(CantOpenFileForReading, filename),
				3639	KMP_ERR(code),
				3640	__kmp_msg_null
				3641	);
				3642	}
				3643	}
				3644	int line = 0;
				3645	depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
				3646	fclose(f);
				3647	if (depth < 0) {
				3648	KMP_ASSERT(msg_id != kmp_i18n_null);
				3649	if (line > 0) {
				3650	KMP_FATAL(FileLineMsgExiting, filename, line, __kmp_i18n_catgets(msg_id));
				3651	}
				3652	else {
				3653	KMP_FATAL(FileMsgExiting, filename, __kmp_i18n_catgets(msg_id));
				3654	}
				3655	}
				3656	if (__kmp_affinity_type == affinity_none) {
				3657	KMP_ASSERT(depth == 0);
				3658	KMP_ASSERT(address2os == NULL);
				3659	return;
				3660	}
				3661	}
				3662
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	3663	# if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3664
				3665	else if (__kmp_affinity_top_method == affinity_top_method_group) {
				3666	if (__kmp_affinity_verbose) {
				3667	KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
				3668	}
				3669
				3670	depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
				3671	KMP_ASSERT(depth != 0);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3672	if (depth < 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3673	KMP_ASSERT(msg_id != kmp_i18n_null);
				3674	KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3675	}
				3676	}
				3677
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	3678	# endif /* KMP_GROUP_AFFINITY */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3679
				3680	else if (__kmp_affinity_top_method == affinity_top_method_flat) {
				3681	if (__kmp_affinity_verbose) {
				3682	KMP_INFORM(AffUsingFlatOS, "KMP_AFFINITY");
				3683	}
				3684
				3685	depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
				3686	if (depth == 0) {
				3687	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3688	KMP_ASSERT(address2os == NULL);
				3689	return;
				3690	}
				3691	// should not fail
				3692	KMP_ASSERT(depth > 0);
				3693	KMP_ASSERT(address2os != NULL);
				3694	}
				3695
				3696	if (address2os == NULL) {
				3697	if (KMP_AFFINITY_CAPABLE()
				3698	&& (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3699	&& (__kmp_affinity_type != affinity_none)))) {
				3700	KMP_WARNING(ErrorInitializeAffinity);
				3701	}
				3702	__kmp_affinity_type = affinity_none;
				3703	__kmp_affin_mask_size = 0;
				3704	return;
				3705	}
				3706
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3707	__kmp_apply_thread_places(&address2os, depth);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3708
				3709	//
				3710	// Create the table of masks, indexed by thread Id.
				3711	//
				3712	unsigned maxIndex;
				3713	unsigned numUnique;
				3714	kmp_affin_mask_t *osId2Mask = __kmp_create_masks(&maxIndex, &numUnique,
				3715	address2os, __kmp_avail_proc);
				3716	if (__kmp_affinity_gran_levels == 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3717	KMP_DEBUG_ASSERT((int)numUnique == __kmp_avail_proc);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3718	}
				3719
				3720	//
				3721	// Set the childNums vector in all Address objects. This must be done
				3722	// before we can sort using __kmp_affinity_cmp_Address_child_num(),
				3723	// which takes into account the setting of __kmp_affinity_compact.
				3724	//
				3725	__kmp_affinity_assign_child_nums(address2os, __kmp_avail_proc);
				3726
				3727	switch (__kmp_affinity_type) {
				3728
				3729	case affinity_explicit:
				3730	KMP_DEBUG_ASSERT(__kmp_affinity_proclist != NULL);
				3731	# if OMP_40_ENABLED
				3732	if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
				3733	# endif
				3734	{
				3735	__kmp_affinity_process_proclist(&__kmp_affinity_masks,
				3736	&__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask,
				3737	maxIndex);
				3738	}
				3739	# if OMP_40_ENABLED
				3740	else {
				3741	__kmp_affinity_process_placelist(&__kmp_affinity_masks,
				3742	&__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask,
				3743	maxIndex);
				3744	}
				3745	# endif
				3746	if (__kmp_affinity_num_masks == 0) {
				3747	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3748	&& (__kmp_affinity_type != affinity_none))) {
				3749	KMP_WARNING(AffNoValidProcID);
				3750	}
				3751	__kmp_affinity_type = affinity_none;
				3752	return;
				3753	}
				3754	break;
				3755
				3756	//
				3757	// The other affinity types rely on sorting the Addresses according
				3758	// to some permutation of the machine topology tree. Set
				3759	// __kmp_affinity_compact and __kmp_affinity_offset appropriately,
				3760	// then jump to a common code fragment to do the sort and create
				3761	// the array of affinity masks.
				3762	//
				3763
				3764	case affinity_logical:
				3765	__kmp_affinity_compact = 0;
				3766	if (__kmp_affinity_offset) {
				3767	__kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset
				3768	% __kmp_avail_proc;
				3769	}
				3770	goto sortAddresses;
				3771
				3772	case affinity_physical:
				3773	if (__kmp_nThreadsPerCore > 1) {
				3774	__kmp_affinity_compact = 1;
				3775	if (__kmp_affinity_compact >= depth) {
				3776	__kmp_affinity_compact = 0;
				3777	}
				3778	} else {
				3779	__kmp_affinity_compact = 0;
				3780	}
				3781	if (__kmp_affinity_offset) {
				3782	__kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset
				3783	% __kmp_avail_proc;
				3784	}
				3785	goto sortAddresses;
				3786
				3787	case affinity_scatter:
				3788	if (__kmp_affinity_compact >= depth) {
				3789	__kmp_affinity_compact = 0;
				3790	}
				3791	else {
				3792	__kmp_affinity_compact = depth - 1 - __kmp_affinity_compact;
				3793	}
				3794	goto sortAddresses;
				3795
				3796	case affinity_compact:
				3797	if (__kmp_affinity_compact >= depth) {
				3798	__kmp_affinity_compact = depth - 1;
				3799	}
				3800	goto sortAddresses;
				3801
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3802	case affinity_balanced:
Andrey Churbanov	e4b9213	2015-03-05 17:46:50 +0000	[diff] [blame]	3803	// Balanced works only for the case of a single package and uniform topology
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3804	if( nPackages > 1 ) {
				3805	if( __kmp_affinity_verbose \|\| __kmp_affinity_warnings ) {
				3806	KMP_WARNING( AffBalancedNotAvail, "KMP_AFFINITY" );
				3807	}
				3808	__kmp_affinity_type = affinity_none;
				3809	return;
				3810	} else if( __kmp_affinity_uniform_topology() ) {
				3811	break;
				3812	} else { // Non-uniform topology
				3813
				3814	// Save the depth for further usage
				3815	__kmp_aff_depth = depth;
				3816
				3817	// Number of hyper threads per core in HT machine
				3818	int nth_per_core = __kmp_nThreadsPerCore;
				3819
				3820	int core_level;
				3821	if( nth_per_core > 1 ) {
				3822	core_level = depth - 2;
				3823	} else {
				3824	core_level = depth - 1;
				3825	}
				3826	int ncores = address2os[ __kmp_avail_proc - 1 ].first.labels[ core_level ] + 1;
				3827	int nproc = nth_per_core * ncores;
				3828
				3829	procarr = ( int * )__kmp_allocate( sizeof( int ) * nproc );
				3830	for( int i = 0; i < nproc; i++ ) {
				3831	procarr[ i ] = -1;
				3832	}
				3833
				3834	for( int i = 0; i < __kmp_avail_proc; i++ ) {
				3835	int proc = address2os[ i ].second;
				3836	// If depth == 3 then level=0 - package, level=1 - core, level=2 - thread.
				3837	// If there is only one thread per core then depth == 2: level 0 - package,
				3838	// level 1 - core.
				3839	int level = depth - 1;
				3840
				3841	// __kmp_nth_per_core == 1
				3842	int thread = 0;
				3843	int core = address2os[ i ].first.labels[ level ];
				3844	// If the thread level exists, that is we have more than one thread context per core
				3845	if( nth_per_core > 1 ) {
				3846	thread = address2os[ i ].first.labels[ level ] % nth_per_core;
				3847	core = address2os[ i ].first.labels[ level - 1 ];
				3848	}
				3849	procarr[ core * nth_per_core + thread ] = proc;
				3850	}
				3851
				3852	break;
				3853	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3854
				3855	sortAddresses:
				3856	//
				3857	// Allocate the gtid->affinity mask table.
				3858	//
				3859	if (__kmp_affinity_dups) {
				3860	__kmp_affinity_num_masks = __kmp_avail_proc;
				3861	}
				3862	else {
				3863	__kmp_affinity_num_masks = numUnique;
				3864	}
				3865
				3866	# if OMP_40_ENABLED
				3867	if ( ( __kmp_nested_proc_bind.bind_types[0] != proc_bind_intel )
				3868	&& ( __kmp_affinity_num_places > 0 )
				3869	&& ( (unsigned)__kmp_affinity_num_places < __kmp_affinity_num_masks ) ) {
				3870	__kmp_affinity_num_masks = __kmp_affinity_num_places;
				3871	}
				3872	# endif
				3873
				3874	__kmp_affinity_masks = (kmp_affin_mask_t*)__kmp_allocate(
				3875	__kmp_affinity_num_masks * __kmp_affin_mask_size);
				3876
				3877	//
				3878	// Sort the address2os table according to the current setting of
				3879	// __kmp_affinity_compact, then fill out __kmp_affinity_masks.
				3880	//
				3881	qsort(address2os, __kmp_avail_proc, sizeof(*address2os),
				3882	__kmp_affinity_cmp_Address_child_num);
				3883	{
				3884	int i;
				3885	unsigned j;
				3886	for (i = 0, j = 0; i < __kmp_avail_proc; i++) {
				3887	if ((! __kmp_affinity_dups) && (! address2os[i].first.leader)) {
				3888	continue;
				3889	}
				3890	unsigned osId = address2os[i].second;
				3891	kmp_affin_mask_t *src = KMP_CPU_INDEX(osId2Mask, osId);
				3892	kmp_affin_mask_t *dest
				3893	= KMP_CPU_INDEX(__kmp_affinity_masks, j);
				3894	KMP_ASSERT(KMP_CPU_ISSET(osId, src));
				3895	KMP_CPU_COPY(dest, src);
				3896	if (++j >= __kmp_affinity_num_masks) {
				3897	break;
				3898	}
				3899	}
				3900	KMP_DEBUG_ASSERT(j == __kmp_affinity_num_masks);
				3901	}
				3902	break;
				3903
				3904	default:
				3905	KMP_ASSERT2(0, "Unexpected affinity setting");
				3906	}
				3907
				3908	__kmp_free(osId2Mask);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3909	machine_hierarchy.init(address2os, __kmp_avail_proc);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3910	}
				3911
				3912
				3913	void
				3914	__kmp_affinity_initialize(void)
				3915	{
				3916	//
				3917	// Much of the code above was written assumming that if a machine was not
				3918	// affinity capable, then __kmp_affinity_type == affinity_none. We now
				3919	// explicitly represent this as __kmp_affinity_type == affinity_disabled.
				3920	//
				3921	// There are too many checks for __kmp_affinity_type == affinity_none
				3922	// in this code. Instead of trying to change them all, check if
				3923	// __kmp_affinity_type == affinity_disabled, and if so, slam it with
				3924	// affinity_none, call the real initialization routine, then restore
				3925	// __kmp_affinity_type to affinity_disabled.
				3926	//
				3927	int disabled = (__kmp_affinity_type == affinity_disabled);
				3928	if (! KMP_AFFINITY_CAPABLE()) {
				3929	KMP_ASSERT(disabled);
				3930	}
				3931	if (disabled) {
				3932	__kmp_affinity_type = affinity_none;
				3933	}
				3934	__kmp_aux_affinity_initialize();
				3935	if (disabled) {
				3936	__kmp_affinity_type = affinity_disabled;
				3937	}
				3938	}
				3939
				3940
				3941	void
				3942	__kmp_affinity_uninitialize(void)
				3943	{
				3944	if (__kmp_affinity_masks != NULL) {
				3945	__kmp_free(__kmp_affinity_masks);
				3946	__kmp_affinity_masks = NULL;
				3947	}
				3948	if (fullMask != NULL) {
				3949	KMP_CPU_FREE(fullMask);
				3950	fullMask = NULL;
				3951	}
				3952	__kmp_affinity_num_masks = 0;
				3953	# if OMP_40_ENABLED
				3954	__kmp_affinity_num_places = 0;
				3955	# endif
				3956	if (__kmp_affinity_proclist != NULL) {
				3957	__kmp_free(__kmp_affinity_proclist);
				3958	__kmp_affinity_proclist = NULL;
				3959	}
				3960	if( address2os != NULL ) {
				3961	__kmp_free( address2os );
				3962	address2os = NULL;
				3963	}
				3964	if( procarr != NULL ) {
				3965	__kmp_free( procarr );
				3966	procarr = NULL;
				3967	}
				3968	}
				3969
				3970
				3971	void
				3972	__kmp_affinity_set_init_mask(int gtid, int isa_root)
				3973	{
				3974	if (! KMP_AFFINITY_CAPABLE()) {
				3975	return;
				3976	}
				3977
				3978	kmp_info_t th = (kmp_info_t )TCR_SYNC_PTR(__kmp_threads[gtid]);
				3979	if (th->th.th_affin_mask == NULL) {
				3980	KMP_CPU_ALLOC(th->th.th_affin_mask);
				3981	}
				3982	else {
				3983	KMP_CPU_ZERO(th->th.th_affin_mask);
				3984	}
				3985
				3986	//
				3987	// Copy the thread mask to the kmp_info_t strucuture.
				3988	// If __kmp_affinity_type == affinity_none, copy the "full" mask, i.e. one
				3989	// that has all of the OS proc ids set, or if __kmp_affinity_respect_mask
				3990	// is set, then the full mask is the same as the mask of the initialization
				3991	// thread.
				3992	//
				3993	kmp_affin_mask_t *mask;
				3994	int i;
				3995
				3996	# if OMP_40_ENABLED
				3997	if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
				3998	# endif
				3999	{
Andrey Churbanov	f28f613	2015-01-13 14:54:00 +0000	[diff] [blame]	4000	if ((__kmp_affinity_type == affinity_none) \|\| (__kmp_affinity_type == affinity_balanced)
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4001	) {
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	4002	# if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4003	if (__kmp_num_proc_groups > 1) {
				4004	return;
				4005	}
				4006	# endif
				4007	KMP_ASSERT(fullMask != NULL);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4008	i = KMP_PLACE_ALL;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4009	mask = fullMask;
				4010	}
				4011	else {
				4012	KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 );
				4013	i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
				4014	mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
				4015	}
				4016	}
				4017	# if OMP_40_ENABLED
				4018	else {
				4019	if ((! isa_root)
				4020	\|\| (__kmp_nested_proc_bind.bind_types[0] == proc_bind_false)) {
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	4021	# if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4022	if (__kmp_num_proc_groups > 1) {
				4023	return;
				4024	}
				4025	# endif
				4026	KMP_ASSERT(fullMask != NULL);
				4027	i = KMP_PLACE_ALL;
				4028	mask = fullMask;
				4029	}
				4030	else {
				4031	//
				4032	// int i = some hash function or just a counter that doesn't
				4033	// always start at 0. Use gtid for now.
				4034	//
				4035	KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 );
				4036	i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
				4037	mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
				4038	}
				4039	}
				4040	# endif
				4041
				4042	# if OMP_40_ENABLED
				4043	th->th.th_current_place = i;
				4044	if (isa_root) {
				4045	th->th.th_new_place = i;
				4046	th->th.th_first_place = 0;
				4047	th->th.th_last_place = __kmp_affinity_num_masks - 1;
				4048	}
				4049
				4050	if (i == KMP_PLACE_ALL) {
				4051	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to all places\n",
				4052	gtid));
				4053	}
				4054	else {
				4055	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to place %d\n",
				4056	gtid, i));
				4057	}
				4058	# else
				4059	if (i == -1) {
				4060	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to fullMask\n",
				4061	gtid));
				4062	}
				4063	else {
				4064	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to mask %d\n",
				4065	gtid, i));
				4066	}
				4067	# endif /* OMP_40_ENABLED */
				4068
				4069	KMP_CPU_COPY(th->th.th_affin_mask, mask);
				4070
				4071	if (__kmp_affinity_verbose) {
				4072	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4073	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4074	th->th.th_affin_mask);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4075	KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(), gtid,
				4076	buf);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4077	}
				4078
				4079	# if KMP_OS_WINDOWS
				4080	//
				4081	// On Windows* OS, the process affinity mask might have changed.
				4082	// If the user didn't request affinity and this call fails,
				4083	// just continue silently. See CQ171393.
				4084	//
				4085	if ( __kmp_affinity_type == affinity_none ) {
				4086	__kmp_set_system_affinity(th->th.th_affin_mask, FALSE);
				4087	}
				4088	else
				4089	# endif
				4090	__kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
				4091	}
				4092
				4093
				4094	# if OMP_40_ENABLED
				4095
				4096	void
				4097	__kmp_affinity_set_place(int gtid)
				4098	{
				4099	int retval;
				4100
				4101	if (! KMP_AFFINITY_CAPABLE()) {
				4102	return;
				4103	}
				4104
				4105	kmp_info_t th = (kmp_info_t )TCR_SYNC_PTR(__kmp_threads[gtid]);
				4106
				4107	KA_TRACE(100, ("__kmp_affinity_set_place: binding T#%d to place %d (current place = %d)\n",
				4108	gtid, th->th.th_new_place, th->th.th_current_place));
				4109
				4110	//
Alp Toker	8f2d3f0	2014-02-24 10:40:15 +0000	[diff] [blame]	4111	// Check that the new place is within this thread's partition.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4112	//
				4113	KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4114	KMP_ASSERT(th->th.th_new_place >= 0);
				4115	KMP_ASSERT((unsigned)th->th.th_new_place <= __kmp_affinity_num_masks);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4116	if (th->th.th_first_place <= th->th.th_last_place) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4117	KMP_ASSERT((th->th.th_new_place >= th->th.th_first_place)
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4118	&& (th->th.th_new_place <= th->th.th_last_place));
				4119	}
				4120	else {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4121	KMP_ASSERT((th->th.th_new_place <= th->th.th_first_place)
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4122	\|\| (th->th.th_new_place >= th->th.th_last_place));
				4123	}
				4124
				4125	//
				4126	// Copy the thread mask to the kmp_info_t strucuture,
				4127	// and set this thread's affinity.
				4128	//
				4129	kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity_masks,
				4130	th->th.th_new_place);
				4131	KMP_CPU_COPY(th->th.th_affin_mask, mask);
				4132	th->th.th_current_place = th->th.th_new_place;
				4133
				4134	if (__kmp_affinity_verbose) {
				4135	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4136	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4137	th->th.th_affin_mask);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4138	KMP_INFORM(BoundToOSProcSet, "OMP_PROC_BIND", (kmp_int32)getpid(),
				4139	gtid, buf);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4140	}
				4141	__kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
				4142	}
				4143
				4144	# endif /* OMP_40_ENABLED */
				4145
				4146
				4147	int
				4148	__kmp_aux_set_affinity(void **mask)
				4149	{
				4150	int gtid;
				4151	kmp_info_t *th;
				4152	int retval;
				4153
				4154	if (! KMP_AFFINITY_CAPABLE()) {
				4155	return -1;
				4156	}
				4157
				4158	gtid = __kmp_entry_gtid();
				4159	KA_TRACE(1000, ;{
				4160	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4161	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4162	(kmp_affin_mask_t )(mask));
				4163	__kmp_debug_printf("kmp_set_affinity: setting affinity mask for thread %d = %s\n",
				4164	gtid, buf);
				4165	});
				4166
				4167	if (__kmp_env_consistency_check) {
				4168	if ((mask == NULL) \|\| (*mask == NULL)) {
				4169	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				4170	}
				4171	else {
				4172	unsigned proc;
				4173	int num_procs = 0;
				4174
				4175	for (proc = 0; proc < KMP_CPU_SETSIZE; proc++) {
				4176	if (! KMP_CPU_ISSET(proc, (kmp_affin_mask_t )(mask))) {
				4177	continue;
				4178	}
				4179	num_procs++;
				4180	if (! KMP_CPU_ISSET(proc, fullMask)) {
				4181	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				4182	break;
				4183	}
				4184	}
				4185	if (num_procs == 0) {
				4186	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				4187	}
				4188
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	4189	# if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4190	if (__kmp_get_proc_group((kmp_affin_mask_t )(mask)) < 0) {
				4191	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				4192	}
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	4193	# endif /* KMP_GROUP_AFFINITY */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4194
				4195	}
				4196	}
				4197
				4198	th = __kmp_threads[gtid];
				4199	KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
				4200	retval = __kmp_set_system_affinity((kmp_affin_mask_t )(mask), FALSE);
				4201	if (retval == 0) {
				4202	KMP_CPU_COPY(th->th.th_affin_mask, (kmp_affin_mask_t )(mask));
				4203	}
				4204
				4205	# if OMP_40_ENABLED
				4206	th->th.th_current_place = KMP_PLACE_UNDEFINED;
				4207	th->th.th_new_place = KMP_PLACE_UNDEFINED;
				4208	th->th.th_first_place = 0;
				4209	th->th.th_last_place = __kmp_affinity_num_masks - 1;
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4210
				4211	//
				4212	// Turn off 4.0 affinity for the current tread at this parallel level.
				4213	//
				4214	th->th.th_current_task->td_icvs.proc_bind = proc_bind_false;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4215	# endif
				4216
				4217	return retval;
				4218	}
				4219
				4220
				4221	int
				4222	__kmp_aux_get_affinity(void **mask)
				4223	{
				4224	int gtid;
				4225	int retval;
				4226	kmp_info_t *th;
				4227
				4228	if (! KMP_AFFINITY_CAPABLE()) {
				4229	return -1;
				4230	}
				4231
				4232	gtid = __kmp_entry_gtid();
				4233	th = __kmp_threads[gtid];
				4234	KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
				4235
				4236	KA_TRACE(1000, ;{
				4237	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4238	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4239	th->th.th_affin_mask);
				4240	__kmp_printf("kmp_get_affinity: stored affinity mask for thread %d = %s\n", gtid, buf);
				4241	});
				4242
				4243	if (__kmp_env_consistency_check) {
				4244	if ((mask == NULL) \|\| (*mask == NULL)) {
				4245	KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity");
				4246	}
				4247	}
				4248
				4249	# if !KMP_OS_WINDOWS
				4250
				4251	retval = __kmp_get_system_affinity((kmp_affin_mask_t )(mask), FALSE);
				4252	KA_TRACE(1000, ;{
				4253	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4254	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4255	(kmp_affin_mask_t )(mask));
				4256	__kmp_printf("kmp_get_affinity: system affinity mask for thread %d = %s\n", gtid, buf);
				4257	});
				4258	return retval;
				4259
				4260	# else
				4261
				4262	KMP_CPU_COPY((kmp_affin_mask_t )(mask), th->th.th_affin_mask);
				4263	return 0;
				4264
				4265	# endif /* KMP_OS_WINDOWS */
				4266
				4267	}
				4268
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4269	int
				4270	__kmp_aux_set_affinity_mask_proc(int proc, void **mask)
				4271	{
				4272	int retval;
				4273
				4274	if (! KMP_AFFINITY_CAPABLE()) {
				4275	return -1;
				4276	}
				4277
				4278	KA_TRACE(1000, ;{
				4279	int gtid = __kmp_entry_gtid();
				4280	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4281	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4282	(kmp_affin_mask_t )(mask));
				4283	__kmp_debug_printf("kmp_set_affinity_mask_proc: setting proc %d in affinity mask for thread %d = %s\n",
				4284	proc, gtid, buf);
				4285	});
				4286
				4287	if (__kmp_env_consistency_check) {
				4288	if ((mask == NULL) \|\| (*mask == NULL)) {
				4289	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity_mask_proc");
				4290	}
				4291	}
				4292
				4293	if ((proc < 0) \|\| ((unsigned)proc >= KMP_CPU_SETSIZE)) {
				4294	return -1;
				4295	}
				4296	if (! KMP_CPU_ISSET(proc, fullMask)) {
				4297	return -2;
				4298	}
				4299
				4300	KMP_CPU_SET(proc, (kmp_affin_mask_t )(mask));
				4301	return 0;
				4302	}
				4303
				4304
				4305	int
				4306	__kmp_aux_unset_affinity_mask_proc(int proc, void **mask)
				4307	{
				4308	int retval;
				4309
				4310	if (! KMP_AFFINITY_CAPABLE()) {
				4311	return -1;
				4312	}
				4313
				4314	KA_TRACE(1000, ;{
				4315	int gtid = __kmp_entry_gtid();
				4316	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4317	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4318	(kmp_affin_mask_t )(mask));
				4319	__kmp_debug_printf("kmp_unset_affinity_mask_proc: unsetting proc %d in affinity mask for thread %d = %s\n",
				4320	proc, gtid, buf);
				4321	});
				4322
				4323	if (__kmp_env_consistency_check) {
				4324	if ((mask == NULL) \|\| (*mask == NULL)) {
				4325	KMP_FATAL(AffinityInvalidMask, "kmp_unset_affinity_mask_proc");
				4326	}
				4327	}
				4328
				4329	if ((proc < 0) \|\| ((unsigned)proc >= KMP_CPU_SETSIZE)) {
				4330	return -1;
				4331	}
				4332	if (! KMP_CPU_ISSET(proc, fullMask)) {
				4333	return -2;
				4334	}
				4335
				4336	KMP_CPU_CLR(proc, (kmp_affin_mask_t )(mask));
				4337	return 0;
				4338	}
				4339
				4340
				4341	int
				4342	__kmp_aux_get_affinity_mask_proc(int proc, void **mask)
				4343	{
				4344	int retval;
				4345
				4346	if (! KMP_AFFINITY_CAPABLE()) {
				4347	return -1;
				4348	}
				4349
				4350	KA_TRACE(1000, ;{
				4351	int gtid = __kmp_entry_gtid();
				4352	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4353	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4354	(kmp_affin_mask_t )(mask));
				4355	__kmp_debug_printf("kmp_get_affinity_mask_proc: getting proc %d in affinity mask for thread %d = %s\n",
				4356	proc, gtid, buf);
				4357	});
				4358
				4359	if (__kmp_env_consistency_check) {
				4360	if ((mask == NULL) \|\| (*mask == NULL)) {
Andrey Churbanov	4b2f17a	2015-01-29 15:49:22 +0000	[diff] [blame]	4361	KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity_mask_proc");
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4362	}
				4363	}
				4364
				4365	if ((proc < 0) \|\| ((unsigned)proc >= KMP_CPU_SETSIZE)) {
				4366	return 0;
				4367	}
				4368	if (! KMP_CPU_ISSET(proc, fullMask)) {
				4369	return 0;
				4370	}
				4371
				4372	return KMP_CPU_ISSET(proc, (kmp_affin_mask_t )(mask));
				4373	}
				4374
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4375
				4376	// Dynamic affinity settings - Affinity balanced
				4377	void __kmp_balanced_affinity( int tid, int nthreads )
				4378	{
				4379	if( __kmp_affinity_uniform_topology() ) {
				4380	int coreID;
				4381	int threadID;
				4382	// Number of hyper threads per core in HT machine
				4383	int __kmp_nth_per_core = __kmp_avail_proc / __kmp_ncores;
				4384	// Number of cores
				4385	int ncores = __kmp_ncores;
				4386	// How many threads will be bound to each core
				4387	int chunk = nthreads / ncores;
				4388	// How many cores will have an additional thread bound to it - "big cores"
				4389	int big_cores = nthreads % ncores;
				4390	// Number of threads on the big cores
				4391	int big_nth = ( chunk + 1 ) * big_cores;
				4392	if( tid < big_nth ) {
				4393	coreID = tid / (chunk + 1 );
				4394	threadID = ( tid % (chunk + 1 ) ) % __kmp_nth_per_core ;
				4395	} else { //tid >= big_nth
				4396	coreID = ( tid - big_cores ) / chunk;
				4397	threadID = ( ( tid - big_cores ) % chunk ) % __kmp_nth_per_core ;
				4398	}
				4399
				4400	KMP_DEBUG_ASSERT2(KMP_AFFINITY_CAPABLE(),
				4401	"Illegal set affinity operation when not capable");
				4402
				4403	kmp_affin_mask_t mask = (kmp_affin_mask_t )alloca(__kmp_affin_mask_size);
				4404	KMP_CPU_ZERO(mask);
				4405
				4406	// Granularity == thread
				4407	if( __kmp_affinity_gran == affinity_gran_fine \|\| __kmp_affinity_gran == affinity_gran_thread) {
				4408	int osID = address2os[ coreID * __kmp_nth_per_core + threadID ].second;
				4409	KMP_CPU_SET( osID, mask);
				4410	} else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
				4411	for( int i = 0; i < __kmp_nth_per_core; i++ ) {
				4412	int osID;
				4413	osID = address2os[ coreID * __kmp_nth_per_core + i ].second;
				4414	KMP_CPU_SET( osID, mask);
				4415	}
				4416	}
				4417	if (__kmp_affinity_verbose) {
				4418	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4419	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4420	KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
				4421	tid, buf);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4422	}
				4423	__kmp_set_system_affinity( mask, TRUE );
				4424	} else { // Non-uniform topology
				4425
				4426	kmp_affin_mask_t mask = (kmp_affin_mask_t )alloca(__kmp_affin_mask_size);
				4427	KMP_CPU_ZERO(mask);
				4428
				4429	// Number of hyper threads per core in HT machine
				4430	int nth_per_core = __kmp_nThreadsPerCore;
				4431	int core_level;
				4432	if( nth_per_core > 1 ) {
				4433	core_level = __kmp_aff_depth - 2;
				4434	} else {
				4435	core_level = __kmp_aff_depth - 1;
				4436	}
				4437
				4438	// Number of cores - maximum value; it does not count trail cores with 0 processors
				4439	int ncores = address2os[ __kmp_avail_proc - 1 ].first.labels[ core_level ] + 1;
				4440
				4441	// For performance gain consider the special case nthreads == __kmp_avail_proc
				4442	if( nthreads == __kmp_avail_proc ) {
				4443	if( __kmp_affinity_gran == affinity_gran_fine \|\| __kmp_affinity_gran == affinity_gran_thread) {
				4444	int osID = address2os[ tid ].second;
				4445	KMP_CPU_SET( osID, mask);
				4446	} else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
				4447	int coreID = address2os[ tid ].first.labels[ core_level ];
				4448	// We'll count found osIDs for the current core; they can be not more than nth_per_core;
				4449	// since the address2os is sortied we can break when cnt==nth_per_core
				4450	int cnt = 0;
				4451	for( int i = 0; i < __kmp_avail_proc; i++ ) {
				4452	int osID = address2os[ i ].second;
				4453	int core = address2os[ i ].first.labels[ core_level ];
				4454	if( core == coreID ) {
				4455	KMP_CPU_SET( osID, mask);
				4456	cnt++;
				4457	if( cnt == nth_per_core ) {
				4458	break;
				4459	}
				4460	}
				4461	}
				4462	}
				4463	} else if( nthreads <= __kmp_ncores ) {
				4464
				4465	int core = 0;
				4466	for( int i = 0; i < ncores; i++ ) {
				4467	// Check if this core from procarr[] is in the mask
				4468	int in_mask = 0;
				4469	for( int j = 0; j < nth_per_core; j++ ) {
				4470	if( procarr[ i * nth_per_core + j ] != - 1 ) {
				4471	in_mask = 1;
				4472	break;
				4473	}
				4474	}
				4475	if( in_mask ) {
				4476	if( tid == core ) {
				4477	for( int j = 0; j < nth_per_core; j++ ) {
				4478	int osID = procarr[ i * nth_per_core + j ];
				4479	if( osID != -1 ) {
				4480	KMP_CPU_SET( osID, mask );
				4481	// For granularity=thread it is enough to set the first available osID for this core
				4482	if( __kmp_affinity_gran == affinity_gran_fine \|\| __kmp_affinity_gran == affinity_gran_thread) {
				4483	break;
				4484	}
				4485	}
				4486	}
				4487	break;
				4488	} else {
				4489	core++;
				4490	}
				4491	}
				4492	}
				4493
				4494	} else { // nthreads > __kmp_ncores
				4495
				4496	// Array to save the number of processors at each core
				4497	int nproc_at_core[ ncores ];
				4498	// Array to save the number of cores with "x" available processors;
				4499	int ncores_with_x_procs[ nth_per_core + 1 ];
				4500	// Array to save the number of cores with # procs from x to nth_per_core
				4501	int ncores_with_x_to_max_procs[ nth_per_core + 1 ];
				4502
				4503	for( int i = 0; i <= nth_per_core; i++ ) {
				4504	ncores_with_x_procs[ i ] = 0;
				4505	ncores_with_x_to_max_procs[ i ] = 0;
				4506	}
				4507
				4508	for( int i = 0; i < ncores; i++ ) {
				4509	int cnt = 0;
				4510	for( int j = 0; j < nth_per_core; j++ ) {
				4511	if( procarr[ i * nth_per_core + j ] != -1 ) {
				4512	cnt++;
				4513	}
				4514	}
				4515	nproc_at_core[ i ] = cnt;
				4516	ncores_with_x_procs[ cnt ]++;
				4517	}
				4518
				4519	for( int i = 0; i <= nth_per_core; i++ ) {
				4520	for( int j = i; j <= nth_per_core; j++ ) {
				4521	ncores_with_x_to_max_procs[ i ] += ncores_with_x_procs[ j ];
				4522	}
				4523	}
				4524
				4525	// Max number of processors
				4526	int nproc = nth_per_core * ncores;
				4527	// An array to keep number of threads per each context
				4528	int * newarr = ( int * )__kmp_allocate( sizeof( int ) * nproc );
				4529	for( int i = 0; i < nproc; i++ ) {
				4530	newarr[ i ] = 0;
				4531	}
				4532
				4533	int nth = nthreads;
				4534	int flag = 0;
				4535	while( nth > 0 ) {
				4536	for( int j = 1; j <= nth_per_core; j++ ) {
				4537	int cnt = ncores_with_x_to_max_procs[ j ];
				4538	for( int i = 0; i < ncores; i++ ) {
				4539	// Skip the core with 0 processors
				4540	if( nproc_at_core[ i ] == 0 ) {
				4541	continue;
				4542	}
				4543	for( int k = 0; k < nth_per_core; k++ ) {
				4544	if( procarr[ i * nth_per_core + k ] != -1 ) {
				4545	if( newarr[ i * nth_per_core + k ] == 0 ) {
				4546	newarr[ i * nth_per_core + k ] = 1;
				4547	cnt--;
				4548	nth--;
				4549	break;
				4550	} else {
				4551	if( flag != 0 ) {
				4552	newarr[ i * nth_per_core + k ] ++;
				4553	cnt--;
				4554	nth--;
				4555	break;
				4556	}
				4557	}
				4558	}
				4559	}
				4560	if( cnt == 0 \|\| nth == 0 ) {
				4561	break;
				4562	}
				4563	}
				4564	if( nth == 0 ) {
				4565	break;
				4566	}
				4567	}
				4568	flag = 1;
				4569	}
				4570	int sum = 0;
				4571	for( int i = 0; i < nproc; i++ ) {
				4572	sum += newarr[ i ];
				4573	if( sum > tid ) {
				4574	// Granularity == thread
				4575	if( __kmp_affinity_gran == affinity_gran_fine \|\| __kmp_affinity_gran == affinity_gran_thread) {
				4576	int osID = procarr[ i ];
				4577	KMP_CPU_SET( osID, mask);
				4578	} else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
				4579	int coreID = i / nth_per_core;
				4580	for( int ii = 0; ii < nth_per_core; ii++ ) {
				4581	int osID = procarr[ coreID * nth_per_core + ii ];
				4582	if( osID != -1 ) {
				4583	KMP_CPU_SET( osID, mask);
				4584	}
				4585	}
				4586	}
				4587	break;
				4588	}
				4589	}
				4590	__kmp_free( newarr );
				4591	}
				4592
				4593	if (__kmp_affinity_verbose) {
				4594	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4595	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4596	KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
				4597	tid, buf);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4598	}
				4599	__kmp_set_system_affinity( mask, TRUE );
				4600	}
				4601	}
				4602
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4603	#else
				4604	// affinity not supported
				4605
				4606	kmp_uint32 mac_skipPerLevel[7];
				4607	kmp_uint32 mac_depth;
				4608	kmp_uint8 mac_leaf_kids;
				4609	void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
				4610	static int first = 1;
				4611	if (first) {
				4612	const kmp_uint32 maxLevels = 7;
				4613	kmp_uint32 numPerLevel[maxLevels];
				4614
				4615	for (kmp_uint32 i=0; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
				4616	numPerLevel[i] = 1;
				4617	mac_skipPerLevel[i] = 1;
				4618	}
				4619
				4620	mac_depth = 2;
				4621	numPerLevel[0] = nproc;
				4622
				4623	kmp_uint32 branch = 4;
				4624	if (numPerLevel[0] == 1) branch = nproc/4;
				4625	if (branch<4) branch=4;
				4626	for (kmp_uint32 d=0; d<mac_depth-1; ++d) { // optimize hierarchy width
				4627	while (numPerLevel[d] > branch \|\| (d==0 && numPerLevel[d]>4)) { // max 4 on level 0!
				4628	if (numPerLevel[d] & 1) numPerLevel[d]++;
				4629	numPerLevel[d] = numPerLevel[d] >> 1;
				4630	if (numPerLevel[d+1] == 1) mac_depth++;
				4631	numPerLevel[d+1] = numPerLevel[d+1] << 1;
				4632	}
				4633	if(numPerLevel[0] == 1) {
				4634	branch = branch >> 1;
				4635	if (branch<4) branch = 4;
				4636	}
				4637	}
				4638
				4639	for (kmp_uint32 i=1; i<mac_depth; ++i)
				4640	mac_skipPerLevel[i] = numPerLevel[i-1] * mac_skipPerLevel[i-1];
				4641	mac_leaf_kids = (kmp_uint8)numPerLevel[0]-1;
				4642	first=0;
				4643	}
				4644	thr_bar->depth = mac_depth;
				4645	thr_bar->base_leaf_kids = mac_leaf_kids;
				4646	thr_bar->skip_per_level = mac_skipPerLevel;
				4647	}
				4648
Alp Toker	763b939	2014-02-28 09:42:41 +0000	[diff] [blame]	4649	#endif // KMP_AFFINITY_SUPPORTED