Blame - openmp/runtime/src/kmp_affinity.cpp - toolchain/llvm-project

blob: 2b3a2b25d4a70a7e06eee09f1f89550f2235eb3d [file] [log] [blame]

Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1	/*
				2	* kmp_affinity.cpp -- affinity management
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3	* $Revision: 43473 $
				4	* $Date: 2014-09-26 15:02:57 -0500 (Fri, 26 Sep 2014) $
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	5	*/
				6
				7
				8	//===----------------------------------------------------------------------===//
				9	//
				10	// The LLVM Compiler Infrastructure
				11	//
				12	// This file is dual licensed under the MIT and the University of Illinois Open
				13	// Source Licenses. See LICENSE.txt for details.
				14	//
				15	//===----------------------------------------------------------------------===//
				16
				17
				18	#include "kmp.h"
				19	#include "kmp_i18n.h"
				20	#include "kmp_io.h"
				21	#include "kmp_str.h"
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	22	#include "kmp_wrapper_getpid.h"
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	23
Alp Toker	763b939	2014-02-28 09:42:41 +0000	[diff] [blame]	24	#if KMP_AFFINITY_SUPPORTED
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	25
				26	//
				27	// Print the affinity mask to the character array in a pretty format.
				28	//
				29	char *
				30	__kmp_affinity_print_mask(char buf, int buf_len, kmp_affin_mask_t mask)
				31	{
				32	KMP_ASSERT(buf_len >= 40);
				33	char *scan = buf;
				34	char *end = buf + buf_len - 1;
				35
				36	//
				37	// Find first element / check for empty set.
				38	//
				39	size_t i;
				40	for (i = 0; i < KMP_CPU_SETSIZE; i++) {
				41	if (KMP_CPU_ISSET(i, mask)) {
				42	break;
				43	}
				44	}
				45	if (i == KMP_CPU_SETSIZE) {
				46	sprintf(scan, "{<empty>}");
				47	while (*scan != '\0') scan++;
				48	KMP_ASSERT(scan <= end);
				49	return buf;
				50	}
				51
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	52	sprintf(scan, "{%ld", (long)i);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	53	while (*scan != '\0') scan++;
				54	i++;
				55	for (; i < KMP_CPU_SETSIZE; i++) {
				56	if (! KMP_CPU_ISSET(i, mask)) {
				57	continue;
				58	}
				59
				60	//
				61	// Check for buffer overflow. A string of the form ",<n>" will have
				62	// at most 10 characters, plus we want to leave room to print ",...}"
				63	// if the set is too large to print for a total of 15 characters.
				64	// We already left room for '\0' in setting end.
				65	//
				66	if (end - scan < 15) {
				67	break;
				68	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	69	sprintf(scan, ",%-ld", (long)i);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	70	while (*scan != '\0') scan++;
				71	}
				72	if (i < KMP_CPU_SETSIZE) {
				73	sprintf(scan, ",...");
				74	while (*scan != '\0') scan++;
				75	}
				76	sprintf(scan, "}");
				77	while (*scan != '\0') scan++;
				78	KMP_ASSERT(scan <= end);
				79	return buf;
				80	}
				81
				82
				83	void
				84	__kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask)
				85	{
				86	KMP_CPU_ZERO(mask);
				87
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	88	# if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	89
				90	if (__kmp_num_proc_groups > 1) {
				91	int group;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	92	KMP_DEBUG_ASSERT(__kmp_GetActiveProcessorCount != NULL);
				93	for (group = 0; group < __kmp_num_proc_groups; group++) {
				94	int i;
				95	int num = __kmp_GetActiveProcessorCount(group);
				96	for (i = 0; i < num; i++) {
				97	KMP_CPU_SET(i + group * (CHAR_BIT * sizeof(DWORD_PTR)), mask);
				98	}
				99	}
				100	}
				101	else
				102
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	103	# endif /* KMP_GROUP_AFFINITY */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	104
				105	{
				106	int proc;
				107	for (proc = 0; proc < __kmp_xproc; proc++) {
				108	KMP_CPU_SET(proc, mask);
				109	}
				110	}
				111	}
				112
				113
				114	//
				115	// In Linux* OS debug & cover (-O0) builds, we need to avoid inline member
				116	// functions.
				117	//
				118	// The icc codegen emits sections with extremely long names, of the form
				119	// ".gnu.linkonce.<mangled_name>". There seems to have been a linker bug
				120	// introduced between GNU ld version 2.14.90.0.4 and 2.15.92.0.2 involving
				121	// some sort of memory corruption or table overflow that is triggered by
				122	// these long strings. I checked the latest version of the linker -
				123	// GNU ld (Linux* OS/GNU Binutils) 2.18.50.0.7.20080422 - and the bug is not
				124	// fixed.
				125	//
				126	// Unfortunately, my attempts to reproduce it in a smaller example have
				127	// failed - I'm not sure what the prospects are of getting it fixed
				128	// properly - but we need a reproducer smaller than all of libiomp.
				129	//
				130	// Work around the problem by avoiding inline constructors in such builds.
				131	// We do this for all platforms, not just Linux* OS - non-inline functions are
				132	// more debuggable and provide better coverage into than inline functions.
				133	// Use inline functions in shipping libs, for performance.
				134	//
				135
				136	# if !defined(KMP_DEBUG) && !defined(COVER)
				137
				138	class Address {
				139	public:
				140	static const unsigned maxDepth = 32;
				141	unsigned labels[maxDepth];
				142	unsigned childNums[maxDepth];
				143	unsigned depth;
				144	unsigned leader;
				145	Address(unsigned _depth)
				146	: depth(_depth), leader(FALSE) {
				147	}
				148	Address &operator=(const Address &b) {
				149	depth = b.depth;
				150	for (unsigned i = 0; i < depth; i++) {
				151	labels[i] = b.labels[i];
				152	childNums[i] = b.childNums[i];
				153	}
				154	leader = FALSE;
				155	return *this;
				156	}
				157	bool operator==(const Address &b) const {
				158	if (depth != b.depth)
				159	return false;
				160	for (unsigned i = 0; i < depth; i++)
				161	if(labels[i] != b.labels[i])
				162	return false;
				163	return true;
				164	}
				165	bool isClose(const Address &b, int level) const {
				166	if (depth != b.depth)
				167	return false;
				168	if ((unsigned)level >= depth)
				169	return true;
				170	for (unsigned i = 0; i < (depth - level); i++)
				171	if(labels[i] != b.labels[i])
				172	return false;
				173	return true;
				174	}
				175	bool operator!=(const Address &b) const {
				176	return !operator==(b);
				177	}
				178	};
				179
				180	class AddrUnsPair {
				181	public:
				182	Address first;
				183	unsigned second;
				184	AddrUnsPair(Address _first, unsigned _second)
				185	: first(_first), second(_second) {
				186	}
				187	AddrUnsPair &operator=(const AddrUnsPair &b)
				188	{
				189	first = b.first;
				190	second = b.second;
				191	return *this;
				192	}
				193	};
				194
				195	# else
				196
				197	class Address {
				198	public:
				199	static const unsigned maxDepth = 32;
				200	unsigned labels[maxDepth];
				201	unsigned childNums[maxDepth];
				202	unsigned depth;
				203	unsigned leader;
				204	Address(unsigned _depth);
				205	Address &operator=(const Address &b);
				206	bool operator==(const Address &b) const;
				207	bool isClose(const Address &b, int level) const;
				208	bool operator!=(const Address &b) const;
				209	};
				210
				211	Address::Address(unsigned _depth)
				212	{
				213	depth = _depth;
				214	leader = FALSE;
				215	}
				216
				217	Address &Address::operator=(const Address &b) {
				218	depth = b.depth;
				219	for (unsigned i = 0; i < depth; i++) {
				220	labels[i] = b.labels[i];
				221	childNums[i] = b.childNums[i];
				222	}
				223	leader = FALSE;
				224	return *this;
				225	}
				226
				227	bool Address::operator==(const Address &b) const {
				228	if (depth != b.depth)
				229	return false;
				230	for (unsigned i = 0; i < depth; i++)
				231	if(labels[i] != b.labels[i])
				232	return false;
				233	return true;
				234	}
				235
				236	bool Address::isClose(const Address &b, int level) const {
				237	if (depth != b.depth)
				238	return false;
				239	if ((unsigned)level >= depth)
				240	return true;
				241	for (unsigned i = 0; i < (depth - level); i++)
				242	if(labels[i] != b.labels[i])
				243	return false;
				244	return true;
				245	}
				246
				247	bool Address::operator!=(const Address &b) const {
				248	return !operator==(b);
				249	}
				250
				251	class AddrUnsPair {
				252	public:
				253	Address first;
				254	unsigned second;
				255	AddrUnsPair(Address _first, unsigned _second);
				256	AddrUnsPair &operator=(const AddrUnsPair &b);
				257	};
				258
				259	AddrUnsPair::AddrUnsPair(Address _first, unsigned _second)
				260	: first(_first), second(_second)
				261	{
				262	}
				263
				264	AddrUnsPair &AddrUnsPair::operator=(const AddrUnsPair &b)
				265	{
				266	first = b.first;
				267	second = b.second;
				268	return *this;
				269	}
				270
				271	# endif /* !defined(KMP_DEBUG) && !defined(COVER) */
				272
				273
				274	static int
				275	__kmp_affinity_cmp_Address_labels(const void a, const void b)
				276	{
				277	const Address aa = (const Address )&(((AddrUnsPair *)a)
				278	->first);
				279	const Address bb = (const Address )&(((AddrUnsPair *)b)
				280	->first);
				281	unsigned depth = aa->depth;
				282	unsigned i;
				283	KMP_DEBUG_ASSERT(depth == bb->depth);
				284	for (i = 0; i < depth; i++) {
				285	if (aa->labels[i] < bb->labels[i]) return -1;
				286	if (aa->labels[i] > bb->labels[i]) return 1;
				287	}
				288	return 0;
				289	}
				290
				291
				292	static int
				293	__kmp_affinity_cmp_Address_child_num(const void a, const void b)
				294	{
				295	const Address aa = (const Address )&(((AddrUnsPair *)a)
				296	->first);
				297	const Address bb = (const Address )&(((AddrUnsPair *)b)
				298	->first);
				299	unsigned depth = aa->depth;
				300	unsigned i;
				301	KMP_DEBUG_ASSERT(depth == bb->depth);
				302	KMP_DEBUG_ASSERT((unsigned)__kmp_affinity_compact <= depth);
				303	KMP_DEBUG_ASSERT(__kmp_affinity_compact >= 0);
				304	for (i = 0; i < (unsigned)__kmp_affinity_compact; i++) {
				305	int j = depth - i - 1;
				306	if (aa->childNums[j] < bb->childNums[j]) return -1;
				307	if (aa->childNums[j] > bb->childNums[j]) return 1;
				308	}
				309	for (; i < depth; i++) {
				310	int j = i - __kmp_affinity_compact;
				311	if (aa->childNums[j] < bb->childNums[j]) return -1;
				312	if (aa->childNums[j] > bb->childNums[j]) return 1;
				313	}
				314	return 0;
				315	}
				316
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	317	/** A structure for holding machine-specific hierarchy info to be computed once at init. */
				318	class hierarchy_info {
				319	public:
				320	/** Typical levels are threads/core, cores/package or socket, packages/node, nodes/machine,
				321	etc. We don't want to get specific with nomenclature */
				322	static const kmp_uint32 maxLevels=7;
				323
				324	/** This is specifically the depth of the machine configuration hierarchy, in terms of the
				325	number of levels along the longest path from root to any leaf. It corresponds to the
				326	number of entries in numPerLevel if we exclude all but one trailing 1. */
				327	kmp_uint32 depth;
				328	kmp_uint32 base_depth;
				329	kmp_uint32 base_num_threads;
				330	bool uninitialized;
				331
				332	/** Level 0 corresponds to leaves. numPerLevel[i] is the number of children the parent of a
				333	node at level i has. For example, if we have a machine with 4 packages, 4 cores/package
				334	and 2 HT per core, then numPerLevel = {2, 4, 4, 1, 1}. All empty levels are set to 1. */
				335	kmp_uint32 numPerLevel[maxLevels];
				336	kmp_uint32 skipPerLevel[maxLevels];
				337
				338	void deriveLevels(AddrUnsPair *adr2os, int num_addrs) {
				339	int hier_depth = adr2os[0].first.depth;
				340	int level = 0;
				341	for (int i=hier_depth-1; i>=0; --i) {
				342	int max = -1;
				343	for (int j=0; j<num_addrs; ++j) {
				344	int next = adr2os[j].first.childNums[i];
				345	if (next > max) max = next;
				346	}
				347	numPerLevel[level] = max+1;
				348	++level;
				349	}
				350	}
				351
				352	hierarchy_info() : depth(1), uninitialized(true) {}
				353	void init(AddrUnsPair *adr2os, int num_addrs)
				354	{
				355	uninitialized = false;
				356	for (kmp_uint32 i=0; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
				357	numPerLevel[i] = 1;
				358	skipPerLevel[i] = 1;
				359	}
				360
				361	// Sort table by physical ID
				362	if (adr2os) {
				363	qsort(adr2os, num_addrs, sizeof(*adr2os), __kmp_affinity_cmp_Address_labels);
				364	deriveLevels(adr2os, num_addrs);
				365	}
				366	else {
				367	numPerLevel[0] = 4;
				368	numPerLevel[1] = num_addrs/4;
				369	if (num_addrs%4) numPerLevel[1]++;
				370	}
				371
				372	base_num_threads = num_addrs;
				373	for (int i=maxLevels-1; i>=0; --i) // count non-empty levels to get depth
				374	if (numPerLevel[i] != 1 \|\| depth > 1) // only count one top-level '1'
				375	depth++;
				376
				377	kmp_uint32 branch = 4;
				378	if (numPerLevel[0] == 1) branch = num_addrs/4;
				379	if (branch<4) branch=4;
				380	for (kmp_uint32 d=0; d<depth-1; ++d) { // optimize hierarchy width
				381	while (numPerLevel[d] > branch \|\| (d==0 && numPerLevel[d]>4)) { // max 4 on level 0!
				382	if (numPerLevel[d] & 1) numPerLevel[d]++;
				383	numPerLevel[d] = numPerLevel[d] >> 1;
				384	if (numPerLevel[d+1] == 1) depth++;
				385	numPerLevel[d+1] = numPerLevel[d+1] << 1;
				386	}
				387	if(numPerLevel[0] == 1) {
				388	branch = branch >> 1;
				389	if (branch<4) branch = 4;
				390	}
				391	}
				392
				393	for (kmp_uint32 i=1; i<depth; ++i)
				394	skipPerLevel[i] = numPerLevel[i-1] * skipPerLevel[i-1];
				395
				396	base_depth = depth;
				397	}
				398	};
				399
				400	static hierarchy_info machine_hierarchy;
				401
				402	void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
				403	if (machine_hierarchy.uninitialized)
				404	machine_hierarchy.init(NULL, nproc);
				405
				406	if (nproc <= machine_hierarchy.base_num_threads)
				407	machine_hierarchy.depth = machine_hierarchy.base_depth;
				408	KMP_DEBUG_ASSERT(machine_hierarchy.depth > 0);
				409	while (nproc > machine_hierarchy.skipPerLevel[machine_hierarchy.depth-1]) {
				410	machine_hierarchy.depth++;
				411	machine_hierarchy.skipPerLevel[machine_hierarchy.depth-1] = 2*machine_hierarchy.skipPerLevel[machine_hierarchy.depth-2];
				412	}
				413	thr_bar->depth = machine_hierarchy.depth;
				414	thr_bar->base_leaf_kids = (kmp_uint8)machine_hierarchy.numPerLevel[0]-1;
				415	thr_bar->skip_per_level = machine_hierarchy.skipPerLevel;
				416	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	417
				418	//
				419	// When sorting by labels, __kmp_affinity_assign_child_nums() must first be
				420	// called to renumber the labels from [0..n] and place them into the child_num
				421	// vector of the address object. This is done in case the labels used for
Alp Toker	8f2d3f0	2014-02-24 10:40:15 +0000	[diff] [blame]	422	// the children at one node of the hierarchy differ from those used for
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	423	// another node at the same level. Example: suppose the machine has 2 nodes
				424	// with 2 packages each. The first node contains packages 601 and 602, and
				425	// second node contains packages 603 and 604. If we try to sort the table
				426	// for "scatter" affinity, the table will still be sorted 601, 602, 603, 604
				427	// because we are paying attention to the labels themselves, not the ordinal
				428	// child numbers. By using the child numbers in the sort, the result is
				429	// {0,0}=601, {0,1}=603, {1,0}=602, {1,1}=604.
				430	//
				431	static void
				432	__kmp_affinity_assign_child_nums(AddrUnsPair *address2os,
				433	int numAddrs)
				434	{
				435	KMP_DEBUG_ASSERT(numAddrs > 0);
				436	int depth = address2os->first.depth;
				437	unsigned counts = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				438	unsigned lastLabel = (unsigned )__kmp_allocate(depth
				439	* sizeof(unsigned));
				440	int labCt;
				441	for (labCt = 0; labCt < depth; labCt++) {
				442	address2os[0].first.childNums[labCt] = counts[labCt] = 0;
				443	lastLabel[labCt] = address2os[0].first.labels[labCt];
				444	}
				445	int i;
				446	for (i = 1; i < numAddrs; i++) {
				447	for (labCt = 0; labCt < depth; labCt++) {
				448	if (address2os[i].first.labels[labCt] != lastLabel[labCt]) {
				449	int labCt2;
				450	for (labCt2 = labCt + 1; labCt2 < depth; labCt2++) {
				451	counts[labCt2] = 0;
				452	lastLabel[labCt2] = address2os[i].first.labels[labCt2];
				453	}
				454	counts[labCt]++;
				455	lastLabel[labCt] = address2os[i].first.labels[labCt];
				456	break;
				457	}
				458	}
				459	for (labCt = 0; labCt < depth; labCt++) {
				460	address2os[i].first.childNums[labCt] = counts[labCt];
				461	}
				462	for (; labCt < (int)Address::maxDepth; labCt++) {
				463	address2os[i].first.childNums[labCt] = 0;
				464	}
				465	}
				466	}
				467
				468
				469	//
				470	// All of the __kmp_affinity_create_*_map() routines should set
				471	// __kmp_affinity_masks to a vector of affinity mask objects of length
				472	// __kmp_affinity_num_masks, if __kmp_affinity_type != affinity_none, and
				473	// return the number of levels in the machine topology tree (zero if
				474	// __kmp_affinity_type == affinity_none).
				475	//
				476	// All of the __kmp_affinity_create__map() routines should set fullMask
				477	// to the affinity mask for the initialization thread. They need to save and
				478	// restore the mask, and it could be needed later, so saving it is just an
				479	// optimization to avoid calling kmp_get_system_affinity() again.
				480	//
				481	static kmp_affin_mask_t *fullMask = NULL;
				482
				483	kmp_affin_mask_t *
				484	__kmp_affinity_get_fullMask() { return fullMask; }
				485
				486
				487	static int nCoresPerPkg, nPackages;
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame]	488	static int __kmp_nThreadsPerCore;
				489	#ifndef KMP_DFLT_NTH_CORES
				490	static int __kmp_ncores;
				491	#endif
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	492
				493	//
				494	// __kmp_affinity_uniform_topology() doesn't work when called from
				495	// places which support arbitrarily many levels in the machine topology
				496	// map, i.e. the non-default cases in __kmp_affinity_create_cpuinfo_map()
				497	// __kmp_affinity_create_x2apicid_map().
				498	//
				499	inline static bool
				500	__kmp_affinity_uniform_topology()
				501	{
				502	return __kmp_avail_proc == (__kmp_nThreadsPerCore * nCoresPerPkg * nPackages);
				503	}
				504
				505
				506	//
				507	// Print out the detailed machine topology map, i.e. the physical locations
				508	// of each OS proc.
				509	//
				510	static void
				511	__kmp_affinity_print_topology(AddrUnsPair *address2os, int len, int depth,
				512	int pkgLevel, int coreLevel, int threadLevel)
				513	{
				514	int proc;
				515
				516	KMP_INFORM(OSProcToPhysicalThreadMap, "KMP_AFFINITY");
				517	for (proc = 0; proc < len; proc++) {
				518	int level;
				519	kmp_str_buf_t buf;
				520	__kmp_str_buf_init(&buf);
				521	for (level = 0; level < depth; level++) {
				522	if (level == threadLevel) {
				523	__kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Thread));
				524	}
				525	else if (level == coreLevel) {
				526	__kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Core));
				527	}
				528	else if (level == pkgLevel) {
				529	__kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Package));
				530	}
				531	else if (level > pkgLevel) {
				532	__kmp_str_buf_print(&buf, "%s_%d ", KMP_I18N_STR(Node),
				533	level - pkgLevel - 1);
				534	}
				535	else {
				536	__kmp_str_buf_print(&buf, "L%d ", level);
				537	}
				538	__kmp_str_buf_print(&buf, "%d ",
				539	address2os[proc].first.labels[level]);
				540	}
				541	KMP_INFORM(OSProcMapToPack, "KMP_AFFINITY", address2os[proc].second,
				542	buf.str);
				543	__kmp_str_buf_free(&buf);
				544	}
				545	}
				546
				547
				548	//
				549	// If we don't know how to retrieve the machine's processor topology, or
				550	// encounter an error in doing so, this routine is called to form a "flat"
				551	// mapping of os thread id's <-> processor id's.
				552	//
				553	static int
				554	__kmp_affinity_create_flat_map(AddrUnsPair **address2os,
				555	kmp_i18n_id_t *const msg_id)
				556	{
				557	*address2os = NULL;
				558	*msg_id = kmp_i18n_null;
				559
				560	//
				561	// Even if __kmp_affinity_type == affinity_none, this routine might still
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame]	562	// called to set __kmp_ncores, as well as
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	563	// __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
				564	//
				565	if (! KMP_AFFINITY_CAPABLE()) {
				566	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				567	__kmp_ncores = nPackages = __kmp_xproc;
				568	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	569	if (__kmp_affinity_verbose) {
				570	KMP_INFORM(AffFlatTopology, "KMP_AFFINITY");
				571	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				572	KMP_INFORM(Uniform, "KMP_AFFINITY");
				573	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				574	__kmp_nThreadsPerCore, __kmp_ncores);
				575	}
				576	return 0;
				577	}
				578
				579	//
				580	// When affinity is off, this routine will still be called to set
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame]	581	// __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	582	// nCoresPerPkg, & nPackages. Make sure all these vars are set
				583	// correctly, and return now if affinity is not enabled.
				584	//
				585	__kmp_ncores = nPackages = __kmp_avail_proc;
				586	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	587	if (__kmp_affinity_verbose) {
				588	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				589	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, fullMask);
				590
				591	KMP_INFORM(AffCapableUseFlat, "KMP_AFFINITY");
				592	if (__kmp_affinity_respect_mask) {
				593	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				594	} else {
				595	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				596	}
				597	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				598	KMP_INFORM(Uniform, "KMP_AFFINITY");
				599	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				600	__kmp_nThreadsPerCore, __kmp_ncores);
				601	}
				602	if (__kmp_affinity_type == affinity_none) {
				603	return 0;
				604	}
				605
				606	//
				607	// Contruct the data structure to be returned.
				608	//
				609	address2os = (AddrUnsPair)
				610	__kmp_allocate(sizeof(*address2os) __kmp_avail_proc);
				611	int avail_ct = 0;
				612	unsigned int i;
				613	for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
				614	//
				615	// Skip this proc if it is not included in the machine model.
				616	//
				617	if (! KMP_CPU_ISSET(i, fullMask)) {
				618	continue;
				619	}
				620
				621	Address addr(1);
				622	addr.labels[0] = i;
				623	(*address2os)[avail_ct++] = AddrUnsPair(addr,i);
				624	}
				625	if (__kmp_affinity_verbose) {
				626	KMP_INFORM(OSProcToPackage, "KMP_AFFINITY");
				627	}
				628
				629	if (__kmp_affinity_gran_levels < 0) {
				630	//
				631	// Only the package level is modeled in the machine topology map,
				632	// so the #levels of granularity is either 0 or 1.
				633	//
				634	if (__kmp_affinity_gran > affinity_gran_package) {
				635	__kmp_affinity_gran_levels = 1;
				636	}
				637	else {
				638	__kmp_affinity_gran_levels = 0;
				639	}
				640	}
				641	return 1;
				642	}
				643
				644
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	645	# if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	646
				647	//
				648	// If multiple Windows* OS processor groups exist, we can create a 2-level
				649	// topology map with the groups at level 0 and the individual procs at
				650	// level 1.
				651	//
				652	// This facilitates letting the threads float among all procs in a group,
				653	// if granularity=group (the default when there are multiple groups).
				654	//
				655	static int
				656	__kmp_affinity_create_proc_group_map(AddrUnsPair **address2os,
				657	kmp_i18n_id_t *const msg_id)
				658	{
				659	*address2os = NULL;
				660	*msg_id = kmp_i18n_null;
				661
				662	//
				663	// If we don't have multiple processor groups, return now.
				664	// The flat mapping will be used.
				665	//
				666	if ((! KMP_AFFINITY_CAPABLE()) \|\| (__kmp_get_proc_group(fullMask) >= 0)) {
				667	// FIXME set *msg_id
				668	return -1;
				669	}
				670
				671	//
				672	// Contruct the data structure to be returned.
				673	//
				674	address2os = (AddrUnsPair)
				675	__kmp_allocate(sizeof(*address2os) __kmp_avail_proc);
				676	int avail_ct = 0;
				677	int i;
				678	for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
				679	//
				680	// Skip this proc if it is not included in the machine model.
				681	//
				682	if (! KMP_CPU_ISSET(i, fullMask)) {
				683	continue;
				684	}
				685
				686	Address addr(2);
				687	addr.labels[0] = i / (CHAR_BIT * sizeof(DWORD_PTR));
				688	addr.labels[1] = i % (CHAR_BIT * sizeof(DWORD_PTR));
				689	(*address2os)[avail_ct++] = AddrUnsPair(addr,i);
				690
				691	if (__kmp_affinity_verbose) {
				692	KMP_INFORM(AffOSProcToGroup, "KMP_AFFINITY", i, addr.labels[0],
				693	addr.labels[1]);
				694	}
				695	}
				696
				697	if (__kmp_affinity_gran_levels < 0) {
				698	if (__kmp_affinity_gran == affinity_gran_group) {
				699	__kmp_affinity_gran_levels = 1;
				700	}
				701	else if ((__kmp_affinity_gran == affinity_gran_fine)
				702	\|\| (__kmp_affinity_gran == affinity_gran_thread)) {
				703	__kmp_affinity_gran_levels = 0;
				704	}
				705	else {
				706	const char *gran_str = NULL;
				707	if (__kmp_affinity_gran == affinity_gran_core) {
				708	gran_str = "core";
				709	}
				710	else if (__kmp_affinity_gran == affinity_gran_package) {
				711	gran_str = "package";
				712	}
				713	else if (__kmp_affinity_gran == affinity_gran_node) {
				714	gran_str = "node";
				715	}
				716	else {
				717	KMP_ASSERT(0);
				718	}
				719
				720	// Warning: can't use affinity granularity \"gran\" with group topology method, using "thread"
				721	__kmp_affinity_gran_levels = 0;
				722	}
				723	}
				724	return 2;
				725	}
				726
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	727	# endif /* KMP_GROUP_AFFINITY */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	728
				729
				730	# if KMP_ARCH_X86 \|\| KMP_ARCH_X86_64
				731
				732	static int
				733	__kmp_cpuid_mask_width(int count) {
				734	int r = 0;
				735
				736	while((1<<r) < count)
				737	++r;
				738	return r;
				739	}
				740
				741
				742	class apicThreadInfo {
				743	public:
				744	unsigned osId; // param to __kmp_affinity_bind_thread
				745	unsigned apicId; // from cpuid after binding
				746	unsigned maxCoresPerPkg; // ""
				747	unsigned maxThreadsPerPkg; // ""
				748	unsigned pkgId; // inferred from above values
				749	unsigned coreId; // ""
				750	unsigned threadId; // ""
				751	};
				752
				753
				754	static int
				755	__kmp_affinity_cmp_apicThreadInfo_os_id(const void a, const void b)
				756	{
				757	const apicThreadInfo aa = (const apicThreadInfo )a;
				758	const apicThreadInfo bb = (const apicThreadInfo )b;
				759	if (aa->osId < bb->osId) return -1;
				760	if (aa->osId > bb->osId) return 1;
				761	return 0;
				762	}
				763
				764
				765	static int
				766	__kmp_affinity_cmp_apicThreadInfo_phys_id(const void a, const void b)
				767	{
				768	const apicThreadInfo aa = (const apicThreadInfo )a;
				769	const apicThreadInfo bb = (const apicThreadInfo )b;
				770	if (aa->pkgId < bb->pkgId) return -1;
				771	if (aa->pkgId > bb->pkgId) return 1;
				772	if (aa->coreId < bb->coreId) return -1;
				773	if (aa->coreId > bb->coreId) return 1;
				774	if (aa->threadId < bb->threadId) return -1;
				775	if (aa->threadId > bb->threadId) return 1;
				776	return 0;
				777	}
				778
				779
				780	//
				781	// On IA-32 architecture and Intel(R) 64 architecture, we attempt to use
				782	// an algorithm which cycles through the available os threads, setting
				783	// the current thread's affinity mask to that thread, and then retrieves
				784	// the Apic Id for each thread context using the cpuid instruction.
				785	//
				786	static int
				787	__kmp_affinity_create_apicid_map(AddrUnsPair **address2os,
				788	kmp_i18n_id_t *const msg_id)
				789	{
Andrey Churbanov	1c33129	2015-01-27 17:03:42 +0000	[diff] [blame^]	790	kmp_cpuid buf;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	791	int rc;
				792	*address2os = NULL;
				793	*msg_id = kmp_i18n_null;
				794
Andrey Churbanov	1c33129	2015-01-27 17:03:42 +0000	[diff] [blame^]	795	//
				796	// Check if cpuid leaf 4 is supported.
				797	//
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	798	__kmp_x86_cpuid(0, 0, &buf);
				799	if (buf.eax < 4) {
				800	*msg_id = kmp_i18n_str_NoLeaf4Support;
				801	return -1;
				802	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	803
				804	//
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	805	// The algorithm used starts by setting the affinity to each available
Andrey Churbanov	1c33129	2015-01-27 17:03:42 +0000	[diff] [blame^]	806	// thread and retrieving info from the cpuid instruction, so if we are
				807	// not capable of calling __kmp_get_system_affinity() and
				808	// _kmp_get_system_affinity(), then we need to do something else - use
				809	// the defaults that we calculated from issuing cpuid without binding
				810	// to each proc.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	811	//
				812	if (! KMP_AFFINITY_CAPABLE()) {
				813	//
				814	// Hack to try and infer the machine topology using only the data
				815	// available from cpuid on the current thread, and __kmp_xproc.
				816	//
				817	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				818
				819	//
				820	// Get an upper bound on the number of threads per package using
				821	// cpuid(1).
				822	//
				823	// On some OS/chps combinations where HT is supported by the chip
				824	// but is disabled, this value will be 2 on a single core chip.
				825	// Usually, it will be 2 if HT is enabled and 1 if HT is disabled.
				826	//
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	827	__kmp_x86_cpuid(1, 0, &buf);
				828	int maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
				829	if (maxThreadsPerPkg == 0) {
				830	maxThreadsPerPkg = 1;
				831	}
				832
				833	//
				834	// The num cores per pkg comes from cpuid(4).
				835	// 1 must be added to the encoded value.
				836	//
				837	// The author of cpu_count.cpp treated this only an upper bound
				838	// on the number of cores, but I haven't seen any cases where it
				839	// was greater than the actual number of cores, so we will treat
				840	// it as exact in this block of code.
				841	//
				842	// First, we need to check if cpuid(4) is supported on this chip.
				843	// To see if cpuid(n) is supported, issue cpuid(0) and check if eax
				844	// has the value n or greater.
				845	//
				846	__kmp_x86_cpuid(0, 0, &buf);
				847	if (buf.eax >= 4) {
				848	__kmp_x86_cpuid(4, 0, &buf);
				849	nCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
				850	}
				851	else {
				852	nCoresPerPkg = 1;
				853	}
				854
				855	//
				856	// There is no way to reliably tell if HT is enabled without issuing
				857	// the cpuid instruction from every thread, can correlating the cpuid
				858	// info, so if the machine is not affinity capable, we assume that HT
				859	// is off. We have seen quite a few machines where maxThreadsPerPkg
				860	// is 2, yet the machine does not support HT.
				861	//
				862	// - Older OSes are usually found on machines with older chips, which
				863	// do not support HT.
				864	//
				865	// - The performance penalty for mistakenly identifying a machine as
				866	// HT when it isn't (which results in blocktime being incorrecly set
				867	// to 0) is greater than the penalty when for mistakenly identifying
				868	// a machine as being 1 thread/core when it is really HT enabled
				869	// (which results in blocktime being incorrectly set to a positive
				870	// value).
				871	//
				872	__kmp_ncores = __kmp_xproc;
				873	nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
				874	__kmp_nThreadsPerCore = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	875	if (__kmp_affinity_verbose) {
				876	KMP_INFORM(AffNotCapableUseLocCpuid, "KMP_AFFINITY");
				877	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				878	if (__kmp_affinity_uniform_topology()) {
				879	KMP_INFORM(Uniform, "KMP_AFFINITY");
				880	} else {
				881	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				882	}
				883	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				884	__kmp_nThreadsPerCore, __kmp_ncores);
				885	}
				886	return 0;
				887	}
				888
				889	//
				890	//
				891	// From here on, we can assume that it is safe to call
				892	// __kmp_get_system_affinity() and __kmp_set_system_affinity(),
				893	// even if __kmp_affinity_type = affinity_none.
				894	//
				895
				896	//
				897	// Save the affinity mask for the current thread.
				898	//
				899	kmp_affin_mask_t *oldMask;
				900	KMP_CPU_ALLOC(oldMask);
				901	KMP_ASSERT(oldMask != NULL);
				902	__kmp_get_system_affinity(oldMask, TRUE);
				903
				904	//
				905	// Run through each of the available contexts, binding the current thread
				906	// to it, and obtaining the pertinent information using the cpuid instr.
				907	//
				908	// The relevant information is:
				909	//
				910	// Apic Id: Bits 24:31 of ebx after issuing cpuid(1) - each thread context
				911	// has a uniqie Apic Id, which is of the form pkg# : core# : thread#.
				912	//
				913	// Max Threads Per Pkg: Bits 16:23 of ebx after issuing cpuid(1). The
				914	// value of this field determines the width of the core# + thread#
				915	// fields in the Apic Id. It is also an upper bound on the number
				916	// of threads per package, but it has been verified that situations
				917	// happen were it is not exact. In particular, on certain OS/chip
				918	// combinations where Intel(R) Hyper-Threading Technology is supported
				919	// by the chip but has
				920	// been disabled, the value of this field will be 2 (for a single core
				921	// chip). On other OS/chip combinations supporting
				922	// Intel(R) Hyper-Threading Technology, the value of
				923	// this field will be 1 when Intel(R) Hyper-Threading Technology is
				924	// disabled and 2 when it is enabled.
				925	//
				926	// Max Cores Per Pkg: Bits 26:31 of eax after issuing cpuid(4). The
				927	// value of this field (+1) determines the width of the core# field in
				928	// the Apic Id. The comments in "cpucount.cpp" say that this value is
				929	// an upper bound, but the IA-32 architecture manual says that it is
				930	// exactly the number of cores per package, and I haven't seen any
				931	// case where it wasn't.
				932	//
				933	// From this information, deduce the package Id, core Id, and thread Id,
				934	// and set the corresponding fields in the apicThreadInfo struct.
				935	//
				936	unsigned i;
				937	apicThreadInfo threadInfo = (apicThreadInfo )__kmp_allocate(
				938	__kmp_avail_proc * sizeof(apicThreadInfo));
				939	unsigned nApics = 0;
				940	for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
				941	//
				942	// Skip this proc if it is not included in the machine model.
				943	//
				944	if (! KMP_CPU_ISSET(i, fullMask)) {
				945	continue;
				946	}
				947	KMP_DEBUG_ASSERT((int)nApics < __kmp_avail_proc);
				948
				949	__kmp_affinity_bind_thread(i);
				950	threadInfo[nApics].osId = i;
				951
				952	//
				953	// The apic id and max threads per pkg come from cpuid(1).
				954	//
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	955	__kmp_x86_cpuid(1, 0, &buf);
				956	if (! (buf.edx >> 9) & 1) {
				957	__kmp_set_system_affinity(oldMask, TRUE);
				958	__kmp_free(threadInfo);
				959	KMP_CPU_FREE(oldMask);
				960	*msg_id = kmp_i18n_str_ApicNotPresent;
				961	return -1;
				962	}
				963	threadInfo[nApics].apicId = (buf.ebx >> 24) & 0xff;
				964	threadInfo[nApics].maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
				965	if (threadInfo[nApics].maxThreadsPerPkg == 0) {
				966	threadInfo[nApics].maxThreadsPerPkg = 1;
				967	}
				968
				969	//
				970	// Max cores per pkg comes from cpuid(4).
				971	// 1 must be added to the encoded value.
				972	//
				973	// First, we need to check if cpuid(4) is supported on this chip.
				974	// To see if cpuid(n) is supported, issue cpuid(0) and check if eax
				975	// has the value n or greater.
				976	//
				977	__kmp_x86_cpuid(0, 0, &buf);
				978	if (buf.eax >= 4) {
				979	__kmp_x86_cpuid(4, 0, &buf);
				980	threadInfo[nApics].maxCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
				981	}
				982	else {
				983	threadInfo[nApics].maxCoresPerPkg = 1;
				984	}
				985
				986	//
				987	// Infer the pkgId / coreId / threadId using only the info
				988	// obtained locally.
				989	//
				990	int widthCT = __kmp_cpuid_mask_width(
				991	threadInfo[nApics].maxThreadsPerPkg);
				992	threadInfo[nApics].pkgId = threadInfo[nApics].apicId >> widthCT;
				993
				994	int widthC = __kmp_cpuid_mask_width(
				995	threadInfo[nApics].maxCoresPerPkg);
				996	int widthT = widthCT - widthC;
				997	if (widthT < 0) {
				998	//
				999	// I've never seen this one happen, but I suppose it could, if
				1000	// the cpuid instruction on a chip was really screwed up.
				1001	// Make sure to restore the affinity mask before the tail call.
				1002	//
				1003	__kmp_set_system_affinity(oldMask, TRUE);
				1004	__kmp_free(threadInfo);
				1005	KMP_CPU_FREE(oldMask);
				1006	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1007	return -1;
				1008	}
				1009
				1010	int maskC = (1 << widthC) - 1;
				1011	threadInfo[nApics].coreId = (threadInfo[nApics].apicId >> widthT)
				1012	&maskC;
				1013
				1014	int maskT = (1 << widthT) - 1;
				1015	threadInfo[nApics].threadId = threadInfo[nApics].apicId &maskT;
				1016
				1017	nApics++;
				1018	}
				1019
				1020	//
				1021	// We've collected all the info we need.
				1022	// Restore the old affinity mask for this thread.
				1023	//
				1024	__kmp_set_system_affinity(oldMask, TRUE);
				1025
				1026	//
				1027	// If there's only one thread context to bind to, form an Address object
				1028	// with depth 1 and return immediately (or, if affinity is off, set
				1029	// address2os to NULL and return).
				1030	//
				1031	// If it is configured to omit the package level when there is only a
				1032	// single package, the logic at the end of this routine won't work if
				1033	// there is only a single thread - it would try to form an Address
				1034	// object with depth 0.
				1035	//
				1036	KMP_ASSERT(nApics > 0);
				1037	if (nApics == 1) {
				1038	__kmp_ncores = nPackages = 1;
				1039	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1040	if (__kmp_affinity_verbose) {
				1041	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				1042	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				1043
				1044	KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
				1045	if (__kmp_affinity_respect_mask) {
				1046	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				1047	} else {
				1048	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				1049	}
				1050	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1051	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1052	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				1053	__kmp_nThreadsPerCore, __kmp_ncores);
				1054	}
				1055
				1056	if (__kmp_affinity_type == affinity_none) {
				1057	__kmp_free(threadInfo);
				1058	KMP_CPU_FREE(oldMask);
				1059	return 0;
				1060	}
				1061
				1062	address2os = (AddrUnsPair)__kmp_allocate(sizeof(AddrUnsPair));
				1063	Address addr(1);
				1064	addr.labels[0] = threadInfo[0].pkgId;
				1065	(*address2os)[0] = AddrUnsPair(addr, threadInfo[0].osId);
				1066
				1067	if (__kmp_affinity_gran_levels < 0) {
				1068	__kmp_affinity_gran_levels = 0;
				1069	}
				1070
				1071	if (__kmp_affinity_verbose) {
				1072	__kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
				1073	}
				1074
				1075	__kmp_free(threadInfo);
				1076	KMP_CPU_FREE(oldMask);
				1077	return 1;
				1078	}
				1079
				1080	//
				1081	// Sort the threadInfo table by physical Id.
				1082	//
				1083	qsort(threadInfo, nApics, sizeof(*threadInfo),
				1084	__kmp_affinity_cmp_apicThreadInfo_phys_id);
				1085
				1086	//
				1087	// The table is now sorted by pkgId / coreId / threadId, but we really
				1088	// don't know the radix of any of the fields. pkgId's may be sparsely
				1089	// assigned among the chips on a system. Although coreId's are usually
				1090	// assigned [0 .. coresPerPkg-1] and threadId's are usually assigned
				1091	// [0..threadsPerCore-1], we don't want to make any such assumptions.
				1092	//
				1093	// For that matter, we don't know what coresPerPkg and threadsPerCore
				1094	// (or the total # packages) are at this point - we want to determine
				1095	// that now. We only have an upper bound on the first two figures.
				1096	//
				1097	// We also perform a consistency check at this point: the values returned
				1098	// by the cpuid instruction for any thread bound to a given package had
				1099	// better return the same info for maxThreadsPerPkg and maxCoresPerPkg.
				1100	//
				1101	nPackages = 1;
				1102	nCoresPerPkg = 1;
				1103	__kmp_nThreadsPerCore = 1;
				1104	unsigned nCores = 1;
				1105
				1106	unsigned pkgCt = 1; // to determine radii
				1107	unsigned lastPkgId = threadInfo[0].pkgId;
				1108	unsigned coreCt = 1;
				1109	unsigned lastCoreId = threadInfo[0].coreId;
				1110	unsigned threadCt = 1;
				1111	unsigned lastThreadId = threadInfo[0].threadId;
				1112
				1113	// intra-pkg consist checks
				1114	unsigned prevMaxCoresPerPkg = threadInfo[0].maxCoresPerPkg;
				1115	unsigned prevMaxThreadsPerPkg = threadInfo[0].maxThreadsPerPkg;
				1116
				1117	for (i = 1; i < nApics; i++) {
				1118	if (threadInfo[i].pkgId != lastPkgId) {
				1119	nCores++;
				1120	pkgCt++;
				1121	lastPkgId = threadInfo[i].pkgId;
				1122	if ((int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt;
				1123	coreCt = 1;
				1124	lastCoreId = threadInfo[i].coreId;
				1125	if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
				1126	threadCt = 1;
				1127	lastThreadId = threadInfo[i].threadId;
				1128
				1129	//
				1130	// This is a different package, so go on to the next iteration
				1131	// without doing any consistency checks. Reset the consistency
				1132	// check vars, though.
				1133	//
				1134	prevMaxCoresPerPkg = threadInfo[i].maxCoresPerPkg;
				1135	prevMaxThreadsPerPkg = threadInfo[i].maxThreadsPerPkg;
				1136	continue;
				1137	}
				1138
				1139	if (threadInfo[i].coreId != lastCoreId) {
				1140	nCores++;
				1141	coreCt++;
				1142	lastCoreId = threadInfo[i].coreId;
				1143	if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
				1144	threadCt = 1;
				1145	lastThreadId = threadInfo[i].threadId;
				1146	}
				1147	else if (threadInfo[i].threadId != lastThreadId) {
				1148	threadCt++;
				1149	lastThreadId = threadInfo[i].threadId;
				1150	}
				1151	else {
				1152	__kmp_free(threadInfo);
				1153	KMP_CPU_FREE(oldMask);
				1154	*msg_id = kmp_i18n_str_LegacyApicIDsNotUnique;
				1155	return -1;
				1156	}
				1157
				1158	//
				1159	// Check to make certain that the maxCoresPerPkg and maxThreadsPerPkg
				1160	// fields agree between all the threads bounds to a given package.
				1161	//
				1162	if ((prevMaxCoresPerPkg != threadInfo[i].maxCoresPerPkg)
				1163	\|\| (prevMaxThreadsPerPkg != threadInfo[i].maxThreadsPerPkg)) {
				1164	__kmp_free(threadInfo);
				1165	KMP_CPU_FREE(oldMask);
				1166	*msg_id = kmp_i18n_str_InconsistentCpuidInfo;
				1167	return -1;
				1168	}
				1169	}
				1170	nPackages = pkgCt;
				1171	if ((int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt;
				1172	if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
				1173
				1174	//
				1175	// When affinity is off, this routine will still be called to set
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame]	1176	// __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1177	// nCoresPerPkg, & nPackages. Make sure all these vars are set
				1178	// correctly, and return now if affinity is not enabled.
				1179	//
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1180	__kmp_ncores = nCores;
				1181	if (__kmp_affinity_verbose) {
				1182	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				1183	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				1184
				1185	KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
				1186	if (__kmp_affinity_respect_mask) {
				1187	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				1188	} else {
				1189	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				1190	}
				1191	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1192	if (__kmp_affinity_uniform_topology()) {
				1193	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1194	} else {
				1195	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				1196	}
				1197	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				1198	__kmp_nThreadsPerCore, __kmp_ncores);
				1199
				1200	}
				1201
				1202	if (__kmp_affinity_type == affinity_none) {
				1203	__kmp_free(threadInfo);
				1204	KMP_CPU_FREE(oldMask);
				1205	return 0;
				1206	}
				1207
				1208	//
				1209	// Now that we've determined the number of packages, the number of cores
				1210	// per package, and the number of threads per core, we can construct the
				1211	// data structure that is to be returned.
				1212	//
				1213	int pkgLevel = 0;
				1214	int coreLevel = (nCoresPerPkg <= 1) ? -1 : 1;
				1215	int threadLevel = (__kmp_nThreadsPerCore <= 1) ? -1 : ((coreLevel >= 0) ? 2 : 1);
				1216	unsigned depth = (pkgLevel >= 0) + (coreLevel >= 0) + (threadLevel >= 0);
				1217
				1218	KMP_ASSERT(depth > 0);
				1219	address2os = (AddrUnsPair)__kmp_allocate(sizeof(AddrUnsPair) * nApics);
				1220
				1221	for (i = 0; i < nApics; ++i) {
				1222	Address addr(depth);
				1223	unsigned os = threadInfo[i].osId;
				1224	int d = 0;
				1225
				1226	if (pkgLevel >= 0) {
				1227	addr.labels[d++] = threadInfo[i].pkgId;
				1228	}
				1229	if (coreLevel >= 0) {
				1230	addr.labels[d++] = threadInfo[i].coreId;
				1231	}
				1232	if (threadLevel >= 0) {
				1233	addr.labels[d++] = threadInfo[i].threadId;
				1234	}
				1235	(*address2os)[i] = AddrUnsPair(addr, os);
				1236	}
				1237
				1238	if (__kmp_affinity_gran_levels < 0) {
				1239	//
				1240	// Set the granularity level based on what levels are modeled
				1241	// in the machine topology map.
				1242	//
				1243	__kmp_affinity_gran_levels = 0;
				1244	if ((threadLevel >= 0)
				1245	&& (__kmp_affinity_gran > affinity_gran_thread)) {
				1246	__kmp_affinity_gran_levels++;
				1247	}
				1248	if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
				1249	__kmp_affinity_gran_levels++;
				1250	}
				1251	if ((pkgLevel >= 0) && (__kmp_affinity_gran > affinity_gran_package)) {
				1252	__kmp_affinity_gran_levels++;
				1253	}
				1254	}
				1255
				1256	if (__kmp_affinity_verbose) {
				1257	__kmp_affinity_print_topology(*address2os, nApics, depth, pkgLevel,
				1258	coreLevel, threadLevel);
				1259	}
				1260
				1261	__kmp_free(threadInfo);
				1262	KMP_CPU_FREE(oldMask);
				1263	return depth;
				1264	}
				1265
				1266
				1267	//
				1268	// Intel(R) microarchitecture code name Nehalem, Dunnington and later
				1269	// architectures support a newer interface for specifying the x2APIC Ids,
				1270	// based on cpuid leaf 11.
				1271	//
				1272	static int
				1273	__kmp_affinity_create_x2apicid_map(AddrUnsPair **address2os,
				1274	kmp_i18n_id_t *const msg_id)
				1275	{
				1276	kmp_cpuid buf;
				1277
				1278	*address2os = NULL;
				1279	*msg_id = kmp_i18n_null;
				1280
				1281	//
				1282	// Check to see if cpuid leaf 11 is supported.
				1283	//
				1284	__kmp_x86_cpuid(0, 0, &buf);
				1285	if (buf.eax < 11) {
				1286	*msg_id = kmp_i18n_str_NoLeaf11Support;
				1287	return -1;
				1288	}
				1289	__kmp_x86_cpuid(11, 0, &buf);
				1290	if (buf.ebx == 0) {
				1291	*msg_id = kmp_i18n_str_NoLeaf11Support;
				1292	return -1;
				1293	}
				1294
				1295	//
				1296	// Find the number of levels in the machine topology. While we're at it,
				1297	// get the default values for __kmp_nThreadsPerCore & nCoresPerPkg. We will
				1298	// try to get more accurate values later by explicitly counting them,
				1299	// but get reasonable defaults now, in case we return early.
				1300	//
				1301	int level;
				1302	int threadLevel = -1;
				1303	int coreLevel = -1;
				1304	int pkgLevel = -1;
				1305	__kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
				1306
				1307	for (level = 0;; level++) {
				1308	if (level > 31) {
				1309	//
				1310	// FIXME: Hack for DPD200163180
				1311	//
				1312	// If level is big then something went wrong -> exiting
				1313	//
				1314	// There could actually be 32 valid levels in the machine topology,
				1315	// but so far, the only machine we have seen which does not exit
				1316	// this loop before iteration 32 has fubar x2APIC settings.
				1317	//
				1318	// For now, just reject this case based upon loop trip count.
				1319	//
				1320	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1321	return -1;
				1322	}
				1323	__kmp_x86_cpuid(11, level, &buf);
				1324	if (buf.ebx == 0) {
				1325	if (pkgLevel < 0) {
				1326	//
				1327	// Will infer nPackages from __kmp_xproc
				1328	//
				1329	pkgLevel = level;
				1330	level++;
				1331	}
				1332	break;
				1333	}
				1334	int kind = (buf.ecx >> 8) & 0xff;
				1335	if (kind == 1) {
				1336	//
				1337	// SMT level
				1338	//
				1339	threadLevel = level;
				1340	coreLevel = -1;
				1341	pkgLevel = -1;
				1342	__kmp_nThreadsPerCore = buf.ebx & 0xff;
				1343	if (__kmp_nThreadsPerCore == 0) {
				1344	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1345	return -1;
				1346	}
				1347	}
				1348	else if (kind == 2) {
				1349	//
				1350	// core level
				1351	//
				1352	coreLevel = level;
				1353	pkgLevel = -1;
				1354	nCoresPerPkg = buf.ebx & 0xff;
				1355	if (nCoresPerPkg == 0) {
				1356	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1357	return -1;
				1358	}
				1359	}
				1360	else {
				1361	if (level <= 0) {
				1362	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1363	return -1;
				1364	}
				1365	if (pkgLevel >= 0) {
				1366	continue;
				1367	}
				1368	pkgLevel = level;
				1369	nPackages = buf.ebx & 0xff;
				1370	if (nPackages == 0) {
				1371	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1372	return -1;
				1373	}
				1374	}
				1375	}
				1376	int depth = level;
				1377
				1378	//
				1379	// In the above loop, "level" was counted from the finest level (usually
				1380	// thread) to the coarsest. The caller expects that we will place the
				1381	// labels in (*address2os)[].first.labels[] in the inverse order, so
				1382	// we need to invert the vars saying which level means what.
				1383	//
				1384	if (threadLevel >= 0) {
				1385	threadLevel = depth - threadLevel - 1;
				1386	}
				1387	if (coreLevel >= 0) {
				1388	coreLevel = depth - coreLevel - 1;
				1389	}
				1390	KMP_DEBUG_ASSERT(pkgLevel >= 0);
				1391	pkgLevel = depth - pkgLevel - 1;
				1392
				1393	//
				1394	// The algorithm used starts by setting the affinity to each available
Andrey Churbanov	1c33129	2015-01-27 17:03:42 +0000	[diff] [blame^]	1395	// thread and retrieving info from the cpuid instruction, so if we are
				1396	// not capable of calling __kmp_get_system_affinity() and
				1397	// _kmp_get_system_affinity(), then we need to do something else - use
				1398	// the defaults that we calculated from issuing cpuid without binding
				1399	// to each proc.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1400	//
				1401	if (! KMP_AFFINITY_CAPABLE())
				1402	{
				1403	//
				1404	// Hack to try and infer the machine topology using only the data
				1405	// available from cpuid on the current thread, and __kmp_xproc.
				1406	//
				1407	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				1408
				1409	__kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
				1410	nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1411	if (__kmp_affinity_verbose) {
				1412	KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY");
				1413	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1414	if (__kmp_affinity_uniform_topology()) {
				1415	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1416	} else {
				1417	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				1418	}
				1419	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				1420	__kmp_nThreadsPerCore, __kmp_ncores);
				1421	}
				1422	return 0;
				1423	}
				1424
				1425	//
				1426	//
				1427	// From here on, we can assume that it is safe to call
				1428	// __kmp_get_system_affinity() and __kmp_set_system_affinity(),
				1429	// even if __kmp_affinity_type = affinity_none.
				1430	//
				1431
				1432	//
				1433	// Save the affinity mask for the current thread.
				1434	//
				1435	kmp_affin_mask_t *oldMask;
				1436	KMP_CPU_ALLOC(oldMask);
				1437	__kmp_get_system_affinity(oldMask, TRUE);
				1438
				1439	//
				1440	// Allocate the data structure to be returned.
				1441	//
				1442	AddrUnsPair retval = (AddrUnsPair )
				1443	__kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc);
				1444
				1445	//
				1446	// Run through each of the available contexts, binding the current thread
				1447	// to it, and obtaining the pertinent information using the cpuid instr.
				1448	//
				1449	unsigned int proc;
				1450	int nApics = 0;
				1451	for (proc = 0; proc < KMP_CPU_SETSIZE; ++proc) {
				1452	//
				1453	// Skip this proc if it is not included in the machine model.
				1454	//
				1455	if (! KMP_CPU_ISSET(proc, fullMask)) {
				1456	continue;
				1457	}
				1458	KMP_DEBUG_ASSERT(nApics < __kmp_avail_proc);
				1459
				1460	__kmp_affinity_bind_thread(proc);
				1461
				1462	//
				1463	// Extrach the labels for each level in the machine topology map
				1464	// from the Apic ID.
				1465	//
				1466	Address addr(depth);
				1467	int prev_shift = 0;
				1468
				1469	for (level = 0; level < depth; level++) {
				1470	__kmp_x86_cpuid(11, level, &buf);
				1471	unsigned apicId = buf.edx;
				1472	if (buf.ebx == 0) {
				1473	if (level != depth - 1) {
				1474	KMP_CPU_FREE(oldMask);
				1475	*msg_id = kmp_i18n_str_InconsistentCpuidInfo;
				1476	return -1;
				1477	}
				1478	addr.labels[depth - level - 1] = apicId >> prev_shift;
				1479	level++;
				1480	break;
				1481	}
				1482	int shift = buf.eax & 0x1f;
				1483	int mask = (1 << shift) - 1;
				1484	addr.labels[depth - level - 1] = (apicId & mask) >> prev_shift;
				1485	prev_shift = shift;
				1486	}
				1487	if (level != depth) {
				1488	KMP_CPU_FREE(oldMask);
				1489	*msg_id = kmp_i18n_str_InconsistentCpuidInfo;
				1490	return -1;
				1491	}
				1492
				1493	retval[nApics] = AddrUnsPair(addr, proc);
				1494	nApics++;
				1495	}
				1496
				1497	//
				1498	// We've collected all the info we need.
				1499	// Restore the old affinity mask for this thread.
				1500	//
				1501	__kmp_set_system_affinity(oldMask, TRUE);
				1502
				1503	//
				1504	// If there's only one thread context to bind to, return now.
				1505	//
				1506	KMP_ASSERT(nApics > 0);
				1507	if (nApics == 1) {
				1508	__kmp_ncores = nPackages = 1;
				1509	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1510	if (__kmp_affinity_verbose) {
				1511	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				1512	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				1513
				1514	KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
				1515	if (__kmp_affinity_respect_mask) {
				1516	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				1517	} else {
				1518	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				1519	}
				1520	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1521	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1522	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				1523	__kmp_nThreadsPerCore, __kmp_ncores);
				1524	}
				1525
				1526	if (__kmp_affinity_type == affinity_none) {
				1527	__kmp_free(retval);
				1528	KMP_CPU_FREE(oldMask);
				1529	return 0;
				1530	}
				1531
				1532	//
				1533	// Form an Address object which only includes the package level.
				1534	//
				1535	Address addr(1);
				1536	addr.labels[0] = retval[0].first.labels[pkgLevel];
				1537	retval[0].first = addr;
				1538
				1539	if (__kmp_affinity_gran_levels < 0) {
				1540	__kmp_affinity_gran_levels = 0;
				1541	}
				1542
				1543	if (__kmp_affinity_verbose) {
				1544	__kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
				1545	}
				1546
				1547	*address2os = retval;
				1548	KMP_CPU_FREE(oldMask);
				1549	return 1;
				1550	}
				1551
				1552	//
				1553	// Sort the table by physical Id.
				1554	//
				1555	qsort(retval, nApics, sizeof(*retval), __kmp_affinity_cmp_Address_labels);
				1556
				1557	//
				1558	// Find the radix at each of the levels.
				1559	//
				1560	unsigned totals = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1561	unsigned counts = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1562	unsigned maxCt = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1563	unsigned last = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1564	for (level = 0; level < depth; level++) {
				1565	totals[level] = 1;
				1566	maxCt[level] = 1;
				1567	counts[level] = 1;
				1568	last[level] = retval[0].first.labels[level];
				1569	}
				1570
				1571	//
				1572	// From here on, the iteration variable "level" runs from the finest
				1573	// level to the coarsest, i.e. we iterate forward through
				1574	// (*address2os)[].first.labels[] - in the previous loops, we iterated
				1575	// backwards.
				1576	//
				1577	for (proc = 1; (int)proc < nApics; proc++) {
				1578	int level;
				1579	for (level = 0; level < depth; level++) {
				1580	if (retval[proc].first.labels[level] != last[level]) {
				1581	int j;
				1582	for (j = level + 1; j < depth; j++) {
				1583	totals[j]++;
				1584	counts[j] = 1;
				1585	// The line below causes printing incorrect topology information
				1586	// in case the max value for some level (maxCt[level]) is encountered earlier than
				1587	// some less value while going through the array.
				1588	// For example, let pkg0 has 4 cores and pkg1 has 2 cores. Then maxCt[1] == 2
				1589	// whereas it must be 4.
				1590	// TODO!!! Check if it can be commented safely
				1591	//maxCt[j] = 1;
				1592	last[j] = retval[proc].first.labels[j];
				1593	}
				1594	totals[level]++;
				1595	counts[level]++;
				1596	if (counts[level] > maxCt[level]) {
				1597	maxCt[level] = counts[level];
				1598	}
				1599	last[level] = retval[proc].first.labels[level];
				1600	break;
				1601	}
				1602	else if (level == depth - 1) {
				1603	__kmp_free(last);
				1604	__kmp_free(maxCt);
				1605	__kmp_free(counts);
				1606	__kmp_free(totals);
				1607	__kmp_free(retval);
				1608	KMP_CPU_FREE(oldMask);
				1609	*msg_id = kmp_i18n_str_x2ApicIDsNotUnique;
				1610	return -1;
				1611	}
				1612	}
				1613	}
				1614
				1615	//
				1616	// When affinity is off, this routine will still be called to set
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame]	1617	// __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1618	// nCoresPerPkg, & nPackages. Make sure all these vars are set
				1619	// correctly, and return if affinity is not enabled.
				1620	//
				1621	if (threadLevel >= 0) {
				1622	__kmp_nThreadsPerCore = maxCt[threadLevel];
				1623	}
				1624	else {
				1625	__kmp_nThreadsPerCore = 1;
				1626	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1627	nPackages = totals[pkgLevel];
				1628
				1629	if (coreLevel >= 0) {
				1630	__kmp_ncores = totals[coreLevel];
				1631	nCoresPerPkg = maxCt[coreLevel];
				1632	}
				1633	else {
				1634	__kmp_ncores = nPackages;
				1635	nCoresPerPkg = 1;
				1636	}
				1637
				1638	//
				1639	// Check to see if the machine topology is uniform
				1640	//
				1641	unsigned prod = maxCt[0];
				1642	for (level = 1; level < depth; level++) {
				1643	prod *= maxCt[level];
				1644	}
				1645	bool uniform = (prod == totals[level - 1]);
				1646
				1647	//
				1648	// Print the machine topology summary.
				1649	//
				1650	if (__kmp_affinity_verbose) {
				1651	char mask[KMP_AFFIN_MASK_PRINT_LEN];
				1652	__kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				1653
				1654	KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
				1655	if (__kmp_affinity_respect_mask) {
				1656	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask);
				1657	} else {
				1658	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask);
				1659	}
				1660	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1661	if (uniform) {
				1662	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1663	} else {
				1664	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				1665	}
				1666
				1667	kmp_str_buf_t buf;
				1668	__kmp_str_buf_init(&buf);
				1669
				1670	__kmp_str_buf_print(&buf, "%d", totals[0]);
				1671	for (level = 1; level <= pkgLevel; level++) {
				1672	__kmp_str_buf_print(&buf, " x %d", maxCt[level]);
				1673	}
				1674	KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg,
				1675	__kmp_nThreadsPerCore, __kmp_ncores);
				1676
				1677	__kmp_str_buf_free(&buf);
				1678	}
				1679
				1680	if (__kmp_affinity_type == affinity_none) {
				1681	__kmp_free(last);
				1682	__kmp_free(maxCt);
				1683	__kmp_free(counts);
				1684	__kmp_free(totals);
				1685	__kmp_free(retval);
				1686	KMP_CPU_FREE(oldMask);
				1687	return 0;
				1688	}
				1689
				1690	//
				1691	// Find any levels with radiix 1, and remove them from the map
				1692	// (except for the package level).
				1693	//
				1694	int new_depth = 0;
				1695	for (level = 0; level < depth; level++) {
				1696	if ((maxCt[level] == 1) && (level != pkgLevel)) {
				1697	continue;
				1698	}
				1699	new_depth++;
				1700	}
				1701
				1702	//
				1703	// If we are removing any levels, allocate a new vector to return,
				1704	// and copy the relevant information to it.
				1705	//
				1706	if (new_depth != depth) {
				1707	AddrUnsPair new_retval = (AddrUnsPair )__kmp_allocate(
				1708	sizeof(AddrUnsPair) * nApics);
				1709	for (proc = 0; (int)proc < nApics; proc++) {
				1710	Address addr(new_depth);
				1711	new_retval[proc] = AddrUnsPair(addr, retval[proc].second);
				1712	}
				1713	int new_level = 0;
				1714	for (level = 0; level < depth; level++) {
				1715	if ((maxCt[level] == 1) && (level != pkgLevel)) {
				1716	if (level == threadLevel) {
				1717	threadLevel = -1;
				1718	}
				1719	else if ((threadLevel >= 0) && (level < threadLevel)) {
				1720	threadLevel--;
				1721	}
				1722	if (level == coreLevel) {
				1723	coreLevel = -1;
				1724	}
				1725	else if ((coreLevel >= 0) && (level < coreLevel)) {
				1726	coreLevel--;
				1727	}
				1728	if (level < pkgLevel) {
				1729	pkgLevel--;
				1730	}
				1731	continue;
				1732	}
				1733	for (proc = 0; (int)proc < nApics; proc++) {
				1734	new_retval[proc].first.labels[new_level]
				1735	= retval[proc].first.labels[level];
				1736	}
				1737	new_level++;
				1738	}
				1739
				1740	__kmp_free(retval);
				1741	retval = new_retval;
				1742	depth = new_depth;
				1743	}
				1744
				1745	if (__kmp_affinity_gran_levels < 0) {
				1746	//
				1747	// Set the granularity level based on what levels are modeled
				1748	// in the machine topology map.
				1749	//
				1750	__kmp_affinity_gran_levels = 0;
				1751	if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
				1752	__kmp_affinity_gran_levels++;
				1753	}
				1754	if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
				1755	__kmp_affinity_gran_levels++;
				1756	}
				1757	if (__kmp_affinity_gran > affinity_gran_package) {
				1758	__kmp_affinity_gran_levels++;
				1759	}
				1760	}
				1761
				1762	if (__kmp_affinity_verbose) {
				1763	__kmp_affinity_print_topology(retval, nApics, depth, pkgLevel,
				1764	coreLevel, threadLevel);
				1765	}
				1766
				1767	__kmp_free(last);
				1768	__kmp_free(maxCt);
				1769	__kmp_free(counts);
				1770	__kmp_free(totals);
				1771	KMP_CPU_FREE(oldMask);
				1772	*address2os = retval;
				1773	return depth;
				1774	}
				1775
				1776
				1777	# endif /* KMP_ARCH_X86 \|\| KMP_ARCH_X86_64 */
				1778
				1779
				1780	#define osIdIndex 0
				1781	#define threadIdIndex 1
				1782	#define coreIdIndex 2
				1783	#define pkgIdIndex 3
				1784	#define nodeIdIndex 4
				1785
				1786	typedef unsigned *ProcCpuInfo;
				1787	static unsigned maxIndex = pkgIdIndex;
				1788
				1789
				1790	static int
				1791	__kmp_affinity_cmp_ProcCpuInfo_os_id(const void a, const void b)
				1792	{
				1793	const unsigned aa = (const unsigned )a;
				1794	const unsigned bb = (const unsigned )b;
				1795	if (aa[osIdIndex] < bb[osIdIndex]) return -1;
				1796	if (aa[osIdIndex] > bb[osIdIndex]) return 1;
				1797	return 0;
				1798	};
				1799
				1800
				1801	static int
				1802	__kmp_affinity_cmp_ProcCpuInfo_phys_id(const void a, const void b)
				1803	{
				1804	unsigned i;
				1805	const unsigned aa = ((const unsigned **)a);
				1806	const unsigned bb = ((const unsigned **)b);
				1807	for (i = maxIndex; ; i--) {
				1808	if (aa[i] < bb[i]) return -1;
				1809	if (aa[i] > bb[i]) return 1;
				1810	if (i == osIdIndex) break;
				1811	}
				1812	return 0;
				1813	}
				1814
				1815
				1816	//
				1817	// Parse /proc/cpuinfo (or an alternate file in the same format) to obtain the
				1818	// affinity map.
				1819	//
				1820	static int
				1821	__kmp_affinity_create_cpuinfo_map(AddrUnsPair *address2os, int line,
				1822	kmp_i18n_id_t const msg_id, FILE f)
				1823	{
				1824	*address2os = NULL;
				1825	*msg_id = kmp_i18n_null;
				1826
				1827	//
				1828	// Scan of the file, and count the number of "processor" (osId) fields,
Alp Toker	8f2d3f0	2014-02-24 10:40:15 +0000	[diff] [blame]	1829	// and find the highest value of <n> for a node_<n> field.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1830	//
				1831	char buf[256];
				1832	unsigned num_records = 0;
				1833	while (! feof(f)) {
				1834	buf[sizeof(buf) - 1] = 1;
				1835	if (! fgets(buf, sizeof(buf), f)) {
				1836	//
				1837	// Read errors presumably because of EOF
				1838	//
				1839	break;
				1840	}
				1841
				1842	char s1[] = "processor";
				1843	if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
				1844	num_records++;
				1845	continue;
				1846	}
				1847
				1848	//
				1849	// FIXME - this will match "node_<n> <garbage>"
				1850	//
				1851	unsigned level;
				1852	if (sscanf(buf, "node_%d id", &level) == 1) {
				1853	if (nodeIdIndex + level >= maxIndex) {
				1854	maxIndex = nodeIdIndex + level;
				1855	}
				1856	continue;
				1857	}
				1858	}
				1859
				1860	//
				1861	// Check for empty file / no valid processor records, or too many.
				1862	// The number of records can't exceed the number of valid bits in the
				1863	// affinity mask.
				1864	//
				1865	if (num_records == 0) {
				1866	*line = 0;
				1867	*msg_id = kmp_i18n_str_NoProcRecords;
				1868	return -1;
				1869	}
				1870	if (num_records > (unsigned)__kmp_xproc) {
				1871	*line = 0;
				1872	*msg_id = kmp_i18n_str_TooManyProcRecords;
				1873	return -1;
				1874	}
				1875
				1876	//
				1877	// Set the file pointer back to the begginning, so that we can scan the
				1878	// file again, this time performing a full parse of the data.
				1879	// Allocate a vector of ProcCpuInfo object, where we will place the data.
				1880	// Adding an extra element at the end allows us to remove a lot of extra
				1881	// checks for termination conditions.
				1882	//
				1883	if (fseek(f, 0, SEEK_SET) != 0) {
				1884	*line = 0;
				1885	*msg_id = kmp_i18n_str_CantRewindCpuinfo;
				1886	return -1;
				1887	}
				1888
				1889	//
				1890	// Allocate the array of records to store the proc info in. The dummy
				1891	// element at the end makes the logic in filling them out easier to code.
				1892	//
				1893	unsigned threadInfo = (unsigned )__kmp_allocate((num_records + 1)
				1894	* sizeof(unsigned *));
				1895	unsigned i;
				1896	for (i = 0; i <= num_records; i++) {
				1897	threadInfo[i] = (unsigned *)__kmp_allocate((maxIndex + 1)
				1898	* sizeof(unsigned));
				1899	}
				1900
				1901	#define CLEANUP_THREAD_INFO \
				1902	for (i = 0; i <= num_records; i++) { \
				1903	__kmp_free(threadInfo[i]); \
				1904	} \
				1905	__kmp_free(threadInfo);
				1906
				1907	//
				1908	// A value of UINT_MAX means that we didn't find the field
				1909	//
				1910	unsigned __index;
				1911
				1912	#define INIT_PROC_INFO(p) \
				1913	for (__index = 0; __index <= maxIndex; __index++) { \
				1914	(p)[__index] = UINT_MAX; \
				1915	}
				1916
				1917	for (i = 0; i <= num_records; i++) {
				1918	INIT_PROC_INFO(threadInfo[i]);
				1919	}
				1920
				1921	unsigned num_avail = 0;
				1922	*line = 0;
				1923	while (! feof(f)) {
				1924	//
				1925	// Create an inner scoping level, so that all the goto targets at the
				1926	// end of the loop appear in an outer scoping level. This avoids
				1927	// warnings about jumping past an initialization to a target in the
				1928	// same block.
				1929	//
				1930	{
				1931	buf[sizeof(buf) - 1] = 1;
				1932	bool long_line = false;
				1933	if (! fgets(buf, sizeof(buf), f)) {
				1934	//
				1935	// Read errors presumably because of EOF
				1936	//
				1937	// If there is valid data in threadInfo[num_avail], then fake
				1938	// a blank line in ensure that the last address gets parsed.
				1939	//
				1940	bool valid = false;
				1941	for (i = 0; i <= maxIndex; i++) {
				1942	if (threadInfo[num_avail][i] != UINT_MAX) {
				1943	valid = true;
				1944	}
				1945	}
				1946	if (! valid) {
				1947	break;
				1948	}
				1949	buf[0] = 0;
				1950	} else if (!buf[sizeof(buf) - 1]) {
				1951	//
				1952	// The line is longer than the buffer. Set a flag and don't
				1953	// emit an error if we were going to ignore the line, anyway.
				1954	//
				1955	long_line = true;
				1956
				1957	#define CHECK_LINE \
				1958	if (long_line) { \
				1959	CLEANUP_THREAD_INFO; \
				1960	*msg_id = kmp_i18n_str_LongLineCpuinfo; \
				1961	return -1; \
				1962	}
				1963	}
				1964	(*line)++;
				1965
				1966	char s1[] = "processor";
				1967	if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
				1968	CHECK_LINE;
				1969	char *p = strchr(buf + sizeof(s1) - 1, ':');
				1970	unsigned val;
				1971	if ((p == NULL) \|\| (sscanf(p + 1, "%u\n", &val) != 1)) goto no_val;
				1972	if (threadInfo[num_avail][osIdIndex] != UINT_MAX) goto dup_field;
				1973	threadInfo[num_avail][osIdIndex] = val;
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	1974	#if KMP_OS_LINUX && USE_SYSFS_INFO
				1975	char path[256];
				1976	snprintf(path, sizeof(path),
				1977	"/sys/devices/system/cpu/cpu%u/topology/physical_package_id",
				1978	threadInfo[num_avail][osIdIndex]);
				1979	__kmp_read_from_file(path, "%u", &threadInfo[num_avail][pkgIdIndex]);
				1980
				1981	snprintf(path, sizeof(path),
				1982	"/sys/devices/system/cpu/cpu%u/topology/core_id",
				1983	threadInfo[num_avail][osIdIndex]);
				1984	__kmp_read_from_file(path, "%u", &threadInfo[num_avail][coreIdIndex]);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1985	continue;
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	1986	#else
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1987	}
				1988	char s2[] = "physical id";
				1989	if (strncmp(buf, s2, sizeof(s2) - 1) == 0) {
				1990	CHECK_LINE;
				1991	char *p = strchr(buf + sizeof(s2) - 1, ':');
				1992	unsigned val;
				1993	if ((p == NULL) \|\| (sscanf(p + 1, "%u\n", &val) != 1)) goto no_val;
				1994	if (threadInfo[num_avail][pkgIdIndex] != UINT_MAX) goto dup_field;
				1995	threadInfo[num_avail][pkgIdIndex] = val;
				1996	continue;
				1997	}
				1998	char s3[] = "core id";
				1999	if (strncmp(buf, s3, sizeof(s3) - 1) == 0) {
				2000	CHECK_LINE;
				2001	char *p = strchr(buf + sizeof(s3) - 1, ':');
				2002	unsigned val;
				2003	if ((p == NULL) \|\| (sscanf(p + 1, "%u\n", &val) != 1)) goto no_val;
				2004	if (threadInfo[num_avail][coreIdIndex] != UINT_MAX) goto dup_field;
				2005	threadInfo[num_avail][coreIdIndex] = val;
				2006	continue;
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	2007	#endif // KMP_OS_LINUX && USE_SYSFS_INFO
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2008	}
				2009	char s4[] = "thread id";
				2010	if (strncmp(buf, s4, sizeof(s4) - 1) == 0) {
				2011	CHECK_LINE;
				2012	char *p = strchr(buf + sizeof(s4) - 1, ':');
				2013	unsigned val;
				2014	if ((p == NULL) \|\| (sscanf(p + 1, "%u\n", &val) != 1)) goto no_val;
				2015	if (threadInfo[num_avail][threadIdIndex] != UINT_MAX) goto dup_field;
				2016	threadInfo[num_avail][threadIdIndex] = val;
				2017	continue;
				2018	}
				2019	unsigned level;
				2020	if (sscanf(buf, "node_%d id", &level) == 1) {
				2021	CHECK_LINE;
				2022	char *p = strchr(buf + sizeof(s4) - 1, ':');
				2023	unsigned val;
				2024	if ((p == NULL) \|\| (sscanf(p + 1, "%u\n", &val) != 1)) goto no_val;
				2025	KMP_ASSERT(nodeIdIndex + level <= maxIndex);
				2026	if (threadInfo[num_avail][nodeIdIndex + level] != UINT_MAX) goto dup_field;
				2027	threadInfo[num_avail][nodeIdIndex + level] = val;
				2028	continue;
				2029	}
				2030
				2031	//
				2032	// We didn't recognize the leading token on the line.
				2033	// There are lots of leading tokens that we don't recognize -
				2034	// if the line isn't empty, go on to the next line.
				2035	//
				2036	if ((buf != 0) && (buf != '\n')) {
				2037	//
				2038	// If the line is longer than the buffer, read characters
				2039	// until we find a newline.
				2040	//
				2041	if (long_line) {
				2042	int ch;
				2043	while (((ch = fgetc(f)) != EOF) && (ch != '\n'));
				2044	}
				2045	continue;
				2046	}
				2047
				2048	//
				2049	// A newline has signalled the end of the processor record.
				2050	// Check that there aren't too many procs specified.
				2051	//
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	2052	if ((int)num_avail == __kmp_xproc) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2053	CLEANUP_THREAD_INFO;
				2054	*msg_id = kmp_i18n_str_TooManyEntries;
				2055	return -1;
				2056	}
				2057
				2058	//
				2059	// Check for missing fields. The osId field must be there, and we
				2060	// currently require that the physical id field is specified, also.
				2061	//
				2062	if (threadInfo[num_avail][osIdIndex] == UINT_MAX) {
				2063	CLEANUP_THREAD_INFO;
				2064	*msg_id = kmp_i18n_str_MissingProcField;
				2065	return -1;
				2066	}
				2067	if (threadInfo[0][pkgIdIndex] == UINT_MAX) {
				2068	CLEANUP_THREAD_INFO;
				2069	*msg_id = kmp_i18n_str_MissingPhysicalIDField;
				2070	return -1;
				2071	}
				2072
				2073	//
				2074	// Skip this proc if it is not included in the machine model.
				2075	//
				2076	if (! KMP_CPU_ISSET(threadInfo[num_avail][osIdIndex], fullMask)) {
				2077	INIT_PROC_INFO(threadInfo[num_avail]);
				2078	continue;
				2079	}
				2080
				2081	//
				2082	// We have a successful parse of this proc's info.
				2083	// Increment the counter, and prepare for the next proc.
				2084	//
				2085	num_avail++;
				2086	KMP_ASSERT(num_avail <= num_records);
				2087	INIT_PROC_INFO(threadInfo[num_avail]);
				2088	}
				2089	continue;
				2090
				2091	no_val:
				2092	CLEANUP_THREAD_INFO;
				2093	*msg_id = kmp_i18n_str_MissingValCpuinfo;
				2094	return -1;
				2095
				2096	dup_field:
				2097	CLEANUP_THREAD_INFO;
				2098	*msg_id = kmp_i18n_str_DuplicateFieldCpuinfo;
				2099	return -1;
				2100	}
				2101	*line = 0;
				2102
				2103	# if KMP_MIC && REDUCE_TEAM_SIZE
				2104	unsigned teamSize = 0;
				2105	# endif // KMP_MIC && REDUCE_TEAM_SIZE
				2106
				2107	// check for num_records == __kmp_xproc ???
				2108
				2109	//
				2110	// If there's only one thread context to bind to, form an Address object
				2111	// with depth 1 and return immediately (or, if affinity is off, set
				2112	// address2os to NULL and return).
				2113	//
				2114	// If it is configured to omit the package level when there is only a
				2115	// single package, the logic at the end of this routine won't work if
				2116	// there is only a single thread - it would try to form an Address
				2117	// object with depth 0.
				2118	//
				2119	KMP_ASSERT(num_avail > 0);
				2120	KMP_ASSERT(num_avail <= num_records);
				2121	if (num_avail == 1) {
				2122	__kmp_ncores = 1;
				2123	__kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2124	if (__kmp_affinity_verbose) {
				2125	if (! KMP_AFFINITY_CAPABLE()) {
				2126	KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
				2127	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				2128	KMP_INFORM(Uniform, "KMP_AFFINITY");
				2129	}
				2130	else {
				2131	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				2132	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				2133	fullMask);
				2134	KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
				2135	if (__kmp_affinity_respect_mask) {
				2136	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				2137	} else {
				2138	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				2139	}
				2140	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				2141	KMP_INFORM(Uniform, "KMP_AFFINITY");
				2142	}
				2143	int index;
				2144	kmp_str_buf_t buf;
				2145	__kmp_str_buf_init(&buf);
				2146	__kmp_str_buf_print(&buf, "1");
				2147	for (index = maxIndex - 1; index > pkgIdIndex; index--) {
				2148	__kmp_str_buf_print(&buf, " x 1");
				2149	}
				2150	KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, 1, 1, 1);
				2151	__kmp_str_buf_free(&buf);
				2152	}
				2153
				2154	if (__kmp_affinity_type == affinity_none) {
				2155	CLEANUP_THREAD_INFO;
				2156	return 0;
				2157	}
				2158
				2159	address2os = (AddrUnsPair)__kmp_allocate(sizeof(AddrUnsPair));
				2160	Address addr(1);
				2161	addr.labels[0] = threadInfo[0][pkgIdIndex];
				2162	(*address2os)[0] = AddrUnsPair(addr, threadInfo[0][osIdIndex]);
				2163
				2164	if (__kmp_affinity_gran_levels < 0) {
				2165	__kmp_affinity_gran_levels = 0;
				2166	}
				2167
				2168	if (__kmp_affinity_verbose) {
				2169	__kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
				2170	}
				2171
				2172	CLEANUP_THREAD_INFO;
				2173	return 1;
				2174	}
				2175
				2176	//
				2177	// Sort the threadInfo table by physical Id.
				2178	//
				2179	qsort(threadInfo, num_avail, sizeof(*threadInfo),
				2180	__kmp_affinity_cmp_ProcCpuInfo_phys_id);
				2181
				2182	//
				2183	// The table is now sorted by pkgId / coreId / threadId, but we really
				2184	// don't know the radix of any of the fields. pkgId's may be sparsely
				2185	// assigned among the chips on a system. Although coreId's are usually
				2186	// assigned [0 .. coresPerPkg-1] and threadId's are usually assigned
				2187	// [0..threadsPerCore-1], we don't want to make any such assumptions.
				2188	//
				2189	// For that matter, we don't know what coresPerPkg and threadsPerCore
				2190	// (or the total # packages) are at this point - we want to determine
				2191	// that now. We only have an upper bound on the first two figures.
				2192	//
				2193	unsigned counts = (unsigned )__kmp_allocate((maxIndex + 1)
				2194	* sizeof(unsigned));
				2195	unsigned maxCt = (unsigned )__kmp_allocate((maxIndex + 1)
				2196	* sizeof(unsigned));
				2197	unsigned totals = (unsigned )__kmp_allocate((maxIndex + 1)
				2198	* sizeof(unsigned));
				2199	unsigned lastId = (unsigned )__kmp_allocate((maxIndex + 1)
				2200	* sizeof(unsigned));
				2201
				2202	bool assign_thread_ids = false;
				2203	unsigned threadIdCt;
				2204	unsigned index;
				2205
				2206	restart_radix_check:
				2207	threadIdCt = 0;
				2208
				2209	//
				2210	// Initialize the counter arrays with data from threadInfo[0].
				2211	//
				2212	if (assign_thread_ids) {
				2213	if (threadInfo[0][threadIdIndex] == UINT_MAX) {
				2214	threadInfo[0][threadIdIndex] = threadIdCt++;
				2215	}
				2216	else if (threadIdCt <= threadInfo[0][threadIdIndex]) {
				2217	threadIdCt = threadInfo[0][threadIdIndex] + 1;
				2218	}
				2219	}
				2220	for (index = 0; index <= maxIndex; index++) {
				2221	counts[index] = 1;
				2222	maxCt[index] = 1;
				2223	totals[index] = 1;
				2224	lastId[index] = threadInfo[0][index];;
				2225	}
				2226
				2227	//
				2228	// Run through the rest of the OS procs.
				2229	//
				2230	for (i = 1; i < num_avail; i++) {
				2231	//
				2232	// Find the most significant index whose id differs
				2233	// from the id for the previous OS proc.
				2234	//
				2235	for (index = maxIndex; index >= threadIdIndex; index--) {
				2236	if (assign_thread_ids && (index == threadIdIndex)) {
				2237	//
				2238	// Auto-assign the thread id field if it wasn't specified.
				2239	//
				2240	if (threadInfo[i][threadIdIndex] == UINT_MAX) {
				2241	threadInfo[i][threadIdIndex] = threadIdCt++;
				2242	}
				2243
				2244	//
				2245	// Aparrently the thread id field was specified for some
				2246	// entries and not others. Start the thread id counter
				2247	// off at the next higher thread id.
				2248	//
				2249	else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
				2250	threadIdCt = threadInfo[i][threadIdIndex] + 1;
				2251	}
				2252	}
				2253	if (threadInfo[i][index] != lastId[index]) {
				2254	//
				2255	// Run through all indices which are less significant,
				2256	// and reset the counts to 1.
				2257	//
				2258	// At all levels up to and including index, we need to
				2259	// increment the totals and record the last id.
				2260	//
				2261	unsigned index2;
				2262	for (index2 = threadIdIndex; index2 < index; index2++) {
				2263	totals[index2]++;
				2264	if (counts[index2] > maxCt[index2]) {
				2265	maxCt[index2] = counts[index2];
				2266	}
				2267	counts[index2] = 1;
				2268	lastId[index2] = threadInfo[i][index2];
				2269	}
				2270	counts[index]++;
				2271	totals[index]++;
				2272	lastId[index] = threadInfo[i][index];
				2273
				2274	if (assign_thread_ids && (index > threadIdIndex)) {
				2275
				2276	# if KMP_MIC && REDUCE_TEAM_SIZE
				2277	//
				2278	// The default team size is the total #threads in the machine
				2279	// minus 1 thread for every core that has 3 or more threads.
				2280	//
				2281	teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 );
				2282	# endif // KMP_MIC && REDUCE_TEAM_SIZE
				2283
				2284	//
				2285	// Restart the thread counter, as we are on a new core.
				2286	//
				2287	threadIdCt = 0;
				2288
				2289	//
				2290	// Auto-assign the thread id field if it wasn't specified.
				2291	//
				2292	if (threadInfo[i][threadIdIndex] == UINT_MAX) {
				2293	threadInfo[i][threadIdIndex] = threadIdCt++;
				2294	}
				2295
				2296	//
				2297	// Aparrently the thread id field was specified for some
				2298	// entries and not others. Start the thread id counter
				2299	// off at the next higher thread id.
				2300	//
				2301	else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
				2302	threadIdCt = threadInfo[i][threadIdIndex] + 1;
				2303	}
				2304	}
				2305	break;
				2306	}
				2307	}
				2308	if (index < threadIdIndex) {
				2309	//
				2310	// If thread ids were specified, it is an error if they are not
				2311	// unique. Also, check that we waven't already restarted the
				2312	// loop (to be safe - shouldn't need to).
				2313	//
				2314	if ((threadInfo[i][threadIdIndex] != UINT_MAX)
				2315	\|\| assign_thread_ids) {
				2316	__kmp_free(lastId);
				2317	__kmp_free(totals);
				2318	__kmp_free(maxCt);
				2319	__kmp_free(counts);
				2320	CLEANUP_THREAD_INFO;
				2321	*msg_id = kmp_i18n_str_PhysicalIDsNotUnique;
				2322	return -1;
				2323	}
				2324
				2325	//
				2326	// If the thread ids were not specified and we see entries
				2327	// entries that are duplicates, start the loop over and
				2328	// assign the thread ids manually.
				2329	//
				2330	assign_thread_ids = true;
				2331	goto restart_radix_check;
				2332	}
				2333	}
				2334
				2335	# if KMP_MIC && REDUCE_TEAM_SIZE
				2336	//
				2337	// The default team size is the total #threads in the machine
				2338	// minus 1 thread for every core that has 3 or more threads.
				2339	//
				2340	teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 );
				2341	# endif // KMP_MIC && REDUCE_TEAM_SIZE
				2342
				2343	for (index = threadIdIndex; index <= maxIndex; index++) {
				2344	if (counts[index] > maxCt[index]) {
				2345	maxCt[index] = counts[index];
				2346	}
				2347	}
				2348
				2349	__kmp_nThreadsPerCore = maxCt[threadIdIndex];
				2350	nCoresPerPkg = maxCt[coreIdIndex];
				2351	nPackages = totals[pkgIdIndex];
				2352
				2353	//
				2354	// Check to see if the machine topology is uniform
				2355	//
				2356	unsigned prod = totals[maxIndex];
				2357	for (index = threadIdIndex; index < maxIndex; index++) {
				2358	prod *= maxCt[index];
				2359	}
				2360	bool uniform = (prod == totals[threadIdIndex]);
				2361
				2362	//
				2363	// When affinity is off, this routine will still be called to set
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame]	2364	// __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2365	// nCoresPerPkg, & nPackages. Make sure all these vars are set
				2366	// correctly, and return now if affinity is not enabled.
				2367	//
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2368	__kmp_ncores = totals[coreIdIndex];
				2369
				2370	if (__kmp_affinity_verbose) {
				2371	if (! KMP_AFFINITY_CAPABLE()) {
				2372	KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
				2373	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				2374	if (uniform) {
				2375	KMP_INFORM(Uniform, "KMP_AFFINITY");
				2376	} else {
				2377	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				2378	}
				2379	}
				2380	else {
				2381	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				2382	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, fullMask);
				2383	KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
				2384	if (__kmp_affinity_respect_mask) {
				2385	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				2386	} else {
				2387	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				2388	}
				2389	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				2390	if (uniform) {
				2391	KMP_INFORM(Uniform, "KMP_AFFINITY");
				2392	} else {
				2393	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				2394	}
				2395	}
				2396	kmp_str_buf_t buf;
				2397	__kmp_str_buf_init(&buf);
				2398
				2399	__kmp_str_buf_print(&buf, "%d", totals[maxIndex]);
				2400	for (index = maxIndex - 1; index >= pkgIdIndex; index--) {
				2401	__kmp_str_buf_print(&buf, " x %d", maxCt[index]);
				2402	}
				2403	KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, maxCt[coreIdIndex],
				2404	maxCt[threadIdIndex], __kmp_ncores);
				2405
				2406	__kmp_str_buf_free(&buf);
				2407	}
				2408
				2409	# if KMP_MIC && REDUCE_TEAM_SIZE
				2410	//
				2411	// Set the default team size.
				2412	//
				2413	if ((__kmp_dflt_team_nth == 0) && (teamSize > 0)) {
				2414	__kmp_dflt_team_nth = teamSize;
				2415	KA_TRACE(20, ("__kmp_affinity_create_cpuinfo_map: setting __kmp_dflt_team_nth = %d\n",
				2416	__kmp_dflt_team_nth));
				2417	}
				2418	# endif // KMP_MIC && REDUCE_TEAM_SIZE
				2419
				2420	if (__kmp_affinity_type == affinity_none) {
				2421	__kmp_free(lastId);
				2422	__kmp_free(totals);
				2423	__kmp_free(maxCt);
				2424	__kmp_free(counts);
				2425	CLEANUP_THREAD_INFO;
				2426	return 0;
				2427	}
				2428
				2429	//
				2430	// Count the number of levels which have more nodes at that level than
				2431	// at the parent's level (with there being an implicit root node of
				2432	// the top level). This is equivalent to saying that there is at least
				2433	// one node at this level which has a sibling. These levels are in the
				2434	// map, and the package level is always in the map.
				2435	//
				2436	bool inMap = (bool )__kmp_allocate((maxIndex + 1) * sizeof(bool));
				2437	int level = 0;
				2438	for (index = threadIdIndex; index < maxIndex; index++) {
				2439	KMP_ASSERT(totals[index] >= totals[index + 1]);
				2440	inMap[index] = (totals[index] > totals[index + 1]);
				2441	}
				2442	inMap[maxIndex] = (totals[maxIndex] > 1);
				2443	inMap[pkgIdIndex] = true;
				2444
				2445	int depth = 0;
				2446	for (index = threadIdIndex; index <= maxIndex; index++) {
				2447	if (inMap[index]) {
				2448	depth++;
				2449	}
				2450	}
				2451	KMP_ASSERT(depth > 0);
				2452
				2453	//
				2454	// Construct the data structure that is to be returned.
				2455	//
				2456	address2os = (AddrUnsPair)
				2457	__kmp_allocate(sizeof(AddrUnsPair) * num_avail);
				2458	int pkgLevel = -1;
				2459	int coreLevel = -1;
				2460	int threadLevel = -1;
				2461
				2462	for (i = 0; i < num_avail; ++i) {
				2463	Address addr(depth);
				2464	unsigned os = threadInfo[i][osIdIndex];
				2465	int src_index;
				2466	int dst_index = 0;
				2467
				2468	for (src_index = maxIndex; src_index >= threadIdIndex; src_index--) {
				2469	if (! inMap[src_index]) {
				2470	continue;
				2471	}
				2472	addr.labels[dst_index] = threadInfo[i][src_index];
				2473	if (src_index == pkgIdIndex) {
				2474	pkgLevel = dst_index;
				2475	}
				2476	else if (src_index == coreIdIndex) {
				2477	coreLevel = dst_index;
				2478	}
				2479	else if (src_index == threadIdIndex) {
				2480	threadLevel = dst_index;
				2481	}
				2482	dst_index++;
				2483	}
				2484	(*address2os)[i] = AddrUnsPair(addr, os);
				2485	}
				2486
				2487	if (__kmp_affinity_gran_levels < 0) {
				2488	//
				2489	// Set the granularity level based on what levels are modeled
				2490	// in the machine topology map.
				2491	//
				2492	unsigned src_index;
				2493	__kmp_affinity_gran_levels = 0;
				2494	for (src_index = threadIdIndex; src_index <= maxIndex; src_index++) {
				2495	if (! inMap[src_index]) {
				2496	continue;
				2497	}
				2498	switch (src_index) {
				2499	case threadIdIndex:
				2500	if (__kmp_affinity_gran > affinity_gran_thread) {
				2501	__kmp_affinity_gran_levels++;
				2502	}
				2503
				2504	break;
				2505	case coreIdIndex:
				2506	if (__kmp_affinity_gran > affinity_gran_core) {
				2507	__kmp_affinity_gran_levels++;
				2508	}
				2509	break;
				2510
				2511	case pkgIdIndex:
				2512	if (__kmp_affinity_gran > affinity_gran_package) {
				2513	__kmp_affinity_gran_levels++;
				2514	}
				2515	break;
				2516	}
				2517	}
				2518	}
				2519
				2520	if (__kmp_affinity_verbose) {
				2521	__kmp_affinity_print_topology(*address2os, num_avail, depth, pkgLevel,
				2522	coreLevel, threadLevel);
				2523	}
				2524
				2525	__kmp_free(inMap);
				2526	__kmp_free(lastId);
				2527	__kmp_free(totals);
				2528	__kmp_free(maxCt);
				2529	__kmp_free(counts);
				2530	CLEANUP_THREAD_INFO;
				2531	return depth;
				2532	}
				2533
				2534
				2535	//
				2536	// Create and return a table of affinity masks, indexed by OS thread ID.
				2537	// This routine handles OR'ing together all the affinity masks of threads
				2538	// that are sufficiently close, if granularity > fine.
				2539	//
				2540	static kmp_affin_mask_t *
				2541	__kmp_create_masks(unsigned maxIndex, unsigned numUnique,
				2542	AddrUnsPair *address2os, unsigned numAddrs)
				2543	{
				2544	//
				2545	// First form a table of affinity masks in order of OS thread id.
				2546	//
				2547	unsigned depth;
				2548	unsigned maxOsId;
				2549	unsigned i;
				2550
				2551	KMP_ASSERT(numAddrs > 0);
				2552	depth = address2os[0].first.depth;
				2553
				2554	maxOsId = 0;
				2555	for (i = 0; i < numAddrs; i++) {
				2556	unsigned osId = address2os[i].second;
				2557	if (osId > maxOsId) {
				2558	maxOsId = osId;
				2559	}
				2560	}
				2561	kmp_affin_mask_t osId2Mask = (kmp_affin_mask_t )__kmp_allocate(
				2562	(maxOsId + 1) * __kmp_affin_mask_size);
				2563
				2564	//
				2565	// Sort the address2os table according to physical order. Doing so
				2566	// will put all threads on the same core/package/node in consecutive
				2567	// locations.
				2568	//
				2569	qsort(address2os, numAddrs, sizeof(*address2os),
				2570	__kmp_affinity_cmp_Address_labels);
				2571
				2572	KMP_ASSERT(__kmp_affinity_gran_levels >= 0);
				2573	if (__kmp_affinity_verbose && (__kmp_affinity_gran_levels > 0)) {
				2574	KMP_INFORM(ThreadsMigrate, "KMP_AFFINITY", __kmp_affinity_gran_levels);
				2575	}
				2576	if (__kmp_affinity_gran_levels >= (int)depth) {
				2577	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				2578	&& (__kmp_affinity_type != affinity_none))) {
				2579	KMP_WARNING(AffThreadsMayMigrate);
				2580	}
				2581	}
				2582
				2583	//
				2584	// Run through the table, forming the masks for all threads on each
				2585	// core. Threads on the same core will have identical "Address"
				2586	// objects, not considering the last level, which must be the thread
				2587	// id. All threads on a core will appear consecutively.
				2588	//
				2589	unsigned unique = 0;
				2590	unsigned j = 0; // index of 1st thread on core
				2591	unsigned leader = 0;
				2592	Address *leaderAddr = &(address2os[0].first);
				2593	kmp_affin_mask_t *sum
				2594	= (kmp_affin_mask_t *)alloca(__kmp_affin_mask_size);
				2595	KMP_CPU_ZERO(sum);
				2596	KMP_CPU_SET(address2os[0].second, sum);
				2597	for (i = 1; i < numAddrs; i++) {
				2598	//
Alp Toker	8f2d3f0	2014-02-24 10:40:15 +0000	[diff] [blame]	2599	// If this thread is sufficiently close to the leader (within the
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2600	// granularity setting), then set the bit for this os thread in the
				2601	// affinity mask for this group, and go on to the next thread.
				2602	//
				2603	if (leaderAddr->isClose(address2os[i].first,
				2604	__kmp_affinity_gran_levels)) {
				2605	KMP_CPU_SET(address2os[i].second, sum);
				2606	continue;
				2607	}
				2608
				2609	//
				2610	// For every thread in this group, copy the mask to the thread's
				2611	// entry in the osId2Mask table. Mark the first address as a
				2612	// leader.
				2613	//
				2614	for (; j < i; j++) {
				2615	unsigned osId = address2os[j].second;
				2616	KMP_DEBUG_ASSERT(osId <= maxOsId);
				2617	kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
				2618	KMP_CPU_COPY(mask, sum);
				2619	address2os[j].first.leader = (j == leader);
				2620	}
				2621	unique++;
				2622
				2623	//
				2624	// Start a new mask.
				2625	//
				2626	leader = i;
				2627	leaderAddr = &(address2os[i].first);
				2628	KMP_CPU_ZERO(sum);
				2629	KMP_CPU_SET(address2os[i].second, sum);
				2630	}
				2631
				2632	//
				2633	// For every thread in last group, copy the mask to the thread's
				2634	// entry in the osId2Mask table.
				2635	//
				2636	for (; j < i; j++) {
				2637	unsigned osId = address2os[j].second;
				2638	KMP_DEBUG_ASSERT(osId <= maxOsId);
				2639	kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
				2640	KMP_CPU_COPY(mask, sum);
				2641	address2os[j].first.leader = (j == leader);
				2642	}
				2643	unique++;
				2644
				2645	*maxIndex = maxOsId;
				2646	*numUnique = unique;
				2647	return osId2Mask;
				2648	}
				2649
				2650
				2651	//
				2652	// Stuff for the affinity proclist parsers. It's easier to declare these vars
				2653	// as file-static than to try and pass them through the calling sequence of
				2654	// the recursive-descent OMP_PLACES parser.
				2655	//
				2656	static kmp_affin_mask_t *newMasks;
				2657	static int numNewMasks;
				2658	static int nextNewMask;
				2659
				2660	#define ADD_MASK(_mask) \
				2661	{ \
				2662	if (nextNewMask >= numNewMasks) { \
				2663	numNewMasks *= 2; \
				2664	newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_REALLOC(newMasks, \
				2665	numNewMasks * __kmp_affin_mask_size); \
				2666	} \
				2667	KMP_CPU_COPY(KMP_CPU_INDEX(newMasks, nextNewMask), (_mask)); \
				2668	nextNewMask++; \
				2669	}
				2670
				2671	#define ADD_MASK_OSID(_osId,_osId2Mask,_maxOsId) \
				2672	{ \
				2673	if (((_osId) > _maxOsId) \|\| \
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	2674	(! KMP_CPU_ISSET((_osId), KMP_CPU_INDEX((_osId2Mask), (_osId))))) { \
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2675	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings \
				2676	&& (__kmp_affinity_type != affinity_none))) { \
				2677	KMP_WARNING(AffIgnoreInvalidProcID, _osId); \
				2678	} \
				2679	} \
				2680	else { \
				2681	ADD_MASK(KMP_CPU_INDEX(_osId2Mask, (_osId))); \
				2682	} \
				2683	}
				2684
				2685
				2686	//
				2687	// Re-parse the proclist (for the explicit affinity type), and form the list
				2688	// of affinity newMasks indexed by gtid.
				2689	//
				2690	static void
				2691	__kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks,
				2692	unsigned int out_numMasks, const char proclist,
				2693	kmp_affin_mask_t *osId2Mask, int maxOsId)
				2694	{
				2695	const char *scan = proclist;
				2696	const char *next = proclist;
				2697
				2698	//
				2699	// We use malloc() for the temporary mask vector,
				2700	// so that we can use realloc() to extend it.
				2701	//
				2702	numNewMasks = 2;
				2703	newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(numNewMasks
				2704	* __kmp_affin_mask_size);
				2705	nextNewMask = 0;
				2706	kmp_affin_mask_t sumMask = (kmp_affin_mask_t )__kmp_allocate(
				2707	__kmp_affin_mask_size);
				2708	int setSize = 0;
				2709
				2710	for (;;) {
				2711	int start, end, stride;
				2712
				2713	SKIP_WS(scan);
				2714	next = scan;
				2715	if (*next == '\0') {
				2716	break;
				2717	}
				2718
				2719	if (*next == '{') {
				2720	int num;
				2721	setSize = 0;
				2722	next++; // skip '{'
				2723	SKIP_WS(next);
				2724	scan = next;
				2725
				2726	//
				2727	// Read the first integer in the set.
				2728	//
				2729	KMP_ASSERT2((next >= '0') && (next <= '9'),
				2730	"bad proclist");
				2731	SKIP_DIGITS(next);
				2732	num = __kmp_str_to_int(scan, *next);
				2733	KMP_ASSERT2(num >= 0, "bad explicit proc list");
				2734
				2735	//
				2736	// Copy the mask for that osId to the sum (union) mask.
				2737	//
				2738	if ((num > maxOsId) \|\|
				2739	(! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
				2740	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				2741	&& (__kmp_affinity_type != affinity_none))) {
				2742	KMP_WARNING(AffIgnoreInvalidProcID, num);
				2743	}
				2744	KMP_CPU_ZERO(sumMask);
				2745	}
				2746	else {
				2747	KMP_CPU_COPY(sumMask, KMP_CPU_INDEX(osId2Mask, num));
				2748	setSize = 1;
				2749	}
				2750
				2751	for (;;) {
				2752	//
				2753	// Check for end of set.
				2754	//
				2755	SKIP_WS(next);
				2756	if (*next == '}') {
				2757	next++; // skip '}'
				2758	break;
				2759	}
				2760
				2761	//
				2762	// Skip optional comma.
				2763	//
				2764	if (*next == ',') {
				2765	next++;
				2766	}
				2767	SKIP_WS(next);
				2768
				2769	//
				2770	// Read the next integer in the set.
				2771	//
				2772	scan = next;
				2773	KMP_ASSERT2((next >= '0') && (next <= '9'),
				2774	"bad explicit proc list");
				2775
				2776	SKIP_DIGITS(next);
				2777	num = __kmp_str_to_int(scan, *next);
				2778	KMP_ASSERT2(num >= 0, "bad explicit proc list");
				2779
				2780	//
				2781	// Add the mask for that osId to the sum mask.
				2782	//
				2783	if ((num > maxOsId) \|\|
				2784	(! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
				2785	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				2786	&& (__kmp_affinity_type != affinity_none))) {
				2787	KMP_WARNING(AffIgnoreInvalidProcID, num);
				2788	}
				2789	}
				2790	else {
				2791	KMP_CPU_UNION(sumMask, KMP_CPU_INDEX(osId2Mask, num));
				2792	setSize++;
				2793	}
				2794	}
				2795	if (setSize > 0) {
				2796	ADD_MASK(sumMask);
				2797	}
				2798
				2799	SKIP_WS(next);
				2800	if (*next == ',') {
				2801	next++;
				2802	}
				2803	scan = next;
				2804	continue;
				2805	}
				2806
				2807	//
				2808	// Read the first integer.
				2809	//
				2810	KMP_ASSERT2((next >= '0') && (next <= '9'), "bad explicit proc list");
				2811	SKIP_DIGITS(next);
				2812	start = __kmp_str_to_int(scan, *next);
				2813	KMP_ASSERT2(start >= 0, "bad explicit proc list");
				2814	SKIP_WS(next);
				2815
				2816	//
				2817	// If this isn't a range, then add a mask to the list and go on.
				2818	//
				2819	if (*next != '-') {
				2820	ADD_MASK_OSID(start, osId2Mask, maxOsId);
				2821
				2822	//
				2823	// Skip optional comma.
				2824	//
				2825	if (*next == ',') {
				2826	next++;
				2827	}
				2828	scan = next;
				2829	continue;
				2830	}
				2831
				2832	//
				2833	// This is a range. Skip over the '-' and read in the 2nd int.
				2834	//
				2835	next++; // skip '-'
				2836	SKIP_WS(next);
				2837	scan = next;
				2838	KMP_ASSERT2((next >= '0') && (next <= '9'), "bad explicit proc list");
				2839	SKIP_DIGITS(next);
				2840	end = __kmp_str_to_int(scan, *next);
				2841	KMP_ASSERT2(end >= 0, "bad explicit proc list");
				2842
				2843	//
				2844	// Check for a stride parameter
				2845	//
				2846	stride = 1;
				2847	SKIP_WS(next);
				2848	if (*next == ':') {
				2849	//
				2850	// A stride is specified. Skip over the ':" and read the 3rd int.
				2851	//
				2852	int sign = +1;
				2853	next++; // skip ':'
				2854	SKIP_WS(next);
				2855	scan = next;
				2856	if (*next == '-') {
				2857	sign = -1;
				2858	next++;
				2859	SKIP_WS(next);
				2860	scan = next;
				2861	}
				2862	KMP_ASSERT2((next >= '0') && (next <= '9'),
				2863	"bad explicit proc list");
				2864	SKIP_DIGITS(next);
				2865	stride = __kmp_str_to_int(scan, *next);
				2866	KMP_ASSERT2(stride >= 0, "bad explicit proc list");
				2867	stride *= sign;
				2868	}
				2869
				2870	//
				2871	// Do some range checks.
				2872	//
				2873	KMP_ASSERT2(stride != 0, "bad explicit proc list");
				2874	if (stride > 0) {
				2875	KMP_ASSERT2(start <= end, "bad explicit proc list");
				2876	}
				2877	else {
				2878	KMP_ASSERT2(start >= end, "bad explicit proc list");
				2879	}
				2880	KMP_ASSERT2((end - start) / stride <= 65536, "bad explicit proc list");
				2881
				2882	//
				2883	// Add the mask for each OS proc # to the list.
				2884	//
				2885	if (stride > 0) {
				2886	do {
				2887	ADD_MASK_OSID(start, osId2Mask, maxOsId);
				2888	start += stride;
				2889	} while (start <= end);
				2890	}
				2891	else {
				2892	do {
				2893	ADD_MASK_OSID(start, osId2Mask, maxOsId);
				2894	start += stride;
				2895	} while (start >= end);
				2896	}
				2897
				2898	//
				2899	// Skip optional comma.
				2900	//
				2901	SKIP_WS(next);
				2902	if (*next == ',') {
				2903	next++;
				2904	}
				2905	scan = next;
				2906	}
				2907
				2908	*out_numMasks = nextNewMask;
				2909	if (nextNewMask == 0) {
				2910	*out_masks = NULL;
				2911	KMP_INTERNAL_FREE(newMasks);
				2912	return;
				2913	}
				2914	*out_masks
				2915	= (kmp_affin_mask_t )__kmp_allocate(nextNewMask __kmp_affin_mask_size);
				2916	memcpy(out_masks, newMasks, nextNewMask __kmp_affin_mask_size);
				2917	__kmp_free(sumMask);
				2918	KMP_INTERNAL_FREE(newMasks);
				2919	}
				2920
				2921
				2922	# if OMP_40_ENABLED
				2923
				2924	/*-----------------------------------------------------------------------------
				2925
				2926	Re-parse the OMP_PLACES proc id list, forming the newMasks for the different
				2927	places. Again, Here is the grammar:
				2928
				2929	place_list := place
				2930	place_list := place , place_list
				2931	place := num
				2932	place := place : num
				2933	place := place : num : signed
				2934	place := { subplacelist }
				2935	place := ! place // (lowest priority)
				2936	subplace_list := subplace
				2937	subplace_list := subplace , subplace_list
				2938	subplace := num
				2939	subplace := num : num
				2940	subplace := num : num : signed
				2941	signed := num
				2942	signed := + signed
				2943	signed := - signed
				2944
				2945	-----------------------------------------------------------------------------*/
				2946
				2947	static void
				2948	__kmp_process_subplace_list(const char *scan, kmp_affin_mask_t osId2Mask,
				2949	int maxOsId, kmp_affin_mask_t tempMask, int setSize)
				2950	{
				2951	const char *next;
				2952
				2953	for (;;) {
				2954	int start, count, stride, i;
				2955
				2956	//
				2957	// Read in the starting proc id
				2958	//
				2959	SKIP_WS(*scan);
				2960	KMP_ASSERT2((scan >= '0') && (scan <= '9'),
				2961	"bad explicit places list");
				2962	next = *scan;
				2963	SKIP_DIGITS(next);
				2964	start = __kmp_str_to_int(scan, next);
				2965	KMP_ASSERT(start >= 0);
				2966	*scan = next;
				2967
				2968	//
				2969	// valid follow sets are ',' ':' and '}'
				2970	//
				2971	SKIP_WS(*scan);
				2972	if (scan == '}' \|\| scan == ',') {
				2973	if ((start > maxOsId) \|\|
				2974	(! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
				2975	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				2976	&& (__kmp_affinity_type != affinity_none))) {
				2977	KMP_WARNING(AffIgnoreInvalidProcID, start);
				2978	}
				2979	}
				2980	else {
				2981	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
				2982	(*setSize)++;
				2983	}
				2984	if (**scan == '}') {
				2985	break;
				2986	}
				2987	(*scan)++; // skip ','
				2988	continue;
				2989	}
				2990	KMP_ASSERT2(**scan == ':', "bad explicit places list");
				2991	(*scan)++; // skip ':'
				2992
				2993	//
				2994	// Read count parameter
				2995	//
				2996	SKIP_WS(*scan);
				2997	KMP_ASSERT2((scan >= '0') && (scan <= '9'),
				2998	"bad explicit places list");
				2999	next = *scan;
				3000	SKIP_DIGITS(next);
				3001	count = __kmp_str_to_int(scan, next);
				3002	KMP_ASSERT(count >= 0);
				3003	*scan = next;
				3004
				3005	//
				3006	// valid follow sets are ',' ':' and '}'
				3007	//
				3008	SKIP_WS(*scan);
				3009	if (scan == '}' \|\| scan == ',') {
				3010	for (i = 0; i < count; i++) {
				3011	if ((start > maxOsId) \|\|
				3012	(! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
				3013	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3014	&& (__kmp_affinity_type != affinity_none))) {
				3015	KMP_WARNING(AffIgnoreInvalidProcID, start);
				3016	}
				3017	break; // don't proliferate warnings for large count
				3018	}
				3019	else {
				3020	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
				3021	start++;
				3022	(*setSize)++;
				3023	}
				3024	}
				3025	if (**scan == '}') {
				3026	break;
				3027	}
				3028	(*scan)++; // skip ','
				3029	continue;
				3030	}
				3031	KMP_ASSERT2(**scan == ':', "bad explicit places list");
				3032	(*scan)++; // skip ':'
				3033
				3034	//
				3035	// Read stride parameter
				3036	//
				3037	int sign = +1;
				3038	for (;;) {
				3039	SKIP_WS(*scan);
				3040	if (**scan == '+') {
				3041	(*scan)++; // skip '+'
				3042	continue;
				3043	}
				3044	if (**scan == '-') {
				3045	sign *= -1;
				3046	(*scan)++; // skip '-'
				3047	continue;
				3048	}
				3049	break;
				3050	}
				3051	SKIP_WS(*scan);
				3052	KMP_ASSERT2((scan >= '0') && (scan <= '9'),
				3053	"bad explicit places list");
				3054	next = *scan;
				3055	SKIP_DIGITS(next);
				3056	stride = __kmp_str_to_int(scan, next);
				3057	KMP_ASSERT(stride >= 0);
				3058	*scan = next;
				3059	stride *= sign;
				3060
				3061	//
				3062	// valid follow sets are ',' and '}'
				3063	//
				3064	SKIP_WS(*scan);
				3065	if (scan == '}' \|\| scan == ',') {
				3066	for (i = 0; i < count; i++) {
				3067	if ((start > maxOsId) \|\|
				3068	(! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
				3069	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3070	&& (__kmp_affinity_type != affinity_none))) {
				3071	KMP_WARNING(AffIgnoreInvalidProcID, start);
				3072	}
				3073	break; // don't proliferate warnings for large count
				3074	}
				3075	else {
				3076	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
				3077	start += stride;
				3078	(*setSize)++;
				3079	}
				3080	}
				3081	if (**scan == '}') {
				3082	break;
				3083	}
				3084	(*scan)++; // skip ','
				3085	continue;
				3086	}
				3087
				3088	KMP_ASSERT2(0, "bad explicit places list");
				3089	}
				3090	}
				3091
				3092
				3093	static void
				3094	__kmp_process_place(const char *scan, kmp_affin_mask_t osId2Mask,
				3095	int maxOsId, kmp_affin_mask_t tempMask, int setSize)
				3096	{
				3097	const char *next;
				3098
				3099	//
				3100	// valid follow sets are '{' '!' and num
				3101	//
				3102	SKIP_WS(*scan);
				3103	if (**scan == '{') {
				3104	(*scan)++; // skip '{'
				3105	__kmp_process_subplace_list(scan, osId2Mask, maxOsId , tempMask,
				3106	setSize);
				3107	KMP_ASSERT2(**scan == '}', "bad explicit places list");
				3108	(*scan)++; // skip '}'
				3109	}
				3110	else if (**scan == '!') {
				3111	__kmp_process_place(scan, osId2Mask, maxOsId, tempMask, setSize);
				3112	KMP_CPU_COMPLEMENT(tempMask);
				3113	(*scan)++; // skip '!'
				3114	}
				3115	else if ((scan >= '0') && (scan <= '9')) {
				3116	next = *scan;
				3117	SKIP_DIGITS(next);
				3118	int num = __kmp_str_to_int(scan, next);
				3119	KMP_ASSERT(num >= 0);
				3120	if ((num > maxOsId) \|\|
				3121	(! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
				3122	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3123	&& (__kmp_affinity_type != affinity_none))) {
				3124	KMP_WARNING(AffIgnoreInvalidProcID, num);
				3125	}
				3126	}
				3127	else {
				3128	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, num));
				3129	(*setSize)++;
				3130	}
				3131	*scan = next; // skip num
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3132	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3133	else {
				3134	KMP_ASSERT2(0, "bad explicit places list");
				3135	}
				3136	}
				3137
				3138
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3139	//static void
				3140	void
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3141	__kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
				3142	unsigned int out_numMasks, const char placelist,
				3143	kmp_affin_mask_t *osId2Mask, int maxOsId)
				3144	{
				3145	const char *scan = placelist;
				3146	const char *next = placelist;
				3147
				3148	numNewMasks = 2;
				3149	newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(numNewMasks
				3150	* __kmp_affin_mask_size);
				3151	nextNewMask = 0;
				3152
				3153	kmp_affin_mask_t tempMask = (kmp_affin_mask_t )__kmp_allocate(
				3154	__kmp_affin_mask_size);
				3155	KMP_CPU_ZERO(tempMask);
				3156	int setSize = 0;
				3157
				3158	for (;;) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3159	__kmp_process_place(&scan, osId2Mask, maxOsId, tempMask, &setSize);
				3160
				3161	//
				3162	// valid follow sets are ',' ':' and EOL
				3163	//
				3164	SKIP_WS(scan);
				3165	if (scan == '\0' \|\| scan == ',') {
				3166	if (setSize > 0) {
				3167	ADD_MASK(tempMask);
				3168	}
				3169	KMP_CPU_ZERO(tempMask);
				3170	setSize = 0;
				3171	if (*scan == '\0') {
				3172	break;
				3173	}
				3174	scan++; // skip ','
				3175	continue;
				3176	}
				3177
				3178	KMP_ASSERT2(*scan == ':', "bad explicit places list");
				3179	scan++; // skip ':'
				3180
				3181	//
				3182	// Read count parameter
				3183	//
				3184	SKIP_WS(scan);
				3185	KMP_ASSERT2((scan >= '0') && (scan <= '9'),
				3186	"bad explicit places list");
				3187	next = scan;
				3188	SKIP_DIGITS(next);
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	3189	int count = __kmp_str_to_int(scan, *next);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3190	KMP_ASSERT(count >= 0);
				3191	scan = next;
				3192
				3193	//
				3194	// valid follow sets are ',' ':' and EOL
				3195	//
				3196	SKIP_WS(scan);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3197	int stride;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3198	if (scan == '\0' \|\| scan == ',') {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3199	stride = +1;
				3200	}
				3201	else {
				3202	KMP_ASSERT2(*scan == ':', "bad explicit places list");
				3203	scan++; // skip ':'
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3204
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3205	//
				3206	// Read stride parameter
				3207	//
				3208	int sign = +1;
				3209	for (;;) {
				3210	SKIP_WS(scan);
				3211	if (*scan == '+') {
				3212	scan++; // skip '+'
				3213	continue;
				3214	}
				3215	if (*scan == '-') {
				3216	sign *= -1;
				3217	scan++; // skip '-'
				3218	continue;
				3219	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3220	break;
				3221	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3222	SKIP_WS(scan);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3223	KMP_ASSERT2((scan >= '0') && (scan <= '9'),
				3224	"bad explicit places list");
				3225	next = scan;
				3226	SKIP_DIGITS(next);
				3227	stride = __kmp_str_to_int(scan, *next);
				3228	KMP_DEBUG_ASSERT(stride >= 0);
				3229	scan = next;
				3230	stride *= sign;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3231	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3232
				3233	if (stride > 0) {
				3234	int i;
				3235	for (i = 0; i < count; i++) {
				3236	int j;
				3237	if (setSize == 0) {
				3238	break;
				3239	}
				3240	ADD_MASK(tempMask);
				3241	setSize = 0;
				3242	for (j = __kmp_affin_mask_size * CHAR_BIT - 1; j >= stride; j--) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3243	if (! KMP_CPU_ISSET(j - stride, tempMask)) {
				3244	KMP_CPU_CLR(j, tempMask);
				3245	}
				3246	else if ((j > maxOsId) \|\|
				3247	(! KMP_CPU_ISSET(j, KMP_CPU_INDEX(osId2Mask, j)))) {
				3248	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3249	&& (__kmp_affinity_type != affinity_none))) {
				3250	KMP_WARNING(AffIgnoreInvalidProcID, j);
				3251	}
				3252	KMP_CPU_CLR(j, tempMask);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3253	}
				3254	else {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3255	KMP_CPU_SET(j, tempMask);
				3256	setSize++;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3257	}
				3258	}
				3259	for (; j >= 0; j--) {
				3260	KMP_CPU_CLR(j, tempMask);
				3261	}
				3262	}
				3263	}
				3264	else {
				3265	int i;
				3266	for (i = 0; i < count; i++) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3267	int j;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3268	if (setSize == 0) {
				3269	break;
				3270	}
				3271	ADD_MASK(tempMask);
				3272	setSize = 0;
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3273	for (j = 0; j < ((int)__kmp_affin_mask_size * CHAR_BIT) + stride;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3274	j++) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3275	if (! KMP_CPU_ISSET(j - stride, tempMask)) {
				3276	KMP_CPU_CLR(j, tempMask);
				3277	}
				3278	else if ((j > maxOsId) \|\|
				3279	(! KMP_CPU_ISSET(j, KMP_CPU_INDEX(osId2Mask, j)))) {
				3280	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3281	&& (__kmp_affinity_type != affinity_none))) {
				3282	KMP_WARNING(AffIgnoreInvalidProcID, j);
				3283	}
				3284	KMP_CPU_CLR(j, tempMask);
				3285	}
				3286	else {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3287	KMP_CPU_SET(j, tempMask);
				3288	setSize++;
				3289	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3290	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3291	for (; j < (int)__kmp_affin_mask_size * CHAR_BIT; j++) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3292	KMP_CPU_CLR(j, tempMask);
				3293	}
				3294	}
				3295	}
				3296	KMP_CPU_ZERO(tempMask);
				3297	setSize = 0;
				3298
				3299	//
				3300	// valid follow sets are ',' and EOL
				3301	//
				3302	SKIP_WS(scan);
				3303	if (*scan == '\0') {
				3304	break;
				3305	}
				3306	if (*scan == ',') {
				3307	scan++; // skip ','
				3308	continue;
				3309	}
				3310
				3311	KMP_ASSERT2(0, "bad explicit places list");
				3312	}
				3313
				3314	*out_numMasks = nextNewMask;
				3315	if (nextNewMask == 0) {
				3316	*out_masks = NULL;
				3317	KMP_INTERNAL_FREE(newMasks);
				3318	return;
				3319	}
				3320	*out_masks
				3321	= (kmp_affin_mask_t )__kmp_allocate(nextNewMask __kmp_affin_mask_size);
				3322	memcpy(out_masks, newMasks, nextNewMask __kmp_affin_mask_size);
				3323	__kmp_free(tempMask);
				3324	KMP_INTERNAL_FREE(newMasks);
				3325	}
				3326
				3327	# endif /* OMP_40_ENABLED */
				3328
				3329	#undef ADD_MASK
				3330	#undef ADD_MASK_OSID
				3331
				3332
				3333	# if KMP_MIC
				3334
				3335	static void
				3336	__kmp_apply_thread_places(AddrUnsPair **pAddr, int depth)
				3337	{
				3338	if ( __kmp_place_num_cores == 0 ) {
				3339	if ( __kmp_place_num_threads_per_core == 0 ) {
				3340	return; // no cores limiting actions requested, exit
				3341	}
				3342	__kmp_place_num_cores = nCoresPerPkg; // use all available cores
				3343	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3344	if ( !__kmp_affinity_uniform_topology() ) {
				3345	KMP_WARNING( AffThrPlaceNonUniform );
				3346	return; // don't support non-uniform topology
				3347	}
				3348	if ( depth != 3 ) {
				3349	KMP_WARNING( AffThrPlaceNonThreeLevel );
				3350	return; // don't support not-3-level topology
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3351	}
				3352	if ( __kmp_place_num_threads_per_core == 0 ) {
				3353	__kmp_place_num_threads_per_core = __kmp_nThreadsPerCore; // use all HW contexts
				3354	}
				3355	if ( __kmp_place_core_offset + __kmp_place_num_cores > nCoresPerPkg ) {
				3356	KMP_WARNING( AffThrPlaceManyCores );
				3357	return;
				3358	}
				3359
				3360	AddrUnsPair newAddr = (AddrUnsPair )__kmp_allocate( sizeof(AddrUnsPair) *
				3361	nPackages * __kmp_place_num_cores * __kmp_place_num_threads_per_core);
				3362	int i, j, k, n_old = 0, n_new = 0;
				3363	for ( i = 0; i < nPackages; ++i ) {
				3364	for ( j = 0; j < nCoresPerPkg; ++j ) {
				3365	if ( j < __kmp_place_core_offset \|\| j >= __kmp_place_core_offset + __kmp_place_num_cores ) {
				3366	n_old += __kmp_nThreadsPerCore; // skip not-requested core
				3367	} else {
				3368	for ( k = 0; k < __kmp_nThreadsPerCore; ++k ) {
				3369	if ( k < __kmp_place_num_threads_per_core ) {
				3370	newAddr[n_new] = (*pAddr)[n_old]; // copy requested core' data to new location
				3371	n_new++;
				3372	}
				3373	n_old++;
				3374	}
				3375	}
				3376	}
				3377	}
				3378	nCoresPerPkg = __kmp_place_num_cores; // correct nCoresPerPkg
				3379	__kmp_nThreadsPerCore = __kmp_place_num_threads_per_core; // correct __kmp_nThreadsPerCore
				3380	__kmp_avail_proc = n_new; // correct avail_proc
				3381	__kmp_ncores = nPackages * __kmp_place_num_cores; // correct ncores
				3382
				3383	__kmp_free( *pAddr );
				3384	*pAddr = newAddr; // replace old topology with new one
				3385	}
				3386
				3387	# endif /* KMP_MIC */
				3388
				3389
				3390	static AddrUnsPair *address2os = NULL;
				3391	static int * procarr = NULL;
				3392	static int __kmp_aff_depth = 0;
				3393
				3394	static void
				3395	__kmp_aux_affinity_initialize(void)
				3396	{
				3397	if (__kmp_affinity_masks != NULL) {
				3398	KMP_ASSERT(fullMask != NULL);
				3399	return;
				3400	}
				3401
				3402	//
				3403	// Create the "full" mask - this defines all of the processors that we
				3404	// consider to be in the machine model. If respect is set, then it is
				3405	// the initialization thread's affinity mask. Otherwise, it is all
				3406	// processors that we know about on the machine.
				3407	//
				3408	if (fullMask == NULL) {
				3409	fullMask = (kmp_affin_mask_t *)__kmp_allocate(__kmp_affin_mask_size);
				3410	}
				3411	if (KMP_AFFINITY_CAPABLE()) {
				3412	if (__kmp_affinity_respect_mask) {
				3413	__kmp_get_system_affinity(fullMask, TRUE);
				3414
				3415	//
				3416	// Count the number of available processors.
				3417	//
				3418	unsigned i;
				3419	__kmp_avail_proc = 0;
				3420	for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
				3421	if (! KMP_CPU_ISSET(i, fullMask)) {
				3422	continue;
				3423	}
				3424	__kmp_avail_proc++;
				3425	}
				3426	if (__kmp_avail_proc > __kmp_xproc) {
				3427	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3428	&& (__kmp_affinity_type != affinity_none))) {
				3429	KMP_WARNING(ErrorInitializeAffinity);
				3430	}
				3431	__kmp_affinity_type = affinity_none;
				3432	__kmp_affin_mask_size = 0;
				3433	return;
				3434	}
				3435	}
				3436	else {
				3437	__kmp_affinity_entire_machine_mask(fullMask);
				3438	__kmp_avail_proc = __kmp_xproc;
				3439	}
				3440	}
				3441
				3442	int depth = -1;
				3443	kmp_i18n_id_t msg_id = kmp_i18n_null;
				3444
				3445	//
Alp Toker	8f2d3f0	2014-02-24 10:40:15 +0000	[diff] [blame]	3446	// For backward compatibility, setting KMP_CPUINFO_FILE =>
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3447	// KMP_TOPOLOGY_METHOD=cpuinfo
				3448	//
				3449	if ((__kmp_cpuinfo_file != NULL) &&
				3450	(__kmp_affinity_top_method == affinity_top_method_all)) {
				3451	__kmp_affinity_top_method = affinity_top_method_cpuinfo;
				3452	}
				3453
				3454	if (__kmp_affinity_top_method == affinity_top_method_all) {
				3455	//
				3456	// In the default code path, errors are not fatal - we just try using
				3457	// another method. We only emit a warning message if affinity is on,
				3458	// or the verbose flag is set, an the nowarnings flag was not set.
				3459	//
				3460	const char *file_name = NULL;
				3461	int line = 0;
				3462
				3463	# if KMP_ARCH_X86 \|\| KMP_ARCH_X86_64
				3464
				3465	if (__kmp_affinity_verbose) {
				3466	KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
				3467	}
				3468
				3469	file_name = NULL;
				3470	depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
				3471	if (depth == 0) {
				3472	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3473	KMP_ASSERT(address2os == NULL);
				3474	return;
				3475	}
				3476
				3477	if (depth < 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3478	if (__kmp_affinity_verbose) {
				3479	if (msg_id != kmp_i18n_null) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3480	KMP_INFORM(AffInfoStrStr, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id),
				3481	KMP_I18N_STR(DecodingLegacyAPIC));
				3482	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3483	else {
				3484	KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(DecodingLegacyAPIC));
				3485	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3486	}
				3487
				3488	file_name = NULL;
				3489	depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
				3490	if (depth == 0) {
				3491	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3492	KMP_ASSERT(address2os == NULL);
				3493	return;
				3494	}
				3495	}
				3496
				3497	# endif /* KMP_ARCH_X86 \|\| KMP_ARCH_X86_64 */
				3498
				3499	# if KMP_OS_LINUX
				3500
				3501	if (depth < 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3502	if (__kmp_affinity_verbose) {
				3503	if (msg_id != kmp_i18n_null) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3504	KMP_INFORM(AffStrParseFilename, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id), "/proc/cpuinfo");
				3505	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3506	else {
				3507	KMP_INFORM(AffParseFilename, "KMP_AFFINITY", "/proc/cpuinfo");
				3508	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3509	}
				3510
				3511	FILE *f = fopen("/proc/cpuinfo", "r");
				3512	if (f == NULL) {
				3513	msg_id = kmp_i18n_str_CantOpenCpuinfo;
				3514	}
				3515	else {
				3516	file_name = "/proc/cpuinfo";
				3517	depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
				3518	fclose(f);
				3519	if (depth == 0) {
				3520	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3521	KMP_ASSERT(address2os == NULL);
				3522	return;
				3523	}
				3524	}
				3525	}
				3526
				3527	# endif /* KMP_OS_LINUX */
				3528
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	3529	# if KMP_GROUP_AFFINITY
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3530
				3531	if ((depth < 0) && (__kmp_num_proc_groups > 1)) {
				3532	if (__kmp_affinity_verbose) {
				3533	KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
				3534	}
				3535
				3536	depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
				3537	KMP_ASSERT(depth != 0);
				3538	}
				3539
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	3540	# endif /* KMP_GROUP_AFFINITY */
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3541
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3542	if (depth < 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3543	if (__kmp_affinity_verbose && (msg_id != kmp_i18n_null)) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3544	if (file_name == NULL) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3545	KMP_INFORM(UsingFlatOS, __kmp_i18n_catgets(msg_id));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3546	}
				3547	else if (line == 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3548	KMP_INFORM(UsingFlatOSFile, file_name, __kmp_i18n_catgets(msg_id));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3549	}
				3550	else {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3551	KMP_INFORM(UsingFlatOSFileLine, file_name, line, __kmp_i18n_catgets(msg_id));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3552	}
				3553	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3554	// FIXME - print msg if msg_id = kmp_i18n_null ???
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3555
				3556	file_name = "";
				3557	depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
				3558	if (depth == 0) {
				3559	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3560	KMP_ASSERT(address2os == NULL);
				3561	return;
				3562	}
				3563	KMP_ASSERT(depth > 0);
				3564	KMP_ASSERT(address2os != NULL);
				3565	}
				3566	}
				3567
				3568	//
				3569	// If the user has specified that a paricular topology discovery method
				3570	// is to be used, then we abort if that method fails. The exception is
				3571	// group affinity, which might have been implicitly set.
				3572	//
				3573
				3574	# if KMP_ARCH_X86 \|\| KMP_ARCH_X86_64
				3575
				3576	else if (__kmp_affinity_top_method == affinity_top_method_x2apicid) {
				3577	if (__kmp_affinity_verbose) {
				3578	KMP_INFORM(AffInfoStr, "KMP_AFFINITY",
				3579	KMP_I18N_STR(Decodingx2APIC));
				3580	}
				3581
				3582	depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
				3583	if (depth == 0) {
				3584	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3585	KMP_ASSERT(address2os == NULL);
				3586	return;
				3587	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3588	if (depth < 0) {
				3589	KMP_ASSERT(msg_id != kmp_i18n_null);
				3590	KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
				3591	}
				3592	}
				3593	else if (__kmp_affinity_top_method == affinity_top_method_apicid) {
				3594	if (__kmp_affinity_verbose) {
				3595	KMP_INFORM(AffInfoStr, "KMP_AFFINITY",
				3596	KMP_I18N_STR(DecodingLegacyAPIC));
				3597	}
				3598
				3599	depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
				3600	if (depth == 0) {
				3601	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3602	KMP_ASSERT(address2os == NULL);
				3603	return;
				3604	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3605	if (depth < 0) {
				3606	KMP_ASSERT(msg_id != kmp_i18n_null);
				3607	KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
				3608	}
				3609	}
				3610
				3611	# endif /* KMP_ARCH_X86 \|\| KMP_ARCH_X86_64 */
				3612
				3613	else if (__kmp_affinity_top_method == affinity_top_method_cpuinfo) {
				3614	const char *filename;
				3615	if (__kmp_cpuinfo_file != NULL) {
				3616	filename = __kmp_cpuinfo_file;
				3617	}
				3618	else {
				3619	filename = "/proc/cpuinfo";
				3620	}
				3621
				3622	if (__kmp_affinity_verbose) {
				3623	KMP_INFORM(AffParseFilename, "KMP_AFFINITY", filename);
				3624	}
				3625
				3626	FILE *f = fopen(filename, "r");
				3627	if (f == NULL) {
				3628	int code = errno;
				3629	if (__kmp_cpuinfo_file != NULL) {
				3630	__kmp_msg(
				3631	kmp_ms_fatal,
				3632	KMP_MSG(CantOpenFileForReading, filename),
				3633	KMP_ERR(code),
				3634	KMP_HNT(NameComesFrom_CPUINFO_FILE),
				3635	__kmp_msg_null
				3636	);
				3637	}
				3638	else {
				3639	__kmp_msg(
				3640	kmp_ms_fatal,
				3641	KMP_MSG(CantOpenFileForReading, filename),
				3642	KMP_ERR(code),
				3643	__kmp_msg_null
				3644	);
				3645	}
				3646	}
				3647	int line = 0;
				3648	depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
				3649	fclose(f);
				3650	if (depth < 0) {
				3651	KMP_ASSERT(msg_id != kmp_i18n_null);
				3652	if (line > 0) {
				3653	KMP_FATAL(FileLineMsgExiting, filename, line, __kmp_i18n_catgets(msg_id));
				3654	}
				3655	else {
				3656	KMP_FATAL(FileMsgExiting, filename, __kmp_i18n_catgets(msg_id));
				3657	}
				3658	}
				3659	if (__kmp_affinity_type == affinity_none) {
				3660	KMP_ASSERT(depth == 0);
				3661	KMP_ASSERT(address2os == NULL);
				3662	return;
				3663	}
				3664	}
				3665
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	3666	# if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3667
				3668	else if (__kmp_affinity_top_method == affinity_top_method_group) {
				3669	if (__kmp_affinity_verbose) {
				3670	KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
				3671	}
				3672
				3673	depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
				3674	KMP_ASSERT(depth != 0);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3675	if (depth < 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3676	KMP_ASSERT(msg_id != kmp_i18n_null);
				3677	KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3678	}
				3679	}
				3680
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	3681	# endif /* KMP_GROUP_AFFINITY */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3682
				3683	else if (__kmp_affinity_top_method == affinity_top_method_flat) {
				3684	if (__kmp_affinity_verbose) {
				3685	KMP_INFORM(AffUsingFlatOS, "KMP_AFFINITY");
				3686	}
				3687
				3688	depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
				3689	if (depth == 0) {
				3690	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3691	KMP_ASSERT(address2os == NULL);
				3692	return;
				3693	}
				3694	// should not fail
				3695	KMP_ASSERT(depth > 0);
				3696	KMP_ASSERT(address2os != NULL);
				3697	}
				3698
				3699	if (address2os == NULL) {
				3700	if (KMP_AFFINITY_CAPABLE()
				3701	&& (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3702	&& (__kmp_affinity_type != affinity_none)))) {
				3703	KMP_WARNING(ErrorInitializeAffinity);
				3704	}
				3705	__kmp_affinity_type = affinity_none;
				3706	__kmp_affin_mask_size = 0;
				3707	return;
				3708	}
				3709
				3710	# if KMP_MIC
				3711	__kmp_apply_thread_places(&address2os, depth);
				3712	# endif
				3713
				3714	//
				3715	// Create the table of masks, indexed by thread Id.
				3716	//
				3717	unsigned maxIndex;
				3718	unsigned numUnique;
				3719	kmp_affin_mask_t *osId2Mask = __kmp_create_masks(&maxIndex, &numUnique,
				3720	address2os, __kmp_avail_proc);
				3721	if (__kmp_affinity_gran_levels == 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3722	KMP_DEBUG_ASSERT((int)numUnique == __kmp_avail_proc);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3723	}
				3724
				3725	//
				3726	// Set the childNums vector in all Address objects. This must be done
				3727	// before we can sort using __kmp_affinity_cmp_Address_child_num(),
				3728	// which takes into account the setting of __kmp_affinity_compact.
				3729	//
				3730	__kmp_affinity_assign_child_nums(address2os, __kmp_avail_proc);
				3731
				3732	switch (__kmp_affinity_type) {
				3733
				3734	case affinity_explicit:
				3735	KMP_DEBUG_ASSERT(__kmp_affinity_proclist != NULL);
				3736	# if OMP_40_ENABLED
				3737	if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
				3738	# endif
				3739	{
				3740	__kmp_affinity_process_proclist(&__kmp_affinity_masks,
				3741	&__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask,
				3742	maxIndex);
				3743	}
				3744	# if OMP_40_ENABLED
				3745	else {
				3746	__kmp_affinity_process_placelist(&__kmp_affinity_masks,
				3747	&__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask,
				3748	maxIndex);
				3749	}
				3750	# endif
				3751	if (__kmp_affinity_num_masks == 0) {
				3752	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3753	&& (__kmp_affinity_type != affinity_none))) {
				3754	KMP_WARNING(AffNoValidProcID);
				3755	}
				3756	__kmp_affinity_type = affinity_none;
				3757	return;
				3758	}
				3759	break;
				3760
				3761	//
				3762	// The other affinity types rely on sorting the Addresses according
				3763	// to some permutation of the machine topology tree. Set
				3764	// __kmp_affinity_compact and __kmp_affinity_offset appropriately,
				3765	// then jump to a common code fragment to do the sort and create
				3766	// the array of affinity masks.
				3767	//
				3768
				3769	case affinity_logical:
				3770	__kmp_affinity_compact = 0;
				3771	if (__kmp_affinity_offset) {
				3772	__kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset
				3773	% __kmp_avail_proc;
				3774	}
				3775	goto sortAddresses;
				3776
				3777	case affinity_physical:
				3778	if (__kmp_nThreadsPerCore > 1) {
				3779	__kmp_affinity_compact = 1;
				3780	if (__kmp_affinity_compact >= depth) {
				3781	__kmp_affinity_compact = 0;
				3782	}
				3783	} else {
				3784	__kmp_affinity_compact = 0;
				3785	}
				3786	if (__kmp_affinity_offset) {
				3787	__kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset
				3788	% __kmp_avail_proc;
				3789	}
				3790	goto sortAddresses;
				3791
				3792	case affinity_scatter:
				3793	if (__kmp_affinity_compact >= depth) {
				3794	__kmp_affinity_compact = 0;
				3795	}
				3796	else {
				3797	__kmp_affinity_compact = depth - 1 - __kmp_affinity_compact;
				3798	}
				3799	goto sortAddresses;
				3800
				3801	case affinity_compact:
				3802	if (__kmp_affinity_compact >= depth) {
				3803	__kmp_affinity_compact = depth - 1;
				3804	}
				3805	goto sortAddresses;
				3806
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3807	case affinity_balanced:
Andrey Churbanov	f28f613	2015-01-13 14:54:00 +0000	[diff] [blame]	3808	// Balanced works only for the case of a single package
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3809	if( nPackages > 1 ) {
				3810	if( __kmp_affinity_verbose \|\| __kmp_affinity_warnings ) {
				3811	KMP_WARNING( AffBalancedNotAvail, "KMP_AFFINITY" );
				3812	}
				3813	__kmp_affinity_type = affinity_none;
				3814	return;
				3815	} else if( __kmp_affinity_uniform_topology() ) {
				3816	break;
				3817	} else { // Non-uniform topology
				3818
				3819	// Save the depth for further usage
				3820	__kmp_aff_depth = depth;
				3821
				3822	// Number of hyper threads per core in HT machine
				3823	int nth_per_core = __kmp_nThreadsPerCore;
				3824
				3825	int core_level;
				3826	if( nth_per_core > 1 ) {
				3827	core_level = depth - 2;
				3828	} else {
				3829	core_level = depth - 1;
				3830	}
				3831	int ncores = address2os[ __kmp_avail_proc - 1 ].first.labels[ core_level ] + 1;
				3832	int nproc = nth_per_core * ncores;
				3833
				3834	procarr = ( int * )__kmp_allocate( sizeof( int ) * nproc );
				3835	for( int i = 0; i < nproc; i++ ) {
				3836	procarr[ i ] = -1;
				3837	}
				3838
				3839	for( int i = 0; i < __kmp_avail_proc; i++ ) {
				3840	int proc = address2os[ i ].second;
				3841	// If depth == 3 then level=0 - package, level=1 - core, level=2 - thread.
				3842	// If there is only one thread per core then depth == 2: level 0 - package,
				3843	// level 1 - core.
				3844	int level = depth - 1;
				3845
				3846	// __kmp_nth_per_core == 1
				3847	int thread = 0;
				3848	int core = address2os[ i ].first.labels[ level ];
				3849	// If the thread level exists, that is we have more than one thread context per core
				3850	if( nth_per_core > 1 ) {
				3851	thread = address2os[ i ].first.labels[ level ] % nth_per_core;
				3852	core = address2os[ i ].first.labels[ level - 1 ];
				3853	}
				3854	procarr[ core * nth_per_core + thread ] = proc;
				3855	}
				3856
				3857	break;
				3858	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3859
				3860	sortAddresses:
				3861	//
				3862	// Allocate the gtid->affinity mask table.
				3863	//
				3864	if (__kmp_affinity_dups) {
				3865	__kmp_affinity_num_masks = __kmp_avail_proc;
				3866	}
				3867	else {
				3868	__kmp_affinity_num_masks = numUnique;
				3869	}
				3870
				3871	# if OMP_40_ENABLED
				3872	if ( ( __kmp_nested_proc_bind.bind_types[0] != proc_bind_intel )
				3873	&& ( __kmp_affinity_num_places > 0 )
				3874	&& ( (unsigned)__kmp_affinity_num_places < __kmp_affinity_num_masks ) ) {
				3875	__kmp_affinity_num_masks = __kmp_affinity_num_places;
				3876	}
				3877	# endif
				3878
				3879	__kmp_affinity_masks = (kmp_affin_mask_t*)__kmp_allocate(
				3880	__kmp_affinity_num_masks * __kmp_affin_mask_size);
				3881
				3882	//
				3883	// Sort the address2os table according to the current setting of
				3884	// __kmp_affinity_compact, then fill out __kmp_affinity_masks.
				3885	//
				3886	qsort(address2os, __kmp_avail_proc, sizeof(*address2os),
				3887	__kmp_affinity_cmp_Address_child_num);
				3888	{
				3889	int i;
				3890	unsigned j;
				3891	for (i = 0, j = 0; i < __kmp_avail_proc; i++) {
				3892	if ((! __kmp_affinity_dups) && (! address2os[i].first.leader)) {
				3893	continue;
				3894	}
				3895	unsigned osId = address2os[i].second;
				3896	kmp_affin_mask_t *src = KMP_CPU_INDEX(osId2Mask, osId);
				3897	kmp_affin_mask_t *dest
				3898	= KMP_CPU_INDEX(__kmp_affinity_masks, j);
				3899	KMP_ASSERT(KMP_CPU_ISSET(osId, src));
				3900	KMP_CPU_COPY(dest, src);
				3901	if (++j >= __kmp_affinity_num_masks) {
				3902	break;
				3903	}
				3904	}
				3905	KMP_DEBUG_ASSERT(j == __kmp_affinity_num_masks);
				3906	}
				3907	break;
				3908
				3909	default:
				3910	KMP_ASSERT2(0, "Unexpected affinity setting");
				3911	}
				3912
				3913	__kmp_free(osId2Mask);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3914	machine_hierarchy.init(address2os, __kmp_avail_proc);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3915	}
				3916
				3917
				3918	void
				3919	__kmp_affinity_initialize(void)
				3920	{
				3921	//
				3922	// Much of the code above was written assumming that if a machine was not
				3923	// affinity capable, then __kmp_affinity_type == affinity_none. We now
				3924	// explicitly represent this as __kmp_affinity_type == affinity_disabled.
				3925	//
				3926	// There are too many checks for __kmp_affinity_type == affinity_none
				3927	// in this code. Instead of trying to change them all, check if
				3928	// __kmp_affinity_type == affinity_disabled, and if so, slam it with
				3929	// affinity_none, call the real initialization routine, then restore
				3930	// __kmp_affinity_type to affinity_disabled.
				3931	//
				3932	int disabled = (__kmp_affinity_type == affinity_disabled);
				3933	if (! KMP_AFFINITY_CAPABLE()) {
				3934	KMP_ASSERT(disabled);
				3935	}
				3936	if (disabled) {
				3937	__kmp_affinity_type = affinity_none;
				3938	}
				3939	__kmp_aux_affinity_initialize();
				3940	if (disabled) {
				3941	__kmp_affinity_type = affinity_disabled;
				3942	}
				3943	}
				3944
				3945
				3946	void
				3947	__kmp_affinity_uninitialize(void)
				3948	{
				3949	if (__kmp_affinity_masks != NULL) {
				3950	__kmp_free(__kmp_affinity_masks);
				3951	__kmp_affinity_masks = NULL;
				3952	}
				3953	if (fullMask != NULL) {
				3954	KMP_CPU_FREE(fullMask);
				3955	fullMask = NULL;
				3956	}
				3957	__kmp_affinity_num_masks = 0;
				3958	# if OMP_40_ENABLED
				3959	__kmp_affinity_num_places = 0;
				3960	# endif
				3961	if (__kmp_affinity_proclist != NULL) {
				3962	__kmp_free(__kmp_affinity_proclist);
				3963	__kmp_affinity_proclist = NULL;
				3964	}
				3965	if( address2os != NULL ) {
				3966	__kmp_free( address2os );
				3967	address2os = NULL;
				3968	}
				3969	if( procarr != NULL ) {
				3970	__kmp_free( procarr );
				3971	procarr = NULL;
				3972	}
				3973	}
				3974
				3975
				3976	void
				3977	__kmp_affinity_set_init_mask(int gtid, int isa_root)
				3978	{
				3979	if (! KMP_AFFINITY_CAPABLE()) {
				3980	return;
				3981	}
				3982
				3983	kmp_info_t th = (kmp_info_t )TCR_SYNC_PTR(__kmp_threads[gtid]);
				3984	if (th->th.th_affin_mask == NULL) {
				3985	KMP_CPU_ALLOC(th->th.th_affin_mask);
				3986	}
				3987	else {
				3988	KMP_CPU_ZERO(th->th.th_affin_mask);
				3989	}
				3990
				3991	//
				3992	// Copy the thread mask to the kmp_info_t strucuture.
				3993	// If __kmp_affinity_type == affinity_none, copy the "full" mask, i.e. one
				3994	// that has all of the OS proc ids set, or if __kmp_affinity_respect_mask
				3995	// is set, then the full mask is the same as the mask of the initialization
				3996	// thread.
				3997	//
				3998	kmp_affin_mask_t *mask;
				3999	int i;
				4000
				4001	# if OMP_40_ENABLED
				4002	if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
				4003	# endif
				4004	{
Andrey Churbanov	f28f613	2015-01-13 14:54:00 +0000	[diff] [blame]	4005	if ((__kmp_affinity_type == affinity_none) \|\| (__kmp_affinity_type == affinity_balanced)
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4006	) {
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	4007	# if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4008	if (__kmp_num_proc_groups > 1) {
				4009	return;
				4010	}
				4011	# endif
				4012	KMP_ASSERT(fullMask != NULL);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4013	i = KMP_PLACE_ALL;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4014	mask = fullMask;
				4015	}
				4016	else {
				4017	KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 );
				4018	i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
				4019	mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
				4020	}
				4021	}
				4022	# if OMP_40_ENABLED
				4023	else {
				4024	if ((! isa_root)
				4025	\|\| (__kmp_nested_proc_bind.bind_types[0] == proc_bind_false)) {
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	4026	# if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4027	if (__kmp_num_proc_groups > 1) {
				4028	return;
				4029	}
				4030	# endif
				4031	KMP_ASSERT(fullMask != NULL);
				4032	i = KMP_PLACE_ALL;
				4033	mask = fullMask;
				4034	}
				4035	else {
				4036	//
				4037	// int i = some hash function or just a counter that doesn't
				4038	// always start at 0. Use gtid for now.
				4039	//
				4040	KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 );
				4041	i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
				4042	mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
				4043	}
				4044	}
				4045	# endif
				4046
				4047	# if OMP_40_ENABLED
				4048	th->th.th_current_place = i;
				4049	if (isa_root) {
				4050	th->th.th_new_place = i;
				4051	th->th.th_first_place = 0;
				4052	th->th.th_last_place = __kmp_affinity_num_masks - 1;
				4053	}
				4054
				4055	if (i == KMP_PLACE_ALL) {
				4056	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to all places\n",
				4057	gtid));
				4058	}
				4059	else {
				4060	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to place %d\n",
				4061	gtid, i));
				4062	}
				4063	# else
				4064	if (i == -1) {
				4065	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to fullMask\n",
				4066	gtid));
				4067	}
				4068	else {
				4069	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to mask %d\n",
				4070	gtid, i));
				4071	}
				4072	# endif /* OMP_40_ENABLED */
				4073
				4074	KMP_CPU_COPY(th->th.th_affin_mask, mask);
				4075
				4076	if (__kmp_affinity_verbose) {
				4077	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4078	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4079	th->th.th_affin_mask);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4080	KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(), gtid,
				4081	buf);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4082	}
				4083
				4084	# if KMP_OS_WINDOWS
				4085	//
				4086	// On Windows* OS, the process affinity mask might have changed.
				4087	// If the user didn't request affinity and this call fails,
				4088	// just continue silently. See CQ171393.
				4089	//
				4090	if ( __kmp_affinity_type == affinity_none ) {
				4091	__kmp_set_system_affinity(th->th.th_affin_mask, FALSE);
				4092	}
				4093	else
				4094	# endif
				4095	__kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
				4096	}
				4097
				4098
				4099	# if OMP_40_ENABLED
				4100
				4101	void
				4102	__kmp_affinity_set_place(int gtid)
				4103	{
				4104	int retval;
				4105
				4106	if (! KMP_AFFINITY_CAPABLE()) {
				4107	return;
				4108	}
				4109
				4110	kmp_info_t th = (kmp_info_t )TCR_SYNC_PTR(__kmp_threads[gtid]);
				4111
				4112	KA_TRACE(100, ("__kmp_affinity_set_place: binding T#%d to place %d (current place = %d)\n",
				4113	gtid, th->th.th_new_place, th->th.th_current_place));
				4114
				4115	//
Alp Toker	8f2d3f0	2014-02-24 10:40:15 +0000	[diff] [blame]	4116	// Check that the new place is within this thread's partition.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4117	//
				4118	KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4119	KMP_ASSERT(th->th.th_new_place >= 0);
				4120	KMP_ASSERT((unsigned)th->th.th_new_place <= __kmp_affinity_num_masks);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4121	if (th->th.th_first_place <= th->th.th_last_place) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4122	KMP_ASSERT((th->th.th_new_place >= th->th.th_first_place)
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4123	&& (th->th.th_new_place <= th->th.th_last_place));
				4124	}
				4125	else {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4126	KMP_ASSERT((th->th.th_new_place <= th->th.th_first_place)
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4127	\|\| (th->th.th_new_place >= th->th.th_last_place));
				4128	}
				4129
				4130	//
				4131	// Copy the thread mask to the kmp_info_t strucuture,
				4132	// and set this thread's affinity.
				4133	//
				4134	kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity_masks,
				4135	th->th.th_new_place);
				4136	KMP_CPU_COPY(th->th.th_affin_mask, mask);
				4137	th->th.th_current_place = th->th.th_new_place;
				4138
				4139	if (__kmp_affinity_verbose) {
				4140	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4141	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4142	th->th.th_affin_mask);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4143	KMP_INFORM(BoundToOSProcSet, "OMP_PROC_BIND", (kmp_int32)getpid(),
				4144	gtid, buf);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4145	}
				4146	__kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
				4147	}
				4148
				4149	# endif /* OMP_40_ENABLED */
				4150
				4151
				4152	int
				4153	__kmp_aux_set_affinity(void **mask)
				4154	{
				4155	int gtid;
				4156	kmp_info_t *th;
				4157	int retval;
				4158
				4159	if (! KMP_AFFINITY_CAPABLE()) {
				4160	return -1;
				4161	}
				4162
				4163	gtid = __kmp_entry_gtid();
				4164	KA_TRACE(1000, ;{
				4165	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4166	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4167	(kmp_affin_mask_t )(mask));
				4168	__kmp_debug_printf("kmp_set_affinity: setting affinity mask for thread %d = %s\n",
				4169	gtid, buf);
				4170	});
				4171
				4172	if (__kmp_env_consistency_check) {
				4173	if ((mask == NULL) \|\| (*mask == NULL)) {
				4174	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				4175	}
				4176	else {
				4177	unsigned proc;
				4178	int num_procs = 0;
				4179
				4180	for (proc = 0; proc < KMP_CPU_SETSIZE; proc++) {
				4181	if (! KMP_CPU_ISSET(proc, (kmp_affin_mask_t )(mask))) {
				4182	continue;
				4183	}
				4184	num_procs++;
				4185	if (! KMP_CPU_ISSET(proc, fullMask)) {
				4186	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				4187	break;
				4188	}
				4189	}
				4190	if (num_procs == 0) {
				4191	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				4192	}
				4193
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	4194	# if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4195	if (__kmp_get_proc_group((kmp_affin_mask_t )(mask)) < 0) {
				4196	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				4197	}
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	4198	# endif /* KMP_GROUP_AFFINITY */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4199
				4200	}
				4201	}
				4202
				4203	th = __kmp_threads[gtid];
				4204	KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
				4205	retval = __kmp_set_system_affinity((kmp_affin_mask_t )(mask), FALSE);
				4206	if (retval == 0) {
				4207	KMP_CPU_COPY(th->th.th_affin_mask, (kmp_affin_mask_t )(mask));
				4208	}
				4209
				4210	# if OMP_40_ENABLED
				4211	th->th.th_current_place = KMP_PLACE_UNDEFINED;
				4212	th->th.th_new_place = KMP_PLACE_UNDEFINED;
				4213	th->th.th_first_place = 0;
				4214	th->th.th_last_place = __kmp_affinity_num_masks - 1;
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4215
				4216	//
				4217	// Turn off 4.0 affinity for the current tread at this parallel level.
				4218	//
				4219	th->th.th_current_task->td_icvs.proc_bind = proc_bind_false;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4220	# endif
				4221
				4222	return retval;
				4223	}
				4224
				4225
				4226	int
				4227	__kmp_aux_get_affinity(void **mask)
				4228	{
				4229	int gtid;
				4230	int retval;
				4231	kmp_info_t *th;
				4232
				4233	if (! KMP_AFFINITY_CAPABLE()) {
				4234	return -1;
				4235	}
				4236
				4237	gtid = __kmp_entry_gtid();
				4238	th = __kmp_threads[gtid];
				4239	KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
				4240
				4241	KA_TRACE(1000, ;{
				4242	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4243	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4244	th->th.th_affin_mask);
				4245	__kmp_printf("kmp_get_affinity: stored affinity mask for thread %d = %s\n", gtid, buf);
				4246	});
				4247
				4248	if (__kmp_env_consistency_check) {
				4249	if ((mask == NULL) \|\| (*mask == NULL)) {
				4250	KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity");
				4251	}
				4252	}
				4253
				4254	# if !KMP_OS_WINDOWS
				4255
				4256	retval = __kmp_get_system_affinity((kmp_affin_mask_t )(mask), FALSE);
				4257	KA_TRACE(1000, ;{
				4258	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4259	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4260	(kmp_affin_mask_t )(mask));
				4261	__kmp_printf("kmp_get_affinity: system affinity mask for thread %d = %s\n", gtid, buf);
				4262	});
				4263	return retval;
				4264
				4265	# else
				4266
				4267	KMP_CPU_COPY((kmp_affin_mask_t )(mask), th->th.th_affin_mask);
				4268	return 0;
				4269
				4270	# endif /* KMP_OS_WINDOWS */
				4271
				4272	}
				4273
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4274	int
				4275	__kmp_aux_set_affinity_mask_proc(int proc, void **mask)
				4276	{
				4277	int retval;
				4278
				4279	if (! KMP_AFFINITY_CAPABLE()) {
				4280	return -1;
				4281	}
				4282
				4283	KA_TRACE(1000, ;{
				4284	int gtid = __kmp_entry_gtid();
				4285	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4286	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4287	(kmp_affin_mask_t )(mask));
				4288	__kmp_debug_printf("kmp_set_affinity_mask_proc: setting proc %d in affinity mask for thread %d = %s\n",
				4289	proc, gtid, buf);
				4290	});
				4291
				4292	if (__kmp_env_consistency_check) {
				4293	if ((mask == NULL) \|\| (*mask == NULL)) {
				4294	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity_mask_proc");
				4295	}
				4296	}
				4297
				4298	if ((proc < 0) \|\| ((unsigned)proc >= KMP_CPU_SETSIZE)) {
				4299	return -1;
				4300	}
				4301	if (! KMP_CPU_ISSET(proc, fullMask)) {
				4302	return -2;
				4303	}
				4304
				4305	KMP_CPU_SET(proc, (kmp_affin_mask_t )(mask));
				4306	return 0;
				4307	}
				4308
				4309
				4310	int
				4311	__kmp_aux_unset_affinity_mask_proc(int proc, void **mask)
				4312	{
				4313	int retval;
				4314
				4315	if (! KMP_AFFINITY_CAPABLE()) {
				4316	return -1;
				4317	}
				4318
				4319	KA_TRACE(1000, ;{
				4320	int gtid = __kmp_entry_gtid();
				4321	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4322	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4323	(kmp_affin_mask_t )(mask));
				4324	__kmp_debug_printf("kmp_unset_affinity_mask_proc: unsetting proc %d in affinity mask for thread %d = %s\n",
				4325	proc, gtid, buf);
				4326	});
				4327
				4328	if (__kmp_env_consistency_check) {
				4329	if ((mask == NULL) \|\| (*mask == NULL)) {
				4330	KMP_FATAL(AffinityInvalidMask, "kmp_unset_affinity_mask_proc");
				4331	}
				4332	}
				4333
				4334	if ((proc < 0) \|\| ((unsigned)proc >= KMP_CPU_SETSIZE)) {
				4335	return -1;
				4336	}
				4337	if (! KMP_CPU_ISSET(proc, fullMask)) {
				4338	return -2;
				4339	}
				4340
				4341	KMP_CPU_CLR(proc, (kmp_affin_mask_t )(mask));
				4342	return 0;
				4343	}
				4344
				4345
				4346	int
				4347	__kmp_aux_get_affinity_mask_proc(int proc, void **mask)
				4348	{
				4349	int retval;
				4350
				4351	if (! KMP_AFFINITY_CAPABLE()) {
				4352	return -1;
				4353	}
				4354
				4355	KA_TRACE(1000, ;{
				4356	int gtid = __kmp_entry_gtid();
				4357	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4358	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4359	(kmp_affin_mask_t )(mask));
				4360	__kmp_debug_printf("kmp_get_affinity_mask_proc: getting proc %d in affinity mask for thread %d = %s\n",
				4361	proc, gtid, buf);
				4362	});
				4363
				4364	if (__kmp_env_consistency_check) {
				4365	if ((mask == NULL) \|\| (*mask == NULL)) {
				4366	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity_mask_proc");
				4367	}
				4368	}
				4369
				4370	if ((proc < 0) \|\| ((unsigned)proc >= KMP_CPU_SETSIZE)) {
				4371	return 0;
				4372	}
				4373	if (! KMP_CPU_ISSET(proc, fullMask)) {
				4374	return 0;
				4375	}
				4376
				4377	return KMP_CPU_ISSET(proc, (kmp_affin_mask_t )(mask));
				4378	}
				4379
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4380
				4381	// Dynamic affinity settings - Affinity balanced
				4382	void __kmp_balanced_affinity( int tid, int nthreads )
				4383	{
				4384	if( __kmp_affinity_uniform_topology() ) {
				4385	int coreID;
				4386	int threadID;
				4387	// Number of hyper threads per core in HT machine
				4388	int __kmp_nth_per_core = __kmp_avail_proc / __kmp_ncores;
				4389	// Number of cores
				4390	int ncores = __kmp_ncores;
				4391	// How many threads will be bound to each core
				4392	int chunk = nthreads / ncores;
				4393	// How many cores will have an additional thread bound to it - "big cores"
				4394	int big_cores = nthreads % ncores;
				4395	// Number of threads on the big cores
				4396	int big_nth = ( chunk + 1 ) * big_cores;
				4397	if( tid < big_nth ) {
				4398	coreID = tid / (chunk + 1 );
				4399	threadID = ( tid % (chunk + 1 ) ) % __kmp_nth_per_core ;
				4400	} else { //tid >= big_nth
				4401	coreID = ( tid - big_cores ) / chunk;
				4402	threadID = ( ( tid - big_cores ) % chunk ) % __kmp_nth_per_core ;
				4403	}
				4404
				4405	KMP_DEBUG_ASSERT2(KMP_AFFINITY_CAPABLE(),
				4406	"Illegal set affinity operation when not capable");
				4407
				4408	kmp_affin_mask_t mask = (kmp_affin_mask_t )alloca(__kmp_affin_mask_size);
				4409	KMP_CPU_ZERO(mask);
				4410
				4411	// Granularity == thread
				4412	if( __kmp_affinity_gran == affinity_gran_fine \|\| __kmp_affinity_gran == affinity_gran_thread) {
				4413	int osID = address2os[ coreID * __kmp_nth_per_core + threadID ].second;
				4414	KMP_CPU_SET( osID, mask);
				4415	} else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
				4416	for( int i = 0; i < __kmp_nth_per_core; i++ ) {
				4417	int osID;
				4418	osID = address2os[ coreID * __kmp_nth_per_core + i ].second;
				4419	KMP_CPU_SET( osID, mask);
				4420	}
				4421	}
				4422	if (__kmp_affinity_verbose) {
				4423	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4424	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4425	KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
				4426	tid, buf);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4427	}
				4428	__kmp_set_system_affinity( mask, TRUE );
				4429	} else { // Non-uniform topology
				4430
				4431	kmp_affin_mask_t mask = (kmp_affin_mask_t )alloca(__kmp_affin_mask_size);
				4432	KMP_CPU_ZERO(mask);
				4433
				4434	// Number of hyper threads per core in HT machine
				4435	int nth_per_core = __kmp_nThreadsPerCore;
				4436	int core_level;
				4437	if( nth_per_core > 1 ) {
				4438	core_level = __kmp_aff_depth - 2;
				4439	} else {
				4440	core_level = __kmp_aff_depth - 1;
				4441	}
				4442
				4443	// Number of cores - maximum value; it does not count trail cores with 0 processors
				4444	int ncores = address2os[ __kmp_avail_proc - 1 ].first.labels[ core_level ] + 1;
				4445
				4446	// For performance gain consider the special case nthreads == __kmp_avail_proc
				4447	if( nthreads == __kmp_avail_proc ) {
				4448	if( __kmp_affinity_gran == affinity_gran_fine \|\| __kmp_affinity_gran == affinity_gran_thread) {
				4449	int osID = address2os[ tid ].second;
				4450	KMP_CPU_SET( osID, mask);
				4451	} else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
				4452	int coreID = address2os[ tid ].first.labels[ core_level ];
				4453	// We'll count found osIDs for the current core; they can be not more than nth_per_core;
				4454	// since the address2os is sortied we can break when cnt==nth_per_core
				4455	int cnt = 0;
				4456	for( int i = 0; i < __kmp_avail_proc; i++ ) {
				4457	int osID = address2os[ i ].second;
				4458	int core = address2os[ i ].first.labels[ core_level ];
				4459	if( core == coreID ) {
				4460	KMP_CPU_SET( osID, mask);
				4461	cnt++;
				4462	if( cnt == nth_per_core ) {
				4463	break;
				4464	}
				4465	}
				4466	}
				4467	}
				4468	} else if( nthreads <= __kmp_ncores ) {
				4469
				4470	int core = 0;
				4471	for( int i = 0; i < ncores; i++ ) {
				4472	// Check if this core from procarr[] is in the mask
				4473	int in_mask = 0;
				4474	for( int j = 0; j < nth_per_core; j++ ) {
				4475	if( procarr[ i * nth_per_core + j ] != - 1 ) {
				4476	in_mask = 1;
				4477	break;
				4478	}
				4479	}
				4480	if( in_mask ) {
				4481	if( tid == core ) {
				4482	for( int j = 0; j < nth_per_core; j++ ) {
				4483	int osID = procarr[ i * nth_per_core + j ];
				4484	if( osID != -1 ) {
				4485	KMP_CPU_SET( osID, mask );
				4486	// For granularity=thread it is enough to set the first available osID for this core
				4487	if( __kmp_affinity_gran == affinity_gran_fine \|\| __kmp_affinity_gran == affinity_gran_thread) {
				4488	break;
				4489	}
				4490	}
				4491	}
				4492	break;
				4493	} else {
				4494	core++;
				4495	}
				4496	}
				4497	}
				4498
				4499	} else { // nthreads > __kmp_ncores
				4500
				4501	// Array to save the number of processors at each core
				4502	int nproc_at_core[ ncores ];
				4503	// Array to save the number of cores with "x" available processors;
				4504	int ncores_with_x_procs[ nth_per_core + 1 ];
				4505	// Array to save the number of cores with # procs from x to nth_per_core
				4506	int ncores_with_x_to_max_procs[ nth_per_core + 1 ];
				4507
				4508	for( int i = 0; i <= nth_per_core; i++ ) {
				4509	ncores_with_x_procs[ i ] = 0;
				4510	ncores_with_x_to_max_procs[ i ] = 0;
				4511	}
				4512
				4513	for( int i = 0; i < ncores; i++ ) {
				4514	int cnt = 0;
				4515	for( int j = 0; j < nth_per_core; j++ ) {
				4516	if( procarr[ i * nth_per_core + j ] != -1 ) {
				4517	cnt++;
				4518	}
				4519	}
				4520	nproc_at_core[ i ] = cnt;
				4521	ncores_with_x_procs[ cnt ]++;
				4522	}
				4523
				4524	for( int i = 0; i <= nth_per_core; i++ ) {
				4525	for( int j = i; j <= nth_per_core; j++ ) {
				4526	ncores_with_x_to_max_procs[ i ] += ncores_with_x_procs[ j ];
				4527	}
				4528	}
				4529
				4530	// Max number of processors
				4531	int nproc = nth_per_core * ncores;
				4532	// An array to keep number of threads per each context
				4533	int * newarr = ( int * )__kmp_allocate( sizeof( int ) * nproc );
				4534	for( int i = 0; i < nproc; i++ ) {
				4535	newarr[ i ] = 0;
				4536	}
				4537
				4538	int nth = nthreads;
				4539	int flag = 0;
				4540	while( nth > 0 ) {
				4541	for( int j = 1; j <= nth_per_core; j++ ) {
				4542	int cnt = ncores_with_x_to_max_procs[ j ];
				4543	for( int i = 0; i < ncores; i++ ) {
				4544	// Skip the core with 0 processors
				4545	if( nproc_at_core[ i ] == 0 ) {
				4546	continue;
				4547	}
				4548	for( int k = 0; k < nth_per_core; k++ ) {
				4549	if( procarr[ i * nth_per_core + k ] != -1 ) {
				4550	if( newarr[ i * nth_per_core + k ] == 0 ) {
				4551	newarr[ i * nth_per_core + k ] = 1;
				4552	cnt--;
				4553	nth--;
				4554	break;
				4555	} else {
				4556	if( flag != 0 ) {
				4557	newarr[ i * nth_per_core + k ] ++;
				4558	cnt--;
				4559	nth--;
				4560	break;
				4561	}
				4562	}
				4563	}
				4564	}
				4565	if( cnt == 0 \|\| nth == 0 ) {
				4566	break;
				4567	}
				4568	}
				4569	if( nth == 0 ) {
				4570	break;
				4571	}
				4572	}
				4573	flag = 1;
				4574	}
				4575	int sum = 0;
				4576	for( int i = 0; i < nproc; i++ ) {
				4577	sum += newarr[ i ];
				4578	if( sum > tid ) {
				4579	// Granularity == thread
				4580	if( __kmp_affinity_gran == affinity_gran_fine \|\| __kmp_affinity_gran == affinity_gran_thread) {
				4581	int osID = procarr[ i ];
				4582	KMP_CPU_SET( osID, mask);
				4583	} else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
				4584	int coreID = i / nth_per_core;
				4585	for( int ii = 0; ii < nth_per_core; ii++ ) {
				4586	int osID = procarr[ coreID * nth_per_core + ii ];
				4587	if( osID != -1 ) {
				4588	KMP_CPU_SET( osID, mask);
				4589	}
				4590	}
				4591	}
				4592	break;
				4593	}
				4594	}
				4595	__kmp_free( newarr );
				4596	}
				4597
				4598	if (__kmp_affinity_verbose) {
				4599	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4600	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4601	KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
				4602	tid, buf);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4603	}
				4604	__kmp_set_system_affinity( mask, TRUE );
				4605	}
				4606	}
				4607
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4608	#else
				4609	// affinity not supported
				4610
				4611	kmp_uint32 mac_skipPerLevel[7];
				4612	kmp_uint32 mac_depth;
				4613	kmp_uint8 mac_leaf_kids;
				4614	void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
				4615	static int first = 1;
				4616	if (first) {
				4617	const kmp_uint32 maxLevels = 7;
				4618	kmp_uint32 numPerLevel[maxLevels];
				4619
				4620	for (kmp_uint32 i=0; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
				4621	numPerLevel[i] = 1;
				4622	mac_skipPerLevel[i] = 1;
				4623	}
				4624
				4625	mac_depth = 2;
				4626	numPerLevel[0] = nproc;
				4627
				4628	kmp_uint32 branch = 4;
				4629	if (numPerLevel[0] == 1) branch = nproc/4;
				4630	if (branch<4) branch=4;
				4631	for (kmp_uint32 d=0; d<mac_depth-1; ++d) { // optimize hierarchy width
				4632	while (numPerLevel[d] > branch \|\| (d==0 && numPerLevel[d]>4)) { // max 4 on level 0!
				4633	if (numPerLevel[d] & 1) numPerLevel[d]++;
				4634	numPerLevel[d] = numPerLevel[d] >> 1;
				4635	if (numPerLevel[d+1] == 1) mac_depth++;
				4636	numPerLevel[d+1] = numPerLevel[d+1] << 1;
				4637	}
				4638	if(numPerLevel[0] == 1) {
				4639	branch = branch >> 1;
				4640	if (branch<4) branch = 4;
				4641	}
				4642	}
				4643
				4644	for (kmp_uint32 i=1; i<mac_depth; ++i)
				4645	mac_skipPerLevel[i] = numPerLevel[i-1] * mac_skipPerLevel[i-1];
				4646	mac_leaf_kids = (kmp_uint8)numPerLevel[0]-1;
				4647	first=0;
				4648	}
				4649	thr_bar->depth = mac_depth;
				4650	thr_bar->base_leaf_kids = mac_leaf_kids;
				4651	thr_bar->skip_per_level = mac_skipPerLevel;
				4652	}
				4653
Alp Toker	763b939	2014-02-28 09:42:41 +0000	[diff] [blame]	4654	#endif // KMP_AFFINITY_SUPPORTED