Blame - openmp/runtime/src/kmp_affinity.cpp - toolchain/llvm-project

blob: b16b458d2157832c143e7d978b704f4cbb5afaae [file] [log] [blame]

Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1	/*
				2	* kmp_affinity.cpp -- affinity management
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3	*/
				4
				5
				6	//===----------------------------------------------------------------------===//
				7	//
				8	// The LLVM Compiler Infrastructure
				9	//
				10	// This file is dual licensed under the MIT and the University of Illinois Open
				11	// Source Licenses. See LICENSE.txt for details.
				12	//
				13	//===----------------------------------------------------------------------===//
				14
				15
				16	#include "kmp.h"
				17	#include "kmp_i18n.h"
				18	#include "kmp_io.h"
				19	#include "kmp_str.h"
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	20	#include "kmp_wrapper_getpid.h"
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	21
Alp Toker	763b939	2014-02-28 09:42:41 +0000	[diff] [blame]	22	#if KMP_AFFINITY_SUPPORTED
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	23
				24	//
				25	// Print the affinity mask to the character array in a pretty format.
				26	//
				27	char *
				28	__kmp_affinity_print_mask(char buf, int buf_len, kmp_affin_mask_t mask)
				29	{
				30	KMP_ASSERT(buf_len >= 40);
				31	char *scan = buf;
				32	char *end = buf + buf_len - 1;
				33
				34	//
				35	// Find first element / check for empty set.
				36	//
				37	size_t i;
				38	for (i = 0; i < KMP_CPU_SETSIZE; i++) {
				39	if (KMP_CPU_ISSET(i, mask)) {
				40	break;
				41	}
				42	}
				43	if (i == KMP_CPU_SETSIZE) {
				44	sprintf(scan, "{<empty>}");
				45	while (*scan != '\0') scan++;
				46	KMP_ASSERT(scan <= end);
				47	return buf;
				48	}
				49
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	50	sprintf(scan, "{%ld", (long)i);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	51	while (*scan != '\0') scan++;
				52	i++;
				53	for (; i < KMP_CPU_SETSIZE; i++) {
				54	if (! KMP_CPU_ISSET(i, mask)) {
				55	continue;
				56	}
				57
				58	//
				59	// Check for buffer overflow. A string of the form ",<n>" will have
				60	// at most 10 characters, plus we want to leave room to print ",...}"
				61	// if the set is too large to print for a total of 15 characters.
				62	// We already left room for '\0' in setting end.
				63	//
				64	if (end - scan < 15) {
				65	break;
				66	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	67	sprintf(scan, ",%-ld", (long)i);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	68	while (*scan != '\0') scan++;
				69	}
				70	if (i < KMP_CPU_SETSIZE) {
				71	sprintf(scan, ",...");
				72	while (*scan != '\0') scan++;
				73	}
				74	sprintf(scan, "}");
				75	while (*scan != '\0') scan++;
				76	KMP_ASSERT(scan <= end);
				77	return buf;
				78	}
				79
				80
				81	void
				82	__kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask)
				83	{
				84	KMP_CPU_ZERO(mask);
				85
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	86	# if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	87
				88	if (__kmp_num_proc_groups > 1) {
				89	int group;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	90	KMP_DEBUG_ASSERT(__kmp_GetActiveProcessorCount != NULL);
				91	for (group = 0; group < __kmp_num_proc_groups; group++) {
				92	int i;
				93	int num = __kmp_GetActiveProcessorCount(group);
				94	for (i = 0; i < num; i++) {
				95	KMP_CPU_SET(i + group * (CHAR_BIT * sizeof(DWORD_PTR)), mask);
				96	}
				97	}
				98	}
				99	else
				100
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	101	# endif /* KMP_GROUP_AFFINITY */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	102
				103	{
				104	int proc;
				105	for (proc = 0; proc < __kmp_xproc; proc++) {
				106	KMP_CPU_SET(proc, mask);
				107	}
				108	}
				109	}
				110
				111
				112	//
				113	// In Linux* OS debug & cover (-O0) builds, we need to avoid inline member
				114	// functions.
				115	//
				116	// The icc codegen emits sections with extremely long names, of the form
				117	// ".gnu.linkonce.<mangled_name>". There seems to have been a linker bug
				118	// introduced between GNU ld version 2.14.90.0.4 and 2.15.92.0.2 involving
				119	// some sort of memory corruption or table overflow that is triggered by
				120	// these long strings. I checked the latest version of the linker -
				121	// GNU ld (Linux* OS/GNU Binutils) 2.18.50.0.7.20080422 - and the bug is not
				122	// fixed.
				123	//
				124	// Unfortunately, my attempts to reproduce it in a smaller example have
				125	// failed - I'm not sure what the prospects are of getting it fixed
				126	// properly - but we need a reproducer smaller than all of libiomp.
				127	//
				128	// Work around the problem by avoiding inline constructors in such builds.
				129	// We do this for all platforms, not just Linux* OS - non-inline functions are
				130	// more debuggable and provide better coverage into than inline functions.
				131	// Use inline functions in shipping libs, for performance.
				132	//
				133
				134	# if !defined(KMP_DEBUG) && !defined(COVER)
				135
				136	class Address {
				137	public:
				138	static const unsigned maxDepth = 32;
				139	unsigned labels[maxDepth];
				140	unsigned childNums[maxDepth];
				141	unsigned depth;
				142	unsigned leader;
				143	Address(unsigned _depth)
				144	: depth(_depth), leader(FALSE) {
				145	}
				146	Address &operator=(const Address &b) {
				147	depth = b.depth;
				148	for (unsigned i = 0; i < depth; i++) {
				149	labels[i] = b.labels[i];
				150	childNums[i] = b.childNums[i];
				151	}
				152	leader = FALSE;
				153	return *this;
				154	}
				155	bool operator==(const Address &b) const {
				156	if (depth != b.depth)
				157	return false;
				158	for (unsigned i = 0; i < depth; i++)
				159	if(labels[i] != b.labels[i])
				160	return false;
				161	return true;
				162	}
				163	bool isClose(const Address &b, int level) const {
				164	if (depth != b.depth)
				165	return false;
				166	if ((unsigned)level >= depth)
				167	return true;
				168	for (unsigned i = 0; i < (depth - level); i++)
				169	if(labels[i] != b.labels[i])
				170	return false;
				171	return true;
				172	}
				173	bool operator!=(const Address &b) const {
				174	return !operator==(b);
				175	}
				176	};
				177
				178	class AddrUnsPair {
				179	public:
				180	Address first;
				181	unsigned second;
				182	AddrUnsPair(Address _first, unsigned _second)
				183	: first(_first), second(_second) {
				184	}
				185	AddrUnsPair &operator=(const AddrUnsPair &b)
				186	{
				187	first = b.first;
				188	second = b.second;
				189	return *this;
				190	}
				191	};
				192
				193	# else
				194
				195	class Address {
				196	public:
				197	static const unsigned maxDepth = 32;
				198	unsigned labels[maxDepth];
				199	unsigned childNums[maxDepth];
				200	unsigned depth;
				201	unsigned leader;
				202	Address(unsigned _depth);
				203	Address &operator=(const Address &b);
				204	bool operator==(const Address &b) const;
				205	bool isClose(const Address &b, int level) const;
				206	bool operator!=(const Address &b) const;
				207	};
				208
				209	Address::Address(unsigned _depth)
				210	{
				211	depth = _depth;
				212	leader = FALSE;
				213	}
				214
				215	Address &Address::operator=(const Address &b) {
				216	depth = b.depth;
				217	for (unsigned i = 0; i < depth; i++) {
				218	labels[i] = b.labels[i];
				219	childNums[i] = b.childNums[i];
				220	}
				221	leader = FALSE;
				222	return *this;
				223	}
				224
				225	bool Address::operator==(const Address &b) const {
				226	if (depth != b.depth)
				227	return false;
				228	for (unsigned i = 0; i < depth; i++)
				229	if(labels[i] != b.labels[i])
				230	return false;
				231	return true;
				232	}
				233
				234	bool Address::isClose(const Address &b, int level) const {
				235	if (depth != b.depth)
				236	return false;
				237	if ((unsigned)level >= depth)
				238	return true;
				239	for (unsigned i = 0; i < (depth - level); i++)
				240	if(labels[i] != b.labels[i])
				241	return false;
				242	return true;
				243	}
				244
				245	bool Address::operator!=(const Address &b) const {
				246	return !operator==(b);
				247	}
				248
				249	class AddrUnsPair {
				250	public:
				251	Address first;
				252	unsigned second;
				253	AddrUnsPair(Address _first, unsigned _second);
				254	AddrUnsPair &operator=(const AddrUnsPair &b);
				255	};
				256
				257	AddrUnsPair::AddrUnsPair(Address _first, unsigned _second)
				258	: first(_first), second(_second)
				259	{
				260	}
				261
				262	AddrUnsPair &AddrUnsPair::operator=(const AddrUnsPair &b)
				263	{
				264	first = b.first;
				265	second = b.second;
				266	return *this;
				267	}
				268
				269	# endif /* !defined(KMP_DEBUG) && !defined(COVER) */
				270
				271
				272	static int
				273	__kmp_affinity_cmp_Address_labels(const void a, const void b)
				274	{
				275	const Address aa = (const Address )&(((AddrUnsPair *)a)
				276	->first);
				277	const Address bb = (const Address )&(((AddrUnsPair *)b)
				278	->first);
				279	unsigned depth = aa->depth;
				280	unsigned i;
				281	KMP_DEBUG_ASSERT(depth == bb->depth);
				282	for (i = 0; i < depth; i++) {
				283	if (aa->labels[i] < bb->labels[i]) return -1;
				284	if (aa->labels[i] > bb->labels[i]) return 1;
				285	}
				286	return 0;
				287	}
				288
				289
				290	static int
				291	__kmp_affinity_cmp_Address_child_num(const void a, const void b)
				292	{
				293	const Address aa = (const Address )&(((AddrUnsPair *)a)
				294	->first);
				295	const Address bb = (const Address )&(((AddrUnsPair *)b)
				296	->first);
				297	unsigned depth = aa->depth;
				298	unsigned i;
				299	KMP_DEBUG_ASSERT(depth == bb->depth);
				300	KMP_DEBUG_ASSERT((unsigned)__kmp_affinity_compact <= depth);
				301	KMP_DEBUG_ASSERT(__kmp_affinity_compact >= 0);
				302	for (i = 0; i < (unsigned)__kmp_affinity_compact; i++) {
				303	int j = depth - i - 1;
				304	if (aa->childNums[j] < bb->childNums[j]) return -1;
				305	if (aa->childNums[j] > bb->childNums[j]) return 1;
				306	}
				307	for (; i < depth; i++) {
				308	int j = i - __kmp_affinity_compact;
				309	if (aa->childNums[j] < bb->childNums[j]) return -1;
				310	if (aa->childNums[j] > bb->childNums[j]) return 1;
				311	}
				312	return 0;
				313	}
				314
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	315	/** A structure for holding machine-specific hierarchy info to be computed once at init. */
				316	class hierarchy_info {
				317	public:
				318	/** Typical levels are threads/core, cores/package or socket, packages/node, nodes/machine,
				319	etc. We don't want to get specific with nomenclature */
				320	static const kmp_uint32 maxLevels=7;
				321
				322	/** This is specifically the depth of the machine configuration hierarchy, in terms of the
				323	number of levels along the longest path from root to any leaf. It corresponds to the
				324	number of entries in numPerLevel if we exclude all but one trailing 1. */
				325	kmp_uint32 depth;
				326	kmp_uint32 base_depth;
				327	kmp_uint32 base_num_threads;
				328	bool uninitialized;
				329
				330	/** Level 0 corresponds to leaves. numPerLevel[i] is the number of children the parent of a
				331	node at level i has. For example, if we have a machine with 4 packages, 4 cores/package
				332	and 2 HT per core, then numPerLevel = {2, 4, 4, 1, 1}. All empty levels are set to 1. */
				333	kmp_uint32 numPerLevel[maxLevels];
				334	kmp_uint32 skipPerLevel[maxLevels];
				335
				336	void deriveLevels(AddrUnsPair *adr2os, int num_addrs) {
				337	int hier_depth = adr2os[0].first.depth;
				338	int level = 0;
				339	for (int i=hier_depth-1; i>=0; --i) {
				340	int max = -1;
				341	for (int j=0; j<num_addrs; ++j) {
				342	int next = adr2os[j].first.childNums[i];
				343	if (next > max) max = next;
				344	}
				345	numPerLevel[level] = max+1;
				346	++level;
				347	}
				348	}
				349
				350	hierarchy_info() : depth(1), uninitialized(true) {}
				351	void init(AddrUnsPair *adr2os, int num_addrs)
				352	{
				353	uninitialized = false;
				354	for (kmp_uint32 i=0; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
				355	numPerLevel[i] = 1;
				356	skipPerLevel[i] = 1;
				357	}
				358
				359	// Sort table by physical ID
				360	if (adr2os) {
				361	qsort(adr2os, num_addrs, sizeof(*adr2os), __kmp_affinity_cmp_Address_labels);
				362	deriveLevels(adr2os, num_addrs);
				363	}
				364	else {
				365	numPerLevel[0] = 4;
				366	numPerLevel[1] = num_addrs/4;
				367	if (num_addrs%4) numPerLevel[1]++;
				368	}
				369
				370	base_num_threads = num_addrs;
				371	for (int i=maxLevels-1; i>=0; --i) // count non-empty levels to get depth
				372	if (numPerLevel[i] != 1 \|\| depth > 1) // only count one top-level '1'
				373	depth++;
				374
				375	kmp_uint32 branch = 4;
				376	if (numPerLevel[0] == 1) branch = num_addrs/4;
				377	if (branch<4) branch=4;
				378	for (kmp_uint32 d=0; d<depth-1; ++d) { // optimize hierarchy width
				379	while (numPerLevel[d] > branch \|\| (d==0 && numPerLevel[d]>4)) { // max 4 on level 0!
				380	if (numPerLevel[d] & 1) numPerLevel[d]++;
				381	numPerLevel[d] = numPerLevel[d] >> 1;
				382	if (numPerLevel[d+1] == 1) depth++;
				383	numPerLevel[d+1] = numPerLevel[d+1] << 1;
				384	}
				385	if(numPerLevel[0] == 1) {
				386	branch = branch >> 1;
				387	if (branch<4) branch = 4;
				388	}
				389	}
				390
				391	for (kmp_uint32 i=1; i<depth; ++i)
				392	skipPerLevel[i] = numPerLevel[i-1] * skipPerLevel[i-1];
				393
				394	base_depth = depth;
				395	}
				396	};
				397
				398	static hierarchy_info machine_hierarchy;
				399
				400	void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
				401	if (machine_hierarchy.uninitialized)
				402	machine_hierarchy.init(NULL, nproc);
				403
				404	if (nproc <= machine_hierarchy.base_num_threads)
				405	machine_hierarchy.depth = machine_hierarchy.base_depth;
				406	KMP_DEBUG_ASSERT(machine_hierarchy.depth > 0);
				407	while (nproc > machine_hierarchy.skipPerLevel[machine_hierarchy.depth-1]) {
				408	machine_hierarchy.depth++;
				409	machine_hierarchy.skipPerLevel[machine_hierarchy.depth-1] = 2*machine_hierarchy.skipPerLevel[machine_hierarchy.depth-2];
				410	}
				411	thr_bar->depth = machine_hierarchy.depth;
				412	thr_bar->base_leaf_kids = (kmp_uint8)machine_hierarchy.numPerLevel[0]-1;
				413	thr_bar->skip_per_level = machine_hierarchy.skipPerLevel;
				414	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	415
				416	//
				417	// When sorting by labels, __kmp_affinity_assign_child_nums() must first be
				418	// called to renumber the labels from [0..n] and place them into the child_num
				419	// vector of the address object. This is done in case the labels used for
Alp Toker	8f2d3f0	2014-02-24 10:40:15 +0000	[diff] [blame]	420	// the children at one node of the hierarchy differ from those used for
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	421	// another node at the same level. Example: suppose the machine has 2 nodes
				422	// with 2 packages each. The first node contains packages 601 and 602, and
				423	// second node contains packages 603 and 604. If we try to sort the table
				424	// for "scatter" affinity, the table will still be sorted 601, 602, 603, 604
				425	// because we are paying attention to the labels themselves, not the ordinal
				426	// child numbers. By using the child numbers in the sort, the result is
				427	// {0,0}=601, {0,1}=603, {1,0}=602, {1,1}=604.
				428	//
				429	static void
				430	__kmp_affinity_assign_child_nums(AddrUnsPair *address2os,
				431	int numAddrs)
				432	{
				433	KMP_DEBUG_ASSERT(numAddrs > 0);
				434	int depth = address2os->first.depth;
				435	unsigned counts = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				436	unsigned lastLabel = (unsigned )__kmp_allocate(depth
				437	* sizeof(unsigned));
				438	int labCt;
				439	for (labCt = 0; labCt < depth; labCt++) {
				440	address2os[0].first.childNums[labCt] = counts[labCt] = 0;
				441	lastLabel[labCt] = address2os[0].first.labels[labCt];
				442	}
				443	int i;
				444	for (i = 1; i < numAddrs; i++) {
				445	for (labCt = 0; labCt < depth; labCt++) {
				446	if (address2os[i].first.labels[labCt] != lastLabel[labCt]) {
				447	int labCt2;
				448	for (labCt2 = labCt + 1; labCt2 < depth; labCt2++) {
				449	counts[labCt2] = 0;
				450	lastLabel[labCt2] = address2os[i].first.labels[labCt2];
				451	}
				452	counts[labCt]++;
				453	lastLabel[labCt] = address2os[i].first.labels[labCt];
				454	break;
				455	}
				456	}
				457	for (labCt = 0; labCt < depth; labCt++) {
				458	address2os[i].first.childNums[labCt] = counts[labCt];
				459	}
				460	for (; labCt < (int)Address::maxDepth; labCt++) {
				461	address2os[i].first.childNums[labCt] = 0;
				462	}
				463	}
				464	}
				465
				466
				467	//
				468	// All of the __kmp_affinity_create_*_map() routines should set
				469	// __kmp_affinity_masks to a vector of affinity mask objects of length
				470	// __kmp_affinity_num_masks, if __kmp_affinity_type != affinity_none, and
				471	// return the number of levels in the machine topology tree (zero if
				472	// __kmp_affinity_type == affinity_none).
				473	//
				474	// All of the __kmp_affinity_create__map() routines should set fullMask
				475	// to the affinity mask for the initialization thread. They need to save and
				476	// restore the mask, and it could be needed later, so saving it is just an
				477	// optimization to avoid calling kmp_get_system_affinity() again.
				478	//
				479	static kmp_affin_mask_t *fullMask = NULL;
				480
				481	kmp_affin_mask_t *
				482	__kmp_affinity_get_fullMask() { return fullMask; }
				483
				484
				485	static int nCoresPerPkg, nPackages;
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame]	486	static int __kmp_nThreadsPerCore;
				487	#ifndef KMP_DFLT_NTH_CORES
				488	static int __kmp_ncores;
				489	#endif
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	490
				491	//
				492	// __kmp_affinity_uniform_topology() doesn't work when called from
				493	// places which support arbitrarily many levels in the machine topology
				494	// map, i.e. the non-default cases in __kmp_affinity_create_cpuinfo_map()
				495	// __kmp_affinity_create_x2apicid_map().
				496	//
				497	inline static bool
				498	__kmp_affinity_uniform_topology()
				499	{
				500	return __kmp_avail_proc == (__kmp_nThreadsPerCore * nCoresPerPkg * nPackages);
				501	}
				502
				503
				504	//
				505	// Print out the detailed machine topology map, i.e. the physical locations
				506	// of each OS proc.
				507	//
				508	static void
				509	__kmp_affinity_print_topology(AddrUnsPair *address2os, int len, int depth,
				510	int pkgLevel, int coreLevel, int threadLevel)
				511	{
				512	int proc;
				513
				514	KMP_INFORM(OSProcToPhysicalThreadMap, "KMP_AFFINITY");
				515	for (proc = 0; proc < len; proc++) {
				516	int level;
				517	kmp_str_buf_t buf;
				518	__kmp_str_buf_init(&buf);
				519	for (level = 0; level < depth; level++) {
				520	if (level == threadLevel) {
				521	__kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Thread));
				522	}
				523	else if (level == coreLevel) {
				524	__kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Core));
				525	}
				526	else if (level == pkgLevel) {
				527	__kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Package));
				528	}
				529	else if (level > pkgLevel) {
				530	__kmp_str_buf_print(&buf, "%s_%d ", KMP_I18N_STR(Node),
				531	level - pkgLevel - 1);
				532	}
				533	else {
				534	__kmp_str_buf_print(&buf, "L%d ", level);
				535	}
				536	__kmp_str_buf_print(&buf, "%d ",
				537	address2os[proc].first.labels[level]);
				538	}
				539	KMP_INFORM(OSProcMapToPack, "KMP_AFFINITY", address2os[proc].second,
				540	buf.str);
				541	__kmp_str_buf_free(&buf);
				542	}
				543	}
				544
				545
				546	//
				547	// If we don't know how to retrieve the machine's processor topology, or
				548	// encounter an error in doing so, this routine is called to form a "flat"
				549	// mapping of os thread id's <-> processor id's.
				550	//
				551	static int
				552	__kmp_affinity_create_flat_map(AddrUnsPair **address2os,
				553	kmp_i18n_id_t *const msg_id)
				554	{
				555	*address2os = NULL;
				556	*msg_id = kmp_i18n_null;
				557
				558	//
				559	// Even if __kmp_affinity_type == affinity_none, this routine might still
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame]	560	// called to set __kmp_ncores, as well as
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	561	// __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
				562	//
				563	if (! KMP_AFFINITY_CAPABLE()) {
				564	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				565	__kmp_ncores = nPackages = __kmp_xproc;
				566	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	567	if (__kmp_affinity_verbose) {
				568	KMP_INFORM(AffFlatTopology, "KMP_AFFINITY");
				569	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				570	KMP_INFORM(Uniform, "KMP_AFFINITY");
				571	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				572	__kmp_nThreadsPerCore, __kmp_ncores);
				573	}
				574	return 0;
				575	}
				576
				577	//
				578	// When affinity is off, this routine will still be called to set
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame]	579	// __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	580	// nCoresPerPkg, & nPackages. Make sure all these vars are set
				581	// correctly, and return now if affinity is not enabled.
				582	//
				583	__kmp_ncores = nPackages = __kmp_avail_proc;
				584	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	585	if (__kmp_affinity_verbose) {
				586	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				587	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, fullMask);
				588
				589	KMP_INFORM(AffCapableUseFlat, "KMP_AFFINITY");
				590	if (__kmp_affinity_respect_mask) {
				591	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				592	} else {
				593	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				594	}
				595	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				596	KMP_INFORM(Uniform, "KMP_AFFINITY");
				597	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				598	__kmp_nThreadsPerCore, __kmp_ncores);
				599	}
				600	if (__kmp_affinity_type == affinity_none) {
				601	return 0;
				602	}
				603
				604	//
				605	// Contruct the data structure to be returned.
				606	//
				607	address2os = (AddrUnsPair)
				608	__kmp_allocate(sizeof(*address2os) __kmp_avail_proc);
				609	int avail_ct = 0;
				610	unsigned int i;
				611	for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
				612	//
				613	// Skip this proc if it is not included in the machine model.
				614	//
				615	if (! KMP_CPU_ISSET(i, fullMask)) {
				616	continue;
				617	}
				618
				619	Address addr(1);
				620	addr.labels[0] = i;
				621	(*address2os)[avail_ct++] = AddrUnsPair(addr,i);
				622	}
				623	if (__kmp_affinity_verbose) {
				624	KMP_INFORM(OSProcToPackage, "KMP_AFFINITY");
				625	}
				626
				627	if (__kmp_affinity_gran_levels < 0) {
				628	//
				629	// Only the package level is modeled in the machine topology map,
				630	// so the #levels of granularity is either 0 or 1.
				631	//
				632	if (__kmp_affinity_gran > affinity_gran_package) {
				633	__kmp_affinity_gran_levels = 1;
				634	}
				635	else {
				636	__kmp_affinity_gran_levels = 0;
				637	}
				638	}
				639	return 1;
				640	}
				641
				642
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	643	# if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	644
				645	//
				646	// If multiple Windows* OS processor groups exist, we can create a 2-level
				647	// topology map with the groups at level 0 and the individual procs at
				648	// level 1.
				649	//
				650	// This facilitates letting the threads float among all procs in a group,
				651	// if granularity=group (the default when there are multiple groups).
				652	//
				653	static int
				654	__kmp_affinity_create_proc_group_map(AddrUnsPair **address2os,
				655	kmp_i18n_id_t *const msg_id)
				656	{
				657	*address2os = NULL;
				658	*msg_id = kmp_i18n_null;
				659
				660	//
				661	// If we don't have multiple processor groups, return now.
				662	// The flat mapping will be used.
				663	//
				664	if ((! KMP_AFFINITY_CAPABLE()) \|\| (__kmp_get_proc_group(fullMask) >= 0)) {
				665	// FIXME set *msg_id
				666	return -1;
				667	}
				668
				669	//
				670	// Contruct the data structure to be returned.
				671	//
				672	address2os = (AddrUnsPair)
				673	__kmp_allocate(sizeof(*address2os) __kmp_avail_proc);
				674	int avail_ct = 0;
				675	int i;
				676	for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
				677	//
				678	// Skip this proc if it is not included in the machine model.
				679	//
				680	if (! KMP_CPU_ISSET(i, fullMask)) {
				681	continue;
				682	}
				683
				684	Address addr(2);
				685	addr.labels[0] = i / (CHAR_BIT * sizeof(DWORD_PTR));
				686	addr.labels[1] = i % (CHAR_BIT * sizeof(DWORD_PTR));
				687	(*address2os)[avail_ct++] = AddrUnsPair(addr,i);
				688
				689	if (__kmp_affinity_verbose) {
				690	KMP_INFORM(AffOSProcToGroup, "KMP_AFFINITY", i, addr.labels[0],
				691	addr.labels[1]);
				692	}
				693	}
				694
				695	if (__kmp_affinity_gran_levels < 0) {
				696	if (__kmp_affinity_gran == affinity_gran_group) {
				697	__kmp_affinity_gran_levels = 1;
				698	}
				699	else if ((__kmp_affinity_gran == affinity_gran_fine)
				700	\|\| (__kmp_affinity_gran == affinity_gran_thread)) {
				701	__kmp_affinity_gran_levels = 0;
				702	}
				703	else {
				704	const char *gran_str = NULL;
				705	if (__kmp_affinity_gran == affinity_gran_core) {
				706	gran_str = "core";
				707	}
				708	else if (__kmp_affinity_gran == affinity_gran_package) {
				709	gran_str = "package";
				710	}
				711	else if (__kmp_affinity_gran == affinity_gran_node) {
				712	gran_str = "node";
				713	}
				714	else {
				715	KMP_ASSERT(0);
				716	}
				717
				718	// Warning: can't use affinity granularity \"gran\" with group topology method, using "thread"
				719	__kmp_affinity_gran_levels = 0;
				720	}
				721	}
				722	return 2;
				723	}
				724
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	725	# endif /* KMP_GROUP_AFFINITY */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	726
				727
				728	# if KMP_ARCH_X86 \|\| KMP_ARCH_X86_64
				729
				730	static int
				731	__kmp_cpuid_mask_width(int count) {
				732	int r = 0;
				733
				734	while((1<<r) < count)
				735	++r;
				736	return r;
				737	}
				738
				739
				740	class apicThreadInfo {
				741	public:
				742	unsigned osId; // param to __kmp_affinity_bind_thread
				743	unsigned apicId; // from cpuid after binding
				744	unsigned maxCoresPerPkg; // ""
				745	unsigned maxThreadsPerPkg; // ""
				746	unsigned pkgId; // inferred from above values
				747	unsigned coreId; // ""
				748	unsigned threadId; // ""
				749	};
				750
				751
				752	static int
				753	__kmp_affinity_cmp_apicThreadInfo_os_id(const void a, const void b)
				754	{
				755	const apicThreadInfo aa = (const apicThreadInfo )a;
				756	const apicThreadInfo bb = (const apicThreadInfo )b;
				757	if (aa->osId < bb->osId) return -1;
				758	if (aa->osId > bb->osId) return 1;
				759	return 0;
				760	}
				761
				762
				763	static int
				764	__kmp_affinity_cmp_apicThreadInfo_phys_id(const void a, const void b)
				765	{
				766	const apicThreadInfo aa = (const apicThreadInfo )a;
				767	const apicThreadInfo bb = (const apicThreadInfo )b;
				768	if (aa->pkgId < bb->pkgId) return -1;
				769	if (aa->pkgId > bb->pkgId) return 1;
				770	if (aa->coreId < bb->coreId) return -1;
				771	if (aa->coreId > bb->coreId) return 1;
				772	if (aa->threadId < bb->threadId) return -1;
				773	if (aa->threadId > bb->threadId) return 1;
				774	return 0;
				775	}
				776
				777
				778	//
				779	// On IA-32 architecture and Intel(R) 64 architecture, we attempt to use
				780	// an algorithm which cycles through the available os threads, setting
				781	// the current thread's affinity mask to that thread, and then retrieves
				782	// the Apic Id for each thread context using the cpuid instruction.
				783	//
				784	static int
				785	__kmp_affinity_create_apicid_map(AddrUnsPair **address2os,
				786	kmp_i18n_id_t *const msg_id)
				787	{
Andrey Churbanov	1c33129	2015-01-27 17:03:42 +0000	[diff] [blame]	788	kmp_cpuid buf;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	789	int rc;
				790	*address2os = NULL;
				791	*msg_id = kmp_i18n_null;
				792
Andrey Churbanov	1c33129	2015-01-27 17:03:42 +0000	[diff] [blame]	793	//
				794	// Check if cpuid leaf 4 is supported.
				795	//
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	796	__kmp_x86_cpuid(0, 0, &buf);
				797	if (buf.eax < 4) {
				798	*msg_id = kmp_i18n_str_NoLeaf4Support;
				799	return -1;
				800	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	801
				802	//
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	803	// The algorithm used starts by setting the affinity to each available
Andrey Churbanov	1c33129	2015-01-27 17:03:42 +0000	[diff] [blame]	804	// thread and retrieving info from the cpuid instruction, so if we are
				805	// not capable of calling __kmp_get_system_affinity() and
				806	// _kmp_get_system_affinity(), then we need to do something else - use
				807	// the defaults that we calculated from issuing cpuid without binding
				808	// to each proc.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	809	//
				810	if (! KMP_AFFINITY_CAPABLE()) {
				811	//
				812	// Hack to try and infer the machine topology using only the data
				813	// available from cpuid on the current thread, and __kmp_xproc.
				814	//
				815	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				816
				817	//
				818	// Get an upper bound on the number of threads per package using
				819	// cpuid(1).
				820	//
				821	// On some OS/chps combinations where HT is supported by the chip
				822	// but is disabled, this value will be 2 on a single core chip.
				823	// Usually, it will be 2 if HT is enabled and 1 if HT is disabled.
				824	//
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	825	__kmp_x86_cpuid(1, 0, &buf);
				826	int maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
				827	if (maxThreadsPerPkg == 0) {
				828	maxThreadsPerPkg = 1;
				829	}
				830
				831	//
				832	// The num cores per pkg comes from cpuid(4).
				833	// 1 must be added to the encoded value.
				834	//
				835	// The author of cpu_count.cpp treated this only an upper bound
				836	// on the number of cores, but I haven't seen any cases where it
				837	// was greater than the actual number of cores, so we will treat
				838	// it as exact in this block of code.
				839	//
				840	// First, we need to check if cpuid(4) is supported on this chip.
				841	// To see if cpuid(n) is supported, issue cpuid(0) and check if eax
				842	// has the value n or greater.
				843	//
				844	__kmp_x86_cpuid(0, 0, &buf);
				845	if (buf.eax >= 4) {
				846	__kmp_x86_cpuid(4, 0, &buf);
				847	nCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
				848	}
				849	else {
				850	nCoresPerPkg = 1;
				851	}
				852
				853	//
				854	// There is no way to reliably tell if HT is enabled without issuing
				855	// the cpuid instruction from every thread, can correlating the cpuid
				856	// info, so if the machine is not affinity capable, we assume that HT
				857	// is off. We have seen quite a few machines where maxThreadsPerPkg
				858	// is 2, yet the machine does not support HT.
				859	//
				860	// - Older OSes are usually found on machines with older chips, which
				861	// do not support HT.
				862	//
				863	// - The performance penalty for mistakenly identifying a machine as
				864	// HT when it isn't (which results in blocktime being incorrecly set
				865	// to 0) is greater than the penalty when for mistakenly identifying
				866	// a machine as being 1 thread/core when it is really HT enabled
				867	// (which results in blocktime being incorrectly set to a positive
				868	// value).
				869	//
				870	__kmp_ncores = __kmp_xproc;
				871	nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
				872	__kmp_nThreadsPerCore = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	873	if (__kmp_affinity_verbose) {
				874	KMP_INFORM(AffNotCapableUseLocCpuid, "KMP_AFFINITY");
				875	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				876	if (__kmp_affinity_uniform_topology()) {
				877	KMP_INFORM(Uniform, "KMP_AFFINITY");
				878	} else {
				879	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				880	}
				881	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				882	__kmp_nThreadsPerCore, __kmp_ncores);
				883	}
				884	return 0;
				885	}
				886
				887	//
				888	//
				889	// From here on, we can assume that it is safe to call
				890	// __kmp_get_system_affinity() and __kmp_set_system_affinity(),
				891	// even if __kmp_affinity_type = affinity_none.
				892	//
				893
				894	//
				895	// Save the affinity mask for the current thread.
				896	//
				897	kmp_affin_mask_t *oldMask;
				898	KMP_CPU_ALLOC(oldMask);
				899	KMP_ASSERT(oldMask != NULL);
				900	__kmp_get_system_affinity(oldMask, TRUE);
				901
				902	//
				903	// Run through each of the available contexts, binding the current thread
				904	// to it, and obtaining the pertinent information using the cpuid instr.
				905	//
				906	// The relevant information is:
				907	//
				908	// Apic Id: Bits 24:31 of ebx after issuing cpuid(1) - each thread context
				909	// has a uniqie Apic Id, which is of the form pkg# : core# : thread#.
				910	//
				911	// Max Threads Per Pkg: Bits 16:23 of ebx after issuing cpuid(1). The
				912	// value of this field determines the width of the core# + thread#
				913	// fields in the Apic Id. It is also an upper bound on the number
				914	// of threads per package, but it has been verified that situations
				915	// happen were it is not exact. In particular, on certain OS/chip
				916	// combinations where Intel(R) Hyper-Threading Technology is supported
				917	// by the chip but has
				918	// been disabled, the value of this field will be 2 (for a single core
				919	// chip). On other OS/chip combinations supporting
				920	// Intel(R) Hyper-Threading Technology, the value of
				921	// this field will be 1 when Intel(R) Hyper-Threading Technology is
				922	// disabled and 2 when it is enabled.
				923	//
				924	// Max Cores Per Pkg: Bits 26:31 of eax after issuing cpuid(4). The
				925	// value of this field (+1) determines the width of the core# field in
				926	// the Apic Id. The comments in "cpucount.cpp" say that this value is
				927	// an upper bound, but the IA-32 architecture manual says that it is
				928	// exactly the number of cores per package, and I haven't seen any
				929	// case where it wasn't.
				930	//
				931	// From this information, deduce the package Id, core Id, and thread Id,
				932	// and set the corresponding fields in the apicThreadInfo struct.
				933	//
				934	unsigned i;
				935	apicThreadInfo threadInfo = (apicThreadInfo )__kmp_allocate(
				936	__kmp_avail_proc * sizeof(apicThreadInfo));
				937	unsigned nApics = 0;
				938	for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
				939	//
				940	// Skip this proc if it is not included in the machine model.
				941	//
				942	if (! KMP_CPU_ISSET(i, fullMask)) {
				943	continue;
				944	}
				945	KMP_DEBUG_ASSERT((int)nApics < __kmp_avail_proc);
				946
				947	__kmp_affinity_bind_thread(i);
				948	threadInfo[nApics].osId = i;
				949
				950	//
				951	// The apic id and max threads per pkg come from cpuid(1).
				952	//
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	953	__kmp_x86_cpuid(1, 0, &buf);
				954	if (! (buf.edx >> 9) & 1) {
				955	__kmp_set_system_affinity(oldMask, TRUE);
				956	__kmp_free(threadInfo);
				957	KMP_CPU_FREE(oldMask);
				958	*msg_id = kmp_i18n_str_ApicNotPresent;
				959	return -1;
				960	}
				961	threadInfo[nApics].apicId = (buf.ebx >> 24) & 0xff;
				962	threadInfo[nApics].maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
				963	if (threadInfo[nApics].maxThreadsPerPkg == 0) {
				964	threadInfo[nApics].maxThreadsPerPkg = 1;
				965	}
				966
				967	//
				968	// Max cores per pkg comes from cpuid(4).
				969	// 1 must be added to the encoded value.
				970	//
				971	// First, we need to check if cpuid(4) is supported on this chip.
				972	// To see if cpuid(n) is supported, issue cpuid(0) and check if eax
				973	// has the value n or greater.
				974	//
				975	__kmp_x86_cpuid(0, 0, &buf);
				976	if (buf.eax >= 4) {
				977	__kmp_x86_cpuid(4, 0, &buf);
				978	threadInfo[nApics].maxCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
				979	}
				980	else {
				981	threadInfo[nApics].maxCoresPerPkg = 1;
				982	}
				983
				984	//
				985	// Infer the pkgId / coreId / threadId using only the info
				986	// obtained locally.
				987	//
				988	int widthCT = __kmp_cpuid_mask_width(
				989	threadInfo[nApics].maxThreadsPerPkg);
				990	threadInfo[nApics].pkgId = threadInfo[nApics].apicId >> widthCT;
				991
				992	int widthC = __kmp_cpuid_mask_width(
				993	threadInfo[nApics].maxCoresPerPkg);
				994	int widthT = widthCT - widthC;
				995	if (widthT < 0) {
				996	//
				997	// I've never seen this one happen, but I suppose it could, if
				998	// the cpuid instruction on a chip was really screwed up.
				999	// Make sure to restore the affinity mask before the tail call.
				1000	//
				1001	__kmp_set_system_affinity(oldMask, TRUE);
				1002	__kmp_free(threadInfo);
				1003	KMP_CPU_FREE(oldMask);
				1004	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1005	return -1;
				1006	}
				1007
				1008	int maskC = (1 << widthC) - 1;
				1009	threadInfo[nApics].coreId = (threadInfo[nApics].apicId >> widthT)
				1010	&maskC;
				1011
				1012	int maskT = (1 << widthT) - 1;
				1013	threadInfo[nApics].threadId = threadInfo[nApics].apicId &maskT;
				1014
				1015	nApics++;
				1016	}
				1017
				1018	//
				1019	// We've collected all the info we need.
				1020	// Restore the old affinity mask for this thread.
				1021	//
				1022	__kmp_set_system_affinity(oldMask, TRUE);
				1023
				1024	//
				1025	// If there's only one thread context to bind to, form an Address object
				1026	// with depth 1 and return immediately (or, if affinity is off, set
				1027	// address2os to NULL and return).
				1028	//
				1029	// If it is configured to omit the package level when there is only a
				1030	// single package, the logic at the end of this routine won't work if
				1031	// there is only a single thread - it would try to form an Address
				1032	// object with depth 0.
				1033	//
				1034	KMP_ASSERT(nApics > 0);
				1035	if (nApics == 1) {
				1036	__kmp_ncores = nPackages = 1;
				1037	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1038	if (__kmp_affinity_verbose) {
				1039	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				1040	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				1041
				1042	KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
				1043	if (__kmp_affinity_respect_mask) {
				1044	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				1045	} else {
				1046	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				1047	}
				1048	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1049	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1050	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				1051	__kmp_nThreadsPerCore, __kmp_ncores);
				1052	}
				1053
				1054	if (__kmp_affinity_type == affinity_none) {
				1055	__kmp_free(threadInfo);
				1056	KMP_CPU_FREE(oldMask);
				1057	return 0;
				1058	}
				1059
				1060	address2os = (AddrUnsPair)__kmp_allocate(sizeof(AddrUnsPair));
				1061	Address addr(1);
				1062	addr.labels[0] = threadInfo[0].pkgId;
				1063	(*address2os)[0] = AddrUnsPair(addr, threadInfo[0].osId);
				1064
				1065	if (__kmp_affinity_gran_levels < 0) {
				1066	__kmp_affinity_gran_levels = 0;
				1067	}
				1068
				1069	if (__kmp_affinity_verbose) {
				1070	__kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
				1071	}
				1072
				1073	__kmp_free(threadInfo);
				1074	KMP_CPU_FREE(oldMask);
				1075	return 1;
				1076	}
				1077
				1078	//
				1079	// Sort the threadInfo table by physical Id.
				1080	//
				1081	qsort(threadInfo, nApics, sizeof(*threadInfo),
				1082	__kmp_affinity_cmp_apicThreadInfo_phys_id);
				1083
				1084	//
				1085	// The table is now sorted by pkgId / coreId / threadId, but we really
				1086	// don't know the radix of any of the fields. pkgId's may be sparsely
				1087	// assigned among the chips on a system. Although coreId's are usually
				1088	// assigned [0 .. coresPerPkg-1] and threadId's are usually assigned
				1089	// [0..threadsPerCore-1], we don't want to make any such assumptions.
				1090	//
				1091	// For that matter, we don't know what coresPerPkg and threadsPerCore
				1092	// (or the total # packages) are at this point - we want to determine
				1093	// that now. We only have an upper bound on the first two figures.
				1094	//
				1095	// We also perform a consistency check at this point: the values returned
				1096	// by the cpuid instruction for any thread bound to a given package had
				1097	// better return the same info for maxThreadsPerPkg and maxCoresPerPkg.
				1098	//
				1099	nPackages = 1;
				1100	nCoresPerPkg = 1;
				1101	__kmp_nThreadsPerCore = 1;
				1102	unsigned nCores = 1;
				1103
				1104	unsigned pkgCt = 1; // to determine radii
				1105	unsigned lastPkgId = threadInfo[0].pkgId;
				1106	unsigned coreCt = 1;
				1107	unsigned lastCoreId = threadInfo[0].coreId;
				1108	unsigned threadCt = 1;
				1109	unsigned lastThreadId = threadInfo[0].threadId;
				1110
				1111	// intra-pkg consist checks
				1112	unsigned prevMaxCoresPerPkg = threadInfo[0].maxCoresPerPkg;
				1113	unsigned prevMaxThreadsPerPkg = threadInfo[0].maxThreadsPerPkg;
				1114
				1115	for (i = 1; i < nApics; i++) {
				1116	if (threadInfo[i].pkgId != lastPkgId) {
				1117	nCores++;
				1118	pkgCt++;
				1119	lastPkgId = threadInfo[i].pkgId;
				1120	if ((int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt;
				1121	coreCt = 1;
				1122	lastCoreId = threadInfo[i].coreId;
				1123	if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
				1124	threadCt = 1;
				1125	lastThreadId = threadInfo[i].threadId;
				1126
				1127	//
				1128	// This is a different package, so go on to the next iteration
				1129	// without doing any consistency checks. Reset the consistency
				1130	// check vars, though.
				1131	//
				1132	prevMaxCoresPerPkg = threadInfo[i].maxCoresPerPkg;
				1133	prevMaxThreadsPerPkg = threadInfo[i].maxThreadsPerPkg;
				1134	continue;
				1135	}
				1136
				1137	if (threadInfo[i].coreId != lastCoreId) {
				1138	nCores++;
				1139	coreCt++;
				1140	lastCoreId = threadInfo[i].coreId;
				1141	if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
				1142	threadCt = 1;
				1143	lastThreadId = threadInfo[i].threadId;
				1144	}
				1145	else if (threadInfo[i].threadId != lastThreadId) {
				1146	threadCt++;
				1147	lastThreadId = threadInfo[i].threadId;
				1148	}
				1149	else {
				1150	__kmp_free(threadInfo);
				1151	KMP_CPU_FREE(oldMask);
				1152	*msg_id = kmp_i18n_str_LegacyApicIDsNotUnique;
				1153	return -1;
				1154	}
				1155
				1156	//
				1157	// Check to make certain that the maxCoresPerPkg and maxThreadsPerPkg
				1158	// fields agree between all the threads bounds to a given package.
				1159	//
				1160	if ((prevMaxCoresPerPkg != threadInfo[i].maxCoresPerPkg)
				1161	\|\| (prevMaxThreadsPerPkg != threadInfo[i].maxThreadsPerPkg)) {
				1162	__kmp_free(threadInfo);
				1163	KMP_CPU_FREE(oldMask);
				1164	*msg_id = kmp_i18n_str_InconsistentCpuidInfo;
				1165	return -1;
				1166	}
				1167	}
				1168	nPackages = pkgCt;
				1169	if ((int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt;
				1170	if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
				1171
				1172	//
				1173	// When affinity is off, this routine will still be called to set
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame]	1174	// __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1175	// nCoresPerPkg, & nPackages. Make sure all these vars are set
				1176	// correctly, and return now if affinity is not enabled.
				1177	//
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1178	__kmp_ncores = nCores;
				1179	if (__kmp_affinity_verbose) {
				1180	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				1181	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				1182
				1183	KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
				1184	if (__kmp_affinity_respect_mask) {
				1185	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				1186	} else {
				1187	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				1188	}
				1189	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1190	if (__kmp_affinity_uniform_topology()) {
				1191	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1192	} else {
				1193	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				1194	}
				1195	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				1196	__kmp_nThreadsPerCore, __kmp_ncores);
				1197
				1198	}
				1199
				1200	if (__kmp_affinity_type == affinity_none) {
				1201	__kmp_free(threadInfo);
				1202	KMP_CPU_FREE(oldMask);
				1203	return 0;
				1204	}
				1205
				1206	//
				1207	// Now that we've determined the number of packages, the number of cores
				1208	// per package, and the number of threads per core, we can construct the
				1209	// data structure that is to be returned.
				1210	//
				1211	int pkgLevel = 0;
				1212	int coreLevel = (nCoresPerPkg <= 1) ? -1 : 1;
				1213	int threadLevel = (__kmp_nThreadsPerCore <= 1) ? -1 : ((coreLevel >= 0) ? 2 : 1);
				1214	unsigned depth = (pkgLevel >= 0) + (coreLevel >= 0) + (threadLevel >= 0);
				1215
				1216	KMP_ASSERT(depth > 0);
				1217	address2os = (AddrUnsPair)__kmp_allocate(sizeof(AddrUnsPair) * nApics);
				1218
				1219	for (i = 0; i < nApics; ++i) {
				1220	Address addr(depth);
				1221	unsigned os = threadInfo[i].osId;
				1222	int d = 0;
				1223
				1224	if (pkgLevel >= 0) {
				1225	addr.labels[d++] = threadInfo[i].pkgId;
				1226	}
				1227	if (coreLevel >= 0) {
				1228	addr.labels[d++] = threadInfo[i].coreId;
				1229	}
				1230	if (threadLevel >= 0) {
				1231	addr.labels[d++] = threadInfo[i].threadId;
				1232	}
				1233	(*address2os)[i] = AddrUnsPair(addr, os);
				1234	}
				1235
				1236	if (__kmp_affinity_gran_levels < 0) {
				1237	//
				1238	// Set the granularity level based on what levels are modeled
				1239	// in the machine topology map.
				1240	//
				1241	__kmp_affinity_gran_levels = 0;
				1242	if ((threadLevel >= 0)
				1243	&& (__kmp_affinity_gran > affinity_gran_thread)) {
				1244	__kmp_affinity_gran_levels++;
				1245	}
				1246	if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
				1247	__kmp_affinity_gran_levels++;
				1248	}
				1249	if ((pkgLevel >= 0) && (__kmp_affinity_gran > affinity_gran_package)) {
				1250	__kmp_affinity_gran_levels++;
				1251	}
				1252	}
				1253
				1254	if (__kmp_affinity_verbose) {
				1255	__kmp_affinity_print_topology(*address2os, nApics, depth, pkgLevel,
				1256	coreLevel, threadLevel);
				1257	}
				1258
				1259	__kmp_free(threadInfo);
				1260	KMP_CPU_FREE(oldMask);
				1261	return depth;
				1262	}
				1263
				1264
				1265	//
				1266	// Intel(R) microarchitecture code name Nehalem, Dunnington and later
				1267	// architectures support a newer interface for specifying the x2APIC Ids,
				1268	// based on cpuid leaf 11.
				1269	//
				1270	static int
				1271	__kmp_affinity_create_x2apicid_map(AddrUnsPair **address2os,
				1272	kmp_i18n_id_t *const msg_id)
				1273	{
				1274	kmp_cpuid buf;
				1275
				1276	*address2os = NULL;
				1277	*msg_id = kmp_i18n_null;
				1278
				1279	//
				1280	// Check to see if cpuid leaf 11 is supported.
				1281	//
				1282	__kmp_x86_cpuid(0, 0, &buf);
				1283	if (buf.eax < 11) {
				1284	*msg_id = kmp_i18n_str_NoLeaf11Support;
				1285	return -1;
				1286	}
				1287	__kmp_x86_cpuid(11, 0, &buf);
				1288	if (buf.ebx == 0) {
				1289	*msg_id = kmp_i18n_str_NoLeaf11Support;
				1290	return -1;
				1291	}
				1292
				1293	//
				1294	// Find the number of levels in the machine topology. While we're at it,
				1295	// get the default values for __kmp_nThreadsPerCore & nCoresPerPkg. We will
				1296	// try to get more accurate values later by explicitly counting them,
				1297	// but get reasonable defaults now, in case we return early.
				1298	//
				1299	int level;
				1300	int threadLevel = -1;
				1301	int coreLevel = -1;
				1302	int pkgLevel = -1;
				1303	__kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
				1304
				1305	for (level = 0;; level++) {
				1306	if (level > 31) {
				1307	//
				1308	// FIXME: Hack for DPD200163180
				1309	//
				1310	// If level is big then something went wrong -> exiting
				1311	//
				1312	// There could actually be 32 valid levels in the machine topology,
				1313	// but so far, the only machine we have seen which does not exit
				1314	// this loop before iteration 32 has fubar x2APIC settings.
				1315	//
				1316	// For now, just reject this case based upon loop trip count.
				1317	//
				1318	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1319	return -1;
				1320	}
				1321	__kmp_x86_cpuid(11, level, &buf);
				1322	if (buf.ebx == 0) {
				1323	if (pkgLevel < 0) {
				1324	//
				1325	// Will infer nPackages from __kmp_xproc
				1326	//
				1327	pkgLevel = level;
				1328	level++;
				1329	}
				1330	break;
				1331	}
				1332	int kind = (buf.ecx >> 8) & 0xff;
				1333	if (kind == 1) {
				1334	//
				1335	// SMT level
				1336	//
				1337	threadLevel = level;
				1338	coreLevel = -1;
				1339	pkgLevel = -1;
				1340	__kmp_nThreadsPerCore = buf.ebx & 0xff;
				1341	if (__kmp_nThreadsPerCore == 0) {
				1342	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1343	return -1;
				1344	}
				1345	}
				1346	else if (kind == 2) {
				1347	//
				1348	// core level
				1349	//
				1350	coreLevel = level;
				1351	pkgLevel = -1;
				1352	nCoresPerPkg = buf.ebx & 0xff;
				1353	if (nCoresPerPkg == 0) {
				1354	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1355	return -1;
				1356	}
				1357	}
				1358	else {
				1359	if (level <= 0) {
				1360	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1361	return -1;
				1362	}
				1363	if (pkgLevel >= 0) {
				1364	continue;
				1365	}
				1366	pkgLevel = level;
				1367	nPackages = buf.ebx & 0xff;
				1368	if (nPackages == 0) {
				1369	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1370	return -1;
				1371	}
				1372	}
				1373	}
				1374	int depth = level;
				1375
				1376	//
				1377	// In the above loop, "level" was counted from the finest level (usually
				1378	// thread) to the coarsest. The caller expects that we will place the
				1379	// labels in (*address2os)[].first.labels[] in the inverse order, so
				1380	// we need to invert the vars saying which level means what.
				1381	//
				1382	if (threadLevel >= 0) {
				1383	threadLevel = depth - threadLevel - 1;
				1384	}
				1385	if (coreLevel >= 0) {
				1386	coreLevel = depth - coreLevel - 1;
				1387	}
				1388	KMP_DEBUG_ASSERT(pkgLevel >= 0);
				1389	pkgLevel = depth - pkgLevel - 1;
				1390
				1391	//
				1392	// The algorithm used starts by setting the affinity to each available
Andrey Churbanov	1c33129	2015-01-27 17:03:42 +0000	[diff] [blame]	1393	// thread and retrieving info from the cpuid instruction, so if we are
				1394	// not capable of calling __kmp_get_system_affinity() and
				1395	// _kmp_get_system_affinity(), then we need to do something else - use
				1396	// the defaults that we calculated from issuing cpuid without binding
				1397	// to each proc.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1398	//
				1399	if (! KMP_AFFINITY_CAPABLE())
				1400	{
				1401	//
				1402	// Hack to try and infer the machine topology using only the data
				1403	// available from cpuid on the current thread, and __kmp_xproc.
				1404	//
				1405	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				1406
				1407	__kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
				1408	nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1409	if (__kmp_affinity_verbose) {
				1410	KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY");
				1411	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1412	if (__kmp_affinity_uniform_topology()) {
				1413	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1414	} else {
				1415	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				1416	}
				1417	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				1418	__kmp_nThreadsPerCore, __kmp_ncores);
				1419	}
				1420	return 0;
				1421	}
				1422
				1423	//
				1424	//
				1425	// From here on, we can assume that it is safe to call
				1426	// __kmp_get_system_affinity() and __kmp_set_system_affinity(),
				1427	// even if __kmp_affinity_type = affinity_none.
				1428	//
				1429
				1430	//
				1431	// Save the affinity mask for the current thread.
				1432	//
				1433	kmp_affin_mask_t *oldMask;
				1434	KMP_CPU_ALLOC(oldMask);
				1435	__kmp_get_system_affinity(oldMask, TRUE);
				1436
				1437	//
				1438	// Allocate the data structure to be returned.
				1439	//
				1440	AddrUnsPair retval = (AddrUnsPair )
				1441	__kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc);
				1442
				1443	//
				1444	// Run through each of the available contexts, binding the current thread
				1445	// to it, and obtaining the pertinent information using the cpuid instr.
				1446	//
				1447	unsigned int proc;
				1448	int nApics = 0;
				1449	for (proc = 0; proc < KMP_CPU_SETSIZE; ++proc) {
				1450	//
				1451	// Skip this proc if it is not included in the machine model.
				1452	//
				1453	if (! KMP_CPU_ISSET(proc, fullMask)) {
				1454	continue;
				1455	}
				1456	KMP_DEBUG_ASSERT(nApics < __kmp_avail_proc);
				1457
				1458	__kmp_affinity_bind_thread(proc);
				1459
				1460	//
				1461	// Extrach the labels for each level in the machine topology map
				1462	// from the Apic ID.
				1463	//
				1464	Address addr(depth);
				1465	int prev_shift = 0;
				1466
				1467	for (level = 0; level < depth; level++) {
				1468	__kmp_x86_cpuid(11, level, &buf);
				1469	unsigned apicId = buf.edx;
				1470	if (buf.ebx == 0) {
				1471	if (level != depth - 1) {
				1472	KMP_CPU_FREE(oldMask);
				1473	*msg_id = kmp_i18n_str_InconsistentCpuidInfo;
				1474	return -1;
				1475	}
				1476	addr.labels[depth - level - 1] = apicId >> prev_shift;
				1477	level++;
				1478	break;
				1479	}
				1480	int shift = buf.eax & 0x1f;
				1481	int mask = (1 << shift) - 1;
				1482	addr.labels[depth - level - 1] = (apicId & mask) >> prev_shift;
				1483	prev_shift = shift;
				1484	}
				1485	if (level != depth) {
				1486	KMP_CPU_FREE(oldMask);
				1487	*msg_id = kmp_i18n_str_InconsistentCpuidInfo;
				1488	return -1;
				1489	}
				1490
				1491	retval[nApics] = AddrUnsPair(addr, proc);
				1492	nApics++;
				1493	}
				1494
				1495	//
				1496	// We've collected all the info we need.
				1497	// Restore the old affinity mask for this thread.
				1498	//
				1499	__kmp_set_system_affinity(oldMask, TRUE);
				1500
				1501	//
				1502	// If there's only one thread context to bind to, return now.
				1503	//
				1504	KMP_ASSERT(nApics > 0);
				1505	if (nApics == 1) {
				1506	__kmp_ncores = nPackages = 1;
				1507	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1508	if (__kmp_affinity_verbose) {
				1509	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				1510	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				1511
				1512	KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
				1513	if (__kmp_affinity_respect_mask) {
				1514	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				1515	} else {
				1516	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				1517	}
				1518	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1519	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1520	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				1521	__kmp_nThreadsPerCore, __kmp_ncores);
				1522	}
				1523
				1524	if (__kmp_affinity_type == affinity_none) {
				1525	__kmp_free(retval);
				1526	KMP_CPU_FREE(oldMask);
				1527	return 0;
				1528	}
				1529
				1530	//
				1531	// Form an Address object which only includes the package level.
				1532	//
				1533	Address addr(1);
				1534	addr.labels[0] = retval[0].first.labels[pkgLevel];
				1535	retval[0].first = addr;
				1536
				1537	if (__kmp_affinity_gran_levels < 0) {
				1538	__kmp_affinity_gran_levels = 0;
				1539	}
				1540
				1541	if (__kmp_affinity_verbose) {
				1542	__kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
				1543	}
				1544
				1545	*address2os = retval;
				1546	KMP_CPU_FREE(oldMask);
				1547	return 1;
				1548	}
				1549
				1550	//
				1551	// Sort the table by physical Id.
				1552	//
				1553	qsort(retval, nApics, sizeof(*retval), __kmp_affinity_cmp_Address_labels);
				1554
				1555	//
				1556	// Find the radix at each of the levels.
				1557	//
				1558	unsigned totals = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1559	unsigned counts = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1560	unsigned maxCt = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1561	unsigned last = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1562	for (level = 0; level < depth; level++) {
				1563	totals[level] = 1;
				1564	maxCt[level] = 1;
				1565	counts[level] = 1;
				1566	last[level] = retval[0].first.labels[level];
				1567	}
				1568
				1569	//
				1570	// From here on, the iteration variable "level" runs from the finest
				1571	// level to the coarsest, i.e. we iterate forward through
				1572	// (*address2os)[].first.labels[] - in the previous loops, we iterated
				1573	// backwards.
				1574	//
				1575	for (proc = 1; (int)proc < nApics; proc++) {
				1576	int level;
				1577	for (level = 0; level < depth; level++) {
				1578	if (retval[proc].first.labels[level] != last[level]) {
				1579	int j;
				1580	for (j = level + 1; j < depth; j++) {
				1581	totals[j]++;
				1582	counts[j] = 1;
				1583	// The line below causes printing incorrect topology information
				1584	// in case the max value for some level (maxCt[level]) is encountered earlier than
				1585	// some less value while going through the array.
				1586	// For example, let pkg0 has 4 cores and pkg1 has 2 cores. Then maxCt[1] == 2
				1587	// whereas it must be 4.
				1588	// TODO!!! Check if it can be commented safely
				1589	//maxCt[j] = 1;
				1590	last[j] = retval[proc].first.labels[j];
				1591	}
				1592	totals[level]++;
				1593	counts[level]++;
				1594	if (counts[level] > maxCt[level]) {
				1595	maxCt[level] = counts[level];
				1596	}
				1597	last[level] = retval[proc].first.labels[level];
				1598	break;
				1599	}
				1600	else if (level == depth - 1) {
				1601	__kmp_free(last);
				1602	__kmp_free(maxCt);
				1603	__kmp_free(counts);
				1604	__kmp_free(totals);
				1605	__kmp_free(retval);
				1606	KMP_CPU_FREE(oldMask);
				1607	*msg_id = kmp_i18n_str_x2ApicIDsNotUnique;
				1608	return -1;
				1609	}
				1610	}
				1611	}
				1612
				1613	//
				1614	// When affinity is off, this routine will still be called to set
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame]	1615	// __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1616	// nCoresPerPkg, & nPackages. Make sure all these vars are set
				1617	// correctly, and return if affinity is not enabled.
				1618	//
				1619	if (threadLevel >= 0) {
				1620	__kmp_nThreadsPerCore = maxCt[threadLevel];
				1621	}
				1622	else {
				1623	__kmp_nThreadsPerCore = 1;
				1624	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1625	nPackages = totals[pkgLevel];
				1626
				1627	if (coreLevel >= 0) {
				1628	__kmp_ncores = totals[coreLevel];
				1629	nCoresPerPkg = maxCt[coreLevel];
				1630	}
				1631	else {
				1632	__kmp_ncores = nPackages;
				1633	nCoresPerPkg = 1;
				1634	}
				1635
				1636	//
				1637	// Check to see if the machine topology is uniform
				1638	//
				1639	unsigned prod = maxCt[0];
				1640	for (level = 1; level < depth; level++) {
				1641	prod *= maxCt[level];
				1642	}
				1643	bool uniform = (prod == totals[level - 1]);
				1644
				1645	//
				1646	// Print the machine topology summary.
				1647	//
				1648	if (__kmp_affinity_verbose) {
				1649	char mask[KMP_AFFIN_MASK_PRINT_LEN];
				1650	__kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				1651
				1652	KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
				1653	if (__kmp_affinity_respect_mask) {
				1654	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask);
				1655	} else {
				1656	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask);
				1657	}
				1658	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1659	if (uniform) {
				1660	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1661	} else {
				1662	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				1663	}
				1664
				1665	kmp_str_buf_t buf;
				1666	__kmp_str_buf_init(&buf);
				1667
				1668	__kmp_str_buf_print(&buf, "%d", totals[0]);
				1669	for (level = 1; level <= pkgLevel; level++) {
				1670	__kmp_str_buf_print(&buf, " x %d", maxCt[level]);
				1671	}
				1672	KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg,
				1673	__kmp_nThreadsPerCore, __kmp_ncores);
				1674
				1675	__kmp_str_buf_free(&buf);
				1676	}
				1677
				1678	if (__kmp_affinity_type == affinity_none) {
				1679	__kmp_free(last);
				1680	__kmp_free(maxCt);
				1681	__kmp_free(counts);
				1682	__kmp_free(totals);
				1683	__kmp_free(retval);
				1684	KMP_CPU_FREE(oldMask);
				1685	return 0;
				1686	}
				1687
				1688	//
				1689	// Find any levels with radiix 1, and remove them from the map
				1690	// (except for the package level).
				1691	//
				1692	int new_depth = 0;
				1693	for (level = 0; level < depth; level++) {
				1694	if ((maxCt[level] == 1) && (level != pkgLevel)) {
				1695	continue;
				1696	}
				1697	new_depth++;
				1698	}
				1699
				1700	//
				1701	// If we are removing any levels, allocate a new vector to return,
				1702	// and copy the relevant information to it.
				1703	//
				1704	if (new_depth != depth) {
				1705	AddrUnsPair new_retval = (AddrUnsPair )__kmp_allocate(
				1706	sizeof(AddrUnsPair) * nApics);
				1707	for (proc = 0; (int)proc < nApics; proc++) {
				1708	Address addr(new_depth);
				1709	new_retval[proc] = AddrUnsPair(addr, retval[proc].second);
				1710	}
				1711	int new_level = 0;
				1712	for (level = 0; level < depth; level++) {
				1713	if ((maxCt[level] == 1) && (level != pkgLevel)) {
				1714	if (level == threadLevel) {
				1715	threadLevel = -1;
				1716	}
				1717	else if ((threadLevel >= 0) && (level < threadLevel)) {
				1718	threadLevel--;
				1719	}
				1720	if (level == coreLevel) {
				1721	coreLevel = -1;
				1722	}
				1723	else if ((coreLevel >= 0) && (level < coreLevel)) {
				1724	coreLevel--;
				1725	}
				1726	if (level < pkgLevel) {
				1727	pkgLevel--;
				1728	}
				1729	continue;
				1730	}
				1731	for (proc = 0; (int)proc < nApics; proc++) {
				1732	new_retval[proc].first.labels[new_level]
				1733	= retval[proc].first.labels[level];
				1734	}
				1735	new_level++;
				1736	}
				1737
				1738	__kmp_free(retval);
				1739	retval = new_retval;
				1740	depth = new_depth;
				1741	}
				1742
				1743	if (__kmp_affinity_gran_levels < 0) {
				1744	//
				1745	// Set the granularity level based on what levels are modeled
				1746	// in the machine topology map.
				1747	//
				1748	__kmp_affinity_gran_levels = 0;
				1749	if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
				1750	__kmp_affinity_gran_levels++;
				1751	}
				1752	if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
				1753	__kmp_affinity_gran_levels++;
				1754	}
				1755	if (__kmp_affinity_gran > affinity_gran_package) {
				1756	__kmp_affinity_gran_levels++;
				1757	}
				1758	}
				1759
				1760	if (__kmp_affinity_verbose) {
				1761	__kmp_affinity_print_topology(retval, nApics, depth, pkgLevel,
				1762	coreLevel, threadLevel);
				1763	}
				1764
				1765	__kmp_free(last);
				1766	__kmp_free(maxCt);
				1767	__kmp_free(counts);
				1768	__kmp_free(totals);
				1769	KMP_CPU_FREE(oldMask);
				1770	*address2os = retval;
				1771	return depth;
				1772	}
				1773
				1774
				1775	# endif /* KMP_ARCH_X86 \|\| KMP_ARCH_X86_64 */
				1776
				1777
				1778	#define osIdIndex 0
				1779	#define threadIdIndex 1
				1780	#define coreIdIndex 2
				1781	#define pkgIdIndex 3
				1782	#define nodeIdIndex 4
				1783
				1784	typedef unsigned *ProcCpuInfo;
				1785	static unsigned maxIndex = pkgIdIndex;
				1786
				1787
				1788	static int
				1789	__kmp_affinity_cmp_ProcCpuInfo_os_id(const void a, const void b)
				1790	{
				1791	const unsigned aa = (const unsigned )a;
				1792	const unsigned bb = (const unsigned )b;
				1793	if (aa[osIdIndex] < bb[osIdIndex]) return -1;
				1794	if (aa[osIdIndex] > bb[osIdIndex]) return 1;
				1795	return 0;
				1796	};
				1797
				1798
				1799	static int
				1800	__kmp_affinity_cmp_ProcCpuInfo_phys_id(const void a, const void b)
				1801	{
				1802	unsigned i;
				1803	const unsigned aa = ((const unsigned **)a);
				1804	const unsigned bb = ((const unsigned **)b);
				1805	for (i = maxIndex; ; i--) {
				1806	if (aa[i] < bb[i]) return -1;
				1807	if (aa[i] > bb[i]) return 1;
				1808	if (i == osIdIndex) break;
				1809	}
				1810	return 0;
				1811	}
				1812
				1813
				1814	//
				1815	// Parse /proc/cpuinfo (or an alternate file in the same format) to obtain the
				1816	// affinity map.
				1817	//
				1818	static int
				1819	__kmp_affinity_create_cpuinfo_map(AddrUnsPair *address2os, int line,
				1820	kmp_i18n_id_t const msg_id, FILE f)
				1821	{
				1822	*address2os = NULL;
				1823	*msg_id = kmp_i18n_null;
				1824
				1825	//
				1826	// Scan of the file, and count the number of "processor" (osId) fields,
Alp Toker	8f2d3f0	2014-02-24 10:40:15 +0000	[diff] [blame]	1827	// and find the highest value of <n> for a node_<n> field.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1828	//
				1829	char buf[256];
				1830	unsigned num_records = 0;
				1831	while (! feof(f)) {
				1832	buf[sizeof(buf) - 1] = 1;
				1833	if (! fgets(buf, sizeof(buf), f)) {
				1834	//
				1835	// Read errors presumably because of EOF
				1836	//
				1837	break;
				1838	}
				1839
				1840	char s1[] = "processor";
				1841	if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
				1842	num_records++;
				1843	continue;
				1844	}
				1845
				1846	//
				1847	// FIXME - this will match "node_<n> <garbage>"
				1848	//
				1849	unsigned level;
				1850	if (sscanf(buf, "node_%d id", &level) == 1) {
				1851	if (nodeIdIndex + level >= maxIndex) {
				1852	maxIndex = nodeIdIndex + level;
				1853	}
				1854	continue;
				1855	}
				1856	}
				1857
				1858	//
				1859	// Check for empty file / no valid processor records, or too many.
				1860	// The number of records can't exceed the number of valid bits in the
				1861	// affinity mask.
				1862	//
				1863	if (num_records == 0) {
				1864	*line = 0;
				1865	*msg_id = kmp_i18n_str_NoProcRecords;
				1866	return -1;
				1867	}
				1868	if (num_records > (unsigned)__kmp_xproc) {
				1869	*line = 0;
				1870	*msg_id = kmp_i18n_str_TooManyProcRecords;
				1871	return -1;
				1872	}
				1873
				1874	//
				1875	// Set the file pointer back to the begginning, so that we can scan the
				1876	// file again, this time performing a full parse of the data.
				1877	// Allocate a vector of ProcCpuInfo object, where we will place the data.
				1878	// Adding an extra element at the end allows us to remove a lot of extra
				1879	// checks for termination conditions.
				1880	//
				1881	if (fseek(f, 0, SEEK_SET) != 0) {
				1882	*line = 0;
				1883	*msg_id = kmp_i18n_str_CantRewindCpuinfo;
				1884	return -1;
				1885	}
				1886
				1887	//
				1888	// Allocate the array of records to store the proc info in. The dummy
				1889	// element at the end makes the logic in filling them out easier to code.
				1890	//
				1891	unsigned threadInfo = (unsigned )__kmp_allocate((num_records + 1)
				1892	* sizeof(unsigned *));
				1893	unsigned i;
				1894	for (i = 0; i <= num_records; i++) {
				1895	threadInfo[i] = (unsigned *)__kmp_allocate((maxIndex + 1)
				1896	* sizeof(unsigned));
				1897	}
				1898
				1899	#define CLEANUP_THREAD_INFO \
				1900	for (i = 0; i <= num_records; i++) { \
				1901	__kmp_free(threadInfo[i]); \
				1902	} \
				1903	__kmp_free(threadInfo);
				1904
				1905	//
				1906	// A value of UINT_MAX means that we didn't find the field
				1907	//
				1908	unsigned __index;
				1909
				1910	#define INIT_PROC_INFO(p) \
				1911	for (__index = 0; __index <= maxIndex; __index++) { \
				1912	(p)[__index] = UINT_MAX; \
				1913	}
				1914
				1915	for (i = 0; i <= num_records; i++) {
				1916	INIT_PROC_INFO(threadInfo[i]);
				1917	}
				1918
				1919	unsigned num_avail = 0;
				1920	*line = 0;
				1921	while (! feof(f)) {
				1922	//
				1923	// Create an inner scoping level, so that all the goto targets at the
				1924	// end of the loop appear in an outer scoping level. This avoids
				1925	// warnings about jumping past an initialization to a target in the
				1926	// same block.
				1927	//
				1928	{
				1929	buf[sizeof(buf) - 1] = 1;
				1930	bool long_line = false;
				1931	if (! fgets(buf, sizeof(buf), f)) {
				1932	//
				1933	// Read errors presumably because of EOF
				1934	//
				1935	// If there is valid data in threadInfo[num_avail], then fake
				1936	// a blank line in ensure that the last address gets parsed.
				1937	//
				1938	bool valid = false;
				1939	for (i = 0; i <= maxIndex; i++) {
				1940	if (threadInfo[num_avail][i] != UINT_MAX) {
				1941	valid = true;
				1942	}
				1943	}
				1944	if (! valid) {
				1945	break;
				1946	}
				1947	buf[0] = 0;
				1948	} else if (!buf[sizeof(buf) - 1]) {
				1949	//
				1950	// The line is longer than the buffer. Set a flag and don't
				1951	// emit an error if we were going to ignore the line, anyway.
				1952	//
				1953	long_line = true;
				1954
				1955	#define CHECK_LINE \
				1956	if (long_line) { \
				1957	CLEANUP_THREAD_INFO; \
				1958	*msg_id = kmp_i18n_str_LongLineCpuinfo; \
				1959	return -1; \
				1960	}
				1961	}
				1962	(*line)++;
				1963
				1964	char s1[] = "processor";
				1965	if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
				1966	CHECK_LINE;
				1967	char *p = strchr(buf + sizeof(s1) - 1, ':');
				1968	unsigned val;
				1969	if ((p == NULL) \|\| (sscanf(p + 1, "%u\n", &val) != 1)) goto no_val;
				1970	if (threadInfo[num_avail][osIdIndex] != UINT_MAX) goto dup_field;
				1971	threadInfo[num_avail][osIdIndex] = val;
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	1972	#if KMP_OS_LINUX && USE_SYSFS_INFO
				1973	char path[256];
				1974	snprintf(path, sizeof(path),
				1975	"/sys/devices/system/cpu/cpu%u/topology/physical_package_id",
				1976	threadInfo[num_avail][osIdIndex]);
				1977	__kmp_read_from_file(path, "%u", &threadInfo[num_avail][pkgIdIndex]);
				1978
				1979	snprintf(path, sizeof(path),
				1980	"/sys/devices/system/cpu/cpu%u/topology/core_id",
				1981	threadInfo[num_avail][osIdIndex]);
				1982	__kmp_read_from_file(path, "%u", &threadInfo[num_avail][coreIdIndex]);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1983	continue;
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	1984	#else
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1985	}
				1986	char s2[] = "physical id";
				1987	if (strncmp(buf, s2, sizeof(s2) - 1) == 0) {
				1988	CHECK_LINE;
				1989	char *p = strchr(buf + sizeof(s2) - 1, ':');
				1990	unsigned val;
				1991	if ((p == NULL) \|\| (sscanf(p + 1, "%u\n", &val) != 1)) goto no_val;
				1992	if (threadInfo[num_avail][pkgIdIndex] != UINT_MAX) goto dup_field;
				1993	threadInfo[num_avail][pkgIdIndex] = val;
				1994	continue;
				1995	}
				1996	char s3[] = "core id";
				1997	if (strncmp(buf, s3, sizeof(s3) - 1) == 0) {
				1998	CHECK_LINE;
				1999	char *p = strchr(buf + sizeof(s3) - 1, ':');
				2000	unsigned val;
				2001	if ((p == NULL) \|\| (sscanf(p + 1, "%u\n", &val) != 1)) goto no_val;
				2002	if (threadInfo[num_avail][coreIdIndex] != UINT_MAX) goto dup_field;
				2003	threadInfo[num_avail][coreIdIndex] = val;
				2004	continue;
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	2005	#endif // KMP_OS_LINUX && USE_SYSFS_INFO
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2006	}
				2007	char s4[] = "thread id";
				2008	if (strncmp(buf, s4, sizeof(s4) - 1) == 0) {
				2009	CHECK_LINE;
				2010	char *p = strchr(buf + sizeof(s4) - 1, ':');
				2011	unsigned val;
				2012	if ((p == NULL) \|\| (sscanf(p + 1, "%u\n", &val) != 1)) goto no_val;
				2013	if (threadInfo[num_avail][threadIdIndex] != UINT_MAX) goto dup_field;
				2014	threadInfo[num_avail][threadIdIndex] = val;
				2015	continue;
				2016	}
				2017	unsigned level;
				2018	if (sscanf(buf, "node_%d id", &level) == 1) {
				2019	CHECK_LINE;
				2020	char *p = strchr(buf + sizeof(s4) - 1, ':');
				2021	unsigned val;
				2022	if ((p == NULL) \|\| (sscanf(p + 1, "%u\n", &val) != 1)) goto no_val;
				2023	KMP_ASSERT(nodeIdIndex + level <= maxIndex);
				2024	if (threadInfo[num_avail][nodeIdIndex + level] != UINT_MAX) goto dup_field;
				2025	threadInfo[num_avail][nodeIdIndex + level] = val;
				2026	continue;
				2027	}
				2028
				2029	//
				2030	// We didn't recognize the leading token on the line.
				2031	// There are lots of leading tokens that we don't recognize -
				2032	// if the line isn't empty, go on to the next line.
				2033	//
				2034	if ((buf != 0) && (buf != '\n')) {
				2035	//
				2036	// If the line is longer than the buffer, read characters
				2037	// until we find a newline.
				2038	//
				2039	if (long_line) {
				2040	int ch;
				2041	while (((ch = fgetc(f)) != EOF) && (ch != '\n'));
				2042	}
				2043	continue;
				2044	}
				2045
				2046	//
				2047	// A newline has signalled the end of the processor record.
				2048	// Check that there aren't too many procs specified.
				2049	//
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	2050	if ((int)num_avail == __kmp_xproc) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2051	CLEANUP_THREAD_INFO;
				2052	*msg_id = kmp_i18n_str_TooManyEntries;
				2053	return -1;
				2054	}
				2055
				2056	//
				2057	// Check for missing fields. The osId field must be there, and we
				2058	// currently require that the physical id field is specified, also.
				2059	//
				2060	if (threadInfo[num_avail][osIdIndex] == UINT_MAX) {
				2061	CLEANUP_THREAD_INFO;
				2062	*msg_id = kmp_i18n_str_MissingProcField;
				2063	return -1;
				2064	}
				2065	if (threadInfo[0][pkgIdIndex] == UINT_MAX) {
				2066	CLEANUP_THREAD_INFO;
				2067	*msg_id = kmp_i18n_str_MissingPhysicalIDField;
				2068	return -1;
				2069	}
				2070
				2071	//
				2072	// Skip this proc if it is not included in the machine model.
				2073	//
				2074	if (! KMP_CPU_ISSET(threadInfo[num_avail][osIdIndex], fullMask)) {
				2075	INIT_PROC_INFO(threadInfo[num_avail]);
				2076	continue;
				2077	}
				2078
				2079	//
				2080	// We have a successful parse of this proc's info.
				2081	// Increment the counter, and prepare for the next proc.
				2082	//
				2083	num_avail++;
				2084	KMP_ASSERT(num_avail <= num_records);
				2085	INIT_PROC_INFO(threadInfo[num_avail]);
				2086	}
				2087	continue;
				2088
				2089	no_val:
				2090	CLEANUP_THREAD_INFO;
				2091	*msg_id = kmp_i18n_str_MissingValCpuinfo;
				2092	return -1;
				2093
				2094	dup_field:
				2095	CLEANUP_THREAD_INFO;
				2096	*msg_id = kmp_i18n_str_DuplicateFieldCpuinfo;
				2097	return -1;
				2098	}
				2099	*line = 0;
				2100
				2101	# if KMP_MIC && REDUCE_TEAM_SIZE
				2102	unsigned teamSize = 0;
				2103	# endif // KMP_MIC && REDUCE_TEAM_SIZE
				2104
				2105	// check for num_records == __kmp_xproc ???
				2106
				2107	//
				2108	// If there's only one thread context to bind to, form an Address object
				2109	// with depth 1 and return immediately (or, if affinity is off, set
				2110	// address2os to NULL and return).
				2111	//
				2112	// If it is configured to omit the package level when there is only a
				2113	// single package, the logic at the end of this routine won't work if
				2114	// there is only a single thread - it would try to form an Address
				2115	// object with depth 0.
				2116	//
				2117	KMP_ASSERT(num_avail > 0);
				2118	KMP_ASSERT(num_avail <= num_records);
				2119	if (num_avail == 1) {
				2120	__kmp_ncores = 1;
				2121	__kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2122	if (__kmp_affinity_verbose) {
				2123	if (! KMP_AFFINITY_CAPABLE()) {
				2124	KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
				2125	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				2126	KMP_INFORM(Uniform, "KMP_AFFINITY");
				2127	}
				2128	else {
				2129	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				2130	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				2131	fullMask);
				2132	KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
				2133	if (__kmp_affinity_respect_mask) {
				2134	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				2135	} else {
				2136	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				2137	}
				2138	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				2139	KMP_INFORM(Uniform, "KMP_AFFINITY");
				2140	}
				2141	int index;
				2142	kmp_str_buf_t buf;
				2143	__kmp_str_buf_init(&buf);
				2144	__kmp_str_buf_print(&buf, "1");
				2145	for (index = maxIndex - 1; index > pkgIdIndex; index--) {
				2146	__kmp_str_buf_print(&buf, " x 1");
				2147	}
				2148	KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, 1, 1, 1);
				2149	__kmp_str_buf_free(&buf);
				2150	}
				2151
				2152	if (__kmp_affinity_type == affinity_none) {
				2153	CLEANUP_THREAD_INFO;
				2154	return 0;
				2155	}
				2156
				2157	address2os = (AddrUnsPair)__kmp_allocate(sizeof(AddrUnsPair));
				2158	Address addr(1);
				2159	addr.labels[0] = threadInfo[0][pkgIdIndex];
				2160	(*address2os)[0] = AddrUnsPair(addr, threadInfo[0][osIdIndex]);
				2161
				2162	if (__kmp_affinity_gran_levels < 0) {
				2163	__kmp_affinity_gran_levels = 0;
				2164	}
				2165
				2166	if (__kmp_affinity_verbose) {
				2167	__kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
				2168	}
				2169
				2170	CLEANUP_THREAD_INFO;
				2171	return 1;
				2172	}
				2173
				2174	//
				2175	// Sort the threadInfo table by physical Id.
				2176	//
				2177	qsort(threadInfo, num_avail, sizeof(*threadInfo),
				2178	__kmp_affinity_cmp_ProcCpuInfo_phys_id);
				2179
				2180	//
				2181	// The table is now sorted by pkgId / coreId / threadId, but we really
				2182	// don't know the radix of any of the fields. pkgId's may be sparsely
				2183	// assigned among the chips on a system. Although coreId's are usually
				2184	// assigned [0 .. coresPerPkg-1] and threadId's are usually assigned
				2185	// [0..threadsPerCore-1], we don't want to make any such assumptions.
				2186	//
				2187	// For that matter, we don't know what coresPerPkg and threadsPerCore
				2188	// (or the total # packages) are at this point - we want to determine
				2189	// that now. We only have an upper bound on the first two figures.
				2190	//
				2191	unsigned counts = (unsigned )__kmp_allocate((maxIndex + 1)
				2192	* sizeof(unsigned));
				2193	unsigned maxCt = (unsigned )__kmp_allocate((maxIndex + 1)
				2194	* sizeof(unsigned));
				2195	unsigned totals = (unsigned )__kmp_allocate((maxIndex + 1)
				2196	* sizeof(unsigned));
				2197	unsigned lastId = (unsigned )__kmp_allocate((maxIndex + 1)
				2198	* sizeof(unsigned));
				2199
				2200	bool assign_thread_ids = false;
				2201	unsigned threadIdCt;
				2202	unsigned index;
				2203
				2204	restart_radix_check:
				2205	threadIdCt = 0;
				2206
				2207	//
				2208	// Initialize the counter arrays with data from threadInfo[0].
				2209	//
				2210	if (assign_thread_ids) {
				2211	if (threadInfo[0][threadIdIndex] == UINT_MAX) {
				2212	threadInfo[0][threadIdIndex] = threadIdCt++;
				2213	}
				2214	else if (threadIdCt <= threadInfo[0][threadIdIndex]) {
				2215	threadIdCt = threadInfo[0][threadIdIndex] + 1;
				2216	}
				2217	}
				2218	for (index = 0; index <= maxIndex; index++) {
				2219	counts[index] = 1;
				2220	maxCt[index] = 1;
				2221	totals[index] = 1;
				2222	lastId[index] = threadInfo[0][index];;
				2223	}
				2224
				2225	//
				2226	// Run through the rest of the OS procs.
				2227	//
				2228	for (i = 1; i < num_avail; i++) {
				2229	//
				2230	// Find the most significant index whose id differs
				2231	// from the id for the previous OS proc.
				2232	//
				2233	for (index = maxIndex; index >= threadIdIndex; index--) {
				2234	if (assign_thread_ids && (index == threadIdIndex)) {
				2235	//
				2236	// Auto-assign the thread id field if it wasn't specified.
				2237	//
				2238	if (threadInfo[i][threadIdIndex] == UINT_MAX) {
				2239	threadInfo[i][threadIdIndex] = threadIdCt++;
				2240	}
				2241
				2242	//
				2243	// Aparrently the thread id field was specified for some
				2244	// entries and not others. Start the thread id counter
				2245	// off at the next higher thread id.
				2246	//
				2247	else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
				2248	threadIdCt = threadInfo[i][threadIdIndex] + 1;
				2249	}
				2250	}
				2251	if (threadInfo[i][index] != lastId[index]) {
				2252	//
				2253	// Run through all indices which are less significant,
				2254	// and reset the counts to 1.
				2255	//
				2256	// At all levels up to and including index, we need to
				2257	// increment the totals and record the last id.
				2258	//
				2259	unsigned index2;
				2260	for (index2 = threadIdIndex; index2 < index; index2++) {
				2261	totals[index2]++;
				2262	if (counts[index2] > maxCt[index2]) {
				2263	maxCt[index2] = counts[index2];
				2264	}
				2265	counts[index2] = 1;
				2266	lastId[index2] = threadInfo[i][index2];
				2267	}
				2268	counts[index]++;
				2269	totals[index]++;
				2270	lastId[index] = threadInfo[i][index];
				2271
				2272	if (assign_thread_ids && (index > threadIdIndex)) {
				2273
				2274	# if KMP_MIC && REDUCE_TEAM_SIZE
				2275	//
				2276	// The default team size is the total #threads in the machine
				2277	// minus 1 thread for every core that has 3 or more threads.
				2278	//
				2279	teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 );
				2280	# endif // KMP_MIC && REDUCE_TEAM_SIZE
				2281
				2282	//
				2283	// Restart the thread counter, as we are on a new core.
				2284	//
				2285	threadIdCt = 0;
				2286
				2287	//
				2288	// Auto-assign the thread id field if it wasn't specified.
				2289	//
				2290	if (threadInfo[i][threadIdIndex] == UINT_MAX) {
				2291	threadInfo[i][threadIdIndex] = threadIdCt++;
				2292	}
				2293
				2294	//
				2295	// Aparrently the thread id field was specified for some
				2296	// entries and not others. Start the thread id counter
				2297	// off at the next higher thread id.
				2298	//
				2299	else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
				2300	threadIdCt = threadInfo[i][threadIdIndex] + 1;
				2301	}
				2302	}
				2303	break;
				2304	}
				2305	}
				2306	if (index < threadIdIndex) {
				2307	//
				2308	// If thread ids were specified, it is an error if they are not
				2309	// unique. Also, check that we waven't already restarted the
				2310	// loop (to be safe - shouldn't need to).
				2311	//
				2312	if ((threadInfo[i][threadIdIndex] != UINT_MAX)
				2313	\|\| assign_thread_ids) {
				2314	__kmp_free(lastId);
				2315	__kmp_free(totals);
				2316	__kmp_free(maxCt);
				2317	__kmp_free(counts);
				2318	CLEANUP_THREAD_INFO;
				2319	*msg_id = kmp_i18n_str_PhysicalIDsNotUnique;
				2320	return -1;
				2321	}
				2322
				2323	//
				2324	// If the thread ids were not specified and we see entries
				2325	// entries that are duplicates, start the loop over and
				2326	// assign the thread ids manually.
				2327	//
				2328	assign_thread_ids = true;
				2329	goto restart_radix_check;
				2330	}
				2331	}
				2332
				2333	# if KMP_MIC && REDUCE_TEAM_SIZE
				2334	//
				2335	// The default team size is the total #threads in the machine
				2336	// minus 1 thread for every core that has 3 or more threads.
				2337	//
				2338	teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 );
				2339	# endif // KMP_MIC && REDUCE_TEAM_SIZE
				2340
				2341	for (index = threadIdIndex; index <= maxIndex; index++) {
				2342	if (counts[index] > maxCt[index]) {
				2343	maxCt[index] = counts[index];
				2344	}
				2345	}
				2346
				2347	__kmp_nThreadsPerCore = maxCt[threadIdIndex];
				2348	nCoresPerPkg = maxCt[coreIdIndex];
				2349	nPackages = totals[pkgIdIndex];
				2350
				2351	//
				2352	// Check to see if the machine topology is uniform
				2353	//
				2354	unsigned prod = totals[maxIndex];
				2355	for (index = threadIdIndex; index < maxIndex; index++) {
				2356	prod *= maxCt[index];
				2357	}
				2358	bool uniform = (prod == totals[threadIdIndex]);
				2359
				2360	//
				2361	// When affinity is off, this routine will still be called to set
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame]	2362	// __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2363	// nCoresPerPkg, & nPackages. Make sure all these vars are set
				2364	// correctly, and return now if affinity is not enabled.
				2365	//
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2366	__kmp_ncores = totals[coreIdIndex];
				2367
				2368	if (__kmp_affinity_verbose) {
				2369	if (! KMP_AFFINITY_CAPABLE()) {
				2370	KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
				2371	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				2372	if (uniform) {
				2373	KMP_INFORM(Uniform, "KMP_AFFINITY");
				2374	} else {
				2375	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				2376	}
				2377	}
				2378	else {
				2379	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				2380	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, fullMask);
				2381	KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
				2382	if (__kmp_affinity_respect_mask) {
				2383	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				2384	} else {
				2385	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				2386	}
				2387	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				2388	if (uniform) {
				2389	KMP_INFORM(Uniform, "KMP_AFFINITY");
				2390	} else {
				2391	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				2392	}
				2393	}
				2394	kmp_str_buf_t buf;
				2395	__kmp_str_buf_init(&buf);
				2396
				2397	__kmp_str_buf_print(&buf, "%d", totals[maxIndex]);
				2398	for (index = maxIndex - 1; index >= pkgIdIndex; index--) {
				2399	__kmp_str_buf_print(&buf, " x %d", maxCt[index]);
				2400	}
				2401	KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, maxCt[coreIdIndex],
				2402	maxCt[threadIdIndex], __kmp_ncores);
				2403
				2404	__kmp_str_buf_free(&buf);
				2405	}
				2406
				2407	# if KMP_MIC && REDUCE_TEAM_SIZE
				2408	//
				2409	// Set the default team size.
				2410	//
				2411	if ((__kmp_dflt_team_nth == 0) && (teamSize > 0)) {
				2412	__kmp_dflt_team_nth = teamSize;
				2413	KA_TRACE(20, ("__kmp_affinity_create_cpuinfo_map: setting __kmp_dflt_team_nth = %d\n",
				2414	__kmp_dflt_team_nth));
				2415	}
				2416	# endif // KMP_MIC && REDUCE_TEAM_SIZE
				2417
				2418	if (__kmp_affinity_type == affinity_none) {
				2419	__kmp_free(lastId);
				2420	__kmp_free(totals);
				2421	__kmp_free(maxCt);
				2422	__kmp_free(counts);
				2423	CLEANUP_THREAD_INFO;
				2424	return 0;
				2425	}
				2426
				2427	//
				2428	// Count the number of levels which have more nodes at that level than
				2429	// at the parent's level (with there being an implicit root node of
				2430	// the top level). This is equivalent to saying that there is at least
				2431	// one node at this level which has a sibling. These levels are in the
				2432	// map, and the package level is always in the map.
				2433	//
				2434	bool inMap = (bool )__kmp_allocate((maxIndex + 1) * sizeof(bool));
				2435	int level = 0;
				2436	for (index = threadIdIndex; index < maxIndex; index++) {
				2437	KMP_ASSERT(totals[index] >= totals[index + 1]);
				2438	inMap[index] = (totals[index] > totals[index + 1]);
				2439	}
				2440	inMap[maxIndex] = (totals[maxIndex] > 1);
				2441	inMap[pkgIdIndex] = true;
				2442
				2443	int depth = 0;
				2444	for (index = threadIdIndex; index <= maxIndex; index++) {
				2445	if (inMap[index]) {
				2446	depth++;
				2447	}
				2448	}
				2449	KMP_ASSERT(depth > 0);
				2450
				2451	//
				2452	// Construct the data structure that is to be returned.
				2453	//
				2454	address2os = (AddrUnsPair)
				2455	__kmp_allocate(sizeof(AddrUnsPair) * num_avail);
				2456	int pkgLevel = -1;
				2457	int coreLevel = -1;
				2458	int threadLevel = -1;
				2459
				2460	for (i = 0; i < num_avail; ++i) {
				2461	Address addr(depth);
				2462	unsigned os = threadInfo[i][osIdIndex];
				2463	int src_index;
				2464	int dst_index = 0;
				2465
				2466	for (src_index = maxIndex; src_index >= threadIdIndex; src_index--) {
				2467	if (! inMap[src_index]) {
				2468	continue;
				2469	}
				2470	addr.labels[dst_index] = threadInfo[i][src_index];
				2471	if (src_index == pkgIdIndex) {
				2472	pkgLevel = dst_index;
				2473	}
				2474	else if (src_index == coreIdIndex) {
				2475	coreLevel = dst_index;
				2476	}
				2477	else if (src_index == threadIdIndex) {
				2478	threadLevel = dst_index;
				2479	}
				2480	dst_index++;
				2481	}
				2482	(*address2os)[i] = AddrUnsPair(addr, os);
				2483	}
				2484
				2485	if (__kmp_affinity_gran_levels < 0) {
				2486	//
				2487	// Set the granularity level based on what levels are modeled
				2488	// in the machine topology map.
				2489	//
				2490	unsigned src_index;
				2491	__kmp_affinity_gran_levels = 0;
				2492	for (src_index = threadIdIndex; src_index <= maxIndex; src_index++) {
				2493	if (! inMap[src_index]) {
				2494	continue;
				2495	}
				2496	switch (src_index) {
				2497	case threadIdIndex:
				2498	if (__kmp_affinity_gran > affinity_gran_thread) {
				2499	__kmp_affinity_gran_levels++;
				2500	}
				2501
				2502	break;
				2503	case coreIdIndex:
				2504	if (__kmp_affinity_gran > affinity_gran_core) {
				2505	__kmp_affinity_gran_levels++;
				2506	}
				2507	break;
				2508
				2509	case pkgIdIndex:
				2510	if (__kmp_affinity_gran > affinity_gran_package) {
				2511	__kmp_affinity_gran_levels++;
				2512	}
				2513	break;
				2514	}
				2515	}
				2516	}
				2517
				2518	if (__kmp_affinity_verbose) {
				2519	__kmp_affinity_print_topology(*address2os, num_avail, depth, pkgLevel,
				2520	coreLevel, threadLevel);
				2521	}
				2522
				2523	__kmp_free(inMap);
				2524	__kmp_free(lastId);
				2525	__kmp_free(totals);
				2526	__kmp_free(maxCt);
				2527	__kmp_free(counts);
				2528	CLEANUP_THREAD_INFO;
				2529	return depth;
				2530	}
				2531
				2532
				2533	//
				2534	// Create and return a table of affinity masks, indexed by OS thread ID.
				2535	// This routine handles OR'ing together all the affinity masks of threads
				2536	// that are sufficiently close, if granularity > fine.
				2537	//
				2538	static kmp_affin_mask_t *
				2539	__kmp_create_masks(unsigned maxIndex, unsigned numUnique,
				2540	AddrUnsPair *address2os, unsigned numAddrs)
				2541	{
				2542	//
				2543	// First form a table of affinity masks in order of OS thread id.
				2544	//
				2545	unsigned depth;
				2546	unsigned maxOsId;
				2547	unsigned i;
				2548
				2549	KMP_ASSERT(numAddrs > 0);
				2550	depth = address2os[0].first.depth;
				2551
				2552	maxOsId = 0;
				2553	for (i = 0; i < numAddrs; i++) {
				2554	unsigned osId = address2os[i].second;
				2555	if (osId > maxOsId) {
				2556	maxOsId = osId;
				2557	}
				2558	}
				2559	kmp_affin_mask_t osId2Mask = (kmp_affin_mask_t )__kmp_allocate(
				2560	(maxOsId + 1) * __kmp_affin_mask_size);
				2561
				2562	//
				2563	// Sort the address2os table according to physical order. Doing so
				2564	// will put all threads on the same core/package/node in consecutive
				2565	// locations.
				2566	//
				2567	qsort(address2os, numAddrs, sizeof(*address2os),
				2568	__kmp_affinity_cmp_Address_labels);
				2569
				2570	KMP_ASSERT(__kmp_affinity_gran_levels >= 0);
				2571	if (__kmp_affinity_verbose && (__kmp_affinity_gran_levels > 0)) {
				2572	KMP_INFORM(ThreadsMigrate, "KMP_AFFINITY", __kmp_affinity_gran_levels);
				2573	}
				2574	if (__kmp_affinity_gran_levels >= (int)depth) {
				2575	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				2576	&& (__kmp_affinity_type != affinity_none))) {
				2577	KMP_WARNING(AffThreadsMayMigrate);
				2578	}
				2579	}
				2580
				2581	//
				2582	// Run through the table, forming the masks for all threads on each
				2583	// core. Threads on the same core will have identical "Address"
				2584	// objects, not considering the last level, which must be the thread
				2585	// id. All threads on a core will appear consecutively.
				2586	//
				2587	unsigned unique = 0;
				2588	unsigned j = 0; // index of 1st thread on core
				2589	unsigned leader = 0;
				2590	Address *leaderAddr = &(address2os[0].first);
				2591	kmp_affin_mask_t *sum
				2592	= (kmp_affin_mask_t *)alloca(__kmp_affin_mask_size);
				2593	KMP_CPU_ZERO(sum);
				2594	KMP_CPU_SET(address2os[0].second, sum);
				2595	for (i = 1; i < numAddrs; i++) {
				2596	//
Alp Toker	8f2d3f0	2014-02-24 10:40:15 +0000	[diff] [blame]	2597	// If this thread is sufficiently close to the leader (within the
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2598	// granularity setting), then set the bit for this os thread in the
				2599	// affinity mask for this group, and go on to the next thread.
				2600	//
				2601	if (leaderAddr->isClose(address2os[i].first,
				2602	__kmp_affinity_gran_levels)) {
				2603	KMP_CPU_SET(address2os[i].second, sum);
				2604	continue;
				2605	}
				2606
				2607	//
				2608	// For every thread in this group, copy the mask to the thread's
				2609	// entry in the osId2Mask table. Mark the first address as a
				2610	// leader.
				2611	//
				2612	for (; j < i; j++) {
				2613	unsigned osId = address2os[j].second;
				2614	KMP_DEBUG_ASSERT(osId <= maxOsId);
				2615	kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
				2616	KMP_CPU_COPY(mask, sum);
				2617	address2os[j].first.leader = (j == leader);
				2618	}
				2619	unique++;
				2620
				2621	//
				2622	// Start a new mask.
				2623	//
				2624	leader = i;
				2625	leaderAddr = &(address2os[i].first);
				2626	KMP_CPU_ZERO(sum);
				2627	KMP_CPU_SET(address2os[i].second, sum);
				2628	}
				2629
				2630	//
				2631	// For every thread in last group, copy the mask to the thread's
				2632	// entry in the osId2Mask table.
				2633	//
				2634	for (; j < i; j++) {
				2635	unsigned osId = address2os[j].second;
				2636	KMP_DEBUG_ASSERT(osId <= maxOsId);
				2637	kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
				2638	KMP_CPU_COPY(mask, sum);
				2639	address2os[j].first.leader = (j == leader);
				2640	}
				2641	unique++;
				2642
				2643	*maxIndex = maxOsId;
				2644	*numUnique = unique;
				2645	return osId2Mask;
				2646	}
				2647
				2648
				2649	//
				2650	// Stuff for the affinity proclist parsers. It's easier to declare these vars
				2651	// as file-static than to try and pass them through the calling sequence of
				2652	// the recursive-descent OMP_PLACES parser.
				2653	//
				2654	static kmp_affin_mask_t *newMasks;
				2655	static int numNewMasks;
				2656	static int nextNewMask;
				2657
				2658	#define ADD_MASK(_mask) \
				2659	{ \
				2660	if (nextNewMask >= numNewMasks) { \
				2661	numNewMasks *= 2; \
				2662	newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_REALLOC(newMasks, \
				2663	numNewMasks * __kmp_affin_mask_size); \
				2664	} \
				2665	KMP_CPU_COPY(KMP_CPU_INDEX(newMasks, nextNewMask), (_mask)); \
				2666	nextNewMask++; \
				2667	}
				2668
				2669	#define ADD_MASK_OSID(_osId,_osId2Mask,_maxOsId) \
				2670	{ \
				2671	if (((_osId) > _maxOsId) \|\| \
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	2672	(! KMP_CPU_ISSET((_osId), KMP_CPU_INDEX((_osId2Mask), (_osId))))) { \
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2673	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings \
				2674	&& (__kmp_affinity_type != affinity_none))) { \
				2675	KMP_WARNING(AffIgnoreInvalidProcID, _osId); \
				2676	} \
				2677	} \
				2678	else { \
				2679	ADD_MASK(KMP_CPU_INDEX(_osId2Mask, (_osId))); \
				2680	} \
				2681	}
				2682
				2683
				2684	//
				2685	// Re-parse the proclist (for the explicit affinity type), and form the list
				2686	// of affinity newMasks indexed by gtid.
				2687	//
				2688	static void
				2689	__kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks,
				2690	unsigned int out_numMasks, const char proclist,
				2691	kmp_affin_mask_t *osId2Mask, int maxOsId)
				2692	{
				2693	const char *scan = proclist;
				2694	const char *next = proclist;
				2695
				2696	//
				2697	// We use malloc() for the temporary mask vector,
				2698	// so that we can use realloc() to extend it.
				2699	//
				2700	numNewMasks = 2;
				2701	newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(numNewMasks
				2702	* __kmp_affin_mask_size);
				2703	nextNewMask = 0;
				2704	kmp_affin_mask_t sumMask = (kmp_affin_mask_t )__kmp_allocate(
				2705	__kmp_affin_mask_size);
				2706	int setSize = 0;
				2707
				2708	for (;;) {
				2709	int start, end, stride;
				2710
				2711	SKIP_WS(scan);
				2712	next = scan;
				2713	if (*next == '\0') {
				2714	break;
				2715	}
				2716
				2717	if (*next == '{') {
				2718	int num;
				2719	setSize = 0;
				2720	next++; // skip '{'
				2721	SKIP_WS(next);
				2722	scan = next;
				2723
				2724	//
				2725	// Read the first integer in the set.
				2726	//
				2727	KMP_ASSERT2((next >= '0') && (next <= '9'),
				2728	"bad proclist");
				2729	SKIP_DIGITS(next);
				2730	num = __kmp_str_to_int(scan, *next);
				2731	KMP_ASSERT2(num >= 0, "bad explicit proc list");
				2732
				2733	//
				2734	// Copy the mask for that osId to the sum (union) mask.
				2735	//
				2736	if ((num > maxOsId) \|\|
				2737	(! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
				2738	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				2739	&& (__kmp_affinity_type != affinity_none))) {
				2740	KMP_WARNING(AffIgnoreInvalidProcID, num);
				2741	}
				2742	KMP_CPU_ZERO(sumMask);
				2743	}
				2744	else {
				2745	KMP_CPU_COPY(sumMask, KMP_CPU_INDEX(osId2Mask, num));
				2746	setSize = 1;
				2747	}
				2748
				2749	for (;;) {
				2750	//
				2751	// Check for end of set.
				2752	//
				2753	SKIP_WS(next);
				2754	if (*next == '}') {
				2755	next++; // skip '}'
				2756	break;
				2757	}
				2758
				2759	//
				2760	// Skip optional comma.
				2761	//
				2762	if (*next == ',') {
				2763	next++;
				2764	}
				2765	SKIP_WS(next);
				2766
				2767	//
				2768	// Read the next integer in the set.
				2769	//
				2770	scan = next;
				2771	KMP_ASSERT2((next >= '0') && (next <= '9'),
				2772	"bad explicit proc list");
				2773
				2774	SKIP_DIGITS(next);
				2775	num = __kmp_str_to_int(scan, *next);
				2776	KMP_ASSERT2(num >= 0, "bad explicit proc list");
				2777
				2778	//
				2779	// Add the mask for that osId to the sum mask.
				2780	//
				2781	if ((num > maxOsId) \|\|
				2782	(! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
				2783	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				2784	&& (__kmp_affinity_type != affinity_none))) {
				2785	KMP_WARNING(AffIgnoreInvalidProcID, num);
				2786	}
				2787	}
				2788	else {
				2789	KMP_CPU_UNION(sumMask, KMP_CPU_INDEX(osId2Mask, num));
				2790	setSize++;
				2791	}
				2792	}
				2793	if (setSize > 0) {
				2794	ADD_MASK(sumMask);
				2795	}
				2796
				2797	SKIP_WS(next);
				2798	if (*next == ',') {
				2799	next++;
				2800	}
				2801	scan = next;
				2802	continue;
				2803	}
				2804
				2805	//
				2806	// Read the first integer.
				2807	//
				2808	KMP_ASSERT2((next >= '0') && (next <= '9'), "bad explicit proc list");
				2809	SKIP_DIGITS(next);
				2810	start = __kmp_str_to_int(scan, *next);
				2811	KMP_ASSERT2(start >= 0, "bad explicit proc list");
				2812	SKIP_WS(next);
				2813
				2814	//
				2815	// If this isn't a range, then add a mask to the list and go on.
				2816	//
				2817	if (*next != '-') {
				2818	ADD_MASK_OSID(start, osId2Mask, maxOsId);
				2819
				2820	//
				2821	// Skip optional comma.
				2822	//
				2823	if (*next == ',') {
				2824	next++;
				2825	}
				2826	scan = next;
				2827	continue;
				2828	}
				2829
				2830	//
				2831	// This is a range. Skip over the '-' and read in the 2nd int.
				2832	//
				2833	next++; // skip '-'
				2834	SKIP_WS(next);
				2835	scan = next;
				2836	KMP_ASSERT2((next >= '0') && (next <= '9'), "bad explicit proc list");
				2837	SKIP_DIGITS(next);
				2838	end = __kmp_str_to_int(scan, *next);
				2839	KMP_ASSERT2(end >= 0, "bad explicit proc list");
				2840
				2841	//
				2842	// Check for a stride parameter
				2843	//
				2844	stride = 1;
				2845	SKIP_WS(next);
				2846	if (*next == ':') {
				2847	//
				2848	// A stride is specified. Skip over the ':" and read the 3rd int.
				2849	//
				2850	int sign = +1;
				2851	next++; // skip ':'
				2852	SKIP_WS(next);
				2853	scan = next;
				2854	if (*next == '-') {
				2855	sign = -1;
				2856	next++;
				2857	SKIP_WS(next);
				2858	scan = next;
				2859	}
				2860	KMP_ASSERT2((next >= '0') && (next <= '9'),
				2861	"bad explicit proc list");
				2862	SKIP_DIGITS(next);
				2863	stride = __kmp_str_to_int(scan, *next);
				2864	KMP_ASSERT2(stride >= 0, "bad explicit proc list");
				2865	stride *= sign;
				2866	}
				2867
				2868	//
				2869	// Do some range checks.
				2870	//
				2871	KMP_ASSERT2(stride != 0, "bad explicit proc list");
				2872	if (stride > 0) {
				2873	KMP_ASSERT2(start <= end, "bad explicit proc list");
				2874	}
				2875	else {
				2876	KMP_ASSERT2(start >= end, "bad explicit proc list");
				2877	}
				2878	KMP_ASSERT2((end - start) / stride <= 65536, "bad explicit proc list");
				2879
				2880	//
				2881	// Add the mask for each OS proc # to the list.
				2882	//
				2883	if (stride > 0) {
				2884	do {
				2885	ADD_MASK_OSID(start, osId2Mask, maxOsId);
				2886	start += stride;
				2887	} while (start <= end);
				2888	}
				2889	else {
				2890	do {
				2891	ADD_MASK_OSID(start, osId2Mask, maxOsId);
				2892	start += stride;
				2893	} while (start >= end);
				2894	}
				2895
				2896	//
				2897	// Skip optional comma.
				2898	//
				2899	SKIP_WS(next);
				2900	if (*next == ',') {
				2901	next++;
				2902	}
				2903	scan = next;
				2904	}
				2905
				2906	*out_numMasks = nextNewMask;
				2907	if (nextNewMask == 0) {
				2908	*out_masks = NULL;
				2909	KMP_INTERNAL_FREE(newMasks);
				2910	return;
				2911	}
				2912	*out_masks
				2913	= (kmp_affin_mask_t )__kmp_allocate(nextNewMask __kmp_affin_mask_size);
				2914	memcpy(out_masks, newMasks, nextNewMask __kmp_affin_mask_size);
				2915	__kmp_free(sumMask);
				2916	KMP_INTERNAL_FREE(newMasks);
				2917	}
				2918
				2919
				2920	# if OMP_40_ENABLED
				2921
				2922	/*-----------------------------------------------------------------------------
				2923
				2924	Re-parse the OMP_PLACES proc id list, forming the newMasks for the different
				2925	places. Again, Here is the grammar:
				2926
				2927	place_list := place
				2928	place_list := place , place_list
				2929	place := num
				2930	place := place : num
				2931	place := place : num : signed
				2932	place := { subplacelist }
				2933	place := ! place // (lowest priority)
				2934	subplace_list := subplace
				2935	subplace_list := subplace , subplace_list
				2936	subplace := num
				2937	subplace := num : num
				2938	subplace := num : num : signed
				2939	signed := num
				2940	signed := + signed
				2941	signed := - signed
				2942
				2943	-----------------------------------------------------------------------------*/
				2944
				2945	static void
				2946	__kmp_process_subplace_list(const char *scan, kmp_affin_mask_t osId2Mask,
				2947	int maxOsId, kmp_affin_mask_t tempMask, int setSize)
				2948	{
				2949	const char *next;
				2950
				2951	for (;;) {
				2952	int start, count, stride, i;
				2953
				2954	//
				2955	// Read in the starting proc id
				2956	//
				2957	SKIP_WS(*scan);
				2958	KMP_ASSERT2((scan >= '0') && (scan <= '9'),
				2959	"bad explicit places list");
				2960	next = *scan;
				2961	SKIP_DIGITS(next);
				2962	start = __kmp_str_to_int(scan, next);
				2963	KMP_ASSERT(start >= 0);
				2964	*scan = next;
				2965
				2966	//
				2967	// valid follow sets are ',' ':' and '}'
				2968	//
				2969	SKIP_WS(*scan);
				2970	if (scan == '}' \|\| scan == ',') {
				2971	if ((start > maxOsId) \|\|
				2972	(! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
				2973	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				2974	&& (__kmp_affinity_type != affinity_none))) {
				2975	KMP_WARNING(AffIgnoreInvalidProcID, start);
				2976	}
				2977	}
				2978	else {
				2979	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
				2980	(*setSize)++;
				2981	}
				2982	if (**scan == '}') {
				2983	break;
				2984	}
				2985	(*scan)++; // skip ','
				2986	continue;
				2987	}
				2988	KMP_ASSERT2(**scan == ':', "bad explicit places list");
				2989	(*scan)++; // skip ':'
				2990
				2991	//
				2992	// Read count parameter
				2993	//
				2994	SKIP_WS(*scan);
				2995	KMP_ASSERT2((scan >= '0') && (scan <= '9'),
				2996	"bad explicit places list");
				2997	next = *scan;
				2998	SKIP_DIGITS(next);
				2999	count = __kmp_str_to_int(scan, next);
				3000	KMP_ASSERT(count >= 0);
				3001	*scan = next;
				3002
				3003	//
				3004	// valid follow sets are ',' ':' and '}'
				3005	//
				3006	SKIP_WS(*scan);
				3007	if (scan == '}' \|\| scan == ',') {
				3008	for (i = 0; i < count; i++) {
				3009	if ((start > maxOsId) \|\|
				3010	(! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
				3011	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3012	&& (__kmp_affinity_type != affinity_none))) {
				3013	KMP_WARNING(AffIgnoreInvalidProcID, start);
				3014	}
				3015	break; // don't proliferate warnings for large count
				3016	}
				3017	else {
				3018	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
				3019	start++;
				3020	(*setSize)++;
				3021	}
				3022	}
				3023	if (**scan == '}') {
				3024	break;
				3025	}
				3026	(*scan)++; // skip ','
				3027	continue;
				3028	}
				3029	KMP_ASSERT2(**scan == ':', "bad explicit places list");
				3030	(*scan)++; // skip ':'
				3031
				3032	//
				3033	// Read stride parameter
				3034	//
				3035	int sign = +1;
				3036	for (;;) {
				3037	SKIP_WS(*scan);
				3038	if (**scan == '+') {
				3039	(*scan)++; // skip '+'
				3040	continue;
				3041	}
				3042	if (**scan == '-') {
				3043	sign *= -1;
				3044	(*scan)++; // skip '-'
				3045	continue;
				3046	}
				3047	break;
				3048	}
				3049	SKIP_WS(*scan);
				3050	KMP_ASSERT2((scan >= '0') && (scan <= '9'),
				3051	"bad explicit places list");
				3052	next = *scan;
				3053	SKIP_DIGITS(next);
				3054	stride = __kmp_str_to_int(scan, next);
				3055	KMP_ASSERT(stride >= 0);
				3056	*scan = next;
				3057	stride *= sign;
				3058
				3059	//
				3060	// valid follow sets are ',' and '}'
				3061	//
				3062	SKIP_WS(*scan);
				3063	if (scan == '}' \|\| scan == ',') {
				3064	for (i = 0; i < count; i++) {
				3065	if ((start > maxOsId) \|\|
				3066	(! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
				3067	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3068	&& (__kmp_affinity_type != affinity_none))) {
				3069	KMP_WARNING(AffIgnoreInvalidProcID, start);
				3070	}
				3071	break; // don't proliferate warnings for large count
				3072	}
				3073	else {
				3074	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
				3075	start += stride;
				3076	(*setSize)++;
				3077	}
				3078	}
				3079	if (**scan == '}') {
				3080	break;
				3081	}
				3082	(*scan)++; // skip ','
				3083	continue;
				3084	}
				3085
				3086	KMP_ASSERT2(0, "bad explicit places list");
				3087	}
				3088	}
				3089
				3090
				3091	static void
				3092	__kmp_process_place(const char *scan, kmp_affin_mask_t osId2Mask,
				3093	int maxOsId, kmp_affin_mask_t tempMask, int setSize)
				3094	{
				3095	const char *next;
				3096
				3097	//
				3098	// valid follow sets are '{' '!' and num
				3099	//
				3100	SKIP_WS(*scan);
				3101	if (**scan == '{') {
				3102	(*scan)++; // skip '{'
				3103	__kmp_process_subplace_list(scan, osId2Mask, maxOsId , tempMask,
				3104	setSize);
				3105	KMP_ASSERT2(**scan == '}', "bad explicit places list");
				3106	(*scan)++; // skip '}'
				3107	}
				3108	else if (**scan == '!') {
				3109	__kmp_process_place(scan, osId2Mask, maxOsId, tempMask, setSize);
				3110	KMP_CPU_COMPLEMENT(tempMask);
				3111	(*scan)++; // skip '!'
				3112	}
				3113	else if ((scan >= '0') && (scan <= '9')) {
				3114	next = *scan;
				3115	SKIP_DIGITS(next);
				3116	int num = __kmp_str_to_int(scan, next);
				3117	KMP_ASSERT(num >= 0);
				3118	if ((num > maxOsId) \|\|
				3119	(! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
				3120	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3121	&& (__kmp_affinity_type != affinity_none))) {
				3122	KMP_WARNING(AffIgnoreInvalidProcID, num);
				3123	}
				3124	}
				3125	else {
				3126	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, num));
				3127	(*setSize)++;
				3128	}
				3129	*scan = next; // skip num
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3130	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3131	else {
				3132	KMP_ASSERT2(0, "bad explicit places list");
				3133	}
				3134	}
				3135
				3136
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3137	//static void
				3138	void
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3139	__kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
				3140	unsigned int out_numMasks, const char placelist,
				3141	kmp_affin_mask_t *osId2Mask, int maxOsId)
				3142	{
				3143	const char *scan = placelist;
				3144	const char *next = placelist;
				3145
				3146	numNewMasks = 2;
				3147	newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(numNewMasks
				3148	* __kmp_affin_mask_size);
				3149	nextNewMask = 0;
				3150
				3151	kmp_affin_mask_t tempMask = (kmp_affin_mask_t )__kmp_allocate(
				3152	__kmp_affin_mask_size);
				3153	KMP_CPU_ZERO(tempMask);
				3154	int setSize = 0;
				3155
				3156	for (;;) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3157	__kmp_process_place(&scan, osId2Mask, maxOsId, tempMask, &setSize);
				3158
				3159	//
				3160	// valid follow sets are ',' ':' and EOL
				3161	//
				3162	SKIP_WS(scan);
				3163	if (scan == '\0' \|\| scan == ',') {
				3164	if (setSize > 0) {
				3165	ADD_MASK(tempMask);
				3166	}
				3167	KMP_CPU_ZERO(tempMask);
				3168	setSize = 0;
				3169	if (*scan == '\0') {
				3170	break;
				3171	}
				3172	scan++; // skip ','
				3173	continue;
				3174	}
				3175
				3176	KMP_ASSERT2(*scan == ':', "bad explicit places list");
				3177	scan++; // skip ':'
				3178
				3179	//
				3180	// Read count parameter
				3181	//
				3182	SKIP_WS(scan);
				3183	KMP_ASSERT2((scan >= '0') && (scan <= '9'),
				3184	"bad explicit places list");
				3185	next = scan;
				3186	SKIP_DIGITS(next);
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	3187	int count = __kmp_str_to_int(scan, *next);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3188	KMP_ASSERT(count >= 0);
				3189	scan = next;
				3190
				3191	//
				3192	// valid follow sets are ',' ':' and EOL
				3193	//
				3194	SKIP_WS(scan);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3195	int stride;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3196	if (scan == '\0' \|\| scan == ',') {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3197	stride = +1;
				3198	}
				3199	else {
				3200	KMP_ASSERT2(*scan == ':', "bad explicit places list");
				3201	scan++; // skip ':'
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3202
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3203	//
				3204	// Read stride parameter
				3205	//
				3206	int sign = +1;
				3207	for (;;) {
				3208	SKIP_WS(scan);
				3209	if (*scan == '+') {
				3210	scan++; // skip '+'
				3211	continue;
				3212	}
				3213	if (*scan == '-') {
				3214	sign *= -1;
				3215	scan++; // skip '-'
				3216	continue;
				3217	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3218	break;
				3219	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3220	SKIP_WS(scan);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3221	KMP_ASSERT2((scan >= '0') && (scan <= '9'),
				3222	"bad explicit places list");
				3223	next = scan;
				3224	SKIP_DIGITS(next);
				3225	stride = __kmp_str_to_int(scan, *next);
				3226	KMP_DEBUG_ASSERT(stride >= 0);
				3227	scan = next;
				3228	stride *= sign;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3229	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3230
				3231	if (stride > 0) {
				3232	int i;
				3233	for (i = 0; i < count; i++) {
				3234	int j;
				3235	if (setSize == 0) {
				3236	break;
				3237	}
				3238	ADD_MASK(tempMask);
				3239	setSize = 0;
				3240	for (j = __kmp_affin_mask_size * CHAR_BIT - 1; j >= stride; j--) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3241	if (! KMP_CPU_ISSET(j - stride, tempMask)) {
				3242	KMP_CPU_CLR(j, tempMask);
				3243	}
				3244	else if ((j > maxOsId) \|\|
				3245	(! KMP_CPU_ISSET(j, KMP_CPU_INDEX(osId2Mask, j)))) {
				3246	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3247	&& (__kmp_affinity_type != affinity_none))) {
				3248	KMP_WARNING(AffIgnoreInvalidProcID, j);
				3249	}
				3250	KMP_CPU_CLR(j, tempMask);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3251	}
				3252	else {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3253	KMP_CPU_SET(j, tempMask);
				3254	setSize++;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3255	}
				3256	}
				3257	for (; j >= 0; j--) {
				3258	KMP_CPU_CLR(j, tempMask);
				3259	}
				3260	}
				3261	}
				3262	else {
				3263	int i;
				3264	for (i = 0; i < count; i++) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3265	int j;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3266	if (setSize == 0) {
				3267	break;
				3268	}
				3269	ADD_MASK(tempMask);
				3270	setSize = 0;
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3271	for (j = 0; j < ((int)__kmp_affin_mask_size * CHAR_BIT) + stride;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3272	j++) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3273	if (! KMP_CPU_ISSET(j - stride, tempMask)) {
				3274	KMP_CPU_CLR(j, tempMask);
				3275	}
				3276	else if ((j > maxOsId) \|\|
				3277	(! KMP_CPU_ISSET(j, KMP_CPU_INDEX(osId2Mask, j)))) {
				3278	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3279	&& (__kmp_affinity_type != affinity_none))) {
				3280	KMP_WARNING(AffIgnoreInvalidProcID, j);
				3281	}
				3282	KMP_CPU_CLR(j, tempMask);
				3283	}
				3284	else {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3285	KMP_CPU_SET(j, tempMask);
				3286	setSize++;
				3287	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3288	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3289	for (; j < (int)__kmp_affin_mask_size * CHAR_BIT; j++) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3290	KMP_CPU_CLR(j, tempMask);
				3291	}
				3292	}
				3293	}
				3294	KMP_CPU_ZERO(tempMask);
				3295	setSize = 0;
				3296
				3297	//
				3298	// valid follow sets are ',' and EOL
				3299	//
				3300	SKIP_WS(scan);
				3301	if (*scan == '\0') {
				3302	break;
				3303	}
				3304	if (*scan == ',') {
				3305	scan++; // skip ','
				3306	continue;
				3307	}
				3308
				3309	KMP_ASSERT2(0, "bad explicit places list");
				3310	}
				3311
				3312	*out_numMasks = nextNewMask;
				3313	if (nextNewMask == 0) {
				3314	*out_masks = NULL;
				3315	KMP_INTERNAL_FREE(newMasks);
				3316	return;
				3317	}
				3318	*out_masks
				3319	= (kmp_affin_mask_t )__kmp_allocate(nextNewMask __kmp_affin_mask_size);
				3320	memcpy(out_masks, newMasks, nextNewMask __kmp_affin_mask_size);
				3321	__kmp_free(tempMask);
				3322	KMP_INTERNAL_FREE(newMasks);
				3323	}
				3324
				3325	# endif /* OMP_40_ENABLED */
				3326
				3327	#undef ADD_MASK
				3328	#undef ADD_MASK_OSID
				3329
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3330	static void
				3331	__kmp_apply_thread_places(AddrUnsPair **pAddr, int depth)
				3332	{
				3333	if ( __kmp_place_num_cores == 0 ) {
				3334	if ( __kmp_place_num_threads_per_core == 0 ) {
				3335	return; // no cores limiting actions requested, exit
				3336	}
				3337	__kmp_place_num_cores = nCoresPerPkg; // use all available cores
				3338	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3339	if ( !__kmp_affinity_uniform_topology() ) {
				3340	KMP_WARNING( AffThrPlaceNonUniform );
				3341	return; // don't support non-uniform topology
				3342	}
				3343	if ( depth != 3 ) {
				3344	KMP_WARNING( AffThrPlaceNonThreeLevel );
				3345	return; // don't support not-3-level topology
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3346	}
				3347	if ( __kmp_place_num_threads_per_core == 0 ) {
				3348	__kmp_place_num_threads_per_core = __kmp_nThreadsPerCore; // use all HW contexts
				3349	}
Andrey Churbanov	5cd50e3	2015-01-29 17:14:58 +0000	[diff] [blame^]	3350	if ( __kmp_place_core_offset + __kmp_place_num_cores > (unsigned int)nCoresPerPkg ) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3351	KMP_WARNING( AffThrPlaceManyCores );
				3352	return;
				3353	}
				3354
				3355	AddrUnsPair newAddr = (AddrUnsPair )__kmp_allocate( sizeof(AddrUnsPair) *
				3356	nPackages * __kmp_place_num_cores * __kmp_place_num_threads_per_core);
				3357	int i, j, k, n_old = 0, n_new = 0;
				3358	for ( i = 0; i < nPackages; ++i ) {
				3359	for ( j = 0; j < nCoresPerPkg; ++j ) {
Andrey Churbanov	5cd50e3	2015-01-29 17:14:58 +0000	[diff] [blame^]	3360	if ( (unsigned int)j < __kmp_place_core_offset \|\| (unsigned int)j >= __kmp_place_core_offset + __kmp_place_num_cores ) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3361	n_old += __kmp_nThreadsPerCore; // skip not-requested core
				3362	} else {
				3363	for ( k = 0; k < __kmp_nThreadsPerCore; ++k ) {
Andrey Churbanov	5cd50e3	2015-01-29 17:14:58 +0000	[diff] [blame^]	3364	if ( (unsigned int)k < __kmp_place_num_threads_per_core ) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3365	newAddr[n_new] = (*pAddr)[n_old]; // copy requested core' data to new location
				3366	n_new++;
				3367	}
				3368	n_old++;
				3369	}
				3370	}
				3371	}
				3372	}
				3373	nCoresPerPkg = __kmp_place_num_cores; // correct nCoresPerPkg
				3374	__kmp_nThreadsPerCore = __kmp_place_num_threads_per_core; // correct __kmp_nThreadsPerCore
				3375	__kmp_avail_proc = n_new; // correct avail_proc
				3376	__kmp_ncores = nPackages * __kmp_place_num_cores; // correct ncores
				3377
				3378	__kmp_free( *pAddr );
				3379	*pAddr = newAddr; // replace old topology with new one
				3380	}
				3381
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3382
				3383	static AddrUnsPair *address2os = NULL;
				3384	static int * procarr = NULL;
				3385	static int __kmp_aff_depth = 0;
				3386
				3387	static void
				3388	__kmp_aux_affinity_initialize(void)
				3389	{
				3390	if (__kmp_affinity_masks != NULL) {
				3391	KMP_ASSERT(fullMask != NULL);
				3392	return;
				3393	}
				3394
				3395	//
				3396	// Create the "full" mask - this defines all of the processors that we
				3397	// consider to be in the machine model. If respect is set, then it is
				3398	// the initialization thread's affinity mask. Otherwise, it is all
				3399	// processors that we know about on the machine.
				3400	//
				3401	if (fullMask == NULL) {
				3402	fullMask = (kmp_affin_mask_t *)__kmp_allocate(__kmp_affin_mask_size);
				3403	}
				3404	if (KMP_AFFINITY_CAPABLE()) {
				3405	if (__kmp_affinity_respect_mask) {
				3406	__kmp_get_system_affinity(fullMask, TRUE);
				3407
				3408	//
				3409	// Count the number of available processors.
				3410	//
				3411	unsigned i;
				3412	__kmp_avail_proc = 0;
				3413	for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
				3414	if (! KMP_CPU_ISSET(i, fullMask)) {
				3415	continue;
				3416	}
				3417	__kmp_avail_proc++;
				3418	}
				3419	if (__kmp_avail_proc > __kmp_xproc) {
				3420	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3421	&& (__kmp_affinity_type != affinity_none))) {
				3422	KMP_WARNING(ErrorInitializeAffinity);
				3423	}
				3424	__kmp_affinity_type = affinity_none;
				3425	__kmp_affin_mask_size = 0;
				3426	return;
				3427	}
				3428	}
				3429	else {
				3430	__kmp_affinity_entire_machine_mask(fullMask);
				3431	__kmp_avail_proc = __kmp_xproc;
				3432	}
				3433	}
				3434
				3435	int depth = -1;
				3436	kmp_i18n_id_t msg_id = kmp_i18n_null;
				3437
				3438	//
Alp Toker	8f2d3f0	2014-02-24 10:40:15 +0000	[diff] [blame]	3439	// For backward compatibility, setting KMP_CPUINFO_FILE =>
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3440	// KMP_TOPOLOGY_METHOD=cpuinfo
				3441	//
				3442	if ((__kmp_cpuinfo_file != NULL) &&
				3443	(__kmp_affinity_top_method == affinity_top_method_all)) {
				3444	__kmp_affinity_top_method = affinity_top_method_cpuinfo;
				3445	}
				3446
				3447	if (__kmp_affinity_top_method == affinity_top_method_all) {
				3448	//
				3449	// In the default code path, errors are not fatal - we just try using
				3450	// another method. We only emit a warning message if affinity is on,
				3451	// or the verbose flag is set, an the nowarnings flag was not set.
				3452	//
				3453	const char *file_name = NULL;
				3454	int line = 0;
				3455
				3456	# if KMP_ARCH_X86 \|\| KMP_ARCH_X86_64
				3457
				3458	if (__kmp_affinity_verbose) {
				3459	KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
				3460	}
				3461
				3462	file_name = NULL;
				3463	depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
				3464	if (depth == 0) {
				3465	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3466	KMP_ASSERT(address2os == NULL);
				3467	return;
				3468	}
				3469
				3470	if (depth < 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3471	if (__kmp_affinity_verbose) {
				3472	if (msg_id != kmp_i18n_null) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3473	KMP_INFORM(AffInfoStrStr, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id),
				3474	KMP_I18N_STR(DecodingLegacyAPIC));
				3475	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3476	else {
				3477	KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(DecodingLegacyAPIC));
				3478	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3479	}
				3480
				3481	file_name = NULL;
				3482	depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
				3483	if (depth == 0) {
				3484	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3485	KMP_ASSERT(address2os == NULL);
				3486	return;
				3487	}
				3488	}
				3489
				3490	# endif /* KMP_ARCH_X86 \|\| KMP_ARCH_X86_64 */
				3491
				3492	# if KMP_OS_LINUX
				3493
				3494	if (depth < 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3495	if (__kmp_affinity_verbose) {
				3496	if (msg_id != kmp_i18n_null) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3497	KMP_INFORM(AffStrParseFilename, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id), "/proc/cpuinfo");
				3498	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3499	else {
				3500	KMP_INFORM(AffParseFilename, "KMP_AFFINITY", "/proc/cpuinfo");
				3501	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3502	}
				3503
				3504	FILE *f = fopen("/proc/cpuinfo", "r");
				3505	if (f == NULL) {
				3506	msg_id = kmp_i18n_str_CantOpenCpuinfo;
				3507	}
				3508	else {
				3509	file_name = "/proc/cpuinfo";
				3510	depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
				3511	fclose(f);
				3512	if (depth == 0) {
				3513	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3514	KMP_ASSERT(address2os == NULL);
				3515	return;
				3516	}
				3517	}
				3518	}
				3519
				3520	# endif /* KMP_OS_LINUX */
				3521
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	3522	# if KMP_GROUP_AFFINITY
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3523
				3524	if ((depth < 0) && (__kmp_num_proc_groups > 1)) {
				3525	if (__kmp_affinity_verbose) {
				3526	KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
				3527	}
				3528
				3529	depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
				3530	KMP_ASSERT(depth != 0);
				3531	}
				3532
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	3533	# endif /* KMP_GROUP_AFFINITY */
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3534
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3535	if (depth < 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3536	if (__kmp_affinity_verbose && (msg_id != kmp_i18n_null)) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3537	if (file_name == NULL) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3538	KMP_INFORM(UsingFlatOS, __kmp_i18n_catgets(msg_id));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3539	}
				3540	else if (line == 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3541	KMP_INFORM(UsingFlatOSFile, file_name, __kmp_i18n_catgets(msg_id));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3542	}
				3543	else {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3544	KMP_INFORM(UsingFlatOSFileLine, file_name, line, __kmp_i18n_catgets(msg_id));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3545	}
				3546	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3547	// FIXME - print msg if msg_id = kmp_i18n_null ???
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3548
				3549	file_name = "";
				3550	depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
				3551	if (depth == 0) {
				3552	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3553	KMP_ASSERT(address2os == NULL);
				3554	return;
				3555	}
				3556	KMP_ASSERT(depth > 0);
				3557	KMP_ASSERT(address2os != NULL);
				3558	}
				3559	}
				3560
				3561	//
				3562	// If the user has specified that a paricular topology discovery method
				3563	// is to be used, then we abort if that method fails. The exception is
				3564	// group affinity, which might have been implicitly set.
				3565	//
				3566
				3567	# if KMP_ARCH_X86 \|\| KMP_ARCH_X86_64
				3568
				3569	else if (__kmp_affinity_top_method == affinity_top_method_x2apicid) {
				3570	if (__kmp_affinity_verbose) {
				3571	KMP_INFORM(AffInfoStr, "KMP_AFFINITY",
				3572	KMP_I18N_STR(Decodingx2APIC));
				3573	}
				3574
				3575	depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
				3576	if (depth == 0) {
				3577	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3578	KMP_ASSERT(address2os == NULL);
				3579	return;
				3580	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3581	if (depth < 0) {
				3582	KMP_ASSERT(msg_id != kmp_i18n_null);
				3583	KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
				3584	}
				3585	}
				3586	else if (__kmp_affinity_top_method == affinity_top_method_apicid) {
				3587	if (__kmp_affinity_verbose) {
				3588	KMP_INFORM(AffInfoStr, "KMP_AFFINITY",
				3589	KMP_I18N_STR(DecodingLegacyAPIC));
				3590	}
				3591
				3592	depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
				3593	if (depth == 0) {
				3594	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3595	KMP_ASSERT(address2os == NULL);
				3596	return;
				3597	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3598	if (depth < 0) {
				3599	KMP_ASSERT(msg_id != kmp_i18n_null);
				3600	KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
				3601	}
				3602	}
				3603
				3604	# endif /* KMP_ARCH_X86 \|\| KMP_ARCH_X86_64 */
				3605
				3606	else if (__kmp_affinity_top_method == affinity_top_method_cpuinfo) {
				3607	const char *filename;
				3608	if (__kmp_cpuinfo_file != NULL) {
				3609	filename = __kmp_cpuinfo_file;
				3610	}
				3611	else {
				3612	filename = "/proc/cpuinfo";
				3613	}
				3614
				3615	if (__kmp_affinity_verbose) {
				3616	KMP_INFORM(AffParseFilename, "KMP_AFFINITY", filename);
				3617	}
				3618
				3619	FILE *f = fopen(filename, "r");
				3620	if (f == NULL) {
				3621	int code = errno;
				3622	if (__kmp_cpuinfo_file != NULL) {
				3623	__kmp_msg(
				3624	kmp_ms_fatal,
				3625	KMP_MSG(CantOpenFileForReading, filename),
				3626	KMP_ERR(code),
				3627	KMP_HNT(NameComesFrom_CPUINFO_FILE),
				3628	__kmp_msg_null
				3629	);
				3630	}
				3631	else {
				3632	__kmp_msg(
				3633	kmp_ms_fatal,
				3634	KMP_MSG(CantOpenFileForReading, filename),
				3635	KMP_ERR(code),
				3636	__kmp_msg_null
				3637	);
				3638	}
				3639	}
				3640	int line = 0;
				3641	depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
				3642	fclose(f);
				3643	if (depth < 0) {
				3644	KMP_ASSERT(msg_id != kmp_i18n_null);
				3645	if (line > 0) {
				3646	KMP_FATAL(FileLineMsgExiting, filename, line, __kmp_i18n_catgets(msg_id));
				3647	}
				3648	else {
				3649	KMP_FATAL(FileMsgExiting, filename, __kmp_i18n_catgets(msg_id));
				3650	}
				3651	}
				3652	if (__kmp_affinity_type == affinity_none) {
				3653	KMP_ASSERT(depth == 0);
				3654	KMP_ASSERT(address2os == NULL);
				3655	return;
				3656	}
				3657	}
				3658
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	3659	# if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3660
				3661	else if (__kmp_affinity_top_method == affinity_top_method_group) {
				3662	if (__kmp_affinity_verbose) {
				3663	KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
				3664	}
				3665
				3666	depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
				3667	KMP_ASSERT(depth != 0);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3668	if (depth < 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3669	KMP_ASSERT(msg_id != kmp_i18n_null);
				3670	KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3671	}
				3672	}
				3673
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	3674	# endif /* KMP_GROUP_AFFINITY */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3675
				3676	else if (__kmp_affinity_top_method == affinity_top_method_flat) {
				3677	if (__kmp_affinity_verbose) {
				3678	KMP_INFORM(AffUsingFlatOS, "KMP_AFFINITY");
				3679	}
				3680
				3681	depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
				3682	if (depth == 0) {
				3683	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3684	KMP_ASSERT(address2os == NULL);
				3685	return;
				3686	}
				3687	// should not fail
				3688	KMP_ASSERT(depth > 0);
				3689	KMP_ASSERT(address2os != NULL);
				3690	}
				3691
				3692	if (address2os == NULL) {
				3693	if (KMP_AFFINITY_CAPABLE()
				3694	&& (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3695	&& (__kmp_affinity_type != affinity_none)))) {
				3696	KMP_WARNING(ErrorInitializeAffinity);
				3697	}
				3698	__kmp_affinity_type = affinity_none;
				3699	__kmp_affin_mask_size = 0;
				3700	return;
				3701	}
				3702
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3703	__kmp_apply_thread_places(&address2os, depth);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3704
				3705	//
				3706	// Create the table of masks, indexed by thread Id.
				3707	//
				3708	unsigned maxIndex;
				3709	unsigned numUnique;
				3710	kmp_affin_mask_t *osId2Mask = __kmp_create_masks(&maxIndex, &numUnique,
				3711	address2os, __kmp_avail_proc);
				3712	if (__kmp_affinity_gran_levels == 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3713	KMP_DEBUG_ASSERT((int)numUnique == __kmp_avail_proc);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3714	}
				3715
				3716	//
				3717	// Set the childNums vector in all Address objects. This must be done
				3718	// before we can sort using __kmp_affinity_cmp_Address_child_num(),
				3719	// which takes into account the setting of __kmp_affinity_compact.
				3720	//
				3721	__kmp_affinity_assign_child_nums(address2os, __kmp_avail_proc);
				3722
				3723	switch (__kmp_affinity_type) {
				3724
				3725	case affinity_explicit:
				3726	KMP_DEBUG_ASSERT(__kmp_affinity_proclist != NULL);
				3727	# if OMP_40_ENABLED
				3728	if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
				3729	# endif
				3730	{
				3731	__kmp_affinity_process_proclist(&__kmp_affinity_masks,
				3732	&__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask,
				3733	maxIndex);
				3734	}
				3735	# if OMP_40_ENABLED
				3736	else {
				3737	__kmp_affinity_process_placelist(&__kmp_affinity_masks,
				3738	&__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask,
				3739	maxIndex);
				3740	}
				3741	# endif
				3742	if (__kmp_affinity_num_masks == 0) {
				3743	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3744	&& (__kmp_affinity_type != affinity_none))) {
				3745	KMP_WARNING(AffNoValidProcID);
				3746	}
				3747	__kmp_affinity_type = affinity_none;
				3748	return;
				3749	}
				3750	break;
				3751
				3752	//
				3753	// The other affinity types rely on sorting the Addresses according
				3754	// to some permutation of the machine topology tree. Set
				3755	// __kmp_affinity_compact and __kmp_affinity_offset appropriately,
				3756	// then jump to a common code fragment to do the sort and create
				3757	// the array of affinity masks.
				3758	//
				3759
				3760	case affinity_logical:
				3761	__kmp_affinity_compact = 0;
				3762	if (__kmp_affinity_offset) {
				3763	__kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset
				3764	% __kmp_avail_proc;
				3765	}
				3766	goto sortAddresses;
				3767
				3768	case affinity_physical:
				3769	if (__kmp_nThreadsPerCore > 1) {
				3770	__kmp_affinity_compact = 1;
				3771	if (__kmp_affinity_compact >= depth) {
				3772	__kmp_affinity_compact = 0;
				3773	}
				3774	} else {
				3775	__kmp_affinity_compact = 0;
				3776	}
				3777	if (__kmp_affinity_offset) {
				3778	__kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset
				3779	% __kmp_avail_proc;
				3780	}
				3781	goto sortAddresses;
				3782
				3783	case affinity_scatter:
				3784	if (__kmp_affinity_compact >= depth) {
				3785	__kmp_affinity_compact = 0;
				3786	}
				3787	else {
				3788	__kmp_affinity_compact = depth - 1 - __kmp_affinity_compact;
				3789	}
				3790	goto sortAddresses;
				3791
				3792	case affinity_compact:
				3793	if (__kmp_affinity_compact >= depth) {
				3794	__kmp_affinity_compact = depth - 1;
				3795	}
				3796	goto sortAddresses;
				3797
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3798	case affinity_balanced:
Andrey Churbanov	f28f613	2015-01-13 14:54:00 +0000	[diff] [blame]	3799	// Balanced works only for the case of a single package
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3800	if( nPackages > 1 ) {
				3801	if( __kmp_affinity_verbose \|\| __kmp_affinity_warnings ) {
				3802	KMP_WARNING( AffBalancedNotAvail, "KMP_AFFINITY" );
				3803	}
				3804	__kmp_affinity_type = affinity_none;
				3805	return;
				3806	} else if( __kmp_affinity_uniform_topology() ) {
				3807	break;
				3808	} else { // Non-uniform topology
				3809
				3810	// Save the depth for further usage
				3811	__kmp_aff_depth = depth;
				3812
				3813	// Number of hyper threads per core in HT machine
				3814	int nth_per_core = __kmp_nThreadsPerCore;
				3815
				3816	int core_level;
				3817	if( nth_per_core > 1 ) {
				3818	core_level = depth - 2;
				3819	} else {
				3820	core_level = depth - 1;
				3821	}
				3822	int ncores = address2os[ __kmp_avail_proc - 1 ].first.labels[ core_level ] + 1;
				3823	int nproc = nth_per_core * ncores;
				3824
				3825	procarr = ( int * )__kmp_allocate( sizeof( int ) * nproc );
				3826	for( int i = 0; i < nproc; i++ ) {
				3827	procarr[ i ] = -1;
				3828	}
				3829
				3830	for( int i = 0; i < __kmp_avail_proc; i++ ) {
				3831	int proc = address2os[ i ].second;
				3832	// If depth == 3 then level=0 - package, level=1 - core, level=2 - thread.
				3833	// If there is only one thread per core then depth == 2: level 0 - package,
				3834	// level 1 - core.
				3835	int level = depth - 1;
				3836
				3837	// __kmp_nth_per_core == 1
				3838	int thread = 0;
				3839	int core = address2os[ i ].first.labels[ level ];
				3840	// If the thread level exists, that is we have more than one thread context per core
				3841	if( nth_per_core > 1 ) {
				3842	thread = address2os[ i ].first.labels[ level ] % nth_per_core;
				3843	core = address2os[ i ].first.labels[ level - 1 ];
				3844	}
				3845	procarr[ core * nth_per_core + thread ] = proc;
				3846	}
				3847
				3848	break;
				3849	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3850
				3851	sortAddresses:
				3852	//
				3853	// Allocate the gtid->affinity mask table.
				3854	//
				3855	if (__kmp_affinity_dups) {
				3856	__kmp_affinity_num_masks = __kmp_avail_proc;
				3857	}
				3858	else {
				3859	__kmp_affinity_num_masks = numUnique;
				3860	}
				3861
				3862	# if OMP_40_ENABLED
				3863	if ( ( __kmp_nested_proc_bind.bind_types[0] != proc_bind_intel )
				3864	&& ( __kmp_affinity_num_places > 0 )
				3865	&& ( (unsigned)__kmp_affinity_num_places < __kmp_affinity_num_masks ) ) {
				3866	__kmp_affinity_num_masks = __kmp_affinity_num_places;
				3867	}
				3868	# endif
				3869
				3870	__kmp_affinity_masks = (kmp_affin_mask_t*)__kmp_allocate(
				3871	__kmp_affinity_num_masks * __kmp_affin_mask_size);
				3872
				3873	//
				3874	// Sort the address2os table according to the current setting of
				3875	// __kmp_affinity_compact, then fill out __kmp_affinity_masks.
				3876	//
				3877	qsort(address2os, __kmp_avail_proc, sizeof(*address2os),
				3878	__kmp_affinity_cmp_Address_child_num);
				3879	{
				3880	int i;
				3881	unsigned j;
				3882	for (i = 0, j = 0; i < __kmp_avail_proc; i++) {
				3883	if ((! __kmp_affinity_dups) && (! address2os[i].first.leader)) {
				3884	continue;
				3885	}
				3886	unsigned osId = address2os[i].second;
				3887	kmp_affin_mask_t *src = KMP_CPU_INDEX(osId2Mask, osId);
				3888	kmp_affin_mask_t *dest
				3889	= KMP_CPU_INDEX(__kmp_affinity_masks, j);
				3890	KMP_ASSERT(KMP_CPU_ISSET(osId, src));
				3891	KMP_CPU_COPY(dest, src);
				3892	if (++j >= __kmp_affinity_num_masks) {
				3893	break;
				3894	}
				3895	}
				3896	KMP_DEBUG_ASSERT(j == __kmp_affinity_num_masks);
				3897	}
				3898	break;
				3899
				3900	default:
				3901	KMP_ASSERT2(0, "Unexpected affinity setting");
				3902	}
				3903
				3904	__kmp_free(osId2Mask);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3905	machine_hierarchy.init(address2os, __kmp_avail_proc);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3906	}
				3907
				3908
				3909	void
				3910	__kmp_affinity_initialize(void)
				3911	{
				3912	//
				3913	// Much of the code above was written assumming that if a machine was not
				3914	// affinity capable, then __kmp_affinity_type == affinity_none. We now
				3915	// explicitly represent this as __kmp_affinity_type == affinity_disabled.
				3916	//
				3917	// There are too many checks for __kmp_affinity_type == affinity_none
				3918	// in this code. Instead of trying to change them all, check if
				3919	// __kmp_affinity_type == affinity_disabled, and if so, slam it with
				3920	// affinity_none, call the real initialization routine, then restore
				3921	// __kmp_affinity_type to affinity_disabled.
				3922	//
				3923	int disabled = (__kmp_affinity_type == affinity_disabled);
				3924	if (! KMP_AFFINITY_CAPABLE()) {
				3925	KMP_ASSERT(disabled);
				3926	}
				3927	if (disabled) {
				3928	__kmp_affinity_type = affinity_none;
				3929	}
				3930	__kmp_aux_affinity_initialize();
				3931	if (disabled) {
				3932	__kmp_affinity_type = affinity_disabled;
				3933	}
				3934	}
				3935
				3936
				3937	void
				3938	__kmp_affinity_uninitialize(void)
				3939	{
				3940	if (__kmp_affinity_masks != NULL) {
				3941	__kmp_free(__kmp_affinity_masks);
				3942	__kmp_affinity_masks = NULL;
				3943	}
				3944	if (fullMask != NULL) {
				3945	KMP_CPU_FREE(fullMask);
				3946	fullMask = NULL;
				3947	}
				3948	__kmp_affinity_num_masks = 0;
				3949	# if OMP_40_ENABLED
				3950	__kmp_affinity_num_places = 0;
				3951	# endif
				3952	if (__kmp_affinity_proclist != NULL) {
				3953	__kmp_free(__kmp_affinity_proclist);
				3954	__kmp_affinity_proclist = NULL;
				3955	}
				3956	if( address2os != NULL ) {
				3957	__kmp_free( address2os );
				3958	address2os = NULL;
				3959	}
				3960	if( procarr != NULL ) {
				3961	__kmp_free( procarr );
				3962	procarr = NULL;
				3963	}
				3964	}
				3965
				3966
				3967	void
				3968	__kmp_affinity_set_init_mask(int gtid, int isa_root)
				3969	{
				3970	if (! KMP_AFFINITY_CAPABLE()) {
				3971	return;
				3972	}
				3973
				3974	kmp_info_t th = (kmp_info_t )TCR_SYNC_PTR(__kmp_threads[gtid]);
				3975	if (th->th.th_affin_mask == NULL) {
				3976	KMP_CPU_ALLOC(th->th.th_affin_mask);
				3977	}
				3978	else {
				3979	KMP_CPU_ZERO(th->th.th_affin_mask);
				3980	}
				3981
				3982	//
				3983	// Copy the thread mask to the kmp_info_t strucuture.
				3984	// If __kmp_affinity_type == affinity_none, copy the "full" mask, i.e. one
				3985	// that has all of the OS proc ids set, or if __kmp_affinity_respect_mask
				3986	// is set, then the full mask is the same as the mask of the initialization
				3987	// thread.
				3988	//
				3989	kmp_affin_mask_t *mask;
				3990	int i;
				3991
				3992	# if OMP_40_ENABLED
				3993	if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
				3994	# endif
				3995	{
Andrey Churbanov	f28f613	2015-01-13 14:54:00 +0000	[diff] [blame]	3996	if ((__kmp_affinity_type == affinity_none) \|\| (__kmp_affinity_type == affinity_balanced)
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3997	) {
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	3998	# if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3999	if (__kmp_num_proc_groups > 1) {
				4000	return;
				4001	}
				4002	# endif
				4003	KMP_ASSERT(fullMask != NULL);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4004	i = KMP_PLACE_ALL;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4005	mask = fullMask;
				4006	}
				4007	else {
				4008	KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 );
				4009	i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
				4010	mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
				4011	}
				4012	}
				4013	# if OMP_40_ENABLED
				4014	else {
				4015	if ((! isa_root)
				4016	\|\| (__kmp_nested_proc_bind.bind_types[0] == proc_bind_false)) {
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	4017	# if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4018	if (__kmp_num_proc_groups > 1) {
				4019	return;
				4020	}
				4021	# endif
				4022	KMP_ASSERT(fullMask != NULL);
				4023	i = KMP_PLACE_ALL;
				4024	mask = fullMask;
				4025	}
				4026	else {
				4027	//
				4028	// int i = some hash function or just a counter that doesn't
				4029	// always start at 0. Use gtid for now.
				4030	//
				4031	KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 );
				4032	i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
				4033	mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
				4034	}
				4035	}
				4036	# endif
				4037
				4038	# if OMP_40_ENABLED
				4039	th->th.th_current_place = i;
				4040	if (isa_root) {
				4041	th->th.th_new_place = i;
				4042	th->th.th_first_place = 0;
				4043	th->th.th_last_place = __kmp_affinity_num_masks - 1;
				4044	}
				4045
				4046	if (i == KMP_PLACE_ALL) {
				4047	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to all places\n",
				4048	gtid));
				4049	}
				4050	else {
				4051	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to place %d\n",
				4052	gtid, i));
				4053	}
				4054	# else
				4055	if (i == -1) {
				4056	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to fullMask\n",
				4057	gtid));
				4058	}
				4059	else {
				4060	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to mask %d\n",
				4061	gtid, i));
				4062	}
				4063	# endif /* OMP_40_ENABLED */
				4064
				4065	KMP_CPU_COPY(th->th.th_affin_mask, mask);
				4066
				4067	if (__kmp_affinity_verbose) {
				4068	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4069	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4070	th->th.th_affin_mask);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4071	KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(), gtid,
				4072	buf);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4073	}
				4074
				4075	# if KMP_OS_WINDOWS
				4076	//
				4077	// On Windows* OS, the process affinity mask might have changed.
				4078	// If the user didn't request affinity and this call fails,
				4079	// just continue silently. See CQ171393.
				4080	//
				4081	if ( __kmp_affinity_type == affinity_none ) {
				4082	__kmp_set_system_affinity(th->th.th_affin_mask, FALSE);
				4083	}
				4084	else
				4085	# endif
				4086	__kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
				4087	}
				4088
				4089
				4090	# if OMP_40_ENABLED
				4091
				4092	void
				4093	__kmp_affinity_set_place(int gtid)
				4094	{
				4095	int retval;
				4096
				4097	if (! KMP_AFFINITY_CAPABLE()) {
				4098	return;
				4099	}
				4100
				4101	kmp_info_t th = (kmp_info_t )TCR_SYNC_PTR(__kmp_threads[gtid]);
				4102
				4103	KA_TRACE(100, ("__kmp_affinity_set_place: binding T#%d to place %d (current place = %d)\n",
				4104	gtid, th->th.th_new_place, th->th.th_current_place));
				4105
				4106	//
Alp Toker	8f2d3f0	2014-02-24 10:40:15 +0000	[diff] [blame]	4107	// Check that the new place is within this thread's partition.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4108	//
				4109	KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4110	KMP_ASSERT(th->th.th_new_place >= 0);
				4111	KMP_ASSERT((unsigned)th->th.th_new_place <= __kmp_affinity_num_masks);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4112	if (th->th.th_first_place <= th->th.th_last_place) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4113	KMP_ASSERT((th->th.th_new_place >= th->th.th_first_place)
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4114	&& (th->th.th_new_place <= th->th.th_last_place));
				4115	}
				4116	else {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4117	KMP_ASSERT((th->th.th_new_place <= th->th.th_first_place)
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4118	\|\| (th->th.th_new_place >= th->th.th_last_place));
				4119	}
				4120
				4121	//
				4122	// Copy the thread mask to the kmp_info_t strucuture,
				4123	// and set this thread's affinity.
				4124	//
				4125	kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity_masks,
				4126	th->th.th_new_place);
				4127	KMP_CPU_COPY(th->th.th_affin_mask, mask);
				4128	th->th.th_current_place = th->th.th_new_place;
				4129
				4130	if (__kmp_affinity_verbose) {
				4131	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4132	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4133	th->th.th_affin_mask);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4134	KMP_INFORM(BoundToOSProcSet, "OMP_PROC_BIND", (kmp_int32)getpid(),
				4135	gtid, buf);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4136	}
				4137	__kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
				4138	}
				4139
				4140	# endif /* OMP_40_ENABLED */
				4141
				4142
				4143	int
				4144	__kmp_aux_set_affinity(void **mask)
				4145	{
				4146	int gtid;
				4147	kmp_info_t *th;
				4148	int retval;
				4149
				4150	if (! KMP_AFFINITY_CAPABLE()) {
				4151	return -1;
				4152	}
				4153
				4154	gtid = __kmp_entry_gtid();
				4155	KA_TRACE(1000, ;{
				4156	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4157	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4158	(kmp_affin_mask_t )(mask));
				4159	__kmp_debug_printf("kmp_set_affinity: setting affinity mask for thread %d = %s\n",
				4160	gtid, buf);
				4161	});
				4162
				4163	if (__kmp_env_consistency_check) {
				4164	if ((mask == NULL) \|\| (*mask == NULL)) {
				4165	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				4166	}
				4167	else {
				4168	unsigned proc;
				4169	int num_procs = 0;
				4170
				4171	for (proc = 0; proc < KMP_CPU_SETSIZE; proc++) {
				4172	if (! KMP_CPU_ISSET(proc, (kmp_affin_mask_t )(mask))) {
				4173	continue;
				4174	}
				4175	num_procs++;
				4176	if (! KMP_CPU_ISSET(proc, fullMask)) {
				4177	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				4178	break;
				4179	}
				4180	}
				4181	if (num_procs == 0) {
				4182	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				4183	}
				4184
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	4185	# if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4186	if (__kmp_get_proc_group((kmp_affin_mask_t )(mask)) < 0) {
				4187	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				4188	}
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	4189	# endif /* KMP_GROUP_AFFINITY */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4190
				4191	}
				4192	}
				4193
				4194	th = __kmp_threads[gtid];
				4195	KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
				4196	retval = __kmp_set_system_affinity((kmp_affin_mask_t )(mask), FALSE);
				4197	if (retval == 0) {
				4198	KMP_CPU_COPY(th->th.th_affin_mask, (kmp_affin_mask_t )(mask));
				4199	}
				4200
				4201	# if OMP_40_ENABLED
				4202	th->th.th_current_place = KMP_PLACE_UNDEFINED;
				4203	th->th.th_new_place = KMP_PLACE_UNDEFINED;
				4204	th->th.th_first_place = 0;
				4205	th->th.th_last_place = __kmp_affinity_num_masks - 1;
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4206
				4207	//
				4208	// Turn off 4.0 affinity for the current tread at this parallel level.
				4209	//
				4210	th->th.th_current_task->td_icvs.proc_bind = proc_bind_false;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4211	# endif
				4212
				4213	return retval;
				4214	}
				4215
				4216
				4217	int
				4218	__kmp_aux_get_affinity(void **mask)
				4219	{
				4220	int gtid;
				4221	int retval;
				4222	kmp_info_t *th;
				4223
				4224	if (! KMP_AFFINITY_CAPABLE()) {
				4225	return -1;
				4226	}
				4227
				4228	gtid = __kmp_entry_gtid();
				4229	th = __kmp_threads[gtid];
				4230	KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
				4231
				4232	KA_TRACE(1000, ;{
				4233	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4234	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4235	th->th.th_affin_mask);
				4236	__kmp_printf("kmp_get_affinity: stored affinity mask for thread %d = %s\n", gtid, buf);
				4237	});
				4238
				4239	if (__kmp_env_consistency_check) {
				4240	if ((mask == NULL) \|\| (*mask == NULL)) {
				4241	KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity");
				4242	}
				4243	}
				4244
				4245	# if !KMP_OS_WINDOWS
				4246
				4247	retval = __kmp_get_system_affinity((kmp_affin_mask_t )(mask), FALSE);
				4248	KA_TRACE(1000, ;{
				4249	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4250	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4251	(kmp_affin_mask_t )(mask));
				4252	__kmp_printf("kmp_get_affinity: system affinity mask for thread %d = %s\n", gtid, buf);
				4253	});
				4254	return retval;
				4255
				4256	# else
				4257
				4258	KMP_CPU_COPY((kmp_affin_mask_t )(mask), th->th.th_affin_mask);
				4259	return 0;
				4260
				4261	# endif /* KMP_OS_WINDOWS */
				4262
				4263	}
				4264
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4265	int
				4266	__kmp_aux_set_affinity_mask_proc(int proc, void **mask)
				4267	{
				4268	int retval;
				4269
				4270	if (! KMP_AFFINITY_CAPABLE()) {
				4271	return -1;
				4272	}
				4273
				4274	KA_TRACE(1000, ;{
				4275	int gtid = __kmp_entry_gtid();
				4276	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4277	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4278	(kmp_affin_mask_t )(mask));
				4279	__kmp_debug_printf("kmp_set_affinity_mask_proc: setting proc %d in affinity mask for thread %d = %s\n",
				4280	proc, gtid, buf);
				4281	});
				4282
				4283	if (__kmp_env_consistency_check) {
				4284	if ((mask == NULL) \|\| (*mask == NULL)) {
				4285	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity_mask_proc");
				4286	}
				4287	}
				4288
				4289	if ((proc < 0) \|\| ((unsigned)proc >= KMP_CPU_SETSIZE)) {
				4290	return -1;
				4291	}
				4292	if (! KMP_CPU_ISSET(proc, fullMask)) {
				4293	return -2;
				4294	}
				4295
				4296	KMP_CPU_SET(proc, (kmp_affin_mask_t )(mask));
				4297	return 0;
				4298	}
				4299
				4300
				4301	int
				4302	__kmp_aux_unset_affinity_mask_proc(int proc, void **mask)
				4303	{
				4304	int retval;
				4305
				4306	if (! KMP_AFFINITY_CAPABLE()) {
				4307	return -1;
				4308	}
				4309
				4310	KA_TRACE(1000, ;{
				4311	int gtid = __kmp_entry_gtid();
				4312	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4313	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4314	(kmp_affin_mask_t )(mask));
				4315	__kmp_debug_printf("kmp_unset_affinity_mask_proc: unsetting proc %d in affinity mask for thread %d = %s\n",
				4316	proc, gtid, buf);
				4317	});
				4318
				4319	if (__kmp_env_consistency_check) {
				4320	if ((mask == NULL) \|\| (*mask == NULL)) {
				4321	KMP_FATAL(AffinityInvalidMask, "kmp_unset_affinity_mask_proc");
				4322	}
				4323	}
				4324
				4325	if ((proc < 0) \|\| ((unsigned)proc >= KMP_CPU_SETSIZE)) {
				4326	return -1;
				4327	}
				4328	if (! KMP_CPU_ISSET(proc, fullMask)) {
				4329	return -2;
				4330	}
				4331
				4332	KMP_CPU_CLR(proc, (kmp_affin_mask_t )(mask));
				4333	return 0;
				4334	}
				4335
				4336
				4337	int
				4338	__kmp_aux_get_affinity_mask_proc(int proc, void **mask)
				4339	{
				4340	int retval;
				4341
				4342	if (! KMP_AFFINITY_CAPABLE()) {
				4343	return -1;
				4344	}
				4345
				4346	KA_TRACE(1000, ;{
				4347	int gtid = __kmp_entry_gtid();
				4348	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4349	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4350	(kmp_affin_mask_t )(mask));
				4351	__kmp_debug_printf("kmp_get_affinity_mask_proc: getting proc %d in affinity mask for thread %d = %s\n",
				4352	proc, gtid, buf);
				4353	});
				4354
				4355	if (__kmp_env_consistency_check) {
				4356	if ((mask == NULL) \|\| (*mask == NULL)) {
Andrey Churbanov	4b2f17a	2015-01-29 15:49:22 +0000	[diff] [blame]	4357	KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity_mask_proc");
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4358	}
				4359	}
				4360
				4361	if ((proc < 0) \|\| ((unsigned)proc >= KMP_CPU_SETSIZE)) {
				4362	return 0;
				4363	}
				4364	if (! KMP_CPU_ISSET(proc, fullMask)) {
				4365	return 0;
				4366	}
				4367
				4368	return KMP_CPU_ISSET(proc, (kmp_affin_mask_t )(mask));
				4369	}
				4370
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4371
				4372	// Dynamic affinity settings - Affinity balanced
				4373	void __kmp_balanced_affinity( int tid, int nthreads )
				4374	{
				4375	if( __kmp_affinity_uniform_topology() ) {
				4376	int coreID;
				4377	int threadID;
				4378	// Number of hyper threads per core in HT machine
				4379	int __kmp_nth_per_core = __kmp_avail_proc / __kmp_ncores;
				4380	// Number of cores
				4381	int ncores = __kmp_ncores;
				4382	// How many threads will be bound to each core
				4383	int chunk = nthreads / ncores;
				4384	// How many cores will have an additional thread bound to it - "big cores"
				4385	int big_cores = nthreads % ncores;
				4386	// Number of threads on the big cores
				4387	int big_nth = ( chunk + 1 ) * big_cores;
				4388	if( tid < big_nth ) {
				4389	coreID = tid / (chunk + 1 );
				4390	threadID = ( tid % (chunk + 1 ) ) % __kmp_nth_per_core ;
				4391	} else { //tid >= big_nth
				4392	coreID = ( tid - big_cores ) / chunk;
				4393	threadID = ( ( tid - big_cores ) % chunk ) % __kmp_nth_per_core ;
				4394	}
				4395
				4396	KMP_DEBUG_ASSERT2(KMP_AFFINITY_CAPABLE(),
				4397	"Illegal set affinity operation when not capable");
				4398
				4399	kmp_affin_mask_t mask = (kmp_affin_mask_t )alloca(__kmp_affin_mask_size);
				4400	KMP_CPU_ZERO(mask);
				4401
				4402	// Granularity == thread
				4403	if( __kmp_affinity_gran == affinity_gran_fine \|\| __kmp_affinity_gran == affinity_gran_thread) {
				4404	int osID = address2os[ coreID * __kmp_nth_per_core + threadID ].second;
				4405	KMP_CPU_SET( osID, mask);
				4406	} else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
				4407	for( int i = 0; i < __kmp_nth_per_core; i++ ) {
				4408	int osID;
				4409	osID = address2os[ coreID * __kmp_nth_per_core + i ].second;
				4410	KMP_CPU_SET( osID, mask);
				4411	}
				4412	}
				4413	if (__kmp_affinity_verbose) {
				4414	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4415	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4416	KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
				4417	tid, buf);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4418	}
				4419	__kmp_set_system_affinity( mask, TRUE );
				4420	} else { // Non-uniform topology
				4421
				4422	kmp_affin_mask_t mask = (kmp_affin_mask_t )alloca(__kmp_affin_mask_size);
				4423	KMP_CPU_ZERO(mask);
				4424
				4425	// Number of hyper threads per core in HT machine
				4426	int nth_per_core = __kmp_nThreadsPerCore;
				4427	int core_level;
				4428	if( nth_per_core > 1 ) {
				4429	core_level = __kmp_aff_depth - 2;
				4430	} else {
				4431	core_level = __kmp_aff_depth - 1;
				4432	}
				4433
				4434	// Number of cores - maximum value; it does not count trail cores with 0 processors
				4435	int ncores = address2os[ __kmp_avail_proc - 1 ].first.labels[ core_level ] + 1;
				4436
				4437	// For performance gain consider the special case nthreads == __kmp_avail_proc
				4438	if( nthreads == __kmp_avail_proc ) {
				4439	if( __kmp_affinity_gran == affinity_gran_fine \|\| __kmp_affinity_gran == affinity_gran_thread) {
				4440	int osID = address2os[ tid ].second;
				4441	KMP_CPU_SET( osID, mask);
				4442	} else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
				4443	int coreID = address2os[ tid ].first.labels[ core_level ];
				4444	// We'll count found osIDs for the current core; they can be not more than nth_per_core;
				4445	// since the address2os is sortied we can break when cnt==nth_per_core
				4446	int cnt = 0;
				4447	for( int i = 0; i < __kmp_avail_proc; i++ ) {
				4448	int osID = address2os[ i ].second;
				4449	int core = address2os[ i ].first.labels[ core_level ];
				4450	if( core == coreID ) {
				4451	KMP_CPU_SET( osID, mask);
				4452	cnt++;
				4453	if( cnt == nth_per_core ) {
				4454	break;
				4455	}
				4456	}
				4457	}
				4458	}
				4459	} else if( nthreads <= __kmp_ncores ) {
				4460
				4461	int core = 0;
				4462	for( int i = 0; i < ncores; i++ ) {
				4463	// Check if this core from procarr[] is in the mask
				4464	int in_mask = 0;
				4465	for( int j = 0; j < nth_per_core; j++ ) {
				4466	if( procarr[ i * nth_per_core + j ] != - 1 ) {
				4467	in_mask = 1;
				4468	break;
				4469	}
				4470	}
				4471	if( in_mask ) {
				4472	if( tid == core ) {
				4473	for( int j = 0; j < nth_per_core; j++ ) {
				4474	int osID = procarr[ i * nth_per_core + j ];
				4475	if( osID != -1 ) {
				4476	KMP_CPU_SET( osID, mask );
				4477	// For granularity=thread it is enough to set the first available osID for this core
				4478	if( __kmp_affinity_gran == affinity_gran_fine \|\| __kmp_affinity_gran == affinity_gran_thread) {
				4479	break;
				4480	}
				4481	}
				4482	}
				4483	break;
				4484	} else {
				4485	core++;
				4486	}
				4487	}
				4488	}
				4489
				4490	} else { // nthreads > __kmp_ncores
				4491
				4492	// Array to save the number of processors at each core
				4493	int nproc_at_core[ ncores ];
				4494	// Array to save the number of cores with "x" available processors;
				4495	int ncores_with_x_procs[ nth_per_core + 1 ];
				4496	// Array to save the number of cores with # procs from x to nth_per_core
				4497	int ncores_with_x_to_max_procs[ nth_per_core + 1 ];
				4498
				4499	for( int i = 0; i <= nth_per_core; i++ ) {
				4500	ncores_with_x_procs[ i ] = 0;
				4501	ncores_with_x_to_max_procs[ i ] = 0;
				4502	}
				4503
				4504	for( int i = 0; i < ncores; i++ ) {
				4505	int cnt = 0;
				4506	for( int j = 0; j < nth_per_core; j++ ) {
				4507	if( procarr[ i * nth_per_core + j ] != -1 ) {
				4508	cnt++;
				4509	}
				4510	}
				4511	nproc_at_core[ i ] = cnt;
				4512	ncores_with_x_procs[ cnt ]++;
				4513	}
				4514
				4515	for( int i = 0; i <= nth_per_core; i++ ) {
				4516	for( int j = i; j <= nth_per_core; j++ ) {
				4517	ncores_with_x_to_max_procs[ i ] += ncores_with_x_procs[ j ];
				4518	}
				4519	}
				4520
				4521	// Max number of processors
				4522	int nproc = nth_per_core * ncores;
				4523	// An array to keep number of threads per each context
				4524	int * newarr = ( int * )__kmp_allocate( sizeof( int ) * nproc );
				4525	for( int i = 0; i < nproc; i++ ) {
				4526	newarr[ i ] = 0;
				4527	}
				4528
				4529	int nth = nthreads;
				4530	int flag = 0;
				4531	while( nth > 0 ) {
				4532	for( int j = 1; j <= nth_per_core; j++ ) {
				4533	int cnt = ncores_with_x_to_max_procs[ j ];
				4534	for( int i = 0; i < ncores; i++ ) {
				4535	// Skip the core with 0 processors
				4536	if( nproc_at_core[ i ] == 0 ) {
				4537	continue;
				4538	}
				4539	for( int k = 0; k < nth_per_core; k++ ) {
				4540	if( procarr[ i * nth_per_core + k ] != -1 ) {
				4541	if( newarr[ i * nth_per_core + k ] == 0 ) {
				4542	newarr[ i * nth_per_core + k ] = 1;
				4543	cnt--;
				4544	nth--;
				4545	break;
				4546	} else {
				4547	if( flag != 0 ) {
				4548	newarr[ i * nth_per_core + k ] ++;
				4549	cnt--;
				4550	nth--;
				4551	break;
				4552	}
				4553	}
				4554	}
				4555	}
				4556	if( cnt == 0 \|\| nth == 0 ) {
				4557	break;
				4558	}
				4559	}
				4560	if( nth == 0 ) {
				4561	break;
				4562	}
				4563	}
				4564	flag = 1;
				4565	}
				4566	int sum = 0;
				4567	for( int i = 0; i < nproc; i++ ) {
				4568	sum += newarr[ i ];
				4569	if( sum > tid ) {
				4570	// Granularity == thread
				4571	if( __kmp_affinity_gran == affinity_gran_fine \|\| __kmp_affinity_gran == affinity_gran_thread) {
				4572	int osID = procarr[ i ];
				4573	KMP_CPU_SET( osID, mask);
				4574	} else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
				4575	int coreID = i / nth_per_core;
				4576	for( int ii = 0; ii < nth_per_core; ii++ ) {
				4577	int osID = procarr[ coreID * nth_per_core + ii ];
				4578	if( osID != -1 ) {
				4579	KMP_CPU_SET( osID, mask);
				4580	}
				4581	}
				4582	}
				4583	break;
				4584	}
				4585	}
				4586	__kmp_free( newarr );
				4587	}
				4588
				4589	if (__kmp_affinity_verbose) {
				4590	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4591	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4592	KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
				4593	tid, buf);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4594	}
				4595	__kmp_set_system_affinity( mask, TRUE );
				4596	}
				4597	}
				4598
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4599	#else
				4600	// affinity not supported
				4601
				4602	kmp_uint32 mac_skipPerLevel[7];
				4603	kmp_uint32 mac_depth;
				4604	kmp_uint8 mac_leaf_kids;
				4605	void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
				4606	static int first = 1;
				4607	if (first) {
				4608	const kmp_uint32 maxLevels = 7;
				4609	kmp_uint32 numPerLevel[maxLevels];
				4610
				4611	for (kmp_uint32 i=0; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
				4612	numPerLevel[i] = 1;
				4613	mac_skipPerLevel[i] = 1;
				4614	}
				4615
				4616	mac_depth = 2;
				4617	numPerLevel[0] = nproc;
				4618
				4619	kmp_uint32 branch = 4;
				4620	if (numPerLevel[0] == 1) branch = nproc/4;
				4621	if (branch<4) branch=4;
				4622	for (kmp_uint32 d=0; d<mac_depth-1; ++d) { // optimize hierarchy width
				4623	while (numPerLevel[d] > branch \|\| (d==0 && numPerLevel[d]>4)) { // max 4 on level 0!
				4624	if (numPerLevel[d] & 1) numPerLevel[d]++;
				4625	numPerLevel[d] = numPerLevel[d] >> 1;
				4626	if (numPerLevel[d+1] == 1) mac_depth++;
				4627	numPerLevel[d+1] = numPerLevel[d+1] << 1;
				4628	}
				4629	if(numPerLevel[0] == 1) {
				4630	branch = branch >> 1;
				4631	if (branch<4) branch = 4;
				4632	}
				4633	}
				4634
				4635	for (kmp_uint32 i=1; i<mac_depth; ++i)
				4636	mac_skipPerLevel[i] = numPerLevel[i-1] * mac_skipPerLevel[i-1];
				4637	mac_leaf_kids = (kmp_uint8)numPerLevel[0]-1;
				4638	first=0;
				4639	}
				4640	thr_bar->depth = mac_depth;
				4641	thr_bar->base_leaf_kids = mac_leaf_kids;
				4642	thr_bar->skip_per_level = mac_skipPerLevel;
				4643	}
				4644
Alp Toker	763b939	2014-02-28 09:42:41 +0000	[diff] [blame]	4645	#endif // KMP_AFFINITY_SUPPORTED