Blame - openmp/runtime/src/kmp_affinity.cpp - toolchain/llvm-project

blob: 2679518978bfed4613fe28994074d2f453e030c6 [file] [log] [blame]

Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1	/*
				2	* kmp_affinity.cpp -- affinity management
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3	* $Revision: 43473 $
				4	* $Date: 2014-09-26 15:02:57 -0500 (Fri, 26 Sep 2014) $
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	5	*/
				6
				7
				8	//===----------------------------------------------------------------------===//
				9	//
				10	// The LLVM Compiler Infrastructure
				11	//
				12	// This file is dual licensed under the MIT and the University of Illinois Open
				13	// Source Licenses. See LICENSE.txt for details.
				14	//
				15	//===----------------------------------------------------------------------===//
				16
				17
				18	#include "kmp.h"
				19	#include "kmp_i18n.h"
				20	#include "kmp_io.h"
				21	#include "kmp_str.h"
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	22	#include "kmp_wrapper_getpid.h"
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	23
Alp Toker	763b939	2014-02-28 09:42:41 +0000	[diff] [blame]	24	#if KMP_AFFINITY_SUPPORTED
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	25
				26	//
				27	// Print the affinity mask to the character array in a pretty format.
				28	//
				29	char *
				30	__kmp_affinity_print_mask(char buf, int buf_len, kmp_affin_mask_t mask)
				31	{
				32	KMP_ASSERT(buf_len >= 40);
				33	char *scan = buf;
				34	char *end = buf + buf_len - 1;
				35
				36	//
				37	// Find first element / check for empty set.
				38	//
				39	size_t i;
				40	for (i = 0; i < KMP_CPU_SETSIZE; i++) {
				41	if (KMP_CPU_ISSET(i, mask)) {
				42	break;
				43	}
				44	}
				45	if (i == KMP_CPU_SETSIZE) {
				46	sprintf(scan, "{<empty>}");
				47	while (*scan != '\0') scan++;
				48	KMP_ASSERT(scan <= end);
				49	return buf;
				50	}
				51
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	52	sprintf(scan, "{%ld", (long)i);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	53	while (*scan != '\0') scan++;
				54	i++;
				55	for (; i < KMP_CPU_SETSIZE; i++) {
				56	if (! KMP_CPU_ISSET(i, mask)) {
				57	continue;
				58	}
				59
				60	//
				61	// Check for buffer overflow. A string of the form ",<n>" will have
				62	// at most 10 characters, plus we want to leave room to print ",...}"
				63	// if the set is too large to print for a total of 15 characters.
				64	// We already left room for '\0' in setting end.
				65	//
				66	if (end - scan < 15) {
				67	break;
				68	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	69	sprintf(scan, ",%-ld", (long)i);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	70	while (*scan != '\0') scan++;
				71	}
				72	if (i < KMP_CPU_SETSIZE) {
				73	sprintf(scan, ",...");
				74	while (*scan != '\0') scan++;
				75	}
				76	sprintf(scan, "}");
				77	while (*scan != '\0') scan++;
				78	KMP_ASSERT(scan <= end);
				79	return buf;
				80	}
				81
				82
				83	void
				84	__kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask)
				85	{
				86	KMP_CPU_ZERO(mask);
				87
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	88	# if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	89
				90	if (__kmp_num_proc_groups > 1) {
				91	int group;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	92	KMP_DEBUG_ASSERT(__kmp_GetActiveProcessorCount != NULL);
				93	for (group = 0; group < __kmp_num_proc_groups; group++) {
				94	int i;
				95	int num = __kmp_GetActiveProcessorCount(group);
				96	for (i = 0; i < num; i++) {
				97	KMP_CPU_SET(i + group * (CHAR_BIT * sizeof(DWORD_PTR)), mask);
				98	}
				99	}
				100	}
				101	else
				102
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	103	# endif /* KMP_GROUP_AFFINITY */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	104
				105	{
				106	int proc;
				107	for (proc = 0; proc < __kmp_xproc; proc++) {
				108	KMP_CPU_SET(proc, mask);
				109	}
				110	}
				111	}
				112
				113
				114	//
				115	// In Linux* OS debug & cover (-O0) builds, we need to avoid inline member
				116	// functions.
				117	//
				118	// The icc codegen emits sections with extremely long names, of the form
				119	// ".gnu.linkonce.<mangled_name>". There seems to have been a linker bug
				120	// introduced between GNU ld version 2.14.90.0.4 and 2.15.92.0.2 involving
				121	// some sort of memory corruption or table overflow that is triggered by
				122	// these long strings. I checked the latest version of the linker -
				123	// GNU ld (Linux* OS/GNU Binutils) 2.18.50.0.7.20080422 - and the bug is not
				124	// fixed.
				125	//
				126	// Unfortunately, my attempts to reproduce it in a smaller example have
				127	// failed - I'm not sure what the prospects are of getting it fixed
				128	// properly - but we need a reproducer smaller than all of libiomp.
				129	//
				130	// Work around the problem by avoiding inline constructors in such builds.
				131	// We do this for all platforms, not just Linux* OS - non-inline functions are
				132	// more debuggable and provide better coverage into than inline functions.
				133	// Use inline functions in shipping libs, for performance.
				134	//
				135
				136	# if !defined(KMP_DEBUG) && !defined(COVER)
				137
				138	class Address {
				139	public:
				140	static const unsigned maxDepth = 32;
				141	unsigned labels[maxDepth];
				142	unsigned childNums[maxDepth];
				143	unsigned depth;
				144	unsigned leader;
				145	Address(unsigned _depth)
				146	: depth(_depth), leader(FALSE) {
				147	}
				148	Address &operator=(const Address &b) {
				149	depth = b.depth;
				150	for (unsigned i = 0; i < depth; i++) {
				151	labels[i] = b.labels[i];
				152	childNums[i] = b.childNums[i];
				153	}
				154	leader = FALSE;
				155	return *this;
				156	}
				157	bool operator==(const Address &b) const {
				158	if (depth != b.depth)
				159	return false;
				160	for (unsigned i = 0; i < depth; i++)
				161	if(labels[i] != b.labels[i])
				162	return false;
				163	return true;
				164	}
				165	bool isClose(const Address &b, int level) const {
				166	if (depth != b.depth)
				167	return false;
				168	if ((unsigned)level >= depth)
				169	return true;
				170	for (unsigned i = 0; i < (depth - level); i++)
				171	if(labels[i] != b.labels[i])
				172	return false;
				173	return true;
				174	}
				175	bool operator!=(const Address &b) const {
				176	return !operator==(b);
				177	}
				178	};
				179
				180	class AddrUnsPair {
				181	public:
				182	Address first;
				183	unsigned second;
				184	AddrUnsPair(Address _first, unsigned _second)
				185	: first(_first), second(_second) {
				186	}
				187	AddrUnsPair &operator=(const AddrUnsPair &b)
				188	{
				189	first = b.first;
				190	second = b.second;
				191	return *this;
				192	}
				193	};
				194
				195	# else
				196
				197	class Address {
				198	public:
				199	static const unsigned maxDepth = 32;
				200	unsigned labels[maxDepth];
				201	unsigned childNums[maxDepth];
				202	unsigned depth;
				203	unsigned leader;
				204	Address(unsigned _depth);
				205	Address &operator=(const Address &b);
				206	bool operator==(const Address &b) const;
				207	bool isClose(const Address &b, int level) const;
				208	bool operator!=(const Address &b) const;
				209	};
				210
				211	Address::Address(unsigned _depth)
				212	{
				213	depth = _depth;
				214	leader = FALSE;
				215	}
				216
				217	Address &Address::operator=(const Address &b) {
				218	depth = b.depth;
				219	for (unsigned i = 0; i < depth; i++) {
				220	labels[i] = b.labels[i];
				221	childNums[i] = b.childNums[i];
				222	}
				223	leader = FALSE;
				224	return *this;
				225	}
				226
				227	bool Address::operator==(const Address &b) const {
				228	if (depth != b.depth)
				229	return false;
				230	for (unsigned i = 0; i < depth; i++)
				231	if(labels[i] != b.labels[i])
				232	return false;
				233	return true;
				234	}
				235
				236	bool Address::isClose(const Address &b, int level) const {
				237	if (depth != b.depth)
				238	return false;
				239	if ((unsigned)level >= depth)
				240	return true;
				241	for (unsigned i = 0; i < (depth - level); i++)
				242	if(labels[i] != b.labels[i])
				243	return false;
				244	return true;
				245	}
				246
				247	bool Address::operator!=(const Address &b) const {
				248	return !operator==(b);
				249	}
				250
				251	class AddrUnsPair {
				252	public:
				253	Address first;
				254	unsigned second;
				255	AddrUnsPair(Address _first, unsigned _second);
				256	AddrUnsPair &operator=(const AddrUnsPair &b);
				257	};
				258
				259	AddrUnsPair::AddrUnsPair(Address _first, unsigned _second)
				260	: first(_first), second(_second)
				261	{
				262	}
				263
				264	AddrUnsPair &AddrUnsPair::operator=(const AddrUnsPair &b)
				265	{
				266	first = b.first;
				267	second = b.second;
				268	return *this;
				269	}
				270
				271	# endif /* !defined(KMP_DEBUG) && !defined(COVER) */
				272
				273
				274	static int
				275	__kmp_affinity_cmp_Address_labels(const void a, const void b)
				276	{
				277	const Address aa = (const Address )&(((AddrUnsPair *)a)
				278	->first);
				279	const Address bb = (const Address )&(((AddrUnsPair *)b)
				280	->first);
				281	unsigned depth = aa->depth;
				282	unsigned i;
				283	KMP_DEBUG_ASSERT(depth == bb->depth);
				284	for (i = 0; i < depth; i++) {
				285	if (aa->labels[i] < bb->labels[i]) return -1;
				286	if (aa->labels[i] > bb->labels[i]) return 1;
				287	}
				288	return 0;
				289	}
				290
				291
				292	static int
				293	__kmp_affinity_cmp_Address_child_num(const void a, const void b)
				294	{
				295	const Address aa = (const Address )&(((AddrUnsPair *)a)
				296	->first);
				297	const Address bb = (const Address )&(((AddrUnsPair *)b)
				298	->first);
				299	unsigned depth = aa->depth;
				300	unsigned i;
				301	KMP_DEBUG_ASSERT(depth == bb->depth);
				302	KMP_DEBUG_ASSERT((unsigned)__kmp_affinity_compact <= depth);
				303	KMP_DEBUG_ASSERT(__kmp_affinity_compact >= 0);
				304	for (i = 0; i < (unsigned)__kmp_affinity_compact; i++) {
				305	int j = depth - i - 1;
				306	if (aa->childNums[j] < bb->childNums[j]) return -1;
				307	if (aa->childNums[j] > bb->childNums[j]) return 1;
				308	}
				309	for (; i < depth; i++) {
				310	int j = i - __kmp_affinity_compact;
				311	if (aa->childNums[j] < bb->childNums[j]) return -1;
				312	if (aa->childNums[j] > bb->childNums[j]) return 1;
				313	}
				314	return 0;
				315	}
				316
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	317	/** A structure for holding machine-specific hierarchy info to be computed once at init. */
				318	class hierarchy_info {
				319	public:
				320	/** Typical levels are threads/core, cores/package or socket, packages/node, nodes/machine,
				321	etc. We don't want to get specific with nomenclature */
				322	static const kmp_uint32 maxLevels=7;
				323
				324	/** This is specifically the depth of the machine configuration hierarchy, in terms of the
				325	number of levels along the longest path from root to any leaf. It corresponds to the
				326	number of entries in numPerLevel if we exclude all but one trailing 1. */
				327	kmp_uint32 depth;
				328	kmp_uint32 base_depth;
				329	kmp_uint32 base_num_threads;
				330	bool uninitialized;
				331
				332	/** Level 0 corresponds to leaves. numPerLevel[i] is the number of children the parent of a
				333	node at level i has. For example, if we have a machine with 4 packages, 4 cores/package
				334	and 2 HT per core, then numPerLevel = {2, 4, 4, 1, 1}. All empty levels are set to 1. */
				335	kmp_uint32 numPerLevel[maxLevels];
				336	kmp_uint32 skipPerLevel[maxLevels];
				337
				338	void deriveLevels(AddrUnsPair *adr2os, int num_addrs) {
				339	int hier_depth = adr2os[0].first.depth;
				340	int level = 0;
				341	for (int i=hier_depth-1; i>=0; --i) {
				342	int max = -1;
				343	for (int j=0; j<num_addrs; ++j) {
				344	int next = adr2os[j].first.childNums[i];
				345	if (next > max) max = next;
				346	}
				347	numPerLevel[level] = max+1;
				348	++level;
				349	}
				350	}
				351
				352	hierarchy_info() : depth(1), uninitialized(true) {}
				353	void init(AddrUnsPair *adr2os, int num_addrs)
				354	{
				355	uninitialized = false;
				356	for (kmp_uint32 i=0; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
				357	numPerLevel[i] = 1;
				358	skipPerLevel[i] = 1;
				359	}
				360
				361	// Sort table by physical ID
				362	if (adr2os) {
				363	qsort(adr2os, num_addrs, sizeof(*adr2os), __kmp_affinity_cmp_Address_labels);
				364	deriveLevels(adr2os, num_addrs);
				365	}
				366	else {
				367	numPerLevel[0] = 4;
				368	numPerLevel[1] = num_addrs/4;
				369	if (num_addrs%4) numPerLevel[1]++;
				370	}
				371
				372	base_num_threads = num_addrs;
				373	for (int i=maxLevels-1; i>=0; --i) // count non-empty levels to get depth
				374	if (numPerLevel[i] != 1 \|\| depth > 1) // only count one top-level '1'
				375	depth++;
				376
				377	kmp_uint32 branch = 4;
				378	if (numPerLevel[0] == 1) branch = num_addrs/4;
				379	if (branch<4) branch=4;
				380	for (kmp_uint32 d=0; d<depth-1; ++d) { // optimize hierarchy width
				381	while (numPerLevel[d] > branch \|\| (d==0 && numPerLevel[d]>4)) { // max 4 on level 0!
				382	if (numPerLevel[d] & 1) numPerLevel[d]++;
				383	numPerLevel[d] = numPerLevel[d] >> 1;
				384	if (numPerLevel[d+1] == 1) depth++;
				385	numPerLevel[d+1] = numPerLevel[d+1] << 1;
				386	}
				387	if(numPerLevel[0] == 1) {
				388	branch = branch >> 1;
				389	if (branch<4) branch = 4;
				390	}
				391	}
				392
				393	for (kmp_uint32 i=1; i<depth; ++i)
				394	skipPerLevel[i] = numPerLevel[i-1] * skipPerLevel[i-1];
				395
				396	base_depth = depth;
				397	}
				398	};
				399
				400	static hierarchy_info machine_hierarchy;
				401
				402	void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
				403	if (machine_hierarchy.uninitialized)
				404	machine_hierarchy.init(NULL, nproc);
				405
				406	if (nproc <= machine_hierarchy.base_num_threads)
				407	machine_hierarchy.depth = machine_hierarchy.base_depth;
				408	KMP_DEBUG_ASSERT(machine_hierarchy.depth > 0);
				409	while (nproc > machine_hierarchy.skipPerLevel[machine_hierarchy.depth-1]) {
				410	machine_hierarchy.depth++;
				411	machine_hierarchy.skipPerLevel[machine_hierarchy.depth-1] = 2*machine_hierarchy.skipPerLevel[machine_hierarchy.depth-2];
				412	}
				413	thr_bar->depth = machine_hierarchy.depth;
				414	thr_bar->base_leaf_kids = (kmp_uint8)machine_hierarchy.numPerLevel[0]-1;
				415	thr_bar->skip_per_level = machine_hierarchy.skipPerLevel;
				416	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	417
				418	//
				419	// When sorting by labels, __kmp_affinity_assign_child_nums() must first be
				420	// called to renumber the labels from [0..n] and place them into the child_num
				421	// vector of the address object. This is done in case the labels used for
Alp Toker	8f2d3f0	2014-02-24 10:40:15 +0000	[diff] [blame]	422	// the children at one node of the hierarchy differ from those used for
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	423	// another node at the same level. Example: suppose the machine has 2 nodes
				424	// with 2 packages each. The first node contains packages 601 and 602, and
				425	// second node contains packages 603 and 604. If we try to sort the table
				426	// for "scatter" affinity, the table will still be sorted 601, 602, 603, 604
				427	// because we are paying attention to the labels themselves, not the ordinal
				428	// child numbers. By using the child numbers in the sort, the result is
				429	// {0,0}=601, {0,1}=603, {1,0}=602, {1,1}=604.
				430	//
				431	static void
				432	__kmp_affinity_assign_child_nums(AddrUnsPair *address2os,
				433	int numAddrs)
				434	{
				435	KMP_DEBUG_ASSERT(numAddrs > 0);
				436	int depth = address2os->first.depth;
				437	unsigned counts = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				438	unsigned lastLabel = (unsigned )__kmp_allocate(depth
				439	* sizeof(unsigned));
				440	int labCt;
				441	for (labCt = 0; labCt < depth; labCt++) {
				442	address2os[0].first.childNums[labCt] = counts[labCt] = 0;
				443	lastLabel[labCt] = address2os[0].first.labels[labCt];
				444	}
				445	int i;
				446	for (i = 1; i < numAddrs; i++) {
				447	for (labCt = 0; labCt < depth; labCt++) {
				448	if (address2os[i].first.labels[labCt] != lastLabel[labCt]) {
				449	int labCt2;
				450	for (labCt2 = labCt + 1; labCt2 < depth; labCt2++) {
				451	counts[labCt2] = 0;
				452	lastLabel[labCt2] = address2os[i].first.labels[labCt2];
				453	}
				454	counts[labCt]++;
				455	lastLabel[labCt] = address2os[i].first.labels[labCt];
				456	break;
				457	}
				458	}
				459	for (labCt = 0; labCt < depth; labCt++) {
				460	address2os[i].first.childNums[labCt] = counts[labCt];
				461	}
				462	for (; labCt < (int)Address::maxDepth; labCt++) {
				463	address2os[i].first.childNums[labCt] = 0;
				464	}
				465	}
				466	}
				467
				468
				469	//
				470	// All of the __kmp_affinity_create_*_map() routines should set
				471	// __kmp_affinity_masks to a vector of affinity mask objects of length
				472	// __kmp_affinity_num_masks, if __kmp_affinity_type != affinity_none, and
				473	// return the number of levels in the machine topology tree (zero if
				474	// __kmp_affinity_type == affinity_none).
				475	//
				476	// All of the __kmp_affinity_create__map() routines should set fullMask
				477	// to the affinity mask for the initialization thread. They need to save and
				478	// restore the mask, and it could be needed later, so saving it is just an
				479	// optimization to avoid calling kmp_get_system_affinity() again.
				480	//
				481	static kmp_affin_mask_t *fullMask = NULL;
				482
				483	kmp_affin_mask_t *
				484	__kmp_affinity_get_fullMask() { return fullMask; }
				485
				486
				487	static int nCoresPerPkg, nPackages;
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame^]	488	static int __kmp_nThreadsPerCore;
				489	#ifndef KMP_DFLT_NTH_CORES
				490	static int __kmp_ncores;
				491	#endif
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	492
				493	//
				494	// __kmp_affinity_uniform_topology() doesn't work when called from
				495	// places which support arbitrarily many levels in the machine topology
				496	// map, i.e. the non-default cases in __kmp_affinity_create_cpuinfo_map()
				497	// __kmp_affinity_create_x2apicid_map().
				498	//
				499	inline static bool
				500	__kmp_affinity_uniform_topology()
				501	{
				502	return __kmp_avail_proc == (__kmp_nThreadsPerCore * nCoresPerPkg * nPackages);
				503	}
				504
				505
				506	//
				507	// Print out the detailed machine topology map, i.e. the physical locations
				508	// of each OS proc.
				509	//
				510	static void
				511	__kmp_affinity_print_topology(AddrUnsPair *address2os, int len, int depth,
				512	int pkgLevel, int coreLevel, int threadLevel)
				513	{
				514	int proc;
				515
				516	KMP_INFORM(OSProcToPhysicalThreadMap, "KMP_AFFINITY");
				517	for (proc = 0; proc < len; proc++) {
				518	int level;
				519	kmp_str_buf_t buf;
				520	__kmp_str_buf_init(&buf);
				521	for (level = 0; level < depth; level++) {
				522	if (level == threadLevel) {
				523	__kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Thread));
				524	}
				525	else if (level == coreLevel) {
				526	__kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Core));
				527	}
				528	else if (level == pkgLevel) {
				529	__kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Package));
				530	}
				531	else if (level > pkgLevel) {
				532	__kmp_str_buf_print(&buf, "%s_%d ", KMP_I18N_STR(Node),
				533	level - pkgLevel - 1);
				534	}
				535	else {
				536	__kmp_str_buf_print(&buf, "L%d ", level);
				537	}
				538	__kmp_str_buf_print(&buf, "%d ",
				539	address2os[proc].first.labels[level]);
				540	}
				541	KMP_INFORM(OSProcMapToPack, "KMP_AFFINITY", address2os[proc].second,
				542	buf.str);
				543	__kmp_str_buf_free(&buf);
				544	}
				545	}
				546
				547
				548	//
				549	// If we don't know how to retrieve the machine's processor topology, or
				550	// encounter an error in doing so, this routine is called to form a "flat"
				551	// mapping of os thread id's <-> processor id's.
				552	//
				553	static int
				554	__kmp_affinity_create_flat_map(AddrUnsPair **address2os,
				555	kmp_i18n_id_t *const msg_id)
				556	{
				557	*address2os = NULL;
				558	*msg_id = kmp_i18n_null;
				559
				560	//
				561	// Even if __kmp_affinity_type == affinity_none, this routine might still
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame^]	562	// called to set __kmp_ncores, as well as
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	563	// __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
				564	//
				565	if (! KMP_AFFINITY_CAPABLE()) {
				566	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				567	__kmp_ncores = nPackages = __kmp_xproc;
				568	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	569	if (__kmp_affinity_verbose) {
				570	KMP_INFORM(AffFlatTopology, "KMP_AFFINITY");
				571	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				572	KMP_INFORM(Uniform, "KMP_AFFINITY");
				573	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				574	__kmp_nThreadsPerCore, __kmp_ncores);
				575	}
				576	return 0;
				577	}
				578
				579	//
				580	// When affinity is off, this routine will still be called to set
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame^]	581	// __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	582	// nCoresPerPkg, & nPackages. Make sure all these vars are set
				583	// correctly, and return now if affinity is not enabled.
				584	//
				585	__kmp_ncores = nPackages = __kmp_avail_proc;
				586	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	587	if (__kmp_affinity_verbose) {
				588	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				589	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, fullMask);
				590
				591	KMP_INFORM(AffCapableUseFlat, "KMP_AFFINITY");
				592	if (__kmp_affinity_respect_mask) {
				593	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				594	} else {
				595	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				596	}
				597	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				598	KMP_INFORM(Uniform, "KMP_AFFINITY");
				599	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				600	__kmp_nThreadsPerCore, __kmp_ncores);
				601	}
				602	if (__kmp_affinity_type == affinity_none) {
				603	return 0;
				604	}
				605
				606	//
				607	// Contruct the data structure to be returned.
				608	//
				609	address2os = (AddrUnsPair)
				610	__kmp_allocate(sizeof(*address2os) __kmp_avail_proc);
				611	int avail_ct = 0;
				612	unsigned int i;
				613	for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
				614	//
				615	// Skip this proc if it is not included in the machine model.
				616	//
				617	if (! KMP_CPU_ISSET(i, fullMask)) {
				618	continue;
				619	}
				620
				621	Address addr(1);
				622	addr.labels[0] = i;
				623	(*address2os)[avail_ct++] = AddrUnsPair(addr,i);
				624	}
				625	if (__kmp_affinity_verbose) {
				626	KMP_INFORM(OSProcToPackage, "KMP_AFFINITY");
				627	}
				628
				629	if (__kmp_affinity_gran_levels < 0) {
				630	//
				631	// Only the package level is modeled in the machine topology map,
				632	// so the #levels of granularity is either 0 or 1.
				633	//
				634	if (__kmp_affinity_gran > affinity_gran_package) {
				635	__kmp_affinity_gran_levels = 1;
				636	}
				637	else {
				638	__kmp_affinity_gran_levels = 0;
				639	}
				640	}
				641	return 1;
				642	}
				643
				644
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	645	# if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	646
				647	//
				648	// If multiple Windows* OS processor groups exist, we can create a 2-level
				649	// topology map with the groups at level 0 and the individual procs at
				650	// level 1.
				651	//
				652	// This facilitates letting the threads float among all procs in a group,
				653	// if granularity=group (the default when there are multiple groups).
				654	//
				655	static int
				656	__kmp_affinity_create_proc_group_map(AddrUnsPair **address2os,
				657	kmp_i18n_id_t *const msg_id)
				658	{
				659	*address2os = NULL;
				660	*msg_id = kmp_i18n_null;
				661
				662	//
				663	// If we don't have multiple processor groups, return now.
				664	// The flat mapping will be used.
				665	//
				666	if ((! KMP_AFFINITY_CAPABLE()) \|\| (__kmp_get_proc_group(fullMask) >= 0)) {
				667	// FIXME set *msg_id
				668	return -1;
				669	}
				670
				671	//
				672	// Contruct the data structure to be returned.
				673	//
				674	address2os = (AddrUnsPair)
				675	__kmp_allocate(sizeof(*address2os) __kmp_avail_proc);
				676	int avail_ct = 0;
				677	int i;
				678	for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
				679	//
				680	// Skip this proc if it is not included in the machine model.
				681	//
				682	if (! KMP_CPU_ISSET(i, fullMask)) {
				683	continue;
				684	}
				685
				686	Address addr(2);
				687	addr.labels[0] = i / (CHAR_BIT * sizeof(DWORD_PTR));
				688	addr.labels[1] = i % (CHAR_BIT * sizeof(DWORD_PTR));
				689	(*address2os)[avail_ct++] = AddrUnsPair(addr,i);
				690
				691	if (__kmp_affinity_verbose) {
				692	KMP_INFORM(AffOSProcToGroup, "KMP_AFFINITY", i, addr.labels[0],
				693	addr.labels[1]);
				694	}
				695	}
				696
				697	if (__kmp_affinity_gran_levels < 0) {
				698	if (__kmp_affinity_gran == affinity_gran_group) {
				699	__kmp_affinity_gran_levels = 1;
				700	}
				701	else if ((__kmp_affinity_gran == affinity_gran_fine)
				702	\|\| (__kmp_affinity_gran == affinity_gran_thread)) {
				703	__kmp_affinity_gran_levels = 0;
				704	}
				705	else {
				706	const char *gran_str = NULL;
				707	if (__kmp_affinity_gran == affinity_gran_core) {
				708	gran_str = "core";
				709	}
				710	else if (__kmp_affinity_gran == affinity_gran_package) {
				711	gran_str = "package";
				712	}
				713	else if (__kmp_affinity_gran == affinity_gran_node) {
				714	gran_str = "node";
				715	}
				716	else {
				717	KMP_ASSERT(0);
				718	}
				719
				720	// Warning: can't use affinity granularity \"gran\" with group topology method, using "thread"
				721	__kmp_affinity_gran_levels = 0;
				722	}
				723	}
				724	return 2;
				725	}
				726
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	727	# endif /* KMP_GROUP_AFFINITY */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	728
				729
				730	# if KMP_ARCH_X86 \|\| KMP_ARCH_X86_64
				731
				732	static int
				733	__kmp_cpuid_mask_width(int count) {
				734	int r = 0;
				735
				736	while((1<<r) < count)
				737	++r;
				738	return r;
				739	}
				740
				741
				742	class apicThreadInfo {
				743	public:
				744	unsigned osId; // param to __kmp_affinity_bind_thread
				745	unsigned apicId; // from cpuid after binding
				746	unsigned maxCoresPerPkg; // ""
				747	unsigned maxThreadsPerPkg; // ""
				748	unsigned pkgId; // inferred from above values
				749	unsigned coreId; // ""
				750	unsigned threadId; // ""
				751	};
				752
				753
				754	static int
				755	__kmp_affinity_cmp_apicThreadInfo_os_id(const void a, const void b)
				756	{
				757	const apicThreadInfo aa = (const apicThreadInfo )a;
				758	const apicThreadInfo bb = (const apicThreadInfo )b;
				759	if (aa->osId < bb->osId) return -1;
				760	if (aa->osId > bb->osId) return 1;
				761	return 0;
				762	}
				763
				764
				765	static int
				766	__kmp_affinity_cmp_apicThreadInfo_phys_id(const void a, const void b)
				767	{
				768	const apicThreadInfo aa = (const apicThreadInfo )a;
				769	const apicThreadInfo bb = (const apicThreadInfo )b;
				770	if (aa->pkgId < bb->pkgId) return -1;
				771	if (aa->pkgId > bb->pkgId) return 1;
				772	if (aa->coreId < bb->coreId) return -1;
				773	if (aa->coreId > bb->coreId) return 1;
				774	if (aa->threadId < bb->threadId) return -1;
				775	if (aa->threadId > bb->threadId) return 1;
				776	return 0;
				777	}
				778
				779
				780	//
				781	// On IA-32 architecture and Intel(R) 64 architecture, we attempt to use
				782	// an algorithm which cycles through the available os threads, setting
				783	// the current thread's affinity mask to that thread, and then retrieves
				784	// the Apic Id for each thread context using the cpuid instruction.
				785	//
				786	static int
				787	__kmp_affinity_create_apicid_map(AddrUnsPair **address2os,
				788	kmp_i18n_id_t *const msg_id)
				789	{
				790	int rc;
				791	*address2os = NULL;
				792	*msg_id = kmp_i18n_null;
				793
				794	# if KMP_MIC
				795	{
				796	// The code below will use cpuid(4).
				797	// Check if cpuid(4) is supported.
				798	// FIXME? - this really doesn't need to be specific to MIC.
				799	kmp_cpuid buf;
				800	__kmp_x86_cpuid(0, 0, &buf);
				801	if (buf.eax < 4) {
				802	*msg_id = kmp_i18n_str_NoLeaf4Support;
				803	return -1;
				804	}
				805	}
				806	# endif // KMP_MIC
				807
				808	//
				809	// Even if __kmp_affinity_type == affinity_none, this routine is still
				810	// called to set __kmp_ht_enabled, & __kmp_ncores, as well as
				811	// __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
				812	//
				813	// The algorithm used starts by setting the affinity to each available
				814	// thread and retreiving info from the cpuid instruction, so if we are not
				815	// capable of calling __kmp_affinity_get_map()/__kmp_affinity_get_map(),
				816	// then we need to do something else.
				817	//
				818	if (! KMP_AFFINITY_CAPABLE()) {
				819	//
				820	// Hack to try and infer the machine topology using only the data
				821	// available from cpuid on the current thread, and __kmp_xproc.
				822	//
				823	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				824
				825	//
				826	// Get an upper bound on the number of threads per package using
				827	// cpuid(1).
				828	//
				829	// On some OS/chps combinations where HT is supported by the chip
				830	// but is disabled, this value will be 2 on a single core chip.
				831	// Usually, it will be 2 if HT is enabled and 1 if HT is disabled.
				832	//
				833	kmp_cpuid buf;
				834	__kmp_x86_cpuid(1, 0, &buf);
				835	int maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
				836	if (maxThreadsPerPkg == 0) {
				837	maxThreadsPerPkg = 1;
				838	}
				839
				840	//
				841	// The num cores per pkg comes from cpuid(4).
				842	// 1 must be added to the encoded value.
				843	//
				844	// The author of cpu_count.cpp treated this only an upper bound
				845	// on the number of cores, but I haven't seen any cases where it
				846	// was greater than the actual number of cores, so we will treat
				847	// it as exact in this block of code.
				848	//
				849	// First, we need to check if cpuid(4) is supported on this chip.
				850	// To see if cpuid(n) is supported, issue cpuid(0) and check if eax
				851	// has the value n or greater.
				852	//
				853	__kmp_x86_cpuid(0, 0, &buf);
				854	if (buf.eax >= 4) {
				855	__kmp_x86_cpuid(4, 0, &buf);
				856	nCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
				857	}
				858	else {
				859	nCoresPerPkg = 1;
				860	}
				861
				862	//
				863	// There is no way to reliably tell if HT is enabled without issuing
				864	// the cpuid instruction from every thread, can correlating the cpuid
				865	// info, so if the machine is not affinity capable, we assume that HT
				866	// is off. We have seen quite a few machines where maxThreadsPerPkg
				867	// is 2, yet the machine does not support HT.
				868	//
				869	// - Older OSes are usually found on machines with older chips, which
				870	// do not support HT.
				871	//
				872	// - The performance penalty for mistakenly identifying a machine as
				873	// HT when it isn't (which results in blocktime being incorrecly set
				874	// to 0) is greater than the penalty when for mistakenly identifying
				875	// a machine as being 1 thread/core when it is really HT enabled
				876	// (which results in blocktime being incorrectly set to a positive
				877	// value).
				878	//
				879	__kmp_ncores = __kmp_xproc;
				880	nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
				881	__kmp_nThreadsPerCore = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	882	if (__kmp_affinity_verbose) {
				883	KMP_INFORM(AffNotCapableUseLocCpuid, "KMP_AFFINITY");
				884	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				885	if (__kmp_affinity_uniform_topology()) {
				886	KMP_INFORM(Uniform, "KMP_AFFINITY");
				887	} else {
				888	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				889	}
				890	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				891	__kmp_nThreadsPerCore, __kmp_ncores);
				892	}
				893	return 0;
				894	}
				895
				896	//
				897	//
				898	// From here on, we can assume that it is safe to call
				899	// __kmp_get_system_affinity() and __kmp_set_system_affinity(),
				900	// even if __kmp_affinity_type = affinity_none.
				901	//
				902
				903	//
				904	// Save the affinity mask for the current thread.
				905	//
				906	kmp_affin_mask_t *oldMask;
				907	KMP_CPU_ALLOC(oldMask);
				908	KMP_ASSERT(oldMask != NULL);
				909	__kmp_get_system_affinity(oldMask, TRUE);
				910
				911	//
				912	// Run through each of the available contexts, binding the current thread
				913	// to it, and obtaining the pertinent information using the cpuid instr.
				914	//
				915	// The relevant information is:
				916	//
				917	// Apic Id: Bits 24:31 of ebx after issuing cpuid(1) - each thread context
				918	// has a uniqie Apic Id, which is of the form pkg# : core# : thread#.
				919	//
				920	// Max Threads Per Pkg: Bits 16:23 of ebx after issuing cpuid(1). The
				921	// value of this field determines the width of the core# + thread#
				922	// fields in the Apic Id. It is also an upper bound on the number
				923	// of threads per package, but it has been verified that situations
				924	// happen were it is not exact. In particular, on certain OS/chip
				925	// combinations where Intel(R) Hyper-Threading Technology is supported
				926	// by the chip but has
				927	// been disabled, the value of this field will be 2 (for a single core
				928	// chip). On other OS/chip combinations supporting
				929	// Intel(R) Hyper-Threading Technology, the value of
				930	// this field will be 1 when Intel(R) Hyper-Threading Technology is
				931	// disabled and 2 when it is enabled.
				932	//
				933	// Max Cores Per Pkg: Bits 26:31 of eax after issuing cpuid(4). The
				934	// value of this field (+1) determines the width of the core# field in
				935	// the Apic Id. The comments in "cpucount.cpp" say that this value is
				936	// an upper bound, but the IA-32 architecture manual says that it is
				937	// exactly the number of cores per package, and I haven't seen any
				938	// case where it wasn't.
				939	//
				940	// From this information, deduce the package Id, core Id, and thread Id,
				941	// and set the corresponding fields in the apicThreadInfo struct.
				942	//
				943	unsigned i;
				944	apicThreadInfo threadInfo = (apicThreadInfo )__kmp_allocate(
				945	__kmp_avail_proc * sizeof(apicThreadInfo));
				946	unsigned nApics = 0;
				947	for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
				948	//
				949	// Skip this proc if it is not included in the machine model.
				950	//
				951	if (! KMP_CPU_ISSET(i, fullMask)) {
				952	continue;
				953	}
				954	KMP_DEBUG_ASSERT((int)nApics < __kmp_avail_proc);
				955
				956	__kmp_affinity_bind_thread(i);
				957	threadInfo[nApics].osId = i;
				958
				959	//
				960	// The apic id and max threads per pkg come from cpuid(1).
				961	//
				962	kmp_cpuid buf;
				963	__kmp_x86_cpuid(1, 0, &buf);
				964	if (! (buf.edx >> 9) & 1) {
				965	__kmp_set_system_affinity(oldMask, TRUE);
				966	__kmp_free(threadInfo);
				967	KMP_CPU_FREE(oldMask);
				968	*msg_id = kmp_i18n_str_ApicNotPresent;
				969	return -1;
				970	}
				971	threadInfo[nApics].apicId = (buf.ebx >> 24) & 0xff;
				972	threadInfo[nApics].maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
				973	if (threadInfo[nApics].maxThreadsPerPkg == 0) {
				974	threadInfo[nApics].maxThreadsPerPkg = 1;
				975	}
				976
				977	//
				978	// Max cores per pkg comes from cpuid(4).
				979	// 1 must be added to the encoded value.
				980	//
				981	// First, we need to check if cpuid(4) is supported on this chip.
				982	// To see if cpuid(n) is supported, issue cpuid(0) and check if eax
				983	// has the value n or greater.
				984	//
				985	__kmp_x86_cpuid(0, 0, &buf);
				986	if (buf.eax >= 4) {
				987	__kmp_x86_cpuid(4, 0, &buf);
				988	threadInfo[nApics].maxCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
				989	}
				990	else {
				991	threadInfo[nApics].maxCoresPerPkg = 1;
				992	}
				993
				994	//
				995	// Infer the pkgId / coreId / threadId using only the info
				996	// obtained locally.
				997	//
				998	int widthCT = __kmp_cpuid_mask_width(
				999	threadInfo[nApics].maxThreadsPerPkg);
				1000	threadInfo[nApics].pkgId = threadInfo[nApics].apicId >> widthCT;
				1001
				1002	int widthC = __kmp_cpuid_mask_width(
				1003	threadInfo[nApics].maxCoresPerPkg);
				1004	int widthT = widthCT - widthC;
				1005	if (widthT < 0) {
				1006	//
				1007	// I've never seen this one happen, but I suppose it could, if
				1008	// the cpuid instruction on a chip was really screwed up.
				1009	// Make sure to restore the affinity mask before the tail call.
				1010	//
				1011	__kmp_set_system_affinity(oldMask, TRUE);
				1012	__kmp_free(threadInfo);
				1013	KMP_CPU_FREE(oldMask);
				1014	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1015	return -1;
				1016	}
				1017
				1018	int maskC = (1 << widthC) - 1;
				1019	threadInfo[nApics].coreId = (threadInfo[nApics].apicId >> widthT)
				1020	&maskC;
				1021
				1022	int maskT = (1 << widthT) - 1;
				1023	threadInfo[nApics].threadId = threadInfo[nApics].apicId &maskT;
				1024
				1025	nApics++;
				1026	}
				1027
				1028	//
				1029	// We've collected all the info we need.
				1030	// Restore the old affinity mask for this thread.
				1031	//
				1032	__kmp_set_system_affinity(oldMask, TRUE);
				1033
				1034	//
				1035	// If there's only one thread context to bind to, form an Address object
				1036	// with depth 1 and return immediately (or, if affinity is off, set
				1037	// address2os to NULL and return).
				1038	//
				1039	// If it is configured to omit the package level when there is only a
				1040	// single package, the logic at the end of this routine won't work if
				1041	// there is only a single thread - it would try to form an Address
				1042	// object with depth 0.
				1043	//
				1044	KMP_ASSERT(nApics > 0);
				1045	if (nApics == 1) {
				1046	__kmp_ncores = nPackages = 1;
				1047	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1048	if (__kmp_affinity_verbose) {
				1049	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				1050	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				1051
				1052	KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
				1053	if (__kmp_affinity_respect_mask) {
				1054	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				1055	} else {
				1056	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				1057	}
				1058	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1059	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1060	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				1061	__kmp_nThreadsPerCore, __kmp_ncores);
				1062	}
				1063
				1064	if (__kmp_affinity_type == affinity_none) {
				1065	__kmp_free(threadInfo);
				1066	KMP_CPU_FREE(oldMask);
				1067	return 0;
				1068	}
				1069
				1070	address2os = (AddrUnsPair)__kmp_allocate(sizeof(AddrUnsPair));
				1071	Address addr(1);
				1072	addr.labels[0] = threadInfo[0].pkgId;
				1073	(*address2os)[0] = AddrUnsPair(addr, threadInfo[0].osId);
				1074
				1075	if (__kmp_affinity_gran_levels < 0) {
				1076	__kmp_affinity_gran_levels = 0;
				1077	}
				1078
				1079	if (__kmp_affinity_verbose) {
				1080	__kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
				1081	}
				1082
				1083	__kmp_free(threadInfo);
				1084	KMP_CPU_FREE(oldMask);
				1085	return 1;
				1086	}
				1087
				1088	//
				1089	// Sort the threadInfo table by physical Id.
				1090	//
				1091	qsort(threadInfo, nApics, sizeof(*threadInfo),
				1092	__kmp_affinity_cmp_apicThreadInfo_phys_id);
				1093
				1094	//
				1095	// The table is now sorted by pkgId / coreId / threadId, but we really
				1096	// don't know the radix of any of the fields. pkgId's may be sparsely
				1097	// assigned among the chips on a system. Although coreId's are usually
				1098	// assigned [0 .. coresPerPkg-1] and threadId's are usually assigned
				1099	// [0..threadsPerCore-1], we don't want to make any such assumptions.
				1100	//
				1101	// For that matter, we don't know what coresPerPkg and threadsPerCore
				1102	// (or the total # packages) are at this point - we want to determine
				1103	// that now. We only have an upper bound on the first two figures.
				1104	//
				1105	// We also perform a consistency check at this point: the values returned
				1106	// by the cpuid instruction for any thread bound to a given package had
				1107	// better return the same info for maxThreadsPerPkg and maxCoresPerPkg.
				1108	//
				1109	nPackages = 1;
				1110	nCoresPerPkg = 1;
				1111	__kmp_nThreadsPerCore = 1;
				1112	unsigned nCores = 1;
				1113
				1114	unsigned pkgCt = 1; // to determine radii
				1115	unsigned lastPkgId = threadInfo[0].pkgId;
				1116	unsigned coreCt = 1;
				1117	unsigned lastCoreId = threadInfo[0].coreId;
				1118	unsigned threadCt = 1;
				1119	unsigned lastThreadId = threadInfo[0].threadId;
				1120
				1121	// intra-pkg consist checks
				1122	unsigned prevMaxCoresPerPkg = threadInfo[0].maxCoresPerPkg;
				1123	unsigned prevMaxThreadsPerPkg = threadInfo[0].maxThreadsPerPkg;
				1124
				1125	for (i = 1; i < nApics; i++) {
				1126	if (threadInfo[i].pkgId != lastPkgId) {
				1127	nCores++;
				1128	pkgCt++;
				1129	lastPkgId = threadInfo[i].pkgId;
				1130	if ((int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt;
				1131	coreCt = 1;
				1132	lastCoreId = threadInfo[i].coreId;
				1133	if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
				1134	threadCt = 1;
				1135	lastThreadId = threadInfo[i].threadId;
				1136
				1137	//
				1138	// This is a different package, so go on to the next iteration
				1139	// without doing any consistency checks. Reset the consistency
				1140	// check vars, though.
				1141	//
				1142	prevMaxCoresPerPkg = threadInfo[i].maxCoresPerPkg;
				1143	prevMaxThreadsPerPkg = threadInfo[i].maxThreadsPerPkg;
				1144	continue;
				1145	}
				1146
				1147	if (threadInfo[i].coreId != lastCoreId) {
				1148	nCores++;
				1149	coreCt++;
				1150	lastCoreId = threadInfo[i].coreId;
				1151	if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
				1152	threadCt = 1;
				1153	lastThreadId = threadInfo[i].threadId;
				1154	}
				1155	else if (threadInfo[i].threadId != lastThreadId) {
				1156	threadCt++;
				1157	lastThreadId = threadInfo[i].threadId;
				1158	}
				1159	else {
				1160	__kmp_free(threadInfo);
				1161	KMP_CPU_FREE(oldMask);
				1162	*msg_id = kmp_i18n_str_LegacyApicIDsNotUnique;
				1163	return -1;
				1164	}
				1165
				1166	//
				1167	// Check to make certain that the maxCoresPerPkg and maxThreadsPerPkg
				1168	// fields agree between all the threads bounds to a given package.
				1169	//
				1170	if ((prevMaxCoresPerPkg != threadInfo[i].maxCoresPerPkg)
				1171	\|\| (prevMaxThreadsPerPkg != threadInfo[i].maxThreadsPerPkg)) {
				1172	__kmp_free(threadInfo);
				1173	KMP_CPU_FREE(oldMask);
				1174	*msg_id = kmp_i18n_str_InconsistentCpuidInfo;
				1175	return -1;
				1176	}
				1177	}
				1178	nPackages = pkgCt;
				1179	if ((int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt;
				1180	if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
				1181
				1182	//
				1183	// When affinity is off, this routine will still be called to set
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame^]	1184	// __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1185	// nCoresPerPkg, & nPackages. Make sure all these vars are set
				1186	// correctly, and return now if affinity is not enabled.
				1187	//
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1188	__kmp_ncores = nCores;
				1189	if (__kmp_affinity_verbose) {
				1190	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				1191	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				1192
				1193	KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
				1194	if (__kmp_affinity_respect_mask) {
				1195	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				1196	} else {
				1197	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				1198	}
				1199	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1200	if (__kmp_affinity_uniform_topology()) {
				1201	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1202	} else {
				1203	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				1204	}
				1205	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				1206	__kmp_nThreadsPerCore, __kmp_ncores);
				1207
				1208	}
				1209
				1210	if (__kmp_affinity_type == affinity_none) {
				1211	__kmp_free(threadInfo);
				1212	KMP_CPU_FREE(oldMask);
				1213	return 0;
				1214	}
				1215
				1216	//
				1217	// Now that we've determined the number of packages, the number of cores
				1218	// per package, and the number of threads per core, we can construct the
				1219	// data structure that is to be returned.
				1220	//
				1221	int pkgLevel = 0;
				1222	int coreLevel = (nCoresPerPkg <= 1) ? -1 : 1;
				1223	int threadLevel = (__kmp_nThreadsPerCore <= 1) ? -1 : ((coreLevel >= 0) ? 2 : 1);
				1224	unsigned depth = (pkgLevel >= 0) + (coreLevel >= 0) + (threadLevel >= 0);
				1225
				1226	KMP_ASSERT(depth > 0);
				1227	address2os = (AddrUnsPair)__kmp_allocate(sizeof(AddrUnsPair) * nApics);
				1228
				1229	for (i = 0; i < nApics; ++i) {
				1230	Address addr(depth);
				1231	unsigned os = threadInfo[i].osId;
				1232	int d = 0;
				1233
				1234	if (pkgLevel >= 0) {
				1235	addr.labels[d++] = threadInfo[i].pkgId;
				1236	}
				1237	if (coreLevel >= 0) {
				1238	addr.labels[d++] = threadInfo[i].coreId;
				1239	}
				1240	if (threadLevel >= 0) {
				1241	addr.labels[d++] = threadInfo[i].threadId;
				1242	}
				1243	(*address2os)[i] = AddrUnsPair(addr, os);
				1244	}
				1245
				1246	if (__kmp_affinity_gran_levels < 0) {
				1247	//
				1248	// Set the granularity level based on what levels are modeled
				1249	// in the machine topology map.
				1250	//
				1251	__kmp_affinity_gran_levels = 0;
				1252	if ((threadLevel >= 0)
				1253	&& (__kmp_affinity_gran > affinity_gran_thread)) {
				1254	__kmp_affinity_gran_levels++;
				1255	}
				1256	if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
				1257	__kmp_affinity_gran_levels++;
				1258	}
				1259	if ((pkgLevel >= 0) && (__kmp_affinity_gran > affinity_gran_package)) {
				1260	__kmp_affinity_gran_levels++;
				1261	}
				1262	}
				1263
				1264	if (__kmp_affinity_verbose) {
				1265	__kmp_affinity_print_topology(*address2os, nApics, depth, pkgLevel,
				1266	coreLevel, threadLevel);
				1267	}
				1268
				1269	__kmp_free(threadInfo);
				1270	KMP_CPU_FREE(oldMask);
				1271	return depth;
				1272	}
				1273
				1274
				1275	//
				1276	// Intel(R) microarchitecture code name Nehalem, Dunnington and later
				1277	// architectures support a newer interface for specifying the x2APIC Ids,
				1278	// based on cpuid leaf 11.
				1279	//
				1280	static int
				1281	__kmp_affinity_create_x2apicid_map(AddrUnsPair **address2os,
				1282	kmp_i18n_id_t *const msg_id)
				1283	{
				1284	kmp_cpuid buf;
				1285
				1286	*address2os = NULL;
				1287	*msg_id = kmp_i18n_null;
				1288
				1289	//
				1290	// Check to see if cpuid leaf 11 is supported.
				1291	//
				1292	__kmp_x86_cpuid(0, 0, &buf);
				1293	if (buf.eax < 11) {
				1294	*msg_id = kmp_i18n_str_NoLeaf11Support;
				1295	return -1;
				1296	}
				1297	__kmp_x86_cpuid(11, 0, &buf);
				1298	if (buf.ebx == 0) {
				1299	*msg_id = kmp_i18n_str_NoLeaf11Support;
				1300	return -1;
				1301	}
				1302
				1303	//
				1304	// Find the number of levels in the machine topology. While we're at it,
				1305	// get the default values for __kmp_nThreadsPerCore & nCoresPerPkg. We will
				1306	// try to get more accurate values later by explicitly counting them,
				1307	// but get reasonable defaults now, in case we return early.
				1308	//
				1309	int level;
				1310	int threadLevel = -1;
				1311	int coreLevel = -1;
				1312	int pkgLevel = -1;
				1313	__kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
				1314
				1315	for (level = 0;; level++) {
				1316	if (level > 31) {
				1317	//
				1318	// FIXME: Hack for DPD200163180
				1319	//
				1320	// If level is big then something went wrong -> exiting
				1321	//
				1322	// There could actually be 32 valid levels in the machine topology,
				1323	// but so far, the only machine we have seen which does not exit
				1324	// this loop before iteration 32 has fubar x2APIC settings.
				1325	//
				1326	// For now, just reject this case based upon loop trip count.
				1327	//
				1328	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1329	return -1;
				1330	}
				1331	__kmp_x86_cpuid(11, level, &buf);
				1332	if (buf.ebx == 0) {
				1333	if (pkgLevel < 0) {
				1334	//
				1335	// Will infer nPackages from __kmp_xproc
				1336	//
				1337	pkgLevel = level;
				1338	level++;
				1339	}
				1340	break;
				1341	}
				1342	int kind = (buf.ecx >> 8) & 0xff;
				1343	if (kind == 1) {
				1344	//
				1345	// SMT level
				1346	//
				1347	threadLevel = level;
				1348	coreLevel = -1;
				1349	pkgLevel = -1;
				1350	__kmp_nThreadsPerCore = buf.ebx & 0xff;
				1351	if (__kmp_nThreadsPerCore == 0) {
				1352	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1353	return -1;
				1354	}
				1355	}
				1356	else if (kind == 2) {
				1357	//
				1358	// core level
				1359	//
				1360	coreLevel = level;
				1361	pkgLevel = -1;
				1362	nCoresPerPkg = buf.ebx & 0xff;
				1363	if (nCoresPerPkg == 0) {
				1364	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1365	return -1;
				1366	}
				1367	}
				1368	else {
				1369	if (level <= 0) {
				1370	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1371	return -1;
				1372	}
				1373	if (pkgLevel >= 0) {
				1374	continue;
				1375	}
				1376	pkgLevel = level;
				1377	nPackages = buf.ebx & 0xff;
				1378	if (nPackages == 0) {
				1379	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1380	return -1;
				1381	}
				1382	}
				1383	}
				1384	int depth = level;
				1385
				1386	//
				1387	// In the above loop, "level" was counted from the finest level (usually
				1388	// thread) to the coarsest. The caller expects that we will place the
				1389	// labels in (*address2os)[].first.labels[] in the inverse order, so
				1390	// we need to invert the vars saying which level means what.
				1391	//
				1392	if (threadLevel >= 0) {
				1393	threadLevel = depth - threadLevel - 1;
				1394	}
				1395	if (coreLevel >= 0) {
				1396	coreLevel = depth - coreLevel - 1;
				1397	}
				1398	KMP_DEBUG_ASSERT(pkgLevel >= 0);
				1399	pkgLevel = depth - pkgLevel - 1;
				1400
				1401	//
				1402	// The algorithm used starts by setting the affinity to each available
				1403	// thread and retrieving info from the cpuid instruction, so if we are not
				1404	// capable of calling __kmp_affinity_get_map()/__kmp_affinity_get_map(),
				1405	// then we need to do something else - use the defaults that we calculated
				1406	// from issuing cpuid without binding to each proc.
				1407	//
				1408	if (! KMP_AFFINITY_CAPABLE())
				1409	{
				1410	//
				1411	// Hack to try and infer the machine topology using only the data
				1412	// available from cpuid on the current thread, and __kmp_xproc.
				1413	//
				1414	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				1415
				1416	__kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
				1417	nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1418	if (__kmp_affinity_verbose) {
				1419	KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY");
				1420	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1421	if (__kmp_affinity_uniform_topology()) {
				1422	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1423	} else {
				1424	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				1425	}
				1426	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				1427	__kmp_nThreadsPerCore, __kmp_ncores);
				1428	}
				1429	return 0;
				1430	}
				1431
				1432	//
				1433	//
				1434	// From here on, we can assume that it is safe to call
				1435	// __kmp_get_system_affinity() and __kmp_set_system_affinity(),
				1436	// even if __kmp_affinity_type = affinity_none.
				1437	//
				1438
				1439	//
				1440	// Save the affinity mask for the current thread.
				1441	//
				1442	kmp_affin_mask_t *oldMask;
				1443	KMP_CPU_ALLOC(oldMask);
				1444	__kmp_get_system_affinity(oldMask, TRUE);
				1445
				1446	//
				1447	// Allocate the data structure to be returned.
				1448	//
				1449	AddrUnsPair retval = (AddrUnsPair )
				1450	__kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc);
				1451
				1452	//
				1453	// Run through each of the available contexts, binding the current thread
				1454	// to it, and obtaining the pertinent information using the cpuid instr.
				1455	//
				1456	unsigned int proc;
				1457	int nApics = 0;
				1458	for (proc = 0; proc < KMP_CPU_SETSIZE; ++proc) {
				1459	//
				1460	// Skip this proc if it is not included in the machine model.
				1461	//
				1462	if (! KMP_CPU_ISSET(proc, fullMask)) {
				1463	continue;
				1464	}
				1465	KMP_DEBUG_ASSERT(nApics < __kmp_avail_proc);
				1466
				1467	__kmp_affinity_bind_thread(proc);
				1468
				1469	//
				1470	// Extrach the labels for each level in the machine topology map
				1471	// from the Apic ID.
				1472	//
				1473	Address addr(depth);
				1474	int prev_shift = 0;
				1475
				1476	for (level = 0; level < depth; level++) {
				1477	__kmp_x86_cpuid(11, level, &buf);
				1478	unsigned apicId = buf.edx;
				1479	if (buf.ebx == 0) {
				1480	if (level != depth - 1) {
				1481	KMP_CPU_FREE(oldMask);
				1482	*msg_id = kmp_i18n_str_InconsistentCpuidInfo;
				1483	return -1;
				1484	}
				1485	addr.labels[depth - level - 1] = apicId >> prev_shift;
				1486	level++;
				1487	break;
				1488	}
				1489	int shift = buf.eax & 0x1f;
				1490	int mask = (1 << shift) - 1;
				1491	addr.labels[depth - level - 1] = (apicId & mask) >> prev_shift;
				1492	prev_shift = shift;
				1493	}
				1494	if (level != depth) {
				1495	KMP_CPU_FREE(oldMask);
				1496	*msg_id = kmp_i18n_str_InconsistentCpuidInfo;
				1497	return -1;
				1498	}
				1499
				1500	retval[nApics] = AddrUnsPair(addr, proc);
				1501	nApics++;
				1502	}
				1503
				1504	//
				1505	// We've collected all the info we need.
				1506	// Restore the old affinity mask for this thread.
				1507	//
				1508	__kmp_set_system_affinity(oldMask, TRUE);
				1509
				1510	//
				1511	// If there's only one thread context to bind to, return now.
				1512	//
				1513	KMP_ASSERT(nApics > 0);
				1514	if (nApics == 1) {
				1515	__kmp_ncores = nPackages = 1;
				1516	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1517	if (__kmp_affinity_verbose) {
				1518	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				1519	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				1520
				1521	KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
				1522	if (__kmp_affinity_respect_mask) {
				1523	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				1524	} else {
				1525	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				1526	}
				1527	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1528	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1529	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				1530	__kmp_nThreadsPerCore, __kmp_ncores);
				1531	}
				1532
				1533	if (__kmp_affinity_type == affinity_none) {
				1534	__kmp_free(retval);
				1535	KMP_CPU_FREE(oldMask);
				1536	return 0;
				1537	}
				1538
				1539	//
				1540	// Form an Address object which only includes the package level.
				1541	//
				1542	Address addr(1);
				1543	addr.labels[0] = retval[0].first.labels[pkgLevel];
				1544	retval[0].first = addr;
				1545
				1546	if (__kmp_affinity_gran_levels < 0) {
				1547	__kmp_affinity_gran_levels = 0;
				1548	}
				1549
				1550	if (__kmp_affinity_verbose) {
				1551	__kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
				1552	}
				1553
				1554	*address2os = retval;
				1555	KMP_CPU_FREE(oldMask);
				1556	return 1;
				1557	}
				1558
				1559	//
				1560	// Sort the table by physical Id.
				1561	//
				1562	qsort(retval, nApics, sizeof(*retval), __kmp_affinity_cmp_Address_labels);
				1563
				1564	//
				1565	// Find the radix at each of the levels.
				1566	//
				1567	unsigned totals = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1568	unsigned counts = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1569	unsigned maxCt = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1570	unsigned last = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1571	for (level = 0; level < depth; level++) {
				1572	totals[level] = 1;
				1573	maxCt[level] = 1;
				1574	counts[level] = 1;
				1575	last[level] = retval[0].first.labels[level];
				1576	}
				1577
				1578	//
				1579	// From here on, the iteration variable "level" runs from the finest
				1580	// level to the coarsest, i.e. we iterate forward through
				1581	// (*address2os)[].first.labels[] - in the previous loops, we iterated
				1582	// backwards.
				1583	//
				1584	for (proc = 1; (int)proc < nApics; proc++) {
				1585	int level;
				1586	for (level = 0; level < depth; level++) {
				1587	if (retval[proc].first.labels[level] != last[level]) {
				1588	int j;
				1589	for (j = level + 1; j < depth; j++) {
				1590	totals[j]++;
				1591	counts[j] = 1;
				1592	// The line below causes printing incorrect topology information
				1593	// in case the max value for some level (maxCt[level]) is encountered earlier than
				1594	// some less value while going through the array.
				1595	// For example, let pkg0 has 4 cores and pkg1 has 2 cores. Then maxCt[1] == 2
				1596	// whereas it must be 4.
				1597	// TODO!!! Check if it can be commented safely
				1598	//maxCt[j] = 1;
				1599	last[j] = retval[proc].first.labels[j];
				1600	}
				1601	totals[level]++;
				1602	counts[level]++;
				1603	if (counts[level] > maxCt[level]) {
				1604	maxCt[level] = counts[level];
				1605	}
				1606	last[level] = retval[proc].first.labels[level];
				1607	break;
				1608	}
				1609	else if (level == depth - 1) {
				1610	__kmp_free(last);
				1611	__kmp_free(maxCt);
				1612	__kmp_free(counts);
				1613	__kmp_free(totals);
				1614	__kmp_free(retval);
				1615	KMP_CPU_FREE(oldMask);
				1616	*msg_id = kmp_i18n_str_x2ApicIDsNotUnique;
				1617	return -1;
				1618	}
				1619	}
				1620	}
				1621
				1622	//
				1623	// When affinity is off, this routine will still be called to set
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame^]	1624	// __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1625	// nCoresPerPkg, & nPackages. Make sure all these vars are set
				1626	// correctly, and return if affinity is not enabled.
				1627	//
				1628	if (threadLevel >= 0) {
				1629	__kmp_nThreadsPerCore = maxCt[threadLevel];
				1630	}
				1631	else {
				1632	__kmp_nThreadsPerCore = 1;
				1633	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1634	nPackages = totals[pkgLevel];
				1635
				1636	if (coreLevel >= 0) {
				1637	__kmp_ncores = totals[coreLevel];
				1638	nCoresPerPkg = maxCt[coreLevel];
				1639	}
				1640	else {
				1641	__kmp_ncores = nPackages;
				1642	nCoresPerPkg = 1;
				1643	}
				1644
				1645	//
				1646	// Check to see if the machine topology is uniform
				1647	//
				1648	unsigned prod = maxCt[0];
				1649	for (level = 1; level < depth; level++) {
				1650	prod *= maxCt[level];
				1651	}
				1652	bool uniform = (prod == totals[level - 1]);
				1653
				1654	//
				1655	// Print the machine topology summary.
				1656	//
				1657	if (__kmp_affinity_verbose) {
				1658	char mask[KMP_AFFIN_MASK_PRINT_LEN];
				1659	__kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				1660
				1661	KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
				1662	if (__kmp_affinity_respect_mask) {
				1663	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask);
				1664	} else {
				1665	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask);
				1666	}
				1667	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1668	if (uniform) {
				1669	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1670	} else {
				1671	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				1672	}
				1673
				1674	kmp_str_buf_t buf;
				1675	__kmp_str_buf_init(&buf);
				1676
				1677	__kmp_str_buf_print(&buf, "%d", totals[0]);
				1678	for (level = 1; level <= pkgLevel; level++) {
				1679	__kmp_str_buf_print(&buf, " x %d", maxCt[level]);
				1680	}
				1681	KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg,
				1682	__kmp_nThreadsPerCore, __kmp_ncores);
				1683
				1684	__kmp_str_buf_free(&buf);
				1685	}
				1686
				1687	if (__kmp_affinity_type == affinity_none) {
				1688	__kmp_free(last);
				1689	__kmp_free(maxCt);
				1690	__kmp_free(counts);
				1691	__kmp_free(totals);
				1692	__kmp_free(retval);
				1693	KMP_CPU_FREE(oldMask);
				1694	return 0;
				1695	}
				1696
				1697	//
				1698	// Find any levels with radiix 1, and remove them from the map
				1699	// (except for the package level).
				1700	//
				1701	int new_depth = 0;
				1702	for (level = 0; level < depth; level++) {
				1703	if ((maxCt[level] == 1) && (level != pkgLevel)) {
				1704	continue;
				1705	}
				1706	new_depth++;
				1707	}
				1708
				1709	//
				1710	// If we are removing any levels, allocate a new vector to return,
				1711	// and copy the relevant information to it.
				1712	//
				1713	if (new_depth != depth) {
				1714	AddrUnsPair new_retval = (AddrUnsPair )__kmp_allocate(
				1715	sizeof(AddrUnsPair) * nApics);
				1716	for (proc = 0; (int)proc < nApics; proc++) {
				1717	Address addr(new_depth);
				1718	new_retval[proc] = AddrUnsPair(addr, retval[proc].second);
				1719	}
				1720	int new_level = 0;
				1721	for (level = 0; level < depth; level++) {
				1722	if ((maxCt[level] == 1) && (level != pkgLevel)) {
				1723	if (level == threadLevel) {
				1724	threadLevel = -1;
				1725	}
				1726	else if ((threadLevel >= 0) && (level < threadLevel)) {
				1727	threadLevel--;
				1728	}
				1729	if (level == coreLevel) {
				1730	coreLevel = -1;
				1731	}
				1732	else if ((coreLevel >= 0) && (level < coreLevel)) {
				1733	coreLevel--;
				1734	}
				1735	if (level < pkgLevel) {
				1736	pkgLevel--;
				1737	}
				1738	continue;
				1739	}
				1740	for (proc = 0; (int)proc < nApics; proc++) {
				1741	new_retval[proc].first.labels[new_level]
				1742	= retval[proc].first.labels[level];
				1743	}
				1744	new_level++;
				1745	}
				1746
				1747	__kmp_free(retval);
				1748	retval = new_retval;
				1749	depth = new_depth;
				1750	}
				1751
				1752	if (__kmp_affinity_gran_levels < 0) {
				1753	//
				1754	// Set the granularity level based on what levels are modeled
				1755	// in the machine topology map.
				1756	//
				1757	__kmp_affinity_gran_levels = 0;
				1758	if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
				1759	__kmp_affinity_gran_levels++;
				1760	}
				1761	if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
				1762	__kmp_affinity_gran_levels++;
				1763	}
				1764	if (__kmp_affinity_gran > affinity_gran_package) {
				1765	__kmp_affinity_gran_levels++;
				1766	}
				1767	}
				1768
				1769	if (__kmp_affinity_verbose) {
				1770	__kmp_affinity_print_topology(retval, nApics, depth, pkgLevel,
				1771	coreLevel, threadLevel);
				1772	}
				1773
				1774	__kmp_free(last);
				1775	__kmp_free(maxCt);
				1776	__kmp_free(counts);
				1777	__kmp_free(totals);
				1778	KMP_CPU_FREE(oldMask);
				1779	*address2os = retval;
				1780	return depth;
				1781	}
				1782
				1783
				1784	# endif /* KMP_ARCH_X86 \|\| KMP_ARCH_X86_64 */
				1785
				1786
				1787	#define osIdIndex 0
				1788	#define threadIdIndex 1
				1789	#define coreIdIndex 2
				1790	#define pkgIdIndex 3
				1791	#define nodeIdIndex 4
				1792
				1793	typedef unsigned *ProcCpuInfo;
				1794	static unsigned maxIndex = pkgIdIndex;
				1795
				1796
				1797	static int
				1798	__kmp_affinity_cmp_ProcCpuInfo_os_id(const void a, const void b)
				1799	{
				1800	const unsigned aa = (const unsigned )a;
				1801	const unsigned bb = (const unsigned )b;
				1802	if (aa[osIdIndex] < bb[osIdIndex]) return -1;
				1803	if (aa[osIdIndex] > bb[osIdIndex]) return 1;
				1804	return 0;
				1805	};
				1806
				1807
				1808	static int
				1809	__kmp_affinity_cmp_ProcCpuInfo_phys_id(const void a, const void b)
				1810	{
				1811	unsigned i;
				1812	const unsigned aa = ((const unsigned **)a);
				1813	const unsigned bb = ((const unsigned **)b);
				1814	for (i = maxIndex; ; i--) {
				1815	if (aa[i] < bb[i]) return -1;
				1816	if (aa[i] > bb[i]) return 1;
				1817	if (i == osIdIndex) break;
				1818	}
				1819	return 0;
				1820	}
				1821
				1822
				1823	//
				1824	// Parse /proc/cpuinfo (or an alternate file in the same format) to obtain the
				1825	// affinity map.
				1826	//
				1827	static int
				1828	__kmp_affinity_create_cpuinfo_map(AddrUnsPair *address2os, int line,
				1829	kmp_i18n_id_t const msg_id, FILE f)
				1830	{
				1831	*address2os = NULL;
				1832	*msg_id = kmp_i18n_null;
				1833
				1834	//
				1835	// Scan of the file, and count the number of "processor" (osId) fields,
Alp Toker	8f2d3f0	2014-02-24 10:40:15 +0000	[diff] [blame]	1836	// and find the highest value of <n> for a node_<n> field.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1837	//
				1838	char buf[256];
				1839	unsigned num_records = 0;
				1840	while (! feof(f)) {
				1841	buf[sizeof(buf) - 1] = 1;
				1842	if (! fgets(buf, sizeof(buf), f)) {
				1843	//
				1844	// Read errors presumably because of EOF
				1845	//
				1846	break;
				1847	}
				1848
				1849	char s1[] = "processor";
				1850	if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
				1851	num_records++;
				1852	continue;
				1853	}
				1854
				1855	//
				1856	// FIXME - this will match "node_<n> <garbage>"
				1857	//
				1858	unsigned level;
				1859	if (sscanf(buf, "node_%d id", &level) == 1) {
				1860	if (nodeIdIndex + level >= maxIndex) {
				1861	maxIndex = nodeIdIndex + level;
				1862	}
				1863	continue;
				1864	}
				1865	}
				1866
				1867	//
				1868	// Check for empty file / no valid processor records, or too many.
				1869	// The number of records can't exceed the number of valid bits in the
				1870	// affinity mask.
				1871	//
				1872	if (num_records == 0) {
				1873	*line = 0;
				1874	*msg_id = kmp_i18n_str_NoProcRecords;
				1875	return -1;
				1876	}
				1877	if (num_records > (unsigned)__kmp_xproc) {
				1878	*line = 0;
				1879	*msg_id = kmp_i18n_str_TooManyProcRecords;
				1880	return -1;
				1881	}
				1882
				1883	//
				1884	// Set the file pointer back to the begginning, so that we can scan the
				1885	// file again, this time performing a full parse of the data.
				1886	// Allocate a vector of ProcCpuInfo object, where we will place the data.
				1887	// Adding an extra element at the end allows us to remove a lot of extra
				1888	// checks for termination conditions.
				1889	//
				1890	if (fseek(f, 0, SEEK_SET) != 0) {
				1891	*line = 0;
				1892	*msg_id = kmp_i18n_str_CantRewindCpuinfo;
				1893	return -1;
				1894	}
				1895
				1896	//
				1897	// Allocate the array of records to store the proc info in. The dummy
				1898	// element at the end makes the logic in filling them out easier to code.
				1899	//
				1900	unsigned threadInfo = (unsigned )__kmp_allocate((num_records + 1)
				1901	* sizeof(unsigned *));
				1902	unsigned i;
				1903	for (i = 0; i <= num_records; i++) {
				1904	threadInfo[i] = (unsigned *)__kmp_allocate((maxIndex + 1)
				1905	* sizeof(unsigned));
				1906	}
				1907
				1908	#define CLEANUP_THREAD_INFO \
				1909	for (i = 0; i <= num_records; i++) { \
				1910	__kmp_free(threadInfo[i]); \
				1911	} \
				1912	__kmp_free(threadInfo);
				1913
				1914	//
				1915	// A value of UINT_MAX means that we didn't find the field
				1916	//
				1917	unsigned __index;
				1918
				1919	#define INIT_PROC_INFO(p) \
				1920	for (__index = 0; __index <= maxIndex; __index++) { \
				1921	(p)[__index] = UINT_MAX; \
				1922	}
				1923
				1924	for (i = 0; i <= num_records; i++) {
				1925	INIT_PROC_INFO(threadInfo[i]);
				1926	}
				1927
				1928	unsigned num_avail = 0;
				1929	*line = 0;
				1930	while (! feof(f)) {
				1931	//
				1932	// Create an inner scoping level, so that all the goto targets at the
				1933	// end of the loop appear in an outer scoping level. This avoids
				1934	// warnings about jumping past an initialization to a target in the
				1935	// same block.
				1936	//
				1937	{
				1938	buf[sizeof(buf) - 1] = 1;
				1939	bool long_line = false;
				1940	if (! fgets(buf, sizeof(buf), f)) {
				1941	//
				1942	// Read errors presumably because of EOF
				1943	//
				1944	// If there is valid data in threadInfo[num_avail], then fake
				1945	// a blank line in ensure that the last address gets parsed.
				1946	//
				1947	bool valid = false;
				1948	for (i = 0; i <= maxIndex; i++) {
				1949	if (threadInfo[num_avail][i] != UINT_MAX) {
				1950	valid = true;
				1951	}
				1952	}
				1953	if (! valid) {
				1954	break;
				1955	}
				1956	buf[0] = 0;
				1957	} else if (!buf[sizeof(buf) - 1]) {
				1958	//
				1959	// The line is longer than the buffer. Set a flag and don't
				1960	// emit an error if we were going to ignore the line, anyway.
				1961	//
				1962	long_line = true;
				1963
				1964	#define CHECK_LINE \
				1965	if (long_line) { \
				1966	CLEANUP_THREAD_INFO; \
				1967	*msg_id = kmp_i18n_str_LongLineCpuinfo; \
				1968	return -1; \
				1969	}
				1970	}
				1971	(*line)++;
				1972
				1973	char s1[] = "processor";
				1974	if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
				1975	CHECK_LINE;
				1976	char *p = strchr(buf + sizeof(s1) - 1, ':');
				1977	unsigned val;
				1978	if ((p == NULL) \|\| (sscanf(p + 1, "%u\n", &val) != 1)) goto no_val;
				1979	if (threadInfo[num_avail][osIdIndex] != UINT_MAX) goto dup_field;
				1980	threadInfo[num_avail][osIdIndex] = val;
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	1981	#if KMP_OS_LINUX && USE_SYSFS_INFO
				1982	char path[256];
				1983	snprintf(path, sizeof(path),
				1984	"/sys/devices/system/cpu/cpu%u/topology/physical_package_id",
				1985	threadInfo[num_avail][osIdIndex]);
				1986	__kmp_read_from_file(path, "%u", &threadInfo[num_avail][pkgIdIndex]);
				1987
				1988	snprintf(path, sizeof(path),
				1989	"/sys/devices/system/cpu/cpu%u/topology/core_id",
				1990	threadInfo[num_avail][osIdIndex]);
				1991	__kmp_read_from_file(path, "%u", &threadInfo[num_avail][coreIdIndex]);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1992	continue;
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	1993	#else
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1994	}
				1995	char s2[] = "physical id";
				1996	if (strncmp(buf, s2, sizeof(s2) - 1) == 0) {
				1997	CHECK_LINE;
				1998	char *p = strchr(buf + sizeof(s2) - 1, ':');
				1999	unsigned val;
				2000	if ((p == NULL) \|\| (sscanf(p + 1, "%u\n", &val) != 1)) goto no_val;
				2001	if (threadInfo[num_avail][pkgIdIndex] != UINT_MAX) goto dup_field;
				2002	threadInfo[num_avail][pkgIdIndex] = val;
				2003	continue;
				2004	}
				2005	char s3[] = "core id";
				2006	if (strncmp(buf, s3, sizeof(s3) - 1) == 0) {
				2007	CHECK_LINE;
				2008	char *p = strchr(buf + sizeof(s3) - 1, ':');
				2009	unsigned val;
				2010	if ((p == NULL) \|\| (sscanf(p + 1, "%u\n", &val) != 1)) goto no_val;
				2011	if (threadInfo[num_avail][coreIdIndex] != UINT_MAX) goto dup_field;
				2012	threadInfo[num_avail][coreIdIndex] = val;
				2013	continue;
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	2014	#endif // KMP_OS_LINUX && USE_SYSFS_INFO
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2015	}
				2016	char s4[] = "thread id";
				2017	if (strncmp(buf, s4, sizeof(s4) - 1) == 0) {
				2018	CHECK_LINE;
				2019	char *p = strchr(buf + sizeof(s4) - 1, ':');
				2020	unsigned val;
				2021	if ((p == NULL) \|\| (sscanf(p + 1, "%u\n", &val) != 1)) goto no_val;
				2022	if (threadInfo[num_avail][threadIdIndex] != UINT_MAX) goto dup_field;
				2023	threadInfo[num_avail][threadIdIndex] = val;
				2024	continue;
				2025	}
				2026	unsigned level;
				2027	if (sscanf(buf, "node_%d id", &level) == 1) {
				2028	CHECK_LINE;
				2029	char *p = strchr(buf + sizeof(s4) - 1, ':');
				2030	unsigned val;
				2031	if ((p == NULL) \|\| (sscanf(p + 1, "%u\n", &val) != 1)) goto no_val;
				2032	KMP_ASSERT(nodeIdIndex + level <= maxIndex);
				2033	if (threadInfo[num_avail][nodeIdIndex + level] != UINT_MAX) goto dup_field;
				2034	threadInfo[num_avail][nodeIdIndex + level] = val;
				2035	continue;
				2036	}
				2037
				2038	//
				2039	// We didn't recognize the leading token on the line.
				2040	// There are lots of leading tokens that we don't recognize -
				2041	// if the line isn't empty, go on to the next line.
				2042	//
				2043	if ((buf != 0) && (buf != '\n')) {
				2044	//
				2045	// If the line is longer than the buffer, read characters
				2046	// until we find a newline.
				2047	//
				2048	if (long_line) {
				2049	int ch;
				2050	while (((ch = fgetc(f)) != EOF) && (ch != '\n'));
				2051	}
				2052	continue;
				2053	}
				2054
				2055	//
				2056	// A newline has signalled the end of the processor record.
				2057	// Check that there aren't too many procs specified.
				2058	//
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	2059	if ((int)num_avail == __kmp_xproc) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2060	CLEANUP_THREAD_INFO;
				2061	*msg_id = kmp_i18n_str_TooManyEntries;
				2062	return -1;
				2063	}
				2064
				2065	//
				2066	// Check for missing fields. The osId field must be there, and we
				2067	// currently require that the physical id field is specified, also.
				2068	//
				2069	if (threadInfo[num_avail][osIdIndex] == UINT_MAX) {
				2070	CLEANUP_THREAD_INFO;
				2071	*msg_id = kmp_i18n_str_MissingProcField;
				2072	return -1;
				2073	}
				2074	if (threadInfo[0][pkgIdIndex] == UINT_MAX) {
				2075	CLEANUP_THREAD_INFO;
				2076	*msg_id = kmp_i18n_str_MissingPhysicalIDField;
				2077	return -1;
				2078	}
				2079
				2080	//
				2081	// Skip this proc if it is not included in the machine model.
				2082	//
				2083	if (! KMP_CPU_ISSET(threadInfo[num_avail][osIdIndex], fullMask)) {
				2084	INIT_PROC_INFO(threadInfo[num_avail]);
				2085	continue;
				2086	}
				2087
				2088	//
				2089	// We have a successful parse of this proc's info.
				2090	// Increment the counter, and prepare for the next proc.
				2091	//
				2092	num_avail++;
				2093	KMP_ASSERT(num_avail <= num_records);
				2094	INIT_PROC_INFO(threadInfo[num_avail]);
				2095	}
				2096	continue;
				2097
				2098	no_val:
				2099	CLEANUP_THREAD_INFO;
				2100	*msg_id = kmp_i18n_str_MissingValCpuinfo;
				2101	return -1;
				2102
				2103	dup_field:
				2104	CLEANUP_THREAD_INFO;
				2105	*msg_id = kmp_i18n_str_DuplicateFieldCpuinfo;
				2106	return -1;
				2107	}
				2108	*line = 0;
				2109
				2110	# if KMP_MIC && REDUCE_TEAM_SIZE
				2111	unsigned teamSize = 0;
				2112	# endif // KMP_MIC && REDUCE_TEAM_SIZE
				2113
				2114	// check for num_records == __kmp_xproc ???
				2115
				2116	//
				2117	// If there's only one thread context to bind to, form an Address object
				2118	// with depth 1 and return immediately (or, if affinity is off, set
				2119	// address2os to NULL and return).
				2120	//
				2121	// If it is configured to omit the package level when there is only a
				2122	// single package, the logic at the end of this routine won't work if
				2123	// there is only a single thread - it would try to form an Address
				2124	// object with depth 0.
				2125	//
				2126	KMP_ASSERT(num_avail > 0);
				2127	KMP_ASSERT(num_avail <= num_records);
				2128	if (num_avail == 1) {
				2129	__kmp_ncores = 1;
				2130	__kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2131	if (__kmp_affinity_verbose) {
				2132	if (! KMP_AFFINITY_CAPABLE()) {
				2133	KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
				2134	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				2135	KMP_INFORM(Uniform, "KMP_AFFINITY");
				2136	}
				2137	else {
				2138	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				2139	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				2140	fullMask);
				2141	KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
				2142	if (__kmp_affinity_respect_mask) {
				2143	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				2144	} else {
				2145	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				2146	}
				2147	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				2148	KMP_INFORM(Uniform, "KMP_AFFINITY");
				2149	}
				2150	int index;
				2151	kmp_str_buf_t buf;
				2152	__kmp_str_buf_init(&buf);
				2153	__kmp_str_buf_print(&buf, "1");
				2154	for (index = maxIndex - 1; index > pkgIdIndex; index--) {
				2155	__kmp_str_buf_print(&buf, " x 1");
				2156	}
				2157	KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, 1, 1, 1);
				2158	__kmp_str_buf_free(&buf);
				2159	}
				2160
				2161	if (__kmp_affinity_type == affinity_none) {
				2162	CLEANUP_THREAD_INFO;
				2163	return 0;
				2164	}
				2165
				2166	address2os = (AddrUnsPair)__kmp_allocate(sizeof(AddrUnsPair));
				2167	Address addr(1);
				2168	addr.labels[0] = threadInfo[0][pkgIdIndex];
				2169	(*address2os)[0] = AddrUnsPair(addr, threadInfo[0][osIdIndex]);
				2170
				2171	if (__kmp_affinity_gran_levels < 0) {
				2172	__kmp_affinity_gran_levels = 0;
				2173	}
				2174
				2175	if (__kmp_affinity_verbose) {
				2176	__kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
				2177	}
				2178
				2179	CLEANUP_THREAD_INFO;
				2180	return 1;
				2181	}
				2182
				2183	//
				2184	// Sort the threadInfo table by physical Id.
				2185	//
				2186	qsort(threadInfo, num_avail, sizeof(*threadInfo),
				2187	__kmp_affinity_cmp_ProcCpuInfo_phys_id);
				2188
				2189	//
				2190	// The table is now sorted by pkgId / coreId / threadId, but we really
				2191	// don't know the radix of any of the fields. pkgId's may be sparsely
				2192	// assigned among the chips on a system. Although coreId's are usually
				2193	// assigned [0 .. coresPerPkg-1] and threadId's are usually assigned
				2194	// [0..threadsPerCore-1], we don't want to make any such assumptions.
				2195	//
				2196	// For that matter, we don't know what coresPerPkg and threadsPerCore
				2197	// (or the total # packages) are at this point - we want to determine
				2198	// that now. We only have an upper bound on the first two figures.
				2199	//
				2200	unsigned counts = (unsigned )__kmp_allocate((maxIndex + 1)
				2201	* sizeof(unsigned));
				2202	unsigned maxCt = (unsigned )__kmp_allocate((maxIndex + 1)
				2203	* sizeof(unsigned));
				2204	unsigned totals = (unsigned )__kmp_allocate((maxIndex + 1)
				2205	* sizeof(unsigned));
				2206	unsigned lastId = (unsigned )__kmp_allocate((maxIndex + 1)
				2207	* sizeof(unsigned));
				2208
				2209	bool assign_thread_ids = false;
				2210	unsigned threadIdCt;
				2211	unsigned index;
				2212
				2213	restart_radix_check:
				2214	threadIdCt = 0;
				2215
				2216	//
				2217	// Initialize the counter arrays with data from threadInfo[0].
				2218	//
				2219	if (assign_thread_ids) {
				2220	if (threadInfo[0][threadIdIndex] == UINT_MAX) {
				2221	threadInfo[0][threadIdIndex] = threadIdCt++;
				2222	}
				2223	else if (threadIdCt <= threadInfo[0][threadIdIndex]) {
				2224	threadIdCt = threadInfo[0][threadIdIndex] + 1;
				2225	}
				2226	}
				2227	for (index = 0; index <= maxIndex; index++) {
				2228	counts[index] = 1;
				2229	maxCt[index] = 1;
				2230	totals[index] = 1;
				2231	lastId[index] = threadInfo[0][index];;
				2232	}
				2233
				2234	//
				2235	// Run through the rest of the OS procs.
				2236	//
				2237	for (i = 1; i < num_avail; i++) {
				2238	//
				2239	// Find the most significant index whose id differs
				2240	// from the id for the previous OS proc.
				2241	//
				2242	for (index = maxIndex; index >= threadIdIndex; index--) {
				2243	if (assign_thread_ids && (index == threadIdIndex)) {
				2244	//
				2245	// Auto-assign the thread id field if it wasn't specified.
				2246	//
				2247	if (threadInfo[i][threadIdIndex] == UINT_MAX) {
				2248	threadInfo[i][threadIdIndex] = threadIdCt++;
				2249	}
				2250
				2251	//
				2252	// Aparrently the thread id field was specified for some
				2253	// entries and not others. Start the thread id counter
				2254	// off at the next higher thread id.
				2255	//
				2256	else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
				2257	threadIdCt = threadInfo[i][threadIdIndex] + 1;
				2258	}
				2259	}
				2260	if (threadInfo[i][index] != lastId[index]) {
				2261	//
				2262	// Run through all indices which are less significant,
				2263	// and reset the counts to 1.
				2264	//
				2265	// At all levels up to and including index, we need to
				2266	// increment the totals and record the last id.
				2267	//
				2268	unsigned index2;
				2269	for (index2 = threadIdIndex; index2 < index; index2++) {
				2270	totals[index2]++;
				2271	if (counts[index2] > maxCt[index2]) {
				2272	maxCt[index2] = counts[index2];
				2273	}
				2274	counts[index2] = 1;
				2275	lastId[index2] = threadInfo[i][index2];
				2276	}
				2277	counts[index]++;
				2278	totals[index]++;
				2279	lastId[index] = threadInfo[i][index];
				2280
				2281	if (assign_thread_ids && (index > threadIdIndex)) {
				2282
				2283	# if KMP_MIC && REDUCE_TEAM_SIZE
				2284	//
				2285	// The default team size is the total #threads in the machine
				2286	// minus 1 thread for every core that has 3 or more threads.
				2287	//
				2288	teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 );
				2289	# endif // KMP_MIC && REDUCE_TEAM_SIZE
				2290
				2291	//
				2292	// Restart the thread counter, as we are on a new core.
				2293	//
				2294	threadIdCt = 0;
				2295
				2296	//
				2297	// Auto-assign the thread id field if it wasn't specified.
				2298	//
				2299	if (threadInfo[i][threadIdIndex] == UINT_MAX) {
				2300	threadInfo[i][threadIdIndex] = threadIdCt++;
				2301	}
				2302
				2303	//
				2304	// Aparrently the thread id field was specified for some
				2305	// entries and not others. Start the thread id counter
				2306	// off at the next higher thread id.
				2307	//
				2308	else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
				2309	threadIdCt = threadInfo[i][threadIdIndex] + 1;
				2310	}
				2311	}
				2312	break;
				2313	}
				2314	}
				2315	if (index < threadIdIndex) {
				2316	//
				2317	// If thread ids were specified, it is an error if they are not
				2318	// unique. Also, check that we waven't already restarted the
				2319	// loop (to be safe - shouldn't need to).
				2320	//
				2321	if ((threadInfo[i][threadIdIndex] != UINT_MAX)
				2322	\|\| assign_thread_ids) {
				2323	__kmp_free(lastId);
				2324	__kmp_free(totals);
				2325	__kmp_free(maxCt);
				2326	__kmp_free(counts);
				2327	CLEANUP_THREAD_INFO;
				2328	*msg_id = kmp_i18n_str_PhysicalIDsNotUnique;
				2329	return -1;
				2330	}
				2331
				2332	//
				2333	// If the thread ids were not specified and we see entries
				2334	// entries that are duplicates, start the loop over and
				2335	// assign the thread ids manually.
				2336	//
				2337	assign_thread_ids = true;
				2338	goto restart_radix_check;
				2339	}
				2340	}
				2341
				2342	# if KMP_MIC && REDUCE_TEAM_SIZE
				2343	//
				2344	// The default team size is the total #threads in the machine
				2345	// minus 1 thread for every core that has 3 or more threads.
				2346	//
				2347	teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 );
				2348	# endif // KMP_MIC && REDUCE_TEAM_SIZE
				2349
				2350	for (index = threadIdIndex; index <= maxIndex; index++) {
				2351	if (counts[index] > maxCt[index]) {
				2352	maxCt[index] = counts[index];
				2353	}
				2354	}
				2355
				2356	__kmp_nThreadsPerCore = maxCt[threadIdIndex];
				2357	nCoresPerPkg = maxCt[coreIdIndex];
				2358	nPackages = totals[pkgIdIndex];
				2359
				2360	//
				2361	// Check to see if the machine topology is uniform
				2362	//
				2363	unsigned prod = totals[maxIndex];
				2364	for (index = threadIdIndex; index < maxIndex; index++) {
				2365	prod *= maxCt[index];
				2366	}
				2367	bool uniform = (prod == totals[threadIdIndex]);
				2368
				2369	//
				2370	// When affinity is off, this routine will still be called to set
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame^]	2371	// __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2372	// nCoresPerPkg, & nPackages. Make sure all these vars are set
				2373	// correctly, and return now if affinity is not enabled.
				2374	//
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2375	__kmp_ncores = totals[coreIdIndex];
				2376
				2377	if (__kmp_affinity_verbose) {
				2378	if (! KMP_AFFINITY_CAPABLE()) {
				2379	KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
				2380	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				2381	if (uniform) {
				2382	KMP_INFORM(Uniform, "KMP_AFFINITY");
				2383	} else {
				2384	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				2385	}
				2386	}
				2387	else {
				2388	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				2389	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, fullMask);
				2390	KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
				2391	if (__kmp_affinity_respect_mask) {
				2392	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				2393	} else {
				2394	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				2395	}
				2396	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				2397	if (uniform) {
				2398	KMP_INFORM(Uniform, "KMP_AFFINITY");
				2399	} else {
				2400	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				2401	}
				2402	}
				2403	kmp_str_buf_t buf;
				2404	__kmp_str_buf_init(&buf);
				2405
				2406	__kmp_str_buf_print(&buf, "%d", totals[maxIndex]);
				2407	for (index = maxIndex - 1; index >= pkgIdIndex; index--) {
				2408	__kmp_str_buf_print(&buf, " x %d", maxCt[index]);
				2409	}
				2410	KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, maxCt[coreIdIndex],
				2411	maxCt[threadIdIndex], __kmp_ncores);
				2412
				2413	__kmp_str_buf_free(&buf);
				2414	}
				2415
				2416	# if KMP_MIC && REDUCE_TEAM_SIZE
				2417	//
				2418	// Set the default team size.
				2419	//
				2420	if ((__kmp_dflt_team_nth == 0) && (teamSize > 0)) {
				2421	__kmp_dflt_team_nth = teamSize;
				2422	KA_TRACE(20, ("__kmp_affinity_create_cpuinfo_map: setting __kmp_dflt_team_nth = %d\n",
				2423	__kmp_dflt_team_nth));
				2424	}
				2425	# endif // KMP_MIC && REDUCE_TEAM_SIZE
				2426
				2427	if (__kmp_affinity_type == affinity_none) {
				2428	__kmp_free(lastId);
				2429	__kmp_free(totals);
				2430	__kmp_free(maxCt);
				2431	__kmp_free(counts);
				2432	CLEANUP_THREAD_INFO;
				2433	return 0;
				2434	}
				2435
				2436	//
				2437	// Count the number of levels which have more nodes at that level than
				2438	// at the parent's level (with there being an implicit root node of
				2439	// the top level). This is equivalent to saying that there is at least
				2440	// one node at this level which has a sibling. These levels are in the
				2441	// map, and the package level is always in the map.
				2442	//
				2443	bool inMap = (bool )__kmp_allocate((maxIndex + 1) * sizeof(bool));
				2444	int level = 0;
				2445	for (index = threadIdIndex; index < maxIndex; index++) {
				2446	KMP_ASSERT(totals[index] >= totals[index + 1]);
				2447	inMap[index] = (totals[index] > totals[index + 1]);
				2448	}
				2449	inMap[maxIndex] = (totals[maxIndex] > 1);
				2450	inMap[pkgIdIndex] = true;
				2451
				2452	int depth = 0;
				2453	for (index = threadIdIndex; index <= maxIndex; index++) {
				2454	if (inMap[index]) {
				2455	depth++;
				2456	}
				2457	}
				2458	KMP_ASSERT(depth > 0);
				2459
				2460	//
				2461	// Construct the data structure that is to be returned.
				2462	//
				2463	address2os = (AddrUnsPair)
				2464	__kmp_allocate(sizeof(AddrUnsPair) * num_avail);
				2465	int pkgLevel = -1;
				2466	int coreLevel = -1;
				2467	int threadLevel = -1;
				2468
				2469	for (i = 0; i < num_avail; ++i) {
				2470	Address addr(depth);
				2471	unsigned os = threadInfo[i][osIdIndex];
				2472	int src_index;
				2473	int dst_index = 0;
				2474
				2475	for (src_index = maxIndex; src_index >= threadIdIndex; src_index--) {
				2476	if (! inMap[src_index]) {
				2477	continue;
				2478	}
				2479	addr.labels[dst_index] = threadInfo[i][src_index];
				2480	if (src_index == pkgIdIndex) {
				2481	pkgLevel = dst_index;
				2482	}
				2483	else if (src_index == coreIdIndex) {
				2484	coreLevel = dst_index;
				2485	}
				2486	else if (src_index == threadIdIndex) {
				2487	threadLevel = dst_index;
				2488	}
				2489	dst_index++;
				2490	}
				2491	(*address2os)[i] = AddrUnsPair(addr, os);
				2492	}
				2493
				2494	if (__kmp_affinity_gran_levels < 0) {
				2495	//
				2496	// Set the granularity level based on what levels are modeled
				2497	// in the machine topology map.
				2498	//
				2499	unsigned src_index;
				2500	__kmp_affinity_gran_levels = 0;
				2501	for (src_index = threadIdIndex; src_index <= maxIndex; src_index++) {
				2502	if (! inMap[src_index]) {
				2503	continue;
				2504	}
				2505	switch (src_index) {
				2506	case threadIdIndex:
				2507	if (__kmp_affinity_gran > affinity_gran_thread) {
				2508	__kmp_affinity_gran_levels++;
				2509	}
				2510
				2511	break;
				2512	case coreIdIndex:
				2513	if (__kmp_affinity_gran > affinity_gran_core) {
				2514	__kmp_affinity_gran_levels++;
				2515	}
				2516	break;
				2517
				2518	case pkgIdIndex:
				2519	if (__kmp_affinity_gran > affinity_gran_package) {
				2520	__kmp_affinity_gran_levels++;
				2521	}
				2522	break;
				2523	}
				2524	}
				2525	}
				2526
				2527	if (__kmp_affinity_verbose) {
				2528	__kmp_affinity_print_topology(*address2os, num_avail, depth, pkgLevel,
				2529	coreLevel, threadLevel);
				2530	}
				2531
				2532	__kmp_free(inMap);
				2533	__kmp_free(lastId);
				2534	__kmp_free(totals);
				2535	__kmp_free(maxCt);
				2536	__kmp_free(counts);
				2537	CLEANUP_THREAD_INFO;
				2538	return depth;
				2539	}
				2540
				2541
				2542	//
				2543	// Create and return a table of affinity masks, indexed by OS thread ID.
				2544	// This routine handles OR'ing together all the affinity masks of threads
				2545	// that are sufficiently close, if granularity > fine.
				2546	//
				2547	static kmp_affin_mask_t *
				2548	__kmp_create_masks(unsigned maxIndex, unsigned numUnique,
				2549	AddrUnsPair *address2os, unsigned numAddrs)
				2550	{
				2551	//
				2552	// First form a table of affinity masks in order of OS thread id.
				2553	//
				2554	unsigned depth;
				2555	unsigned maxOsId;
				2556	unsigned i;
				2557
				2558	KMP_ASSERT(numAddrs > 0);
				2559	depth = address2os[0].first.depth;
				2560
				2561	maxOsId = 0;
				2562	for (i = 0; i < numAddrs; i++) {
				2563	unsigned osId = address2os[i].second;
				2564	if (osId > maxOsId) {
				2565	maxOsId = osId;
				2566	}
				2567	}
				2568	kmp_affin_mask_t osId2Mask = (kmp_affin_mask_t )__kmp_allocate(
				2569	(maxOsId + 1) * __kmp_affin_mask_size);
				2570
				2571	//
				2572	// Sort the address2os table according to physical order. Doing so
				2573	// will put all threads on the same core/package/node in consecutive
				2574	// locations.
				2575	//
				2576	qsort(address2os, numAddrs, sizeof(*address2os),
				2577	__kmp_affinity_cmp_Address_labels);
				2578
				2579	KMP_ASSERT(__kmp_affinity_gran_levels >= 0);
				2580	if (__kmp_affinity_verbose && (__kmp_affinity_gran_levels > 0)) {
				2581	KMP_INFORM(ThreadsMigrate, "KMP_AFFINITY", __kmp_affinity_gran_levels);
				2582	}
				2583	if (__kmp_affinity_gran_levels >= (int)depth) {
				2584	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				2585	&& (__kmp_affinity_type != affinity_none))) {
				2586	KMP_WARNING(AffThreadsMayMigrate);
				2587	}
				2588	}
				2589
				2590	//
				2591	// Run through the table, forming the masks for all threads on each
				2592	// core. Threads on the same core will have identical "Address"
				2593	// objects, not considering the last level, which must be the thread
				2594	// id. All threads on a core will appear consecutively.
				2595	//
				2596	unsigned unique = 0;
				2597	unsigned j = 0; // index of 1st thread on core
				2598	unsigned leader = 0;
				2599	Address *leaderAddr = &(address2os[0].first);
				2600	kmp_affin_mask_t *sum
				2601	= (kmp_affin_mask_t *)alloca(__kmp_affin_mask_size);
				2602	KMP_CPU_ZERO(sum);
				2603	KMP_CPU_SET(address2os[0].second, sum);
				2604	for (i = 1; i < numAddrs; i++) {
				2605	//
Alp Toker	8f2d3f0	2014-02-24 10:40:15 +0000	[diff] [blame]	2606	// If this thread is sufficiently close to the leader (within the
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2607	// granularity setting), then set the bit for this os thread in the
				2608	// affinity mask for this group, and go on to the next thread.
				2609	//
				2610	if (leaderAddr->isClose(address2os[i].first,
				2611	__kmp_affinity_gran_levels)) {
				2612	KMP_CPU_SET(address2os[i].second, sum);
				2613	continue;
				2614	}
				2615
				2616	//
				2617	// For every thread in this group, copy the mask to the thread's
				2618	// entry in the osId2Mask table. Mark the first address as a
				2619	// leader.
				2620	//
				2621	for (; j < i; j++) {
				2622	unsigned osId = address2os[j].second;
				2623	KMP_DEBUG_ASSERT(osId <= maxOsId);
				2624	kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
				2625	KMP_CPU_COPY(mask, sum);
				2626	address2os[j].first.leader = (j == leader);
				2627	}
				2628	unique++;
				2629
				2630	//
				2631	// Start a new mask.
				2632	//
				2633	leader = i;
				2634	leaderAddr = &(address2os[i].first);
				2635	KMP_CPU_ZERO(sum);
				2636	KMP_CPU_SET(address2os[i].second, sum);
				2637	}
				2638
				2639	//
				2640	// For every thread in last group, copy the mask to the thread's
				2641	// entry in the osId2Mask table.
				2642	//
				2643	for (; j < i; j++) {
				2644	unsigned osId = address2os[j].second;
				2645	KMP_DEBUG_ASSERT(osId <= maxOsId);
				2646	kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
				2647	KMP_CPU_COPY(mask, sum);
				2648	address2os[j].first.leader = (j == leader);
				2649	}
				2650	unique++;
				2651
				2652	*maxIndex = maxOsId;
				2653	*numUnique = unique;
				2654	return osId2Mask;
				2655	}
				2656
				2657
				2658	//
				2659	// Stuff for the affinity proclist parsers. It's easier to declare these vars
				2660	// as file-static than to try and pass them through the calling sequence of
				2661	// the recursive-descent OMP_PLACES parser.
				2662	//
				2663	static kmp_affin_mask_t *newMasks;
				2664	static int numNewMasks;
				2665	static int nextNewMask;
				2666
				2667	#define ADD_MASK(_mask) \
				2668	{ \
				2669	if (nextNewMask >= numNewMasks) { \
				2670	numNewMasks *= 2; \
				2671	newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_REALLOC(newMasks, \
				2672	numNewMasks * __kmp_affin_mask_size); \
				2673	} \
				2674	KMP_CPU_COPY(KMP_CPU_INDEX(newMasks, nextNewMask), (_mask)); \
				2675	nextNewMask++; \
				2676	}
				2677
				2678	#define ADD_MASK_OSID(_osId,_osId2Mask,_maxOsId) \
				2679	{ \
				2680	if (((_osId) > _maxOsId) \|\| \
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	2681	(! KMP_CPU_ISSET((_osId), KMP_CPU_INDEX((_osId2Mask), (_osId))))) { \
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2682	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings \
				2683	&& (__kmp_affinity_type != affinity_none))) { \
				2684	KMP_WARNING(AffIgnoreInvalidProcID, _osId); \
				2685	} \
				2686	} \
				2687	else { \
				2688	ADD_MASK(KMP_CPU_INDEX(_osId2Mask, (_osId))); \
				2689	} \
				2690	}
				2691
				2692
				2693	//
				2694	// Re-parse the proclist (for the explicit affinity type), and form the list
				2695	// of affinity newMasks indexed by gtid.
				2696	//
				2697	static void
				2698	__kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks,
				2699	unsigned int out_numMasks, const char proclist,
				2700	kmp_affin_mask_t *osId2Mask, int maxOsId)
				2701	{
				2702	const char *scan = proclist;
				2703	const char *next = proclist;
				2704
				2705	//
				2706	// We use malloc() for the temporary mask vector,
				2707	// so that we can use realloc() to extend it.
				2708	//
				2709	numNewMasks = 2;
				2710	newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(numNewMasks
				2711	* __kmp_affin_mask_size);
				2712	nextNewMask = 0;
				2713	kmp_affin_mask_t sumMask = (kmp_affin_mask_t )__kmp_allocate(
				2714	__kmp_affin_mask_size);
				2715	int setSize = 0;
				2716
				2717	for (;;) {
				2718	int start, end, stride;
				2719
				2720	SKIP_WS(scan);
				2721	next = scan;
				2722	if (*next == '\0') {
				2723	break;
				2724	}
				2725
				2726	if (*next == '{') {
				2727	int num;
				2728	setSize = 0;
				2729	next++; // skip '{'
				2730	SKIP_WS(next);
				2731	scan = next;
				2732
				2733	//
				2734	// Read the first integer in the set.
				2735	//
				2736	KMP_ASSERT2((next >= '0') && (next <= '9'),
				2737	"bad proclist");
				2738	SKIP_DIGITS(next);
				2739	num = __kmp_str_to_int(scan, *next);
				2740	KMP_ASSERT2(num >= 0, "bad explicit proc list");
				2741
				2742	//
				2743	// Copy the mask for that osId to the sum (union) mask.
				2744	//
				2745	if ((num > maxOsId) \|\|
				2746	(! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
				2747	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				2748	&& (__kmp_affinity_type != affinity_none))) {
				2749	KMP_WARNING(AffIgnoreInvalidProcID, num);
				2750	}
				2751	KMP_CPU_ZERO(sumMask);
				2752	}
				2753	else {
				2754	KMP_CPU_COPY(sumMask, KMP_CPU_INDEX(osId2Mask, num));
				2755	setSize = 1;
				2756	}
				2757
				2758	for (;;) {
				2759	//
				2760	// Check for end of set.
				2761	//
				2762	SKIP_WS(next);
				2763	if (*next == '}') {
				2764	next++; // skip '}'
				2765	break;
				2766	}
				2767
				2768	//
				2769	// Skip optional comma.
				2770	//
				2771	if (*next == ',') {
				2772	next++;
				2773	}
				2774	SKIP_WS(next);
				2775
				2776	//
				2777	// Read the next integer in the set.
				2778	//
				2779	scan = next;
				2780	KMP_ASSERT2((next >= '0') && (next <= '9'),
				2781	"bad explicit proc list");
				2782
				2783	SKIP_DIGITS(next);
				2784	num = __kmp_str_to_int(scan, *next);
				2785	KMP_ASSERT2(num >= 0, "bad explicit proc list");
				2786
				2787	//
				2788	// Add the mask for that osId to the sum mask.
				2789	//
				2790	if ((num > maxOsId) \|\|
				2791	(! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
				2792	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				2793	&& (__kmp_affinity_type != affinity_none))) {
				2794	KMP_WARNING(AffIgnoreInvalidProcID, num);
				2795	}
				2796	}
				2797	else {
				2798	KMP_CPU_UNION(sumMask, KMP_CPU_INDEX(osId2Mask, num));
				2799	setSize++;
				2800	}
				2801	}
				2802	if (setSize > 0) {
				2803	ADD_MASK(sumMask);
				2804	}
				2805
				2806	SKIP_WS(next);
				2807	if (*next == ',') {
				2808	next++;
				2809	}
				2810	scan = next;
				2811	continue;
				2812	}
				2813
				2814	//
				2815	// Read the first integer.
				2816	//
				2817	KMP_ASSERT2((next >= '0') && (next <= '9'), "bad explicit proc list");
				2818	SKIP_DIGITS(next);
				2819	start = __kmp_str_to_int(scan, *next);
				2820	KMP_ASSERT2(start >= 0, "bad explicit proc list");
				2821	SKIP_WS(next);
				2822
				2823	//
				2824	// If this isn't a range, then add a mask to the list and go on.
				2825	//
				2826	if (*next != '-') {
				2827	ADD_MASK_OSID(start, osId2Mask, maxOsId);
				2828
				2829	//
				2830	// Skip optional comma.
				2831	//
				2832	if (*next == ',') {
				2833	next++;
				2834	}
				2835	scan = next;
				2836	continue;
				2837	}
				2838
				2839	//
				2840	// This is a range. Skip over the '-' and read in the 2nd int.
				2841	//
				2842	next++; // skip '-'
				2843	SKIP_WS(next);
				2844	scan = next;
				2845	KMP_ASSERT2((next >= '0') && (next <= '9'), "bad explicit proc list");
				2846	SKIP_DIGITS(next);
				2847	end = __kmp_str_to_int(scan, *next);
				2848	KMP_ASSERT2(end >= 0, "bad explicit proc list");
				2849
				2850	//
				2851	// Check for a stride parameter
				2852	//
				2853	stride = 1;
				2854	SKIP_WS(next);
				2855	if (*next == ':') {
				2856	//
				2857	// A stride is specified. Skip over the ':" and read the 3rd int.
				2858	//
				2859	int sign = +1;
				2860	next++; // skip ':'
				2861	SKIP_WS(next);
				2862	scan = next;
				2863	if (*next == '-') {
				2864	sign = -1;
				2865	next++;
				2866	SKIP_WS(next);
				2867	scan = next;
				2868	}
				2869	KMP_ASSERT2((next >= '0') && (next <= '9'),
				2870	"bad explicit proc list");
				2871	SKIP_DIGITS(next);
				2872	stride = __kmp_str_to_int(scan, *next);
				2873	KMP_ASSERT2(stride >= 0, "bad explicit proc list");
				2874	stride *= sign;
				2875	}
				2876
				2877	//
				2878	// Do some range checks.
				2879	//
				2880	KMP_ASSERT2(stride != 0, "bad explicit proc list");
				2881	if (stride > 0) {
				2882	KMP_ASSERT2(start <= end, "bad explicit proc list");
				2883	}
				2884	else {
				2885	KMP_ASSERT2(start >= end, "bad explicit proc list");
				2886	}
				2887	KMP_ASSERT2((end - start) / stride <= 65536, "bad explicit proc list");
				2888
				2889	//
				2890	// Add the mask for each OS proc # to the list.
				2891	//
				2892	if (stride > 0) {
				2893	do {
				2894	ADD_MASK_OSID(start, osId2Mask, maxOsId);
				2895	start += stride;
				2896	} while (start <= end);
				2897	}
				2898	else {
				2899	do {
				2900	ADD_MASK_OSID(start, osId2Mask, maxOsId);
				2901	start += stride;
				2902	} while (start >= end);
				2903	}
				2904
				2905	//
				2906	// Skip optional comma.
				2907	//
				2908	SKIP_WS(next);
				2909	if (*next == ',') {
				2910	next++;
				2911	}
				2912	scan = next;
				2913	}
				2914
				2915	*out_numMasks = nextNewMask;
				2916	if (nextNewMask == 0) {
				2917	*out_masks = NULL;
				2918	KMP_INTERNAL_FREE(newMasks);
				2919	return;
				2920	}
				2921	*out_masks
				2922	= (kmp_affin_mask_t )__kmp_allocate(nextNewMask __kmp_affin_mask_size);
				2923	memcpy(out_masks, newMasks, nextNewMask __kmp_affin_mask_size);
				2924	__kmp_free(sumMask);
				2925	KMP_INTERNAL_FREE(newMasks);
				2926	}
				2927
				2928
				2929	# if OMP_40_ENABLED
				2930
				2931	/*-----------------------------------------------------------------------------
				2932
				2933	Re-parse the OMP_PLACES proc id list, forming the newMasks for the different
				2934	places. Again, Here is the grammar:
				2935
				2936	place_list := place
				2937	place_list := place , place_list
				2938	place := num
				2939	place := place : num
				2940	place := place : num : signed
				2941	place := { subplacelist }
				2942	place := ! place // (lowest priority)
				2943	subplace_list := subplace
				2944	subplace_list := subplace , subplace_list
				2945	subplace := num
				2946	subplace := num : num
				2947	subplace := num : num : signed
				2948	signed := num
				2949	signed := + signed
				2950	signed := - signed
				2951
				2952	-----------------------------------------------------------------------------*/
				2953
				2954	static void
				2955	__kmp_process_subplace_list(const char *scan, kmp_affin_mask_t osId2Mask,
				2956	int maxOsId, kmp_affin_mask_t tempMask, int setSize)
				2957	{
				2958	const char *next;
				2959
				2960	for (;;) {
				2961	int start, count, stride, i;
				2962
				2963	//
				2964	// Read in the starting proc id
				2965	//
				2966	SKIP_WS(*scan);
				2967	KMP_ASSERT2((scan >= '0') && (scan <= '9'),
				2968	"bad explicit places list");
				2969	next = *scan;
				2970	SKIP_DIGITS(next);
				2971	start = __kmp_str_to_int(scan, next);
				2972	KMP_ASSERT(start >= 0);
				2973	*scan = next;
				2974
				2975	//
				2976	// valid follow sets are ',' ':' and '}'
				2977	//
				2978	SKIP_WS(*scan);
				2979	if (scan == '}' \|\| scan == ',') {
				2980	if ((start > maxOsId) \|\|
				2981	(! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
				2982	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				2983	&& (__kmp_affinity_type != affinity_none))) {
				2984	KMP_WARNING(AffIgnoreInvalidProcID, start);
				2985	}
				2986	}
				2987	else {
				2988	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
				2989	(*setSize)++;
				2990	}
				2991	if (**scan == '}') {
				2992	break;
				2993	}
				2994	(*scan)++; // skip ','
				2995	continue;
				2996	}
				2997	KMP_ASSERT2(**scan == ':', "bad explicit places list");
				2998	(*scan)++; // skip ':'
				2999
				3000	//
				3001	// Read count parameter
				3002	//
				3003	SKIP_WS(*scan);
				3004	KMP_ASSERT2((scan >= '0') && (scan <= '9'),
				3005	"bad explicit places list");
				3006	next = *scan;
				3007	SKIP_DIGITS(next);
				3008	count = __kmp_str_to_int(scan, next);
				3009	KMP_ASSERT(count >= 0);
				3010	*scan = next;
				3011
				3012	//
				3013	// valid follow sets are ',' ':' and '}'
				3014	//
				3015	SKIP_WS(*scan);
				3016	if (scan == '}' \|\| scan == ',') {
				3017	for (i = 0; i < count; i++) {
				3018	if ((start > maxOsId) \|\|
				3019	(! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
				3020	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3021	&& (__kmp_affinity_type != affinity_none))) {
				3022	KMP_WARNING(AffIgnoreInvalidProcID, start);
				3023	}
				3024	break; // don't proliferate warnings for large count
				3025	}
				3026	else {
				3027	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
				3028	start++;
				3029	(*setSize)++;
				3030	}
				3031	}
				3032	if (**scan == '}') {
				3033	break;
				3034	}
				3035	(*scan)++; // skip ','
				3036	continue;
				3037	}
				3038	KMP_ASSERT2(**scan == ':', "bad explicit places list");
				3039	(*scan)++; // skip ':'
				3040
				3041	//
				3042	// Read stride parameter
				3043	//
				3044	int sign = +1;
				3045	for (;;) {
				3046	SKIP_WS(*scan);
				3047	if (**scan == '+') {
				3048	(*scan)++; // skip '+'
				3049	continue;
				3050	}
				3051	if (**scan == '-') {
				3052	sign *= -1;
				3053	(*scan)++; // skip '-'
				3054	continue;
				3055	}
				3056	break;
				3057	}
				3058	SKIP_WS(*scan);
				3059	KMP_ASSERT2((scan >= '0') && (scan <= '9'),
				3060	"bad explicit places list");
				3061	next = *scan;
				3062	SKIP_DIGITS(next);
				3063	stride = __kmp_str_to_int(scan, next);
				3064	KMP_ASSERT(stride >= 0);
				3065	*scan = next;
				3066	stride *= sign;
				3067
				3068	//
				3069	// valid follow sets are ',' and '}'
				3070	//
				3071	SKIP_WS(*scan);
				3072	if (scan == '}' \|\| scan == ',') {
				3073	for (i = 0; i < count; i++) {
				3074	if ((start > maxOsId) \|\|
				3075	(! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
				3076	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3077	&& (__kmp_affinity_type != affinity_none))) {
				3078	KMP_WARNING(AffIgnoreInvalidProcID, start);
				3079	}
				3080	break; // don't proliferate warnings for large count
				3081	}
				3082	else {
				3083	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
				3084	start += stride;
				3085	(*setSize)++;
				3086	}
				3087	}
				3088	if (**scan == '}') {
				3089	break;
				3090	}
				3091	(*scan)++; // skip ','
				3092	continue;
				3093	}
				3094
				3095	KMP_ASSERT2(0, "bad explicit places list");
				3096	}
				3097	}
				3098
				3099
				3100	static void
				3101	__kmp_process_place(const char *scan, kmp_affin_mask_t osId2Mask,
				3102	int maxOsId, kmp_affin_mask_t tempMask, int setSize)
				3103	{
				3104	const char *next;
				3105
				3106	//
				3107	// valid follow sets are '{' '!' and num
				3108	//
				3109	SKIP_WS(*scan);
				3110	if (**scan == '{') {
				3111	(*scan)++; // skip '{'
				3112	__kmp_process_subplace_list(scan, osId2Mask, maxOsId , tempMask,
				3113	setSize);
				3114	KMP_ASSERT2(**scan == '}', "bad explicit places list");
				3115	(*scan)++; // skip '}'
				3116	}
				3117	else if (**scan == '!') {
				3118	__kmp_process_place(scan, osId2Mask, maxOsId, tempMask, setSize);
				3119	KMP_CPU_COMPLEMENT(tempMask);
				3120	(*scan)++; // skip '!'
				3121	}
				3122	else if ((scan >= '0') && (scan <= '9')) {
				3123	next = *scan;
				3124	SKIP_DIGITS(next);
				3125	int num = __kmp_str_to_int(scan, next);
				3126	KMP_ASSERT(num >= 0);
				3127	if ((num > maxOsId) \|\|
				3128	(! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
				3129	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3130	&& (__kmp_affinity_type != affinity_none))) {
				3131	KMP_WARNING(AffIgnoreInvalidProcID, num);
				3132	}
				3133	}
				3134	else {
				3135	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, num));
				3136	(*setSize)++;
				3137	}
				3138	*scan = next; // skip num
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3139	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3140	else {
				3141	KMP_ASSERT2(0, "bad explicit places list");
				3142	}
				3143	}
				3144
				3145
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3146	//static void
				3147	void
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3148	__kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
				3149	unsigned int out_numMasks, const char placelist,
				3150	kmp_affin_mask_t *osId2Mask, int maxOsId)
				3151	{
				3152	const char *scan = placelist;
				3153	const char *next = placelist;
				3154
				3155	numNewMasks = 2;
				3156	newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(numNewMasks
				3157	* __kmp_affin_mask_size);
				3158	nextNewMask = 0;
				3159
				3160	kmp_affin_mask_t tempMask = (kmp_affin_mask_t )__kmp_allocate(
				3161	__kmp_affin_mask_size);
				3162	KMP_CPU_ZERO(tempMask);
				3163	int setSize = 0;
				3164
				3165	for (;;) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3166	__kmp_process_place(&scan, osId2Mask, maxOsId, tempMask, &setSize);
				3167
				3168	//
				3169	// valid follow sets are ',' ':' and EOL
				3170	//
				3171	SKIP_WS(scan);
				3172	if (scan == '\0' \|\| scan == ',') {
				3173	if (setSize > 0) {
				3174	ADD_MASK(tempMask);
				3175	}
				3176	KMP_CPU_ZERO(tempMask);
				3177	setSize = 0;
				3178	if (*scan == '\0') {
				3179	break;
				3180	}
				3181	scan++; // skip ','
				3182	continue;
				3183	}
				3184
				3185	KMP_ASSERT2(*scan == ':', "bad explicit places list");
				3186	scan++; // skip ':'
				3187
				3188	//
				3189	// Read count parameter
				3190	//
				3191	SKIP_WS(scan);
				3192	KMP_ASSERT2((scan >= '0') && (scan <= '9'),
				3193	"bad explicit places list");
				3194	next = scan;
				3195	SKIP_DIGITS(next);
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	3196	int count = __kmp_str_to_int(scan, *next);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3197	KMP_ASSERT(count >= 0);
				3198	scan = next;
				3199
				3200	//
				3201	// valid follow sets are ',' ':' and EOL
				3202	//
				3203	SKIP_WS(scan);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3204	int stride;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3205	if (scan == '\0' \|\| scan == ',') {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3206	stride = +1;
				3207	}
				3208	else {
				3209	KMP_ASSERT2(*scan == ':', "bad explicit places list");
				3210	scan++; // skip ':'
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3211
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3212	//
				3213	// Read stride parameter
				3214	//
				3215	int sign = +1;
				3216	for (;;) {
				3217	SKIP_WS(scan);
				3218	if (*scan == '+') {
				3219	scan++; // skip '+'
				3220	continue;
				3221	}
				3222	if (*scan == '-') {
				3223	sign *= -1;
				3224	scan++; // skip '-'
				3225	continue;
				3226	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3227	break;
				3228	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3229	SKIP_WS(scan);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3230	KMP_ASSERT2((scan >= '0') && (scan <= '9'),
				3231	"bad explicit places list");
				3232	next = scan;
				3233	SKIP_DIGITS(next);
				3234	stride = __kmp_str_to_int(scan, *next);
				3235	KMP_DEBUG_ASSERT(stride >= 0);
				3236	scan = next;
				3237	stride *= sign;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3238	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3239
				3240	if (stride > 0) {
				3241	int i;
				3242	for (i = 0; i < count; i++) {
				3243	int j;
				3244	if (setSize == 0) {
				3245	break;
				3246	}
				3247	ADD_MASK(tempMask);
				3248	setSize = 0;
				3249	for (j = __kmp_affin_mask_size * CHAR_BIT - 1; j >= stride; j--) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3250	if (! KMP_CPU_ISSET(j - stride, tempMask)) {
				3251	KMP_CPU_CLR(j, tempMask);
				3252	}
				3253	else if ((j > maxOsId) \|\|
				3254	(! KMP_CPU_ISSET(j, KMP_CPU_INDEX(osId2Mask, j)))) {
				3255	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3256	&& (__kmp_affinity_type != affinity_none))) {
				3257	KMP_WARNING(AffIgnoreInvalidProcID, j);
				3258	}
				3259	KMP_CPU_CLR(j, tempMask);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3260	}
				3261	else {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3262	KMP_CPU_SET(j, tempMask);
				3263	setSize++;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3264	}
				3265	}
				3266	for (; j >= 0; j--) {
				3267	KMP_CPU_CLR(j, tempMask);
				3268	}
				3269	}
				3270	}
				3271	else {
				3272	int i;
				3273	for (i = 0; i < count; i++) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3274	int j;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3275	if (setSize == 0) {
				3276	break;
				3277	}
				3278	ADD_MASK(tempMask);
				3279	setSize = 0;
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3280	for (j = 0; j < ((int)__kmp_affin_mask_size * CHAR_BIT) + stride;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3281	j++) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3282	if (! KMP_CPU_ISSET(j - stride, tempMask)) {
				3283	KMP_CPU_CLR(j, tempMask);
				3284	}
				3285	else if ((j > maxOsId) \|\|
				3286	(! KMP_CPU_ISSET(j, KMP_CPU_INDEX(osId2Mask, j)))) {
				3287	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3288	&& (__kmp_affinity_type != affinity_none))) {
				3289	KMP_WARNING(AffIgnoreInvalidProcID, j);
				3290	}
				3291	KMP_CPU_CLR(j, tempMask);
				3292	}
				3293	else {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3294	KMP_CPU_SET(j, tempMask);
				3295	setSize++;
				3296	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3297	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3298	for (; j < (int)__kmp_affin_mask_size * CHAR_BIT; j++) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3299	KMP_CPU_CLR(j, tempMask);
				3300	}
				3301	}
				3302	}
				3303	KMP_CPU_ZERO(tempMask);
				3304	setSize = 0;
				3305
				3306	//
				3307	// valid follow sets are ',' and EOL
				3308	//
				3309	SKIP_WS(scan);
				3310	if (*scan == '\0') {
				3311	break;
				3312	}
				3313	if (*scan == ',') {
				3314	scan++; // skip ','
				3315	continue;
				3316	}
				3317
				3318	KMP_ASSERT2(0, "bad explicit places list");
				3319	}
				3320
				3321	*out_numMasks = nextNewMask;
				3322	if (nextNewMask == 0) {
				3323	*out_masks = NULL;
				3324	KMP_INTERNAL_FREE(newMasks);
				3325	return;
				3326	}
				3327	*out_masks
				3328	= (kmp_affin_mask_t )__kmp_allocate(nextNewMask __kmp_affin_mask_size);
				3329	memcpy(out_masks, newMasks, nextNewMask __kmp_affin_mask_size);
				3330	__kmp_free(tempMask);
				3331	KMP_INTERNAL_FREE(newMasks);
				3332	}
				3333
				3334	# endif /* OMP_40_ENABLED */
				3335
				3336	#undef ADD_MASK
				3337	#undef ADD_MASK_OSID
				3338
				3339
				3340	# if KMP_MIC
				3341
				3342	static void
				3343	__kmp_apply_thread_places(AddrUnsPair **pAddr, int depth)
				3344	{
				3345	if ( __kmp_place_num_cores == 0 ) {
				3346	if ( __kmp_place_num_threads_per_core == 0 ) {
				3347	return; // no cores limiting actions requested, exit
				3348	}
				3349	__kmp_place_num_cores = nCoresPerPkg; // use all available cores
				3350	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3351	if ( !__kmp_affinity_uniform_topology() ) {
				3352	KMP_WARNING( AffThrPlaceNonUniform );
				3353	return; // don't support non-uniform topology
				3354	}
				3355	if ( depth != 3 ) {
				3356	KMP_WARNING( AffThrPlaceNonThreeLevel );
				3357	return; // don't support not-3-level topology
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3358	}
				3359	if ( __kmp_place_num_threads_per_core == 0 ) {
				3360	__kmp_place_num_threads_per_core = __kmp_nThreadsPerCore; // use all HW contexts
				3361	}
				3362	if ( __kmp_place_core_offset + __kmp_place_num_cores > nCoresPerPkg ) {
				3363	KMP_WARNING( AffThrPlaceManyCores );
				3364	return;
				3365	}
				3366
				3367	AddrUnsPair newAddr = (AddrUnsPair )__kmp_allocate( sizeof(AddrUnsPair) *
				3368	nPackages * __kmp_place_num_cores * __kmp_place_num_threads_per_core);
				3369	int i, j, k, n_old = 0, n_new = 0;
				3370	for ( i = 0; i < nPackages; ++i ) {
				3371	for ( j = 0; j < nCoresPerPkg; ++j ) {
				3372	if ( j < __kmp_place_core_offset \|\| j >= __kmp_place_core_offset + __kmp_place_num_cores ) {
				3373	n_old += __kmp_nThreadsPerCore; // skip not-requested core
				3374	} else {
				3375	for ( k = 0; k < __kmp_nThreadsPerCore; ++k ) {
				3376	if ( k < __kmp_place_num_threads_per_core ) {
				3377	newAddr[n_new] = (*pAddr)[n_old]; // copy requested core' data to new location
				3378	n_new++;
				3379	}
				3380	n_old++;
				3381	}
				3382	}
				3383	}
				3384	}
				3385	nCoresPerPkg = __kmp_place_num_cores; // correct nCoresPerPkg
				3386	__kmp_nThreadsPerCore = __kmp_place_num_threads_per_core; // correct __kmp_nThreadsPerCore
				3387	__kmp_avail_proc = n_new; // correct avail_proc
				3388	__kmp_ncores = nPackages * __kmp_place_num_cores; // correct ncores
				3389
				3390	__kmp_free( *pAddr );
				3391	*pAddr = newAddr; // replace old topology with new one
				3392	}
				3393
				3394	# endif /* KMP_MIC */
				3395
				3396
				3397	static AddrUnsPair *address2os = NULL;
				3398	static int * procarr = NULL;
				3399	static int __kmp_aff_depth = 0;
				3400
				3401	static void
				3402	__kmp_aux_affinity_initialize(void)
				3403	{
				3404	if (__kmp_affinity_masks != NULL) {
				3405	KMP_ASSERT(fullMask != NULL);
				3406	return;
				3407	}
				3408
				3409	//
				3410	// Create the "full" mask - this defines all of the processors that we
				3411	// consider to be in the machine model. If respect is set, then it is
				3412	// the initialization thread's affinity mask. Otherwise, it is all
				3413	// processors that we know about on the machine.
				3414	//
				3415	if (fullMask == NULL) {
				3416	fullMask = (kmp_affin_mask_t *)__kmp_allocate(__kmp_affin_mask_size);
				3417	}
				3418	if (KMP_AFFINITY_CAPABLE()) {
				3419	if (__kmp_affinity_respect_mask) {
				3420	__kmp_get_system_affinity(fullMask, TRUE);
				3421
				3422	//
				3423	// Count the number of available processors.
				3424	//
				3425	unsigned i;
				3426	__kmp_avail_proc = 0;
				3427	for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
				3428	if (! KMP_CPU_ISSET(i, fullMask)) {
				3429	continue;
				3430	}
				3431	__kmp_avail_proc++;
				3432	}
				3433	if (__kmp_avail_proc > __kmp_xproc) {
				3434	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3435	&& (__kmp_affinity_type != affinity_none))) {
				3436	KMP_WARNING(ErrorInitializeAffinity);
				3437	}
				3438	__kmp_affinity_type = affinity_none;
				3439	__kmp_affin_mask_size = 0;
				3440	return;
				3441	}
				3442	}
				3443	else {
				3444	__kmp_affinity_entire_machine_mask(fullMask);
				3445	__kmp_avail_proc = __kmp_xproc;
				3446	}
				3447	}
				3448
				3449	int depth = -1;
				3450	kmp_i18n_id_t msg_id = kmp_i18n_null;
				3451
				3452	//
Alp Toker	8f2d3f0	2014-02-24 10:40:15 +0000	[diff] [blame]	3453	// For backward compatibility, setting KMP_CPUINFO_FILE =>
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3454	// KMP_TOPOLOGY_METHOD=cpuinfo
				3455	//
				3456	if ((__kmp_cpuinfo_file != NULL) &&
				3457	(__kmp_affinity_top_method == affinity_top_method_all)) {
				3458	__kmp_affinity_top_method = affinity_top_method_cpuinfo;
				3459	}
				3460
				3461	if (__kmp_affinity_top_method == affinity_top_method_all) {
				3462	//
				3463	// In the default code path, errors are not fatal - we just try using
				3464	// another method. We only emit a warning message if affinity is on,
				3465	// or the verbose flag is set, an the nowarnings flag was not set.
				3466	//
				3467	const char *file_name = NULL;
				3468	int line = 0;
				3469
				3470	# if KMP_ARCH_X86 \|\| KMP_ARCH_X86_64
				3471
				3472	if (__kmp_affinity_verbose) {
				3473	KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
				3474	}
				3475
				3476	file_name = NULL;
				3477	depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
				3478	if (depth == 0) {
				3479	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3480	KMP_ASSERT(address2os == NULL);
				3481	return;
				3482	}
				3483
				3484	if (depth < 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3485	if (__kmp_affinity_verbose) {
				3486	if (msg_id != kmp_i18n_null) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3487	KMP_INFORM(AffInfoStrStr, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id),
				3488	KMP_I18N_STR(DecodingLegacyAPIC));
				3489	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3490	else {
				3491	KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(DecodingLegacyAPIC));
				3492	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3493	}
				3494
				3495	file_name = NULL;
				3496	depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
				3497	if (depth == 0) {
				3498	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3499	KMP_ASSERT(address2os == NULL);
				3500	return;
				3501	}
				3502	}
				3503
				3504	# endif /* KMP_ARCH_X86 \|\| KMP_ARCH_X86_64 */
				3505
				3506	# if KMP_OS_LINUX
				3507
				3508	if (depth < 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3509	if (__kmp_affinity_verbose) {
				3510	if (msg_id != kmp_i18n_null) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3511	KMP_INFORM(AffStrParseFilename, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id), "/proc/cpuinfo");
				3512	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3513	else {
				3514	KMP_INFORM(AffParseFilename, "KMP_AFFINITY", "/proc/cpuinfo");
				3515	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3516	}
				3517
				3518	FILE *f = fopen("/proc/cpuinfo", "r");
				3519	if (f == NULL) {
				3520	msg_id = kmp_i18n_str_CantOpenCpuinfo;
				3521	}
				3522	else {
				3523	file_name = "/proc/cpuinfo";
				3524	depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
				3525	fclose(f);
				3526	if (depth == 0) {
				3527	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3528	KMP_ASSERT(address2os == NULL);
				3529	return;
				3530	}
				3531	}
				3532	}
				3533
				3534	# endif /* KMP_OS_LINUX */
				3535
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	3536	# if KMP_GROUP_AFFINITY
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3537
				3538	if ((depth < 0) && (__kmp_num_proc_groups > 1)) {
				3539	if (__kmp_affinity_verbose) {
				3540	KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
				3541	}
				3542
				3543	depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
				3544	KMP_ASSERT(depth != 0);
				3545	}
				3546
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	3547	# endif /* KMP_GROUP_AFFINITY */
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3548
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3549	if (depth < 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3550	if (__kmp_affinity_verbose && (msg_id != kmp_i18n_null)) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3551	if (file_name == NULL) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3552	KMP_INFORM(UsingFlatOS, __kmp_i18n_catgets(msg_id));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3553	}
				3554	else if (line == 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3555	KMP_INFORM(UsingFlatOSFile, file_name, __kmp_i18n_catgets(msg_id));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3556	}
				3557	else {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3558	KMP_INFORM(UsingFlatOSFileLine, file_name, line, __kmp_i18n_catgets(msg_id));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3559	}
				3560	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3561	// FIXME - print msg if msg_id = kmp_i18n_null ???
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3562
				3563	file_name = "";
				3564	depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
				3565	if (depth == 0) {
				3566	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3567	KMP_ASSERT(address2os == NULL);
				3568	return;
				3569	}
				3570	KMP_ASSERT(depth > 0);
				3571	KMP_ASSERT(address2os != NULL);
				3572	}
				3573	}
				3574
				3575	//
				3576	// If the user has specified that a paricular topology discovery method
				3577	// is to be used, then we abort if that method fails. The exception is
				3578	// group affinity, which might have been implicitly set.
				3579	//
				3580
				3581	# if KMP_ARCH_X86 \|\| KMP_ARCH_X86_64
				3582
				3583	else if (__kmp_affinity_top_method == affinity_top_method_x2apicid) {
				3584	if (__kmp_affinity_verbose) {
				3585	KMP_INFORM(AffInfoStr, "KMP_AFFINITY",
				3586	KMP_I18N_STR(Decodingx2APIC));
				3587	}
				3588
				3589	depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
				3590	if (depth == 0) {
				3591	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3592	KMP_ASSERT(address2os == NULL);
				3593	return;
				3594	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3595	if (depth < 0) {
				3596	KMP_ASSERT(msg_id != kmp_i18n_null);
				3597	KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
				3598	}
				3599	}
				3600	else if (__kmp_affinity_top_method == affinity_top_method_apicid) {
				3601	if (__kmp_affinity_verbose) {
				3602	KMP_INFORM(AffInfoStr, "KMP_AFFINITY",
				3603	KMP_I18N_STR(DecodingLegacyAPIC));
				3604	}
				3605
				3606	depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
				3607	if (depth == 0) {
				3608	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3609	KMP_ASSERT(address2os == NULL);
				3610	return;
				3611	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3612	if (depth < 0) {
				3613	KMP_ASSERT(msg_id != kmp_i18n_null);
				3614	KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
				3615	}
				3616	}
				3617
				3618	# endif /* KMP_ARCH_X86 \|\| KMP_ARCH_X86_64 */
				3619
				3620	else if (__kmp_affinity_top_method == affinity_top_method_cpuinfo) {
				3621	const char *filename;
				3622	if (__kmp_cpuinfo_file != NULL) {
				3623	filename = __kmp_cpuinfo_file;
				3624	}
				3625	else {
				3626	filename = "/proc/cpuinfo";
				3627	}
				3628
				3629	if (__kmp_affinity_verbose) {
				3630	KMP_INFORM(AffParseFilename, "KMP_AFFINITY", filename);
				3631	}
				3632
				3633	FILE *f = fopen(filename, "r");
				3634	if (f == NULL) {
				3635	int code = errno;
				3636	if (__kmp_cpuinfo_file != NULL) {
				3637	__kmp_msg(
				3638	kmp_ms_fatal,
				3639	KMP_MSG(CantOpenFileForReading, filename),
				3640	KMP_ERR(code),
				3641	KMP_HNT(NameComesFrom_CPUINFO_FILE),
				3642	__kmp_msg_null
				3643	);
				3644	}
				3645	else {
				3646	__kmp_msg(
				3647	kmp_ms_fatal,
				3648	KMP_MSG(CantOpenFileForReading, filename),
				3649	KMP_ERR(code),
				3650	__kmp_msg_null
				3651	);
				3652	}
				3653	}
				3654	int line = 0;
				3655	depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
				3656	fclose(f);
				3657	if (depth < 0) {
				3658	KMP_ASSERT(msg_id != kmp_i18n_null);
				3659	if (line > 0) {
				3660	KMP_FATAL(FileLineMsgExiting, filename, line, __kmp_i18n_catgets(msg_id));
				3661	}
				3662	else {
				3663	KMP_FATAL(FileMsgExiting, filename, __kmp_i18n_catgets(msg_id));
				3664	}
				3665	}
				3666	if (__kmp_affinity_type == affinity_none) {
				3667	KMP_ASSERT(depth == 0);
				3668	KMP_ASSERT(address2os == NULL);
				3669	return;
				3670	}
				3671	}
				3672
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	3673	# if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3674
				3675	else if (__kmp_affinity_top_method == affinity_top_method_group) {
				3676	if (__kmp_affinity_verbose) {
				3677	KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
				3678	}
				3679
				3680	depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
				3681	KMP_ASSERT(depth != 0);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3682	if (depth < 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3683	KMP_ASSERT(msg_id != kmp_i18n_null);
				3684	KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3685	}
				3686	}
				3687
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	3688	# endif /* KMP_GROUP_AFFINITY */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3689
				3690	else if (__kmp_affinity_top_method == affinity_top_method_flat) {
				3691	if (__kmp_affinity_verbose) {
				3692	KMP_INFORM(AffUsingFlatOS, "KMP_AFFINITY");
				3693	}
				3694
				3695	depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
				3696	if (depth == 0) {
				3697	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3698	KMP_ASSERT(address2os == NULL);
				3699	return;
				3700	}
				3701	// should not fail
				3702	KMP_ASSERT(depth > 0);
				3703	KMP_ASSERT(address2os != NULL);
				3704	}
				3705
				3706	if (address2os == NULL) {
				3707	if (KMP_AFFINITY_CAPABLE()
				3708	&& (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3709	&& (__kmp_affinity_type != affinity_none)))) {
				3710	KMP_WARNING(ErrorInitializeAffinity);
				3711	}
				3712	__kmp_affinity_type = affinity_none;
				3713	__kmp_affin_mask_size = 0;
				3714	return;
				3715	}
				3716
				3717	# if KMP_MIC
				3718	__kmp_apply_thread_places(&address2os, depth);
				3719	# endif
				3720
				3721	//
				3722	// Create the table of masks, indexed by thread Id.
				3723	//
				3724	unsigned maxIndex;
				3725	unsigned numUnique;
				3726	kmp_affin_mask_t *osId2Mask = __kmp_create_masks(&maxIndex, &numUnique,
				3727	address2os, __kmp_avail_proc);
				3728	if (__kmp_affinity_gran_levels == 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3729	KMP_DEBUG_ASSERT((int)numUnique == __kmp_avail_proc);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3730	}
				3731
				3732	//
				3733	// Set the childNums vector in all Address objects. This must be done
				3734	// before we can sort using __kmp_affinity_cmp_Address_child_num(),
				3735	// which takes into account the setting of __kmp_affinity_compact.
				3736	//
				3737	__kmp_affinity_assign_child_nums(address2os, __kmp_avail_proc);
				3738
				3739	switch (__kmp_affinity_type) {
				3740
				3741	case affinity_explicit:
				3742	KMP_DEBUG_ASSERT(__kmp_affinity_proclist != NULL);
				3743	# if OMP_40_ENABLED
				3744	if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
				3745	# endif
				3746	{
				3747	__kmp_affinity_process_proclist(&__kmp_affinity_masks,
				3748	&__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask,
				3749	maxIndex);
				3750	}
				3751	# if OMP_40_ENABLED
				3752	else {
				3753	__kmp_affinity_process_placelist(&__kmp_affinity_masks,
				3754	&__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask,
				3755	maxIndex);
				3756	}
				3757	# endif
				3758	if (__kmp_affinity_num_masks == 0) {
				3759	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3760	&& (__kmp_affinity_type != affinity_none))) {
				3761	KMP_WARNING(AffNoValidProcID);
				3762	}
				3763	__kmp_affinity_type = affinity_none;
				3764	return;
				3765	}
				3766	break;
				3767
				3768	//
				3769	// The other affinity types rely on sorting the Addresses according
				3770	// to some permutation of the machine topology tree. Set
				3771	// __kmp_affinity_compact and __kmp_affinity_offset appropriately,
				3772	// then jump to a common code fragment to do the sort and create
				3773	// the array of affinity masks.
				3774	//
				3775
				3776	case affinity_logical:
				3777	__kmp_affinity_compact = 0;
				3778	if (__kmp_affinity_offset) {
				3779	__kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset
				3780	% __kmp_avail_proc;
				3781	}
				3782	goto sortAddresses;
				3783
				3784	case affinity_physical:
				3785	if (__kmp_nThreadsPerCore > 1) {
				3786	__kmp_affinity_compact = 1;
				3787	if (__kmp_affinity_compact >= depth) {
				3788	__kmp_affinity_compact = 0;
				3789	}
				3790	} else {
				3791	__kmp_affinity_compact = 0;
				3792	}
				3793	if (__kmp_affinity_offset) {
				3794	__kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset
				3795	% __kmp_avail_proc;
				3796	}
				3797	goto sortAddresses;
				3798
				3799	case affinity_scatter:
				3800	if (__kmp_affinity_compact >= depth) {
				3801	__kmp_affinity_compact = 0;
				3802	}
				3803	else {
				3804	__kmp_affinity_compact = depth - 1 - __kmp_affinity_compact;
				3805	}
				3806	goto sortAddresses;
				3807
				3808	case affinity_compact:
				3809	if (__kmp_affinity_compact >= depth) {
				3810	__kmp_affinity_compact = depth - 1;
				3811	}
				3812	goto sortAddresses;
				3813
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3814	case affinity_balanced:
Andrey Churbanov	f28f613	2015-01-13 14:54:00 +0000	[diff] [blame]	3815	// Balanced works only for the case of a single package
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3816	if( nPackages > 1 ) {
				3817	if( __kmp_affinity_verbose \|\| __kmp_affinity_warnings ) {
				3818	KMP_WARNING( AffBalancedNotAvail, "KMP_AFFINITY" );
				3819	}
				3820	__kmp_affinity_type = affinity_none;
				3821	return;
				3822	} else if( __kmp_affinity_uniform_topology() ) {
				3823	break;
				3824	} else { // Non-uniform topology
				3825
				3826	// Save the depth for further usage
				3827	__kmp_aff_depth = depth;
				3828
				3829	// Number of hyper threads per core in HT machine
				3830	int nth_per_core = __kmp_nThreadsPerCore;
				3831
				3832	int core_level;
				3833	if( nth_per_core > 1 ) {
				3834	core_level = depth - 2;
				3835	} else {
				3836	core_level = depth - 1;
				3837	}
				3838	int ncores = address2os[ __kmp_avail_proc - 1 ].first.labels[ core_level ] + 1;
				3839	int nproc = nth_per_core * ncores;
				3840
				3841	procarr = ( int * )__kmp_allocate( sizeof( int ) * nproc );
				3842	for( int i = 0; i < nproc; i++ ) {
				3843	procarr[ i ] = -1;
				3844	}
				3845
				3846	for( int i = 0; i < __kmp_avail_proc; i++ ) {
				3847	int proc = address2os[ i ].second;
				3848	// If depth == 3 then level=0 - package, level=1 - core, level=2 - thread.
				3849	// If there is only one thread per core then depth == 2: level 0 - package,
				3850	// level 1 - core.
				3851	int level = depth - 1;
				3852
				3853	// __kmp_nth_per_core == 1
				3854	int thread = 0;
				3855	int core = address2os[ i ].first.labels[ level ];
				3856	// If the thread level exists, that is we have more than one thread context per core
				3857	if( nth_per_core > 1 ) {
				3858	thread = address2os[ i ].first.labels[ level ] % nth_per_core;
				3859	core = address2os[ i ].first.labels[ level - 1 ];
				3860	}
				3861	procarr[ core * nth_per_core + thread ] = proc;
				3862	}
				3863
				3864	break;
				3865	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3866
				3867	sortAddresses:
				3868	//
				3869	// Allocate the gtid->affinity mask table.
				3870	//
				3871	if (__kmp_affinity_dups) {
				3872	__kmp_affinity_num_masks = __kmp_avail_proc;
				3873	}
				3874	else {
				3875	__kmp_affinity_num_masks = numUnique;
				3876	}
				3877
				3878	# if OMP_40_ENABLED
				3879	if ( ( __kmp_nested_proc_bind.bind_types[0] != proc_bind_intel )
				3880	&& ( __kmp_affinity_num_places > 0 )
				3881	&& ( (unsigned)__kmp_affinity_num_places < __kmp_affinity_num_masks ) ) {
				3882	__kmp_affinity_num_masks = __kmp_affinity_num_places;
				3883	}
				3884	# endif
				3885
				3886	__kmp_affinity_masks = (kmp_affin_mask_t*)__kmp_allocate(
				3887	__kmp_affinity_num_masks * __kmp_affin_mask_size);
				3888
				3889	//
				3890	// Sort the address2os table according to the current setting of
				3891	// __kmp_affinity_compact, then fill out __kmp_affinity_masks.
				3892	//
				3893	qsort(address2os, __kmp_avail_proc, sizeof(*address2os),
				3894	__kmp_affinity_cmp_Address_child_num);
				3895	{
				3896	int i;
				3897	unsigned j;
				3898	for (i = 0, j = 0; i < __kmp_avail_proc; i++) {
				3899	if ((! __kmp_affinity_dups) && (! address2os[i].first.leader)) {
				3900	continue;
				3901	}
				3902	unsigned osId = address2os[i].second;
				3903	kmp_affin_mask_t *src = KMP_CPU_INDEX(osId2Mask, osId);
				3904	kmp_affin_mask_t *dest
				3905	= KMP_CPU_INDEX(__kmp_affinity_masks, j);
				3906	KMP_ASSERT(KMP_CPU_ISSET(osId, src));
				3907	KMP_CPU_COPY(dest, src);
				3908	if (++j >= __kmp_affinity_num_masks) {
				3909	break;
				3910	}
				3911	}
				3912	KMP_DEBUG_ASSERT(j == __kmp_affinity_num_masks);
				3913	}
				3914	break;
				3915
				3916	default:
				3917	KMP_ASSERT2(0, "Unexpected affinity setting");
				3918	}
				3919
				3920	__kmp_free(osId2Mask);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3921	machine_hierarchy.init(address2os, __kmp_avail_proc);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3922	}
				3923
				3924
				3925	void
				3926	__kmp_affinity_initialize(void)
				3927	{
				3928	//
				3929	// Much of the code above was written assumming that if a machine was not
				3930	// affinity capable, then __kmp_affinity_type == affinity_none. We now
				3931	// explicitly represent this as __kmp_affinity_type == affinity_disabled.
				3932	//
				3933	// There are too many checks for __kmp_affinity_type == affinity_none
				3934	// in this code. Instead of trying to change them all, check if
				3935	// __kmp_affinity_type == affinity_disabled, and if so, slam it with
				3936	// affinity_none, call the real initialization routine, then restore
				3937	// __kmp_affinity_type to affinity_disabled.
				3938	//
				3939	int disabled = (__kmp_affinity_type == affinity_disabled);
				3940	if (! KMP_AFFINITY_CAPABLE()) {
				3941	KMP_ASSERT(disabled);
				3942	}
				3943	if (disabled) {
				3944	__kmp_affinity_type = affinity_none;
				3945	}
				3946	__kmp_aux_affinity_initialize();
				3947	if (disabled) {
				3948	__kmp_affinity_type = affinity_disabled;
				3949	}
				3950	}
				3951
				3952
				3953	void
				3954	__kmp_affinity_uninitialize(void)
				3955	{
				3956	if (__kmp_affinity_masks != NULL) {
				3957	__kmp_free(__kmp_affinity_masks);
				3958	__kmp_affinity_masks = NULL;
				3959	}
				3960	if (fullMask != NULL) {
				3961	KMP_CPU_FREE(fullMask);
				3962	fullMask = NULL;
				3963	}
				3964	__kmp_affinity_num_masks = 0;
				3965	# if OMP_40_ENABLED
				3966	__kmp_affinity_num_places = 0;
				3967	# endif
				3968	if (__kmp_affinity_proclist != NULL) {
				3969	__kmp_free(__kmp_affinity_proclist);
				3970	__kmp_affinity_proclist = NULL;
				3971	}
				3972	if( address2os != NULL ) {
				3973	__kmp_free( address2os );
				3974	address2os = NULL;
				3975	}
				3976	if( procarr != NULL ) {
				3977	__kmp_free( procarr );
				3978	procarr = NULL;
				3979	}
				3980	}
				3981
				3982
				3983	void
				3984	__kmp_affinity_set_init_mask(int gtid, int isa_root)
				3985	{
				3986	if (! KMP_AFFINITY_CAPABLE()) {
				3987	return;
				3988	}
				3989
				3990	kmp_info_t th = (kmp_info_t )TCR_SYNC_PTR(__kmp_threads[gtid]);
				3991	if (th->th.th_affin_mask == NULL) {
				3992	KMP_CPU_ALLOC(th->th.th_affin_mask);
				3993	}
				3994	else {
				3995	KMP_CPU_ZERO(th->th.th_affin_mask);
				3996	}
				3997
				3998	//
				3999	// Copy the thread mask to the kmp_info_t strucuture.
				4000	// If __kmp_affinity_type == affinity_none, copy the "full" mask, i.e. one
				4001	// that has all of the OS proc ids set, or if __kmp_affinity_respect_mask
				4002	// is set, then the full mask is the same as the mask of the initialization
				4003	// thread.
				4004	//
				4005	kmp_affin_mask_t *mask;
				4006	int i;
				4007
				4008	# if OMP_40_ENABLED
				4009	if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
				4010	# endif
				4011	{
Andrey Churbanov	f28f613	2015-01-13 14:54:00 +0000	[diff] [blame]	4012	if ((__kmp_affinity_type == affinity_none) \|\| (__kmp_affinity_type == affinity_balanced)
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4013	) {
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	4014	# if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4015	if (__kmp_num_proc_groups > 1) {
				4016	return;
				4017	}
				4018	# endif
				4019	KMP_ASSERT(fullMask != NULL);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4020	i = KMP_PLACE_ALL;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4021	mask = fullMask;
				4022	}
				4023	else {
				4024	KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 );
				4025	i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
				4026	mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
				4027	}
				4028	}
				4029	# if OMP_40_ENABLED
				4030	else {
				4031	if ((! isa_root)
				4032	\|\| (__kmp_nested_proc_bind.bind_types[0] == proc_bind_false)) {
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	4033	# if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4034	if (__kmp_num_proc_groups > 1) {
				4035	return;
				4036	}
				4037	# endif
				4038	KMP_ASSERT(fullMask != NULL);
				4039	i = KMP_PLACE_ALL;
				4040	mask = fullMask;
				4041	}
				4042	else {
				4043	//
				4044	// int i = some hash function or just a counter that doesn't
				4045	// always start at 0. Use gtid for now.
				4046	//
				4047	KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 );
				4048	i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
				4049	mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
				4050	}
				4051	}
				4052	# endif
				4053
				4054	# if OMP_40_ENABLED
				4055	th->th.th_current_place = i;
				4056	if (isa_root) {
				4057	th->th.th_new_place = i;
				4058	th->th.th_first_place = 0;
				4059	th->th.th_last_place = __kmp_affinity_num_masks - 1;
				4060	}
				4061
				4062	if (i == KMP_PLACE_ALL) {
				4063	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to all places\n",
				4064	gtid));
				4065	}
				4066	else {
				4067	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to place %d\n",
				4068	gtid, i));
				4069	}
				4070	# else
				4071	if (i == -1) {
				4072	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to fullMask\n",
				4073	gtid));
				4074	}
				4075	else {
				4076	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to mask %d\n",
				4077	gtid, i));
				4078	}
				4079	# endif /* OMP_40_ENABLED */
				4080
				4081	KMP_CPU_COPY(th->th.th_affin_mask, mask);
				4082
				4083	if (__kmp_affinity_verbose) {
				4084	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4085	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4086	th->th.th_affin_mask);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4087	KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(), gtid,
				4088	buf);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4089	}
				4090
				4091	# if KMP_OS_WINDOWS
				4092	//
				4093	// On Windows* OS, the process affinity mask might have changed.
				4094	// If the user didn't request affinity and this call fails,
				4095	// just continue silently. See CQ171393.
				4096	//
				4097	if ( __kmp_affinity_type == affinity_none ) {
				4098	__kmp_set_system_affinity(th->th.th_affin_mask, FALSE);
				4099	}
				4100	else
				4101	# endif
				4102	__kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
				4103	}
				4104
				4105
				4106	# if OMP_40_ENABLED
				4107
				4108	void
				4109	__kmp_affinity_set_place(int gtid)
				4110	{
				4111	int retval;
				4112
				4113	if (! KMP_AFFINITY_CAPABLE()) {
				4114	return;
				4115	}
				4116
				4117	kmp_info_t th = (kmp_info_t )TCR_SYNC_PTR(__kmp_threads[gtid]);
				4118
				4119	KA_TRACE(100, ("__kmp_affinity_set_place: binding T#%d to place %d (current place = %d)\n",
				4120	gtid, th->th.th_new_place, th->th.th_current_place));
				4121
				4122	//
Alp Toker	8f2d3f0	2014-02-24 10:40:15 +0000	[diff] [blame]	4123	// Check that the new place is within this thread's partition.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4124	//
				4125	KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4126	KMP_ASSERT(th->th.th_new_place >= 0);
				4127	KMP_ASSERT((unsigned)th->th.th_new_place <= __kmp_affinity_num_masks);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4128	if (th->th.th_first_place <= th->th.th_last_place) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4129	KMP_ASSERT((th->th.th_new_place >= th->th.th_first_place)
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4130	&& (th->th.th_new_place <= th->th.th_last_place));
				4131	}
				4132	else {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4133	KMP_ASSERT((th->th.th_new_place <= th->th.th_first_place)
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4134	\|\| (th->th.th_new_place >= th->th.th_last_place));
				4135	}
				4136
				4137	//
				4138	// Copy the thread mask to the kmp_info_t strucuture,
				4139	// and set this thread's affinity.
				4140	//
				4141	kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity_masks,
				4142	th->th.th_new_place);
				4143	KMP_CPU_COPY(th->th.th_affin_mask, mask);
				4144	th->th.th_current_place = th->th.th_new_place;
				4145
				4146	if (__kmp_affinity_verbose) {
				4147	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4148	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4149	th->th.th_affin_mask);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4150	KMP_INFORM(BoundToOSProcSet, "OMP_PROC_BIND", (kmp_int32)getpid(),
				4151	gtid, buf);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4152	}
				4153	__kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
				4154	}
				4155
				4156	# endif /* OMP_40_ENABLED */
				4157
				4158
				4159	int
				4160	__kmp_aux_set_affinity(void **mask)
				4161	{
				4162	int gtid;
				4163	kmp_info_t *th;
				4164	int retval;
				4165
				4166	if (! KMP_AFFINITY_CAPABLE()) {
				4167	return -1;
				4168	}
				4169
				4170	gtid = __kmp_entry_gtid();
				4171	KA_TRACE(1000, ;{
				4172	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4173	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4174	(kmp_affin_mask_t )(mask));
				4175	__kmp_debug_printf("kmp_set_affinity: setting affinity mask for thread %d = %s\n",
				4176	gtid, buf);
				4177	});
				4178
				4179	if (__kmp_env_consistency_check) {
				4180	if ((mask == NULL) \|\| (*mask == NULL)) {
				4181	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				4182	}
				4183	else {
				4184	unsigned proc;
				4185	int num_procs = 0;
				4186
				4187	for (proc = 0; proc < KMP_CPU_SETSIZE; proc++) {
				4188	if (! KMP_CPU_ISSET(proc, (kmp_affin_mask_t )(mask))) {
				4189	continue;
				4190	}
				4191	num_procs++;
				4192	if (! KMP_CPU_ISSET(proc, fullMask)) {
				4193	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				4194	break;
				4195	}
				4196	}
				4197	if (num_procs == 0) {
				4198	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				4199	}
				4200
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	4201	# if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4202	if (__kmp_get_proc_group((kmp_affin_mask_t )(mask)) < 0) {
				4203	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				4204	}
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	4205	# endif /* KMP_GROUP_AFFINITY */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4206
				4207	}
				4208	}
				4209
				4210	th = __kmp_threads[gtid];
				4211	KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
				4212	retval = __kmp_set_system_affinity((kmp_affin_mask_t )(mask), FALSE);
				4213	if (retval == 0) {
				4214	KMP_CPU_COPY(th->th.th_affin_mask, (kmp_affin_mask_t )(mask));
				4215	}
				4216
				4217	# if OMP_40_ENABLED
				4218	th->th.th_current_place = KMP_PLACE_UNDEFINED;
				4219	th->th.th_new_place = KMP_PLACE_UNDEFINED;
				4220	th->th.th_first_place = 0;
				4221	th->th.th_last_place = __kmp_affinity_num_masks - 1;
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4222
				4223	//
				4224	// Turn off 4.0 affinity for the current tread at this parallel level.
				4225	//
				4226	th->th.th_current_task->td_icvs.proc_bind = proc_bind_false;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4227	# endif
				4228
				4229	return retval;
				4230	}
				4231
				4232
				4233	int
				4234	__kmp_aux_get_affinity(void **mask)
				4235	{
				4236	int gtid;
				4237	int retval;
				4238	kmp_info_t *th;
				4239
				4240	if (! KMP_AFFINITY_CAPABLE()) {
				4241	return -1;
				4242	}
				4243
				4244	gtid = __kmp_entry_gtid();
				4245	th = __kmp_threads[gtid];
				4246	KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
				4247
				4248	KA_TRACE(1000, ;{
				4249	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4250	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4251	th->th.th_affin_mask);
				4252	__kmp_printf("kmp_get_affinity: stored affinity mask for thread %d = %s\n", gtid, buf);
				4253	});
				4254
				4255	if (__kmp_env_consistency_check) {
				4256	if ((mask == NULL) \|\| (*mask == NULL)) {
				4257	KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity");
				4258	}
				4259	}
				4260
				4261	# if !KMP_OS_WINDOWS
				4262
				4263	retval = __kmp_get_system_affinity((kmp_affin_mask_t )(mask), FALSE);
				4264	KA_TRACE(1000, ;{
				4265	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4266	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4267	(kmp_affin_mask_t )(mask));
				4268	__kmp_printf("kmp_get_affinity: system affinity mask for thread %d = %s\n", gtid, buf);
				4269	});
				4270	return retval;
				4271
				4272	# else
				4273
				4274	KMP_CPU_COPY((kmp_affin_mask_t )(mask), th->th.th_affin_mask);
				4275	return 0;
				4276
				4277	# endif /* KMP_OS_WINDOWS */
				4278
				4279	}
				4280
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4281	int
				4282	__kmp_aux_set_affinity_mask_proc(int proc, void **mask)
				4283	{
				4284	int retval;
				4285
				4286	if (! KMP_AFFINITY_CAPABLE()) {
				4287	return -1;
				4288	}
				4289
				4290	KA_TRACE(1000, ;{
				4291	int gtid = __kmp_entry_gtid();
				4292	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4293	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4294	(kmp_affin_mask_t )(mask));
				4295	__kmp_debug_printf("kmp_set_affinity_mask_proc: setting proc %d in affinity mask for thread %d = %s\n",
				4296	proc, gtid, buf);
				4297	});
				4298
				4299	if (__kmp_env_consistency_check) {
				4300	if ((mask == NULL) \|\| (*mask == NULL)) {
				4301	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity_mask_proc");
				4302	}
				4303	}
				4304
				4305	if ((proc < 0) \|\| ((unsigned)proc >= KMP_CPU_SETSIZE)) {
				4306	return -1;
				4307	}
				4308	if (! KMP_CPU_ISSET(proc, fullMask)) {
				4309	return -2;
				4310	}
				4311
				4312	KMP_CPU_SET(proc, (kmp_affin_mask_t )(mask));
				4313	return 0;
				4314	}
				4315
				4316
				4317	int
				4318	__kmp_aux_unset_affinity_mask_proc(int proc, void **mask)
				4319	{
				4320	int retval;
				4321
				4322	if (! KMP_AFFINITY_CAPABLE()) {
				4323	return -1;
				4324	}
				4325
				4326	KA_TRACE(1000, ;{
				4327	int gtid = __kmp_entry_gtid();
				4328	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4329	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4330	(kmp_affin_mask_t )(mask));
				4331	__kmp_debug_printf("kmp_unset_affinity_mask_proc: unsetting proc %d in affinity mask for thread %d = %s\n",
				4332	proc, gtid, buf);
				4333	});
				4334
				4335	if (__kmp_env_consistency_check) {
				4336	if ((mask == NULL) \|\| (*mask == NULL)) {
				4337	KMP_FATAL(AffinityInvalidMask, "kmp_unset_affinity_mask_proc");
				4338	}
				4339	}
				4340
				4341	if ((proc < 0) \|\| ((unsigned)proc >= KMP_CPU_SETSIZE)) {
				4342	return -1;
				4343	}
				4344	if (! KMP_CPU_ISSET(proc, fullMask)) {
				4345	return -2;
				4346	}
				4347
				4348	KMP_CPU_CLR(proc, (kmp_affin_mask_t )(mask));
				4349	return 0;
				4350	}
				4351
				4352
				4353	int
				4354	__kmp_aux_get_affinity_mask_proc(int proc, void **mask)
				4355	{
				4356	int retval;
				4357
				4358	if (! KMP_AFFINITY_CAPABLE()) {
				4359	return -1;
				4360	}
				4361
				4362	KA_TRACE(1000, ;{
				4363	int gtid = __kmp_entry_gtid();
				4364	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4365	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4366	(kmp_affin_mask_t )(mask));
				4367	__kmp_debug_printf("kmp_get_affinity_mask_proc: getting proc %d in affinity mask for thread %d = %s\n",
				4368	proc, gtid, buf);
				4369	});
				4370
				4371	if (__kmp_env_consistency_check) {
				4372	if ((mask == NULL) \|\| (*mask == NULL)) {
				4373	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity_mask_proc");
				4374	}
				4375	}
				4376
				4377	if ((proc < 0) \|\| ((unsigned)proc >= KMP_CPU_SETSIZE)) {
				4378	return 0;
				4379	}
				4380	if (! KMP_CPU_ISSET(proc, fullMask)) {
				4381	return 0;
				4382	}
				4383
				4384	return KMP_CPU_ISSET(proc, (kmp_affin_mask_t )(mask));
				4385	}
				4386
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4387
				4388	// Dynamic affinity settings - Affinity balanced
				4389	void __kmp_balanced_affinity( int tid, int nthreads )
				4390	{
				4391	if( __kmp_affinity_uniform_topology() ) {
				4392	int coreID;
				4393	int threadID;
				4394	// Number of hyper threads per core in HT machine
				4395	int __kmp_nth_per_core = __kmp_avail_proc / __kmp_ncores;
				4396	// Number of cores
				4397	int ncores = __kmp_ncores;
				4398	// How many threads will be bound to each core
				4399	int chunk = nthreads / ncores;
				4400	// How many cores will have an additional thread bound to it - "big cores"
				4401	int big_cores = nthreads % ncores;
				4402	// Number of threads on the big cores
				4403	int big_nth = ( chunk + 1 ) * big_cores;
				4404	if( tid < big_nth ) {
				4405	coreID = tid / (chunk + 1 );
				4406	threadID = ( tid % (chunk + 1 ) ) % __kmp_nth_per_core ;
				4407	} else { //tid >= big_nth
				4408	coreID = ( tid - big_cores ) / chunk;
				4409	threadID = ( ( tid - big_cores ) % chunk ) % __kmp_nth_per_core ;
				4410	}
				4411
				4412	KMP_DEBUG_ASSERT2(KMP_AFFINITY_CAPABLE(),
				4413	"Illegal set affinity operation when not capable");
				4414
				4415	kmp_affin_mask_t mask = (kmp_affin_mask_t )alloca(__kmp_affin_mask_size);
				4416	KMP_CPU_ZERO(mask);
				4417
				4418	// Granularity == thread
				4419	if( __kmp_affinity_gran == affinity_gran_fine \|\| __kmp_affinity_gran == affinity_gran_thread) {
				4420	int osID = address2os[ coreID * __kmp_nth_per_core + threadID ].second;
				4421	KMP_CPU_SET( osID, mask);
				4422	} else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
				4423	for( int i = 0; i < __kmp_nth_per_core; i++ ) {
				4424	int osID;
				4425	osID = address2os[ coreID * __kmp_nth_per_core + i ].second;
				4426	KMP_CPU_SET( osID, mask);
				4427	}
				4428	}
				4429	if (__kmp_affinity_verbose) {
				4430	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4431	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4432	KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
				4433	tid, buf);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4434	}
				4435	__kmp_set_system_affinity( mask, TRUE );
				4436	} else { // Non-uniform topology
				4437
				4438	kmp_affin_mask_t mask = (kmp_affin_mask_t )alloca(__kmp_affin_mask_size);
				4439	KMP_CPU_ZERO(mask);
				4440
				4441	// Number of hyper threads per core in HT machine
				4442	int nth_per_core = __kmp_nThreadsPerCore;
				4443	int core_level;
				4444	if( nth_per_core > 1 ) {
				4445	core_level = __kmp_aff_depth - 2;
				4446	} else {
				4447	core_level = __kmp_aff_depth - 1;
				4448	}
				4449
				4450	// Number of cores - maximum value; it does not count trail cores with 0 processors
				4451	int ncores = address2os[ __kmp_avail_proc - 1 ].first.labels[ core_level ] + 1;
				4452
				4453	// For performance gain consider the special case nthreads == __kmp_avail_proc
				4454	if( nthreads == __kmp_avail_proc ) {
				4455	if( __kmp_affinity_gran == affinity_gran_fine \|\| __kmp_affinity_gran == affinity_gran_thread) {
				4456	int osID = address2os[ tid ].second;
				4457	KMP_CPU_SET( osID, mask);
				4458	} else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
				4459	int coreID = address2os[ tid ].first.labels[ core_level ];
				4460	// We'll count found osIDs for the current core; they can be not more than nth_per_core;
				4461	// since the address2os is sortied we can break when cnt==nth_per_core
				4462	int cnt = 0;
				4463	for( int i = 0; i < __kmp_avail_proc; i++ ) {
				4464	int osID = address2os[ i ].second;
				4465	int core = address2os[ i ].first.labels[ core_level ];
				4466	if( core == coreID ) {
				4467	KMP_CPU_SET( osID, mask);
				4468	cnt++;
				4469	if( cnt == nth_per_core ) {
				4470	break;
				4471	}
				4472	}
				4473	}
				4474	}
				4475	} else if( nthreads <= __kmp_ncores ) {
				4476
				4477	int core = 0;
				4478	for( int i = 0; i < ncores; i++ ) {
				4479	// Check if this core from procarr[] is in the mask
				4480	int in_mask = 0;
				4481	for( int j = 0; j < nth_per_core; j++ ) {
				4482	if( procarr[ i * nth_per_core + j ] != - 1 ) {
				4483	in_mask = 1;
				4484	break;
				4485	}
				4486	}
				4487	if( in_mask ) {
				4488	if( tid == core ) {
				4489	for( int j = 0; j < nth_per_core; j++ ) {
				4490	int osID = procarr[ i * nth_per_core + j ];
				4491	if( osID != -1 ) {
				4492	KMP_CPU_SET( osID, mask );
				4493	// For granularity=thread it is enough to set the first available osID for this core
				4494	if( __kmp_affinity_gran == affinity_gran_fine \|\| __kmp_affinity_gran == affinity_gran_thread) {
				4495	break;
				4496	}
				4497	}
				4498	}
				4499	break;
				4500	} else {
				4501	core++;
				4502	}
				4503	}
				4504	}
				4505
				4506	} else { // nthreads > __kmp_ncores
				4507
				4508	// Array to save the number of processors at each core
				4509	int nproc_at_core[ ncores ];
				4510	// Array to save the number of cores with "x" available processors;
				4511	int ncores_with_x_procs[ nth_per_core + 1 ];
				4512	// Array to save the number of cores with # procs from x to nth_per_core
				4513	int ncores_with_x_to_max_procs[ nth_per_core + 1 ];
				4514
				4515	for( int i = 0; i <= nth_per_core; i++ ) {
				4516	ncores_with_x_procs[ i ] = 0;
				4517	ncores_with_x_to_max_procs[ i ] = 0;
				4518	}
				4519
				4520	for( int i = 0; i < ncores; i++ ) {
				4521	int cnt = 0;
				4522	for( int j = 0; j < nth_per_core; j++ ) {
				4523	if( procarr[ i * nth_per_core + j ] != -1 ) {
				4524	cnt++;
				4525	}
				4526	}
				4527	nproc_at_core[ i ] = cnt;
				4528	ncores_with_x_procs[ cnt ]++;
				4529	}
				4530
				4531	for( int i = 0; i <= nth_per_core; i++ ) {
				4532	for( int j = i; j <= nth_per_core; j++ ) {
				4533	ncores_with_x_to_max_procs[ i ] += ncores_with_x_procs[ j ];
				4534	}
				4535	}
				4536
				4537	// Max number of processors
				4538	int nproc = nth_per_core * ncores;
				4539	// An array to keep number of threads per each context
				4540	int * newarr = ( int * )__kmp_allocate( sizeof( int ) * nproc );
				4541	for( int i = 0; i < nproc; i++ ) {
				4542	newarr[ i ] = 0;
				4543	}
				4544
				4545	int nth = nthreads;
				4546	int flag = 0;
				4547	while( nth > 0 ) {
				4548	for( int j = 1; j <= nth_per_core; j++ ) {
				4549	int cnt = ncores_with_x_to_max_procs[ j ];
				4550	for( int i = 0; i < ncores; i++ ) {
				4551	// Skip the core with 0 processors
				4552	if( nproc_at_core[ i ] == 0 ) {
				4553	continue;
				4554	}
				4555	for( int k = 0; k < nth_per_core; k++ ) {
				4556	if( procarr[ i * nth_per_core + k ] != -1 ) {
				4557	if( newarr[ i * nth_per_core + k ] == 0 ) {
				4558	newarr[ i * nth_per_core + k ] = 1;
				4559	cnt--;
				4560	nth--;
				4561	break;
				4562	} else {
				4563	if( flag != 0 ) {
				4564	newarr[ i * nth_per_core + k ] ++;
				4565	cnt--;
				4566	nth--;
				4567	break;
				4568	}
				4569	}
				4570	}
				4571	}
				4572	if( cnt == 0 \|\| nth == 0 ) {
				4573	break;
				4574	}
				4575	}
				4576	if( nth == 0 ) {
				4577	break;
				4578	}
				4579	}
				4580	flag = 1;
				4581	}
				4582	int sum = 0;
				4583	for( int i = 0; i < nproc; i++ ) {
				4584	sum += newarr[ i ];
				4585	if( sum > tid ) {
				4586	// Granularity == thread
				4587	if( __kmp_affinity_gran == affinity_gran_fine \|\| __kmp_affinity_gran == affinity_gran_thread) {
				4588	int osID = procarr[ i ];
				4589	KMP_CPU_SET( osID, mask);
				4590	} else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
				4591	int coreID = i / nth_per_core;
				4592	for( int ii = 0; ii < nth_per_core; ii++ ) {
				4593	int osID = procarr[ coreID * nth_per_core + ii ];
				4594	if( osID != -1 ) {
				4595	KMP_CPU_SET( osID, mask);
				4596	}
				4597	}
				4598	}
				4599	break;
				4600	}
				4601	}
				4602	__kmp_free( newarr );
				4603	}
				4604
				4605	if (__kmp_affinity_verbose) {
				4606	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4607	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4608	KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
				4609	tid, buf);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4610	}
				4611	__kmp_set_system_affinity( mask, TRUE );
				4612	}
				4613	}
				4614
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4615	#else
				4616	// affinity not supported
				4617
				4618	kmp_uint32 mac_skipPerLevel[7];
				4619	kmp_uint32 mac_depth;
				4620	kmp_uint8 mac_leaf_kids;
				4621	void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
				4622	static int first = 1;
				4623	if (first) {
				4624	const kmp_uint32 maxLevels = 7;
				4625	kmp_uint32 numPerLevel[maxLevels];
				4626
				4627	for (kmp_uint32 i=0; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
				4628	numPerLevel[i] = 1;
				4629	mac_skipPerLevel[i] = 1;
				4630	}
				4631
				4632	mac_depth = 2;
				4633	numPerLevel[0] = nproc;
				4634
				4635	kmp_uint32 branch = 4;
				4636	if (numPerLevel[0] == 1) branch = nproc/4;
				4637	if (branch<4) branch=4;
				4638	for (kmp_uint32 d=0; d<mac_depth-1; ++d) { // optimize hierarchy width
				4639	while (numPerLevel[d] > branch \|\| (d==0 && numPerLevel[d]>4)) { // max 4 on level 0!
				4640	if (numPerLevel[d] & 1) numPerLevel[d]++;
				4641	numPerLevel[d] = numPerLevel[d] >> 1;
				4642	if (numPerLevel[d+1] == 1) mac_depth++;
				4643	numPerLevel[d+1] = numPerLevel[d+1] << 1;
				4644	}
				4645	if(numPerLevel[0] == 1) {
				4646	branch = branch >> 1;
				4647	if (branch<4) branch = 4;
				4648	}
				4649	}
				4650
				4651	for (kmp_uint32 i=1; i<mac_depth; ++i)
				4652	mac_skipPerLevel[i] = numPerLevel[i-1] * mac_skipPerLevel[i-1];
				4653	mac_leaf_kids = (kmp_uint8)numPerLevel[0]-1;
				4654	first=0;
				4655	}
				4656	thr_bar->depth = mac_depth;
				4657	thr_bar->base_leaf_kids = mac_leaf_kids;
				4658	thr_bar->skip_per_level = mac_skipPerLevel;
				4659	}
				4660
Alp Toker	763b939	2014-02-28 09:42:41 +0000	[diff] [blame]	4661	#endif // KMP_AFFINITY_SUPPORTED