Blame - openmp/runtime/src/kmp_affinity.cpp - toolchain/llvm-project

blob: 2a4b9629b841b770ca4d4b8a4a272ccd58717f86 [file] [log] [blame]

Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1	/*
				2	* kmp_affinity.cpp -- affinity management
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3	*/
				4
				5
				6	//===----------------------------------------------------------------------===//
				7	//
				8	// The LLVM Compiler Infrastructure
				9	//
				10	// This file is dual licensed under the MIT and the University of Illinois Open
				11	// Source Licenses. See LICENSE.txt for details.
				12	//
				13	//===----------------------------------------------------------------------===//
				14
				15
				16	#include "kmp.h"
				17	#include "kmp_i18n.h"
				18	#include "kmp_io.h"
				19	#include "kmp_str.h"
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	20	#include "kmp_wrapper_getpid.h"
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	21
Alp Toker	763b939	2014-02-28 09:42:41 +0000	[diff] [blame]	22	#if KMP_AFFINITY_SUPPORTED
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	23
				24	//
				25	// Print the affinity mask to the character array in a pretty format.
				26	//
				27	char *
				28	__kmp_affinity_print_mask(char buf, int buf_len, kmp_affin_mask_t mask)
				29	{
				30	KMP_ASSERT(buf_len >= 40);
				31	char *scan = buf;
				32	char *end = buf + buf_len - 1;
				33
				34	//
				35	// Find first element / check for empty set.
				36	//
				37	size_t i;
				38	for (i = 0; i < KMP_CPU_SETSIZE; i++) {
				39	if (KMP_CPU_ISSET(i, mask)) {
				40	break;
				41	}
				42	}
				43	if (i == KMP_CPU_SETSIZE) {
				44	sprintf(scan, "{<empty>}");
				45	while (*scan != '\0') scan++;
				46	KMP_ASSERT(scan <= end);
				47	return buf;
				48	}
				49
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	50	sprintf(scan, "{%ld", (long)i);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	51	while (*scan != '\0') scan++;
				52	i++;
				53	for (; i < KMP_CPU_SETSIZE; i++) {
				54	if (! KMP_CPU_ISSET(i, mask)) {
				55	continue;
				56	}
				57
				58	//
				59	// Check for buffer overflow. A string of the form ",<n>" will have
				60	// at most 10 characters, plus we want to leave room to print ",...}"
				61	// if the set is too large to print for a total of 15 characters.
				62	// We already left room for '\0' in setting end.
				63	//
				64	if (end - scan < 15) {
				65	break;
				66	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	67	sprintf(scan, ",%-ld", (long)i);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	68	while (*scan != '\0') scan++;
				69	}
				70	if (i < KMP_CPU_SETSIZE) {
				71	sprintf(scan, ",...");
				72	while (*scan != '\0') scan++;
				73	}
				74	sprintf(scan, "}");
				75	while (*scan != '\0') scan++;
				76	KMP_ASSERT(scan <= end);
				77	return buf;
				78	}
				79
				80
				81	void
				82	__kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask)
				83	{
				84	KMP_CPU_ZERO(mask);
				85
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	86	# if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	87
				88	if (__kmp_num_proc_groups > 1) {
				89	int group;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	90	KMP_DEBUG_ASSERT(__kmp_GetActiveProcessorCount != NULL);
				91	for (group = 0; group < __kmp_num_proc_groups; group++) {
				92	int i;
				93	int num = __kmp_GetActiveProcessorCount(group);
				94	for (i = 0; i < num; i++) {
				95	KMP_CPU_SET(i + group * (CHAR_BIT * sizeof(DWORD_PTR)), mask);
				96	}
				97	}
				98	}
				99	else
				100
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	101	# endif /* KMP_GROUP_AFFINITY */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	102
				103	{
				104	int proc;
				105	for (proc = 0; proc < __kmp_xproc; proc++) {
				106	KMP_CPU_SET(proc, mask);
				107	}
				108	}
				109	}
				110
				111
				112	//
				113	// In Linux* OS debug & cover (-O0) builds, we need to avoid inline member
				114	// functions.
				115	//
				116	// The icc codegen emits sections with extremely long names, of the form
				117	// ".gnu.linkonce.<mangled_name>". There seems to have been a linker bug
				118	// introduced between GNU ld version 2.14.90.0.4 and 2.15.92.0.2 involving
				119	// some sort of memory corruption or table overflow that is triggered by
				120	// these long strings. I checked the latest version of the linker -
				121	// GNU ld (Linux* OS/GNU Binutils) 2.18.50.0.7.20080422 - and the bug is not
				122	// fixed.
				123	//
				124	// Unfortunately, my attempts to reproduce it in a smaller example have
				125	// failed - I'm not sure what the prospects are of getting it fixed
				126	// properly - but we need a reproducer smaller than all of libiomp.
				127	//
				128	// Work around the problem by avoiding inline constructors in such builds.
				129	// We do this for all platforms, not just Linux* OS - non-inline functions are
				130	// more debuggable and provide better coverage into than inline functions.
				131	// Use inline functions in shipping libs, for performance.
				132	//
				133
				134	# if !defined(KMP_DEBUG) && !defined(COVER)
				135
				136	class Address {
				137	public:
				138	static const unsigned maxDepth = 32;
				139	unsigned labels[maxDepth];
				140	unsigned childNums[maxDepth];
				141	unsigned depth;
				142	unsigned leader;
				143	Address(unsigned _depth)
				144	: depth(_depth), leader(FALSE) {
				145	}
				146	Address &operator=(const Address &b) {
				147	depth = b.depth;
				148	for (unsigned i = 0; i < depth; i++) {
				149	labels[i] = b.labels[i];
				150	childNums[i] = b.childNums[i];
				151	}
				152	leader = FALSE;
				153	return *this;
				154	}
				155	bool operator==(const Address &b) const {
				156	if (depth != b.depth)
				157	return false;
				158	for (unsigned i = 0; i < depth; i++)
				159	if(labels[i] != b.labels[i])
				160	return false;
				161	return true;
				162	}
				163	bool isClose(const Address &b, int level) const {
				164	if (depth != b.depth)
				165	return false;
				166	if ((unsigned)level >= depth)
				167	return true;
				168	for (unsigned i = 0; i < (depth - level); i++)
				169	if(labels[i] != b.labels[i])
				170	return false;
				171	return true;
				172	}
				173	bool operator!=(const Address &b) const {
				174	return !operator==(b);
				175	}
				176	};
				177
				178	class AddrUnsPair {
				179	public:
				180	Address first;
				181	unsigned second;
				182	AddrUnsPair(Address _first, unsigned _second)
				183	: first(_first), second(_second) {
				184	}
				185	AddrUnsPair &operator=(const AddrUnsPair &b)
				186	{
				187	first = b.first;
				188	second = b.second;
				189	return *this;
				190	}
				191	};
				192
				193	# else
				194
				195	class Address {
				196	public:
				197	static const unsigned maxDepth = 32;
				198	unsigned labels[maxDepth];
				199	unsigned childNums[maxDepth];
				200	unsigned depth;
				201	unsigned leader;
				202	Address(unsigned _depth);
				203	Address &operator=(const Address &b);
				204	bool operator==(const Address &b) const;
				205	bool isClose(const Address &b, int level) const;
				206	bool operator!=(const Address &b) const;
				207	};
				208
				209	Address::Address(unsigned _depth)
				210	{
				211	depth = _depth;
				212	leader = FALSE;
				213	}
				214
				215	Address &Address::operator=(const Address &b) {
				216	depth = b.depth;
				217	for (unsigned i = 0; i < depth; i++) {
				218	labels[i] = b.labels[i];
				219	childNums[i] = b.childNums[i];
				220	}
				221	leader = FALSE;
				222	return *this;
				223	}
				224
				225	bool Address::operator==(const Address &b) const {
				226	if (depth != b.depth)
				227	return false;
				228	for (unsigned i = 0; i < depth; i++)
				229	if(labels[i] != b.labels[i])
				230	return false;
				231	return true;
				232	}
				233
				234	bool Address::isClose(const Address &b, int level) const {
				235	if (depth != b.depth)
				236	return false;
				237	if ((unsigned)level >= depth)
				238	return true;
				239	for (unsigned i = 0; i < (depth - level); i++)
				240	if(labels[i] != b.labels[i])
				241	return false;
				242	return true;
				243	}
				244
				245	bool Address::operator!=(const Address &b) const {
				246	return !operator==(b);
				247	}
				248
				249	class AddrUnsPair {
				250	public:
				251	Address first;
				252	unsigned second;
				253	AddrUnsPair(Address _first, unsigned _second);
				254	AddrUnsPair &operator=(const AddrUnsPair &b);
				255	};
				256
				257	AddrUnsPair::AddrUnsPair(Address _first, unsigned _second)
				258	: first(_first), second(_second)
				259	{
				260	}
				261
				262	AddrUnsPair &AddrUnsPair::operator=(const AddrUnsPair &b)
				263	{
				264	first = b.first;
				265	second = b.second;
				266	return *this;
				267	}
				268
				269	# endif /* !defined(KMP_DEBUG) && !defined(COVER) */
				270
				271
				272	static int
				273	__kmp_affinity_cmp_Address_labels(const void a, const void b)
				274	{
				275	const Address aa = (const Address )&(((AddrUnsPair *)a)
				276	->first);
				277	const Address bb = (const Address )&(((AddrUnsPair *)b)
				278	->first);
				279	unsigned depth = aa->depth;
				280	unsigned i;
				281	KMP_DEBUG_ASSERT(depth == bb->depth);
				282	for (i = 0; i < depth; i++) {
				283	if (aa->labels[i] < bb->labels[i]) return -1;
				284	if (aa->labels[i] > bb->labels[i]) return 1;
				285	}
				286	return 0;
				287	}
				288
				289
				290	static int
				291	__kmp_affinity_cmp_Address_child_num(const void a, const void b)
				292	{
				293	const Address aa = (const Address )&(((AddrUnsPair *)a)
				294	->first);
				295	const Address bb = (const Address )&(((AddrUnsPair *)b)
				296	->first);
				297	unsigned depth = aa->depth;
				298	unsigned i;
				299	KMP_DEBUG_ASSERT(depth == bb->depth);
				300	KMP_DEBUG_ASSERT((unsigned)__kmp_affinity_compact <= depth);
				301	KMP_DEBUG_ASSERT(__kmp_affinity_compact >= 0);
				302	for (i = 0; i < (unsigned)__kmp_affinity_compact; i++) {
				303	int j = depth - i - 1;
				304	if (aa->childNums[j] < bb->childNums[j]) return -1;
				305	if (aa->childNums[j] > bb->childNums[j]) return 1;
				306	}
				307	for (; i < depth; i++) {
				308	int j = i - __kmp_affinity_compact;
				309	if (aa->childNums[j] < bb->childNums[j]) return -1;
				310	if (aa->childNums[j] > bb->childNums[j]) return 1;
				311	}
				312	return 0;
				313	}
				314
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	315	/** A structure for holding machine-specific hierarchy info to be computed once at init. */
				316	class hierarchy_info {
				317	public:
				318	/** Typical levels are threads/core, cores/package or socket, packages/node, nodes/machine,
				319	etc. We don't want to get specific with nomenclature */
				320	static const kmp_uint32 maxLevels=7;
				321
				322	/** This is specifically the depth of the machine configuration hierarchy, in terms of the
				323	number of levels along the longest path from root to any leaf. It corresponds to the
				324	number of entries in numPerLevel if we exclude all but one trailing 1. */
				325	kmp_uint32 depth;
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	326	kmp_uint32 base_num_threads;
				327	bool uninitialized;
				328
				329	/** Level 0 corresponds to leaves. numPerLevel[i] is the number of children the parent of a
				330	node at level i has. For example, if we have a machine with 4 packages, 4 cores/package
				331	and 2 HT per core, then numPerLevel = {2, 4, 4, 1, 1}. All empty levels are set to 1. */
				332	kmp_uint32 numPerLevel[maxLevels];
				333	kmp_uint32 skipPerLevel[maxLevels];
				334
				335	void deriveLevels(AddrUnsPair *adr2os, int num_addrs) {
				336	int hier_depth = adr2os[0].first.depth;
				337	int level = 0;
				338	for (int i=hier_depth-1; i>=0; --i) {
				339	int max = -1;
				340	for (int j=0; j<num_addrs; ++j) {
				341	int next = adr2os[j].first.childNums[i];
				342	if (next > max) max = next;
				343	}
				344	numPerLevel[level] = max+1;
				345	++level;
				346	}
				347	}
				348
				349	hierarchy_info() : depth(1), uninitialized(true) {}
				350	void init(AddrUnsPair *adr2os, int num_addrs)
				351	{
Andrey Churbanov	b41e62b	2015-02-10 20:10:21 +0000	[diff] [blame]	352	/* Added explicit initialization of the depth here to prevent usage of dirty value
				353	observed when static library is re-initialized multiple times (e.g. when
				354	non-OpenMP thread repeatedly launches/joins thread that uses OpenMP). */
				355	depth = 1;
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	356	uninitialized = false;
				357	for (kmp_uint32 i=0; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
				358	numPerLevel[i] = 1;
				359	skipPerLevel[i] = 1;
				360	}
				361
				362	// Sort table by physical ID
				363	if (adr2os) {
				364	qsort(adr2os, num_addrs, sizeof(*adr2os), __kmp_affinity_cmp_Address_labels);
				365	deriveLevels(adr2os, num_addrs);
				366	}
				367	else {
				368	numPerLevel[0] = 4;
				369	numPerLevel[1] = num_addrs/4;
				370	if (num_addrs%4) numPerLevel[1]++;
				371	}
				372
				373	base_num_threads = num_addrs;
				374	for (int i=maxLevels-1; i>=0; --i) // count non-empty levels to get depth
				375	if (numPerLevel[i] != 1 \|\| depth > 1) // only count one top-level '1'
				376	depth++;
				377
				378	kmp_uint32 branch = 4;
				379	if (numPerLevel[0] == 1) branch = num_addrs/4;
				380	if (branch<4) branch=4;
				381	for (kmp_uint32 d=0; d<depth-1; ++d) { // optimize hierarchy width
				382	while (numPerLevel[d] > branch \|\| (d==0 && numPerLevel[d]>4)) { // max 4 on level 0!
				383	if (numPerLevel[d] & 1) numPerLevel[d]++;
				384	numPerLevel[d] = numPerLevel[d] >> 1;
				385	if (numPerLevel[d+1] == 1) depth++;
				386	numPerLevel[d+1] = numPerLevel[d+1] << 1;
				387	}
				388	if(numPerLevel[0] == 1) {
				389	branch = branch >> 1;
				390	if (branch<4) branch = 4;
				391	}
				392	}
				393
				394	for (kmp_uint32 i=1; i<depth; ++i)
				395	skipPerLevel[i] = numPerLevel[i-1] * skipPerLevel[i-1];
				396
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	397	}
				398	};
				399
				400	static hierarchy_info machine_hierarchy;
				401
				402	void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
Andrey Churbanov	1362ae7	2015-04-02 13:18:50 +0000	[diff] [blame^]	403	kmp_uint32 depth;
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	404	if (machine_hierarchy.uninitialized)
				405	machine_hierarchy.init(NULL, nproc);
				406
Andrey Churbanov	1362ae7	2015-04-02 13:18:50 +0000	[diff] [blame^]	407	depth = machine_hierarchy.depth;
				408	KMP_DEBUG_ASSERT(depth > 0);
				409	while (nproc > machine_hierarchy.skipPerLevel[depth-1]) {
				410	depth++;
				411	machine_hierarchy.skipPerLevel[depth-1] = 2*machine_hierarchy.skipPerLevel[depth-2];
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	412	}
Andrey Churbanov	1362ae7	2015-04-02 13:18:50 +0000	[diff] [blame^]	413	thr_bar->depth = depth;
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	414	thr_bar->base_leaf_kids = (kmp_uint8)machine_hierarchy.numPerLevel[0]-1;
				415	thr_bar->skip_per_level = machine_hierarchy.skipPerLevel;
				416	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	417
				418	//
				419	// When sorting by labels, __kmp_affinity_assign_child_nums() must first be
				420	// called to renumber the labels from [0..n] and place them into the child_num
				421	// vector of the address object. This is done in case the labels used for
Alp Toker	8f2d3f0	2014-02-24 10:40:15 +0000	[diff] [blame]	422	// the children at one node of the hierarchy differ from those used for
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	423	// another node at the same level. Example: suppose the machine has 2 nodes
				424	// with 2 packages each. The first node contains packages 601 and 602, and
				425	// second node contains packages 603 and 604. If we try to sort the table
				426	// for "scatter" affinity, the table will still be sorted 601, 602, 603, 604
				427	// because we are paying attention to the labels themselves, not the ordinal
				428	// child numbers. By using the child numbers in the sort, the result is
				429	// {0,0}=601, {0,1}=603, {1,0}=602, {1,1}=604.
				430	//
				431	static void
				432	__kmp_affinity_assign_child_nums(AddrUnsPair *address2os,
				433	int numAddrs)
				434	{
				435	KMP_DEBUG_ASSERT(numAddrs > 0);
				436	int depth = address2os->first.depth;
				437	unsigned counts = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				438	unsigned lastLabel = (unsigned )__kmp_allocate(depth
				439	* sizeof(unsigned));
				440	int labCt;
				441	for (labCt = 0; labCt < depth; labCt++) {
				442	address2os[0].first.childNums[labCt] = counts[labCt] = 0;
				443	lastLabel[labCt] = address2os[0].first.labels[labCt];
				444	}
				445	int i;
				446	for (i = 1; i < numAddrs; i++) {
				447	for (labCt = 0; labCt < depth; labCt++) {
				448	if (address2os[i].first.labels[labCt] != lastLabel[labCt]) {
				449	int labCt2;
				450	for (labCt2 = labCt + 1; labCt2 < depth; labCt2++) {
				451	counts[labCt2] = 0;
				452	lastLabel[labCt2] = address2os[i].first.labels[labCt2];
				453	}
				454	counts[labCt]++;
				455	lastLabel[labCt] = address2os[i].first.labels[labCt];
				456	break;
				457	}
				458	}
				459	for (labCt = 0; labCt < depth; labCt++) {
				460	address2os[i].first.childNums[labCt] = counts[labCt];
				461	}
				462	for (; labCt < (int)Address::maxDepth; labCt++) {
				463	address2os[i].first.childNums[labCt] = 0;
				464	}
				465	}
				466	}
				467
				468
				469	//
				470	// All of the __kmp_affinity_create_*_map() routines should set
				471	// __kmp_affinity_masks to a vector of affinity mask objects of length
				472	// __kmp_affinity_num_masks, if __kmp_affinity_type != affinity_none, and
				473	// return the number of levels in the machine topology tree (zero if
				474	// __kmp_affinity_type == affinity_none).
				475	//
				476	// All of the __kmp_affinity_create__map() routines should set fullMask
				477	// to the affinity mask for the initialization thread. They need to save and
				478	// restore the mask, and it could be needed later, so saving it is just an
				479	// optimization to avoid calling kmp_get_system_affinity() again.
				480	//
				481	static kmp_affin_mask_t *fullMask = NULL;
				482
				483	kmp_affin_mask_t *
				484	__kmp_affinity_get_fullMask() { return fullMask; }
				485
				486
				487	static int nCoresPerPkg, nPackages;
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame]	488	static int __kmp_nThreadsPerCore;
				489	#ifndef KMP_DFLT_NTH_CORES
				490	static int __kmp_ncores;
				491	#endif
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	492
				493	//
				494	// __kmp_affinity_uniform_topology() doesn't work when called from
				495	// places which support arbitrarily many levels in the machine topology
				496	// map, i.e. the non-default cases in __kmp_affinity_create_cpuinfo_map()
				497	// __kmp_affinity_create_x2apicid_map().
				498	//
				499	inline static bool
				500	__kmp_affinity_uniform_topology()
				501	{
				502	return __kmp_avail_proc == (__kmp_nThreadsPerCore * nCoresPerPkg * nPackages);
				503	}
				504
				505
				506	//
				507	// Print out the detailed machine topology map, i.e. the physical locations
				508	// of each OS proc.
				509	//
				510	static void
				511	__kmp_affinity_print_topology(AddrUnsPair *address2os, int len, int depth,
				512	int pkgLevel, int coreLevel, int threadLevel)
				513	{
				514	int proc;
				515
				516	KMP_INFORM(OSProcToPhysicalThreadMap, "KMP_AFFINITY");
				517	for (proc = 0; proc < len; proc++) {
				518	int level;
				519	kmp_str_buf_t buf;
				520	__kmp_str_buf_init(&buf);
				521	for (level = 0; level < depth; level++) {
				522	if (level == threadLevel) {
				523	__kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Thread));
				524	}
				525	else if (level == coreLevel) {
				526	__kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Core));
				527	}
				528	else if (level == pkgLevel) {
				529	__kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Package));
				530	}
				531	else if (level > pkgLevel) {
				532	__kmp_str_buf_print(&buf, "%s_%d ", KMP_I18N_STR(Node),
				533	level - pkgLevel - 1);
				534	}
				535	else {
				536	__kmp_str_buf_print(&buf, "L%d ", level);
				537	}
				538	__kmp_str_buf_print(&buf, "%d ",
				539	address2os[proc].first.labels[level]);
				540	}
				541	KMP_INFORM(OSProcMapToPack, "KMP_AFFINITY", address2os[proc].second,
				542	buf.str);
				543	__kmp_str_buf_free(&buf);
				544	}
				545	}
				546
				547
				548	//
				549	// If we don't know how to retrieve the machine's processor topology, or
				550	// encounter an error in doing so, this routine is called to form a "flat"
				551	// mapping of os thread id's <-> processor id's.
				552	//
				553	static int
				554	__kmp_affinity_create_flat_map(AddrUnsPair **address2os,
				555	kmp_i18n_id_t *const msg_id)
				556	{
				557	*address2os = NULL;
				558	*msg_id = kmp_i18n_null;
				559
				560	//
				561	// Even if __kmp_affinity_type == affinity_none, this routine might still
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame]	562	// called to set __kmp_ncores, as well as
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	563	// __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
				564	//
				565	if (! KMP_AFFINITY_CAPABLE()) {
				566	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				567	__kmp_ncores = nPackages = __kmp_xproc;
				568	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	569	if (__kmp_affinity_verbose) {
				570	KMP_INFORM(AffFlatTopology, "KMP_AFFINITY");
				571	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				572	KMP_INFORM(Uniform, "KMP_AFFINITY");
				573	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				574	__kmp_nThreadsPerCore, __kmp_ncores);
				575	}
				576	return 0;
				577	}
				578
				579	//
				580	// When affinity is off, this routine will still be called to set
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame]	581	// __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	582	// nCoresPerPkg, & nPackages. Make sure all these vars are set
				583	// correctly, and return now if affinity is not enabled.
				584	//
				585	__kmp_ncores = nPackages = __kmp_avail_proc;
				586	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	587	if (__kmp_affinity_verbose) {
				588	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				589	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, fullMask);
				590
				591	KMP_INFORM(AffCapableUseFlat, "KMP_AFFINITY");
				592	if (__kmp_affinity_respect_mask) {
				593	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				594	} else {
				595	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				596	}
				597	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				598	KMP_INFORM(Uniform, "KMP_AFFINITY");
				599	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				600	__kmp_nThreadsPerCore, __kmp_ncores);
				601	}
				602	if (__kmp_affinity_type == affinity_none) {
				603	return 0;
				604	}
				605
				606	//
				607	// Contruct the data structure to be returned.
				608	//
				609	address2os = (AddrUnsPair)
				610	__kmp_allocate(sizeof(*address2os) __kmp_avail_proc);
				611	int avail_ct = 0;
				612	unsigned int i;
				613	for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
				614	//
				615	// Skip this proc if it is not included in the machine model.
				616	//
				617	if (! KMP_CPU_ISSET(i, fullMask)) {
				618	continue;
				619	}
				620
				621	Address addr(1);
				622	addr.labels[0] = i;
				623	(*address2os)[avail_ct++] = AddrUnsPair(addr,i);
				624	}
				625	if (__kmp_affinity_verbose) {
				626	KMP_INFORM(OSProcToPackage, "KMP_AFFINITY");
				627	}
				628
				629	if (__kmp_affinity_gran_levels < 0) {
				630	//
				631	// Only the package level is modeled in the machine topology map,
				632	// so the #levels of granularity is either 0 or 1.
				633	//
				634	if (__kmp_affinity_gran > affinity_gran_package) {
				635	__kmp_affinity_gran_levels = 1;
				636	}
				637	else {
				638	__kmp_affinity_gran_levels = 0;
				639	}
				640	}
				641	return 1;
				642	}
				643
				644
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	645	# if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	646
				647	//
				648	// If multiple Windows* OS processor groups exist, we can create a 2-level
				649	// topology map with the groups at level 0 and the individual procs at
				650	// level 1.
				651	//
				652	// This facilitates letting the threads float among all procs in a group,
				653	// if granularity=group (the default when there are multiple groups).
				654	//
				655	static int
				656	__kmp_affinity_create_proc_group_map(AddrUnsPair **address2os,
				657	kmp_i18n_id_t *const msg_id)
				658	{
				659	*address2os = NULL;
				660	*msg_id = kmp_i18n_null;
				661
				662	//
				663	// If we don't have multiple processor groups, return now.
				664	// The flat mapping will be used.
				665	//
				666	if ((! KMP_AFFINITY_CAPABLE()) \|\| (__kmp_get_proc_group(fullMask) >= 0)) {
				667	// FIXME set *msg_id
				668	return -1;
				669	}
				670
				671	//
				672	// Contruct the data structure to be returned.
				673	//
				674	address2os = (AddrUnsPair)
				675	__kmp_allocate(sizeof(*address2os) __kmp_avail_proc);
				676	int avail_ct = 0;
				677	int i;
				678	for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
				679	//
				680	// Skip this proc if it is not included in the machine model.
				681	//
				682	if (! KMP_CPU_ISSET(i, fullMask)) {
				683	continue;
				684	}
				685
				686	Address addr(2);
				687	addr.labels[0] = i / (CHAR_BIT * sizeof(DWORD_PTR));
				688	addr.labels[1] = i % (CHAR_BIT * sizeof(DWORD_PTR));
				689	(*address2os)[avail_ct++] = AddrUnsPair(addr,i);
				690
				691	if (__kmp_affinity_verbose) {
				692	KMP_INFORM(AffOSProcToGroup, "KMP_AFFINITY", i, addr.labels[0],
				693	addr.labels[1]);
				694	}
				695	}
				696
				697	if (__kmp_affinity_gran_levels < 0) {
				698	if (__kmp_affinity_gran == affinity_gran_group) {
				699	__kmp_affinity_gran_levels = 1;
				700	}
				701	else if ((__kmp_affinity_gran == affinity_gran_fine)
				702	\|\| (__kmp_affinity_gran == affinity_gran_thread)) {
				703	__kmp_affinity_gran_levels = 0;
				704	}
				705	else {
				706	const char *gran_str = NULL;
				707	if (__kmp_affinity_gran == affinity_gran_core) {
				708	gran_str = "core";
				709	}
				710	else if (__kmp_affinity_gran == affinity_gran_package) {
				711	gran_str = "package";
				712	}
				713	else if (__kmp_affinity_gran == affinity_gran_node) {
				714	gran_str = "node";
				715	}
				716	else {
				717	KMP_ASSERT(0);
				718	}
				719
				720	// Warning: can't use affinity granularity \"gran\" with group topology method, using "thread"
				721	__kmp_affinity_gran_levels = 0;
				722	}
				723	}
				724	return 2;
				725	}
				726
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	727	# endif /* KMP_GROUP_AFFINITY */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	728
				729
				730	# if KMP_ARCH_X86 \|\| KMP_ARCH_X86_64
				731
				732	static int
				733	__kmp_cpuid_mask_width(int count) {
				734	int r = 0;
				735
				736	while((1<<r) < count)
				737	++r;
				738	return r;
				739	}
				740
				741
				742	class apicThreadInfo {
				743	public:
				744	unsigned osId; // param to __kmp_affinity_bind_thread
				745	unsigned apicId; // from cpuid after binding
				746	unsigned maxCoresPerPkg; // ""
				747	unsigned maxThreadsPerPkg; // ""
				748	unsigned pkgId; // inferred from above values
				749	unsigned coreId; // ""
				750	unsigned threadId; // ""
				751	};
				752
				753
				754	static int
				755	__kmp_affinity_cmp_apicThreadInfo_os_id(const void a, const void b)
				756	{
				757	const apicThreadInfo aa = (const apicThreadInfo )a;
				758	const apicThreadInfo bb = (const apicThreadInfo )b;
				759	if (aa->osId < bb->osId) return -1;
				760	if (aa->osId > bb->osId) return 1;
				761	return 0;
				762	}
				763
				764
				765	static int
				766	__kmp_affinity_cmp_apicThreadInfo_phys_id(const void a, const void b)
				767	{
				768	const apicThreadInfo aa = (const apicThreadInfo )a;
				769	const apicThreadInfo bb = (const apicThreadInfo )b;
				770	if (aa->pkgId < bb->pkgId) return -1;
				771	if (aa->pkgId > bb->pkgId) return 1;
				772	if (aa->coreId < bb->coreId) return -1;
				773	if (aa->coreId > bb->coreId) return 1;
				774	if (aa->threadId < bb->threadId) return -1;
				775	if (aa->threadId > bb->threadId) return 1;
				776	return 0;
				777	}
				778
				779
				780	//
				781	// On IA-32 architecture and Intel(R) 64 architecture, we attempt to use
				782	// an algorithm which cycles through the available os threads, setting
				783	// the current thread's affinity mask to that thread, and then retrieves
				784	// the Apic Id for each thread context using the cpuid instruction.
				785	//
				786	static int
				787	__kmp_affinity_create_apicid_map(AddrUnsPair **address2os,
				788	kmp_i18n_id_t *const msg_id)
				789	{
Andrey Churbanov	1c33129	2015-01-27 17:03:42 +0000	[diff] [blame]	790	kmp_cpuid buf;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	791	int rc;
				792	*address2os = NULL;
				793	*msg_id = kmp_i18n_null;
				794
Andrey Churbanov	1c33129	2015-01-27 17:03:42 +0000	[diff] [blame]	795	//
				796	// Check if cpuid leaf 4 is supported.
				797	//
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	798	__kmp_x86_cpuid(0, 0, &buf);
				799	if (buf.eax < 4) {
				800	*msg_id = kmp_i18n_str_NoLeaf4Support;
				801	return -1;
				802	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	803
				804	//
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	805	// The algorithm used starts by setting the affinity to each available
Andrey Churbanov	1c33129	2015-01-27 17:03:42 +0000	[diff] [blame]	806	// thread and retrieving info from the cpuid instruction, so if we are
				807	// not capable of calling __kmp_get_system_affinity() and
				808	// _kmp_get_system_affinity(), then we need to do something else - use
				809	// the defaults that we calculated from issuing cpuid without binding
				810	// to each proc.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	811	//
				812	if (! KMP_AFFINITY_CAPABLE()) {
				813	//
				814	// Hack to try and infer the machine topology using only the data
				815	// available from cpuid on the current thread, and __kmp_xproc.
				816	//
				817	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				818
				819	//
				820	// Get an upper bound on the number of threads per package using
				821	// cpuid(1).
				822	//
				823	// On some OS/chps combinations where HT is supported by the chip
				824	// but is disabled, this value will be 2 on a single core chip.
				825	// Usually, it will be 2 if HT is enabled and 1 if HT is disabled.
				826	//
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	827	__kmp_x86_cpuid(1, 0, &buf);
				828	int maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
				829	if (maxThreadsPerPkg == 0) {
				830	maxThreadsPerPkg = 1;
				831	}
				832
				833	//
				834	// The num cores per pkg comes from cpuid(4).
				835	// 1 must be added to the encoded value.
				836	//
				837	// The author of cpu_count.cpp treated this only an upper bound
				838	// on the number of cores, but I haven't seen any cases where it
				839	// was greater than the actual number of cores, so we will treat
				840	// it as exact in this block of code.
				841	//
				842	// First, we need to check if cpuid(4) is supported on this chip.
				843	// To see if cpuid(n) is supported, issue cpuid(0) and check if eax
				844	// has the value n or greater.
				845	//
				846	__kmp_x86_cpuid(0, 0, &buf);
				847	if (buf.eax >= 4) {
				848	__kmp_x86_cpuid(4, 0, &buf);
				849	nCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
				850	}
				851	else {
				852	nCoresPerPkg = 1;
				853	}
				854
				855	//
				856	// There is no way to reliably tell if HT is enabled without issuing
				857	// the cpuid instruction from every thread, can correlating the cpuid
				858	// info, so if the machine is not affinity capable, we assume that HT
				859	// is off. We have seen quite a few machines where maxThreadsPerPkg
				860	// is 2, yet the machine does not support HT.
				861	//
				862	// - Older OSes are usually found on machines with older chips, which
				863	// do not support HT.
				864	//
				865	// - The performance penalty for mistakenly identifying a machine as
				866	// HT when it isn't (which results in blocktime being incorrecly set
				867	// to 0) is greater than the penalty when for mistakenly identifying
				868	// a machine as being 1 thread/core when it is really HT enabled
				869	// (which results in blocktime being incorrectly set to a positive
				870	// value).
				871	//
				872	__kmp_ncores = __kmp_xproc;
				873	nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
				874	__kmp_nThreadsPerCore = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	875	if (__kmp_affinity_verbose) {
				876	KMP_INFORM(AffNotCapableUseLocCpuid, "KMP_AFFINITY");
				877	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				878	if (__kmp_affinity_uniform_topology()) {
				879	KMP_INFORM(Uniform, "KMP_AFFINITY");
				880	} else {
				881	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				882	}
				883	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				884	__kmp_nThreadsPerCore, __kmp_ncores);
				885	}
				886	return 0;
				887	}
				888
				889	//
				890	//
				891	// From here on, we can assume that it is safe to call
				892	// __kmp_get_system_affinity() and __kmp_set_system_affinity(),
				893	// even if __kmp_affinity_type = affinity_none.
				894	//
				895
				896	//
				897	// Save the affinity mask for the current thread.
				898	//
				899	kmp_affin_mask_t *oldMask;
				900	KMP_CPU_ALLOC(oldMask);
				901	KMP_ASSERT(oldMask != NULL);
				902	__kmp_get_system_affinity(oldMask, TRUE);
				903
				904	//
				905	// Run through each of the available contexts, binding the current thread
				906	// to it, and obtaining the pertinent information using the cpuid instr.
				907	//
				908	// The relevant information is:
				909	//
				910	// Apic Id: Bits 24:31 of ebx after issuing cpuid(1) - each thread context
				911	// has a uniqie Apic Id, which is of the form pkg# : core# : thread#.
				912	//
				913	// Max Threads Per Pkg: Bits 16:23 of ebx after issuing cpuid(1). The
				914	// value of this field determines the width of the core# + thread#
				915	// fields in the Apic Id. It is also an upper bound on the number
				916	// of threads per package, but it has been verified that situations
				917	// happen were it is not exact. In particular, on certain OS/chip
				918	// combinations where Intel(R) Hyper-Threading Technology is supported
				919	// by the chip but has
				920	// been disabled, the value of this field will be 2 (for a single core
				921	// chip). On other OS/chip combinations supporting
				922	// Intel(R) Hyper-Threading Technology, the value of
				923	// this field will be 1 when Intel(R) Hyper-Threading Technology is
				924	// disabled and 2 when it is enabled.
				925	//
				926	// Max Cores Per Pkg: Bits 26:31 of eax after issuing cpuid(4). The
				927	// value of this field (+1) determines the width of the core# field in
				928	// the Apic Id. The comments in "cpucount.cpp" say that this value is
				929	// an upper bound, but the IA-32 architecture manual says that it is
				930	// exactly the number of cores per package, and I haven't seen any
				931	// case where it wasn't.
				932	//
				933	// From this information, deduce the package Id, core Id, and thread Id,
				934	// and set the corresponding fields in the apicThreadInfo struct.
				935	//
				936	unsigned i;
				937	apicThreadInfo threadInfo = (apicThreadInfo )__kmp_allocate(
				938	__kmp_avail_proc * sizeof(apicThreadInfo));
				939	unsigned nApics = 0;
				940	for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
				941	//
				942	// Skip this proc if it is not included in the machine model.
				943	//
				944	if (! KMP_CPU_ISSET(i, fullMask)) {
				945	continue;
				946	}
				947	KMP_DEBUG_ASSERT((int)nApics < __kmp_avail_proc);
				948
				949	__kmp_affinity_bind_thread(i);
				950	threadInfo[nApics].osId = i;
				951
				952	//
				953	// The apic id and max threads per pkg come from cpuid(1).
				954	//
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	955	__kmp_x86_cpuid(1, 0, &buf);
				956	if (! (buf.edx >> 9) & 1) {
				957	__kmp_set_system_affinity(oldMask, TRUE);
				958	__kmp_free(threadInfo);
				959	KMP_CPU_FREE(oldMask);
				960	*msg_id = kmp_i18n_str_ApicNotPresent;
				961	return -1;
				962	}
				963	threadInfo[nApics].apicId = (buf.ebx >> 24) & 0xff;
				964	threadInfo[nApics].maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
				965	if (threadInfo[nApics].maxThreadsPerPkg == 0) {
				966	threadInfo[nApics].maxThreadsPerPkg = 1;
				967	}
				968
				969	//
				970	// Max cores per pkg comes from cpuid(4).
				971	// 1 must be added to the encoded value.
				972	//
				973	// First, we need to check if cpuid(4) is supported on this chip.
				974	// To see if cpuid(n) is supported, issue cpuid(0) and check if eax
				975	// has the value n or greater.
				976	//
				977	__kmp_x86_cpuid(0, 0, &buf);
				978	if (buf.eax >= 4) {
				979	__kmp_x86_cpuid(4, 0, &buf);
				980	threadInfo[nApics].maxCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
				981	}
				982	else {
				983	threadInfo[nApics].maxCoresPerPkg = 1;
				984	}
				985
				986	//
				987	// Infer the pkgId / coreId / threadId using only the info
				988	// obtained locally.
				989	//
				990	int widthCT = __kmp_cpuid_mask_width(
				991	threadInfo[nApics].maxThreadsPerPkg);
				992	threadInfo[nApics].pkgId = threadInfo[nApics].apicId >> widthCT;
				993
				994	int widthC = __kmp_cpuid_mask_width(
				995	threadInfo[nApics].maxCoresPerPkg);
				996	int widthT = widthCT - widthC;
				997	if (widthT < 0) {
				998	//
				999	// I've never seen this one happen, but I suppose it could, if
				1000	// the cpuid instruction on a chip was really screwed up.
				1001	// Make sure to restore the affinity mask before the tail call.
				1002	//
				1003	__kmp_set_system_affinity(oldMask, TRUE);
				1004	__kmp_free(threadInfo);
				1005	KMP_CPU_FREE(oldMask);
				1006	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1007	return -1;
				1008	}
				1009
				1010	int maskC = (1 << widthC) - 1;
				1011	threadInfo[nApics].coreId = (threadInfo[nApics].apicId >> widthT)
				1012	&maskC;
				1013
				1014	int maskT = (1 << widthT) - 1;
				1015	threadInfo[nApics].threadId = threadInfo[nApics].apicId &maskT;
				1016
				1017	nApics++;
				1018	}
				1019
				1020	//
				1021	// We've collected all the info we need.
				1022	// Restore the old affinity mask for this thread.
				1023	//
				1024	__kmp_set_system_affinity(oldMask, TRUE);
				1025
				1026	//
				1027	// If there's only one thread context to bind to, form an Address object
				1028	// with depth 1 and return immediately (or, if affinity is off, set
				1029	// address2os to NULL and return).
				1030	//
				1031	// If it is configured to omit the package level when there is only a
				1032	// single package, the logic at the end of this routine won't work if
				1033	// there is only a single thread - it would try to form an Address
				1034	// object with depth 0.
				1035	//
				1036	KMP_ASSERT(nApics > 0);
				1037	if (nApics == 1) {
				1038	__kmp_ncores = nPackages = 1;
				1039	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1040	if (__kmp_affinity_verbose) {
				1041	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				1042	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				1043
				1044	KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
				1045	if (__kmp_affinity_respect_mask) {
				1046	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				1047	} else {
				1048	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				1049	}
				1050	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1051	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1052	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				1053	__kmp_nThreadsPerCore, __kmp_ncores);
				1054	}
				1055
				1056	if (__kmp_affinity_type == affinity_none) {
				1057	__kmp_free(threadInfo);
				1058	KMP_CPU_FREE(oldMask);
				1059	return 0;
				1060	}
				1061
				1062	address2os = (AddrUnsPair)__kmp_allocate(sizeof(AddrUnsPair));
				1063	Address addr(1);
				1064	addr.labels[0] = threadInfo[0].pkgId;
				1065	(*address2os)[0] = AddrUnsPair(addr, threadInfo[0].osId);
				1066
				1067	if (__kmp_affinity_gran_levels < 0) {
				1068	__kmp_affinity_gran_levels = 0;
				1069	}
				1070
				1071	if (__kmp_affinity_verbose) {
				1072	__kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
				1073	}
				1074
				1075	__kmp_free(threadInfo);
				1076	KMP_CPU_FREE(oldMask);
				1077	return 1;
				1078	}
				1079
				1080	//
				1081	// Sort the threadInfo table by physical Id.
				1082	//
				1083	qsort(threadInfo, nApics, sizeof(*threadInfo),
				1084	__kmp_affinity_cmp_apicThreadInfo_phys_id);
				1085
				1086	//
				1087	// The table is now sorted by pkgId / coreId / threadId, but we really
				1088	// don't know the radix of any of the fields. pkgId's may be sparsely
				1089	// assigned among the chips on a system. Although coreId's are usually
				1090	// assigned [0 .. coresPerPkg-1] and threadId's are usually assigned
				1091	// [0..threadsPerCore-1], we don't want to make any such assumptions.
				1092	//
				1093	// For that matter, we don't know what coresPerPkg and threadsPerCore
				1094	// (or the total # packages) are at this point - we want to determine
				1095	// that now. We only have an upper bound on the first two figures.
				1096	//
				1097	// We also perform a consistency check at this point: the values returned
				1098	// by the cpuid instruction for any thread bound to a given package had
				1099	// better return the same info for maxThreadsPerPkg and maxCoresPerPkg.
				1100	//
				1101	nPackages = 1;
				1102	nCoresPerPkg = 1;
				1103	__kmp_nThreadsPerCore = 1;
				1104	unsigned nCores = 1;
				1105
				1106	unsigned pkgCt = 1; // to determine radii
				1107	unsigned lastPkgId = threadInfo[0].pkgId;
				1108	unsigned coreCt = 1;
				1109	unsigned lastCoreId = threadInfo[0].coreId;
				1110	unsigned threadCt = 1;
				1111	unsigned lastThreadId = threadInfo[0].threadId;
				1112
				1113	// intra-pkg consist checks
				1114	unsigned prevMaxCoresPerPkg = threadInfo[0].maxCoresPerPkg;
				1115	unsigned prevMaxThreadsPerPkg = threadInfo[0].maxThreadsPerPkg;
				1116
				1117	for (i = 1; i < nApics; i++) {
				1118	if (threadInfo[i].pkgId != lastPkgId) {
				1119	nCores++;
				1120	pkgCt++;
				1121	lastPkgId = threadInfo[i].pkgId;
				1122	if ((int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt;
				1123	coreCt = 1;
				1124	lastCoreId = threadInfo[i].coreId;
				1125	if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
				1126	threadCt = 1;
				1127	lastThreadId = threadInfo[i].threadId;
				1128
				1129	//
				1130	// This is a different package, so go on to the next iteration
				1131	// without doing any consistency checks. Reset the consistency
				1132	// check vars, though.
				1133	//
				1134	prevMaxCoresPerPkg = threadInfo[i].maxCoresPerPkg;
				1135	prevMaxThreadsPerPkg = threadInfo[i].maxThreadsPerPkg;
				1136	continue;
				1137	}
				1138
				1139	if (threadInfo[i].coreId != lastCoreId) {
				1140	nCores++;
				1141	coreCt++;
				1142	lastCoreId = threadInfo[i].coreId;
				1143	if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
				1144	threadCt = 1;
				1145	lastThreadId = threadInfo[i].threadId;
				1146	}
				1147	else if (threadInfo[i].threadId != lastThreadId) {
				1148	threadCt++;
				1149	lastThreadId = threadInfo[i].threadId;
				1150	}
				1151	else {
				1152	__kmp_free(threadInfo);
				1153	KMP_CPU_FREE(oldMask);
				1154	*msg_id = kmp_i18n_str_LegacyApicIDsNotUnique;
				1155	return -1;
				1156	}
				1157
				1158	//
				1159	// Check to make certain that the maxCoresPerPkg and maxThreadsPerPkg
				1160	// fields agree between all the threads bounds to a given package.
				1161	//
				1162	if ((prevMaxCoresPerPkg != threadInfo[i].maxCoresPerPkg)
				1163	\|\| (prevMaxThreadsPerPkg != threadInfo[i].maxThreadsPerPkg)) {
				1164	__kmp_free(threadInfo);
				1165	KMP_CPU_FREE(oldMask);
				1166	*msg_id = kmp_i18n_str_InconsistentCpuidInfo;
				1167	return -1;
				1168	}
				1169	}
				1170	nPackages = pkgCt;
				1171	if ((int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt;
				1172	if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
				1173
				1174	//
				1175	// When affinity is off, this routine will still be called to set
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame]	1176	// __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1177	// nCoresPerPkg, & nPackages. Make sure all these vars are set
				1178	// correctly, and return now if affinity is not enabled.
				1179	//
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1180	__kmp_ncores = nCores;
				1181	if (__kmp_affinity_verbose) {
				1182	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				1183	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				1184
				1185	KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
				1186	if (__kmp_affinity_respect_mask) {
				1187	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				1188	} else {
				1189	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				1190	}
				1191	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1192	if (__kmp_affinity_uniform_topology()) {
				1193	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1194	} else {
				1195	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				1196	}
				1197	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				1198	__kmp_nThreadsPerCore, __kmp_ncores);
				1199
				1200	}
				1201
				1202	if (__kmp_affinity_type == affinity_none) {
				1203	__kmp_free(threadInfo);
				1204	KMP_CPU_FREE(oldMask);
				1205	return 0;
				1206	}
				1207
				1208	//
				1209	// Now that we've determined the number of packages, the number of cores
				1210	// per package, and the number of threads per core, we can construct the
				1211	// data structure that is to be returned.
				1212	//
				1213	int pkgLevel = 0;
				1214	int coreLevel = (nCoresPerPkg <= 1) ? -1 : 1;
				1215	int threadLevel = (__kmp_nThreadsPerCore <= 1) ? -1 : ((coreLevel >= 0) ? 2 : 1);
				1216	unsigned depth = (pkgLevel >= 0) + (coreLevel >= 0) + (threadLevel >= 0);
				1217
				1218	KMP_ASSERT(depth > 0);
				1219	address2os = (AddrUnsPair)__kmp_allocate(sizeof(AddrUnsPair) * nApics);
				1220
				1221	for (i = 0; i < nApics; ++i) {
				1222	Address addr(depth);
				1223	unsigned os = threadInfo[i].osId;
				1224	int d = 0;
				1225
				1226	if (pkgLevel >= 0) {
				1227	addr.labels[d++] = threadInfo[i].pkgId;
				1228	}
				1229	if (coreLevel >= 0) {
				1230	addr.labels[d++] = threadInfo[i].coreId;
				1231	}
				1232	if (threadLevel >= 0) {
				1233	addr.labels[d++] = threadInfo[i].threadId;
				1234	}
				1235	(*address2os)[i] = AddrUnsPair(addr, os);
				1236	}
				1237
				1238	if (__kmp_affinity_gran_levels < 0) {
				1239	//
				1240	// Set the granularity level based on what levels are modeled
				1241	// in the machine topology map.
				1242	//
				1243	__kmp_affinity_gran_levels = 0;
				1244	if ((threadLevel >= 0)
				1245	&& (__kmp_affinity_gran > affinity_gran_thread)) {
				1246	__kmp_affinity_gran_levels++;
				1247	}
				1248	if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
				1249	__kmp_affinity_gran_levels++;
				1250	}
				1251	if ((pkgLevel >= 0) && (__kmp_affinity_gran > affinity_gran_package)) {
				1252	__kmp_affinity_gran_levels++;
				1253	}
				1254	}
				1255
				1256	if (__kmp_affinity_verbose) {
				1257	__kmp_affinity_print_topology(*address2os, nApics, depth, pkgLevel,
				1258	coreLevel, threadLevel);
				1259	}
				1260
				1261	__kmp_free(threadInfo);
				1262	KMP_CPU_FREE(oldMask);
				1263	return depth;
				1264	}
				1265
				1266
				1267	//
				1268	// Intel(R) microarchitecture code name Nehalem, Dunnington and later
				1269	// architectures support a newer interface for specifying the x2APIC Ids,
				1270	// based on cpuid leaf 11.
				1271	//
				1272	static int
				1273	__kmp_affinity_create_x2apicid_map(AddrUnsPair **address2os,
				1274	kmp_i18n_id_t *const msg_id)
				1275	{
				1276	kmp_cpuid buf;
				1277
				1278	*address2os = NULL;
				1279	*msg_id = kmp_i18n_null;
				1280
				1281	//
				1282	// Check to see if cpuid leaf 11 is supported.
				1283	//
				1284	__kmp_x86_cpuid(0, 0, &buf);
				1285	if (buf.eax < 11) {
				1286	*msg_id = kmp_i18n_str_NoLeaf11Support;
				1287	return -1;
				1288	}
				1289	__kmp_x86_cpuid(11, 0, &buf);
				1290	if (buf.ebx == 0) {
				1291	*msg_id = kmp_i18n_str_NoLeaf11Support;
				1292	return -1;
				1293	}
				1294
				1295	//
				1296	// Find the number of levels in the machine topology. While we're at it,
				1297	// get the default values for __kmp_nThreadsPerCore & nCoresPerPkg. We will
				1298	// try to get more accurate values later by explicitly counting them,
				1299	// but get reasonable defaults now, in case we return early.
				1300	//
				1301	int level;
				1302	int threadLevel = -1;
				1303	int coreLevel = -1;
				1304	int pkgLevel = -1;
				1305	__kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
				1306
				1307	for (level = 0;; level++) {
				1308	if (level > 31) {
				1309	//
				1310	// FIXME: Hack for DPD200163180
				1311	//
				1312	// If level is big then something went wrong -> exiting
				1313	//
				1314	// There could actually be 32 valid levels in the machine topology,
				1315	// but so far, the only machine we have seen which does not exit
				1316	// this loop before iteration 32 has fubar x2APIC settings.
				1317	//
				1318	// For now, just reject this case based upon loop trip count.
				1319	//
				1320	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1321	return -1;
				1322	}
				1323	__kmp_x86_cpuid(11, level, &buf);
				1324	if (buf.ebx == 0) {
				1325	if (pkgLevel < 0) {
				1326	//
				1327	// Will infer nPackages from __kmp_xproc
				1328	//
				1329	pkgLevel = level;
				1330	level++;
				1331	}
				1332	break;
				1333	}
				1334	int kind = (buf.ecx >> 8) & 0xff;
				1335	if (kind == 1) {
				1336	//
				1337	// SMT level
				1338	//
				1339	threadLevel = level;
				1340	coreLevel = -1;
				1341	pkgLevel = -1;
				1342	__kmp_nThreadsPerCore = buf.ebx & 0xff;
				1343	if (__kmp_nThreadsPerCore == 0) {
				1344	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1345	return -1;
				1346	}
				1347	}
				1348	else if (kind == 2) {
				1349	//
				1350	// core level
				1351	//
				1352	coreLevel = level;
				1353	pkgLevel = -1;
				1354	nCoresPerPkg = buf.ebx & 0xff;
				1355	if (nCoresPerPkg == 0) {
				1356	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1357	return -1;
				1358	}
				1359	}
				1360	else {
				1361	if (level <= 0) {
				1362	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1363	return -1;
				1364	}
				1365	if (pkgLevel >= 0) {
				1366	continue;
				1367	}
				1368	pkgLevel = level;
				1369	nPackages = buf.ebx & 0xff;
				1370	if (nPackages == 0) {
				1371	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1372	return -1;
				1373	}
				1374	}
				1375	}
				1376	int depth = level;
				1377
				1378	//
				1379	// In the above loop, "level" was counted from the finest level (usually
				1380	// thread) to the coarsest. The caller expects that we will place the
				1381	// labels in (*address2os)[].first.labels[] in the inverse order, so
				1382	// we need to invert the vars saying which level means what.
				1383	//
				1384	if (threadLevel >= 0) {
				1385	threadLevel = depth - threadLevel - 1;
				1386	}
				1387	if (coreLevel >= 0) {
				1388	coreLevel = depth - coreLevel - 1;
				1389	}
				1390	KMP_DEBUG_ASSERT(pkgLevel >= 0);
				1391	pkgLevel = depth - pkgLevel - 1;
				1392
				1393	//
				1394	// The algorithm used starts by setting the affinity to each available
Andrey Churbanov	1c33129	2015-01-27 17:03:42 +0000	[diff] [blame]	1395	// thread and retrieving info from the cpuid instruction, so if we are
				1396	// not capable of calling __kmp_get_system_affinity() and
				1397	// _kmp_get_system_affinity(), then we need to do something else - use
				1398	// the defaults that we calculated from issuing cpuid without binding
				1399	// to each proc.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1400	//
				1401	if (! KMP_AFFINITY_CAPABLE())
				1402	{
				1403	//
				1404	// Hack to try and infer the machine topology using only the data
				1405	// available from cpuid on the current thread, and __kmp_xproc.
				1406	//
				1407	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				1408
				1409	__kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
				1410	nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1411	if (__kmp_affinity_verbose) {
				1412	KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY");
				1413	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1414	if (__kmp_affinity_uniform_topology()) {
				1415	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1416	} else {
				1417	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				1418	}
				1419	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				1420	__kmp_nThreadsPerCore, __kmp_ncores);
				1421	}
				1422	return 0;
				1423	}
				1424
				1425	//
				1426	//
				1427	// From here on, we can assume that it is safe to call
				1428	// __kmp_get_system_affinity() and __kmp_set_system_affinity(),
				1429	// even if __kmp_affinity_type = affinity_none.
				1430	//
				1431
				1432	//
				1433	// Save the affinity mask for the current thread.
				1434	//
				1435	kmp_affin_mask_t *oldMask;
				1436	KMP_CPU_ALLOC(oldMask);
				1437	__kmp_get_system_affinity(oldMask, TRUE);
				1438
				1439	//
				1440	// Allocate the data structure to be returned.
				1441	//
				1442	AddrUnsPair retval = (AddrUnsPair )
				1443	__kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc);
				1444
				1445	//
				1446	// Run through each of the available contexts, binding the current thread
				1447	// to it, and obtaining the pertinent information using the cpuid instr.
				1448	//
				1449	unsigned int proc;
				1450	int nApics = 0;
				1451	for (proc = 0; proc < KMP_CPU_SETSIZE; ++proc) {
				1452	//
				1453	// Skip this proc if it is not included in the machine model.
				1454	//
				1455	if (! KMP_CPU_ISSET(proc, fullMask)) {
				1456	continue;
				1457	}
				1458	KMP_DEBUG_ASSERT(nApics < __kmp_avail_proc);
				1459
				1460	__kmp_affinity_bind_thread(proc);
				1461
				1462	//
				1463	// Extrach the labels for each level in the machine topology map
				1464	// from the Apic ID.
				1465	//
				1466	Address addr(depth);
				1467	int prev_shift = 0;
				1468
				1469	for (level = 0; level < depth; level++) {
				1470	__kmp_x86_cpuid(11, level, &buf);
				1471	unsigned apicId = buf.edx;
				1472	if (buf.ebx == 0) {
				1473	if (level != depth - 1) {
				1474	KMP_CPU_FREE(oldMask);
				1475	*msg_id = kmp_i18n_str_InconsistentCpuidInfo;
				1476	return -1;
				1477	}
				1478	addr.labels[depth - level - 1] = apicId >> prev_shift;
				1479	level++;
				1480	break;
				1481	}
				1482	int shift = buf.eax & 0x1f;
				1483	int mask = (1 << shift) - 1;
				1484	addr.labels[depth - level - 1] = (apicId & mask) >> prev_shift;
				1485	prev_shift = shift;
				1486	}
				1487	if (level != depth) {
				1488	KMP_CPU_FREE(oldMask);
				1489	*msg_id = kmp_i18n_str_InconsistentCpuidInfo;
				1490	return -1;
				1491	}
				1492
				1493	retval[nApics] = AddrUnsPair(addr, proc);
				1494	nApics++;
				1495	}
				1496
				1497	//
				1498	// We've collected all the info we need.
				1499	// Restore the old affinity mask for this thread.
				1500	//
				1501	__kmp_set_system_affinity(oldMask, TRUE);
				1502
				1503	//
				1504	// If there's only one thread context to bind to, return now.
				1505	//
				1506	KMP_ASSERT(nApics > 0);
				1507	if (nApics == 1) {
				1508	__kmp_ncores = nPackages = 1;
				1509	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1510	if (__kmp_affinity_verbose) {
				1511	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				1512	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				1513
				1514	KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
				1515	if (__kmp_affinity_respect_mask) {
				1516	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				1517	} else {
				1518	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				1519	}
				1520	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1521	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1522	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				1523	__kmp_nThreadsPerCore, __kmp_ncores);
				1524	}
				1525
				1526	if (__kmp_affinity_type == affinity_none) {
				1527	__kmp_free(retval);
				1528	KMP_CPU_FREE(oldMask);
				1529	return 0;
				1530	}
				1531
				1532	//
				1533	// Form an Address object which only includes the package level.
				1534	//
				1535	Address addr(1);
				1536	addr.labels[0] = retval[0].first.labels[pkgLevel];
				1537	retval[0].first = addr;
				1538
				1539	if (__kmp_affinity_gran_levels < 0) {
				1540	__kmp_affinity_gran_levels = 0;
				1541	}
				1542
				1543	if (__kmp_affinity_verbose) {
				1544	__kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
				1545	}
				1546
				1547	*address2os = retval;
				1548	KMP_CPU_FREE(oldMask);
				1549	return 1;
				1550	}
				1551
				1552	//
				1553	// Sort the table by physical Id.
				1554	//
				1555	qsort(retval, nApics, sizeof(*retval), __kmp_affinity_cmp_Address_labels);
				1556
				1557	//
				1558	// Find the radix at each of the levels.
				1559	//
				1560	unsigned totals = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1561	unsigned counts = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1562	unsigned maxCt = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1563	unsigned last = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1564	for (level = 0; level < depth; level++) {
				1565	totals[level] = 1;
				1566	maxCt[level] = 1;
				1567	counts[level] = 1;
				1568	last[level] = retval[0].first.labels[level];
				1569	}
				1570
				1571	//
				1572	// From here on, the iteration variable "level" runs from the finest
				1573	// level to the coarsest, i.e. we iterate forward through
				1574	// (*address2os)[].first.labels[] - in the previous loops, we iterated
				1575	// backwards.
				1576	//
				1577	for (proc = 1; (int)proc < nApics; proc++) {
				1578	int level;
				1579	for (level = 0; level < depth; level++) {
				1580	if (retval[proc].first.labels[level] != last[level]) {
				1581	int j;
				1582	for (j = level + 1; j < depth; j++) {
				1583	totals[j]++;
				1584	counts[j] = 1;
				1585	// The line below causes printing incorrect topology information
				1586	// in case the max value for some level (maxCt[level]) is encountered earlier than
				1587	// some less value while going through the array.
				1588	// For example, let pkg0 has 4 cores and pkg1 has 2 cores. Then maxCt[1] == 2
				1589	// whereas it must be 4.
				1590	// TODO!!! Check if it can be commented safely
				1591	//maxCt[j] = 1;
				1592	last[j] = retval[proc].first.labels[j];
				1593	}
				1594	totals[level]++;
				1595	counts[level]++;
				1596	if (counts[level] > maxCt[level]) {
				1597	maxCt[level] = counts[level];
				1598	}
				1599	last[level] = retval[proc].first.labels[level];
				1600	break;
				1601	}
				1602	else if (level == depth - 1) {
				1603	__kmp_free(last);
				1604	__kmp_free(maxCt);
				1605	__kmp_free(counts);
				1606	__kmp_free(totals);
				1607	__kmp_free(retval);
				1608	KMP_CPU_FREE(oldMask);
				1609	*msg_id = kmp_i18n_str_x2ApicIDsNotUnique;
				1610	return -1;
				1611	}
				1612	}
				1613	}
				1614
				1615	//
				1616	// When affinity is off, this routine will still be called to set
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame]	1617	// __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1618	// nCoresPerPkg, & nPackages. Make sure all these vars are set
				1619	// correctly, and return if affinity is not enabled.
				1620	//
				1621	if (threadLevel >= 0) {
				1622	__kmp_nThreadsPerCore = maxCt[threadLevel];
				1623	}
				1624	else {
				1625	__kmp_nThreadsPerCore = 1;
				1626	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1627	nPackages = totals[pkgLevel];
				1628
				1629	if (coreLevel >= 0) {
				1630	__kmp_ncores = totals[coreLevel];
				1631	nCoresPerPkg = maxCt[coreLevel];
				1632	}
				1633	else {
				1634	__kmp_ncores = nPackages;
				1635	nCoresPerPkg = 1;
				1636	}
				1637
				1638	//
				1639	// Check to see if the machine topology is uniform
				1640	//
				1641	unsigned prod = maxCt[0];
				1642	for (level = 1; level < depth; level++) {
				1643	prod *= maxCt[level];
				1644	}
				1645	bool uniform = (prod == totals[level - 1]);
				1646
				1647	//
				1648	// Print the machine topology summary.
				1649	//
				1650	if (__kmp_affinity_verbose) {
				1651	char mask[KMP_AFFIN_MASK_PRINT_LEN];
				1652	__kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				1653
				1654	KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
				1655	if (__kmp_affinity_respect_mask) {
				1656	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask);
				1657	} else {
				1658	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask);
				1659	}
				1660	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1661	if (uniform) {
				1662	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1663	} else {
				1664	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				1665	}
				1666
				1667	kmp_str_buf_t buf;
				1668	__kmp_str_buf_init(&buf);
				1669
				1670	__kmp_str_buf_print(&buf, "%d", totals[0]);
				1671	for (level = 1; level <= pkgLevel; level++) {
				1672	__kmp_str_buf_print(&buf, " x %d", maxCt[level]);
				1673	}
				1674	KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg,
				1675	__kmp_nThreadsPerCore, __kmp_ncores);
				1676
				1677	__kmp_str_buf_free(&buf);
				1678	}
				1679
				1680	if (__kmp_affinity_type == affinity_none) {
				1681	__kmp_free(last);
				1682	__kmp_free(maxCt);
				1683	__kmp_free(counts);
				1684	__kmp_free(totals);
				1685	__kmp_free(retval);
				1686	KMP_CPU_FREE(oldMask);
				1687	return 0;
				1688	}
				1689
				1690	//
				1691	// Find any levels with radiix 1, and remove them from the map
				1692	// (except for the package level).
				1693	//
				1694	int new_depth = 0;
				1695	for (level = 0; level < depth; level++) {
				1696	if ((maxCt[level] == 1) && (level != pkgLevel)) {
				1697	continue;
				1698	}
				1699	new_depth++;
				1700	}
				1701
				1702	//
				1703	// If we are removing any levels, allocate a new vector to return,
				1704	// and copy the relevant information to it.
				1705	//
				1706	if (new_depth != depth) {
				1707	AddrUnsPair new_retval = (AddrUnsPair )__kmp_allocate(
				1708	sizeof(AddrUnsPair) * nApics);
				1709	for (proc = 0; (int)proc < nApics; proc++) {
				1710	Address addr(new_depth);
				1711	new_retval[proc] = AddrUnsPair(addr, retval[proc].second);
				1712	}
				1713	int new_level = 0;
				1714	for (level = 0; level < depth; level++) {
				1715	if ((maxCt[level] == 1) && (level != pkgLevel)) {
				1716	if (level == threadLevel) {
				1717	threadLevel = -1;
				1718	}
				1719	else if ((threadLevel >= 0) && (level < threadLevel)) {
				1720	threadLevel--;
				1721	}
				1722	if (level == coreLevel) {
				1723	coreLevel = -1;
				1724	}
				1725	else if ((coreLevel >= 0) && (level < coreLevel)) {
				1726	coreLevel--;
				1727	}
				1728	if (level < pkgLevel) {
				1729	pkgLevel--;
				1730	}
				1731	continue;
				1732	}
				1733	for (proc = 0; (int)proc < nApics; proc++) {
				1734	new_retval[proc].first.labels[new_level]
				1735	= retval[proc].first.labels[level];
				1736	}
				1737	new_level++;
				1738	}
				1739
				1740	__kmp_free(retval);
				1741	retval = new_retval;
				1742	depth = new_depth;
				1743	}
				1744
				1745	if (__kmp_affinity_gran_levels < 0) {
				1746	//
				1747	// Set the granularity level based on what levels are modeled
				1748	// in the machine topology map.
				1749	//
				1750	__kmp_affinity_gran_levels = 0;
				1751	if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
				1752	__kmp_affinity_gran_levels++;
				1753	}
				1754	if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
				1755	__kmp_affinity_gran_levels++;
				1756	}
				1757	if (__kmp_affinity_gran > affinity_gran_package) {
				1758	__kmp_affinity_gran_levels++;
				1759	}
				1760	}
				1761
				1762	if (__kmp_affinity_verbose) {
				1763	__kmp_affinity_print_topology(retval, nApics, depth, pkgLevel,
				1764	coreLevel, threadLevel);
				1765	}
				1766
				1767	__kmp_free(last);
				1768	__kmp_free(maxCt);
				1769	__kmp_free(counts);
				1770	__kmp_free(totals);
				1771	KMP_CPU_FREE(oldMask);
				1772	*address2os = retval;
				1773	return depth;
				1774	}
				1775
				1776
				1777	# endif /* KMP_ARCH_X86 \|\| KMP_ARCH_X86_64 */
				1778
				1779
				1780	#define osIdIndex 0
				1781	#define threadIdIndex 1
				1782	#define coreIdIndex 2
				1783	#define pkgIdIndex 3
				1784	#define nodeIdIndex 4
				1785
				1786	typedef unsigned *ProcCpuInfo;
				1787	static unsigned maxIndex = pkgIdIndex;
				1788
				1789
				1790	static int
				1791	__kmp_affinity_cmp_ProcCpuInfo_os_id(const void a, const void b)
				1792	{
				1793	const unsigned aa = (const unsigned )a;
				1794	const unsigned bb = (const unsigned )b;
				1795	if (aa[osIdIndex] < bb[osIdIndex]) return -1;
				1796	if (aa[osIdIndex] > bb[osIdIndex]) return 1;
				1797	return 0;
				1798	};
				1799
				1800
				1801	static int
				1802	__kmp_affinity_cmp_ProcCpuInfo_phys_id(const void a, const void b)
				1803	{
				1804	unsigned i;
				1805	const unsigned aa = ((const unsigned **)a);
				1806	const unsigned bb = ((const unsigned **)b);
				1807	for (i = maxIndex; ; i--) {
				1808	if (aa[i] < bb[i]) return -1;
				1809	if (aa[i] > bb[i]) return 1;
				1810	if (i == osIdIndex) break;
				1811	}
				1812	return 0;
				1813	}
				1814
				1815
				1816	//
				1817	// Parse /proc/cpuinfo (or an alternate file in the same format) to obtain the
				1818	// affinity map.
				1819	//
				1820	static int
				1821	__kmp_affinity_create_cpuinfo_map(AddrUnsPair *address2os, int line,
				1822	kmp_i18n_id_t const msg_id, FILE f)
				1823	{
				1824	*address2os = NULL;
				1825	*msg_id = kmp_i18n_null;
				1826
				1827	//
				1828	// Scan of the file, and count the number of "processor" (osId) fields,
Alp Toker	8f2d3f0	2014-02-24 10:40:15 +0000	[diff] [blame]	1829	// and find the highest value of <n> for a node_<n> field.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1830	//
				1831	char buf[256];
				1832	unsigned num_records = 0;
				1833	while (! feof(f)) {
				1834	buf[sizeof(buf) - 1] = 1;
				1835	if (! fgets(buf, sizeof(buf), f)) {
				1836	//
				1837	// Read errors presumably because of EOF
				1838	//
				1839	break;
				1840	}
				1841
				1842	char s1[] = "processor";
				1843	if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
				1844	num_records++;
				1845	continue;
				1846	}
				1847
				1848	//
				1849	// FIXME - this will match "node_<n> <garbage>"
				1850	//
				1851	unsigned level;
				1852	if (sscanf(buf, "node_%d id", &level) == 1) {
				1853	if (nodeIdIndex + level >= maxIndex) {
				1854	maxIndex = nodeIdIndex + level;
				1855	}
				1856	continue;
				1857	}
				1858	}
				1859
				1860	//
				1861	// Check for empty file / no valid processor records, or too many.
				1862	// The number of records can't exceed the number of valid bits in the
				1863	// affinity mask.
				1864	//
				1865	if (num_records == 0) {
				1866	*line = 0;
				1867	*msg_id = kmp_i18n_str_NoProcRecords;
				1868	return -1;
				1869	}
				1870	if (num_records > (unsigned)__kmp_xproc) {
				1871	*line = 0;
				1872	*msg_id = kmp_i18n_str_TooManyProcRecords;
				1873	return -1;
				1874	}
				1875
				1876	//
				1877	// Set the file pointer back to the begginning, so that we can scan the
				1878	// file again, this time performing a full parse of the data.
				1879	// Allocate a vector of ProcCpuInfo object, where we will place the data.
				1880	// Adding an extra element at the end allows us to remove a lot of extra
				1881	// checks for termination conditions.
				1882	//
				1883	if (fseek(f, 0, SEEK_SET) != 0) {
				1884	*line = 0;
				1885	*msg_id = kmp_i18n_str_CantRewindCpuinfo;
				1886	return -1;
				1887	}
				1888
				1889	//
				1890	// Allocate the array of records to store the proc info in. The dummy
				1891	// element at the end makes the logic in filling them out easier to code.
				1892	//
				1893	unsigned threadInfo = (unsigned )__kmp_allocate((num_records + 1)
				1894	* sizeof(unsigned *));
				1895	unsigned i;
				1896	for (i = 0; i <= num_records; i++) {
				1897	threadInfo[i] = (unsigned *)__kmp_allocate((maxIndex + 1)
				1898	* sizeof(unsigned));
				1899	}
				1900
				1901	#define CLEANUP_THREAD_INFO \
				1902	for (i = 0; i <= num_records; i++) { \
				1903	__kmp_free(threadInfo[i]); \
				1904	} \
				1905	__kmp_free(threadInfo);
				1906
				1907	//
				1908	// A value of UINT_MAX means that we didn't find the field
				1909	//
				1910	unsigned __index;
				1911
				1912	#define INIT_PROC_INFO(p) \
				1913	for (__index = 0; __index <= maxIndex; __index++) { \
				1914	(p)[__index] = UINT_MAX; \
				1915	}
				1916
				1917	for (i = 0; i <= num_records; i++) {
				1918	INIT_PROC_INFO(threadInfo[i]);
				1919	}
				1920
				1921	unsigned num_avail = 0;
				1922	*line = 0;
				1923	while (! feof(f)) {
				1924	//
				1925	// Create an inner scoping level, so that all the goto targets at the
				1926	// end of the loop appear in an outer scoping level. This avoids
				1927	// warnings about jumping past an initialization to a target in the
				1928	// same block.
				1929	//
				1930	{
				1931	buf[sizeof(buf) - 1] = 1;
				1932	bool long_line = false;
				1933	if (! fgets(buf, sizeof(buf), f)) {
				1934	//
				1935	// Read errors presumably because of EOF
				1936	//
				1937	// If there is valid data in threadInfo[num_avail], then fake
				1938	// a blank line in ensure that the last address gets parsed.
				1939	//
				1940	bool valid = false;
				1941	for (i = 0; i <= maxIndex; i++) {
				1942	if (threadInfo[num_avail][i] != UINT_MAX) {
				1943	valid = true;
				1944	}
				1945	}
				1946	if (! valid) {
				1947	break;
				1948	}
				1949	buf[0] = 0;
				1950	} else if (!buf[sizeof(buf) - 1]) {
				1951	//
				1952	// The line is longer than the buffer. Set a flag and don't
				1953	// emit an error if we were going to ignore the line, anyway.
				1954	//
				1955	long_line = true;
				1956
				1957	#define CHECK_LINE \
				1958	if (long_line) { \
				1959	CLEANUP_THREAD_INFO; \
				1960	*msg_id = kmp_i18n_str_LongLineCpuinfo; \
				1961	return -1; \
				1962	}
				1963	}
				1964	(*line)++;
				1965
				1966	char s1[] = "processor";
				1967	if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
				1968	CHECK_LINE;
				1969	char *p = strchr(buf + sizeof(s1) - 1, ':');
				1970	unsigned val;
				1971	if ((p == NULL) \|\| (sscanf(p + 1, "%u\n", &val) != 1)) goto no_val;
				1972	if (threadInfo[num_avail][osIdIndex] != UINT_MAX) goto dup_field;
				1973	threadInfo[num_avail][osIdIndex] = val;
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	1974	#if KMP_OS_LINUX && USE_SYSFS_INFO
				1975	char path[256];
				1976	snprintf(path, sizeof(path),
				1977	"/sys/devices/system/cpu/cpu%u/topology/physical_package_id",
				1978	threadInfo[num_avail][osIdIndex]);
				1979	__kmp_read_from_file(path, "%u", &threadInfo[num_avail][pkgIdIndex]);
				1980
				1981	snprintf(path, sizeof(path),
				1982	"/sys/devices/system/cpu/cpu%u/topology/core_id",
				1983	threadInfo[num_avail][osIdIndex]);
				1984	__kmp_read_from_file(path, "%u", &threadInfo[num_avail][coreIdIndex]);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1985	continue;
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	1986	#else
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1987	}
				1988	char s2[] = "physical id";
				1989	if (strncmp(buf, s2, sizeof(s2) - 1) == 0) {
				1990	CHECK_LINE;
				1991	char *p = strchr(buf + sizeof(s2) - 1, ':');
				1992	unsigned val;
				1993	if ((p == NULL) \|\| (sscanf(p + 1, "%u\n", &val) != 1)) goto no_val;
				1994	if (threadInfo[num_avail][pkgIdIndex] != UINT_MAX) goto dup_field;
				1995	threadInfo[num_avail][pkgIdIndex] = val;
				1996	continue;
				1997	}
				1998	char s3[] = "core id";
				1999	if (strncmp(buf, s3, sizeof(s3) - 1) == 0) {
				2000	CHECK_LINE;
				2001	char *p = strchr(buf + sizeof(s3) - 1, ':');
				2002	unsigned val;
				2003	if ((p == NULL) \|\| (sscanf(p + 1, "%u\n", &val) != 1)) goto no_val;
				2004	if (threadInfo[num_avail][coreIdIndex] != UINT_MAX) goto dup_field;
				2005	threadInfo[num_avail][coreIdIndex] = val;
				2006	continue;
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	2007	#endif // KMP_OS_LINUX && USE_SYSFS_INFO
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2008	}
				2009	char s4[] = "thread id";
				2010	if (strncmp(buf, s4, sizeof(s4) - 1) == 0) {
				2011	CHECK_LINE;
				2012	char *p = strchr(buf + sizeof(s4) - 1, ':');
				2013	unsigned val;
				2014	if ((p == NULL) \|\| (sscanf(p + 1, "%u\n", &val) != 1)) goto no_val;
				2015	if (threadInfo[num_avail][threadIdIndex] != UINT_MAX) goto dup_field;
				2016	threadInfo[num_avail][threadIdIndex] = val;
				2017	continue;
				2018	}
				2019	unsigned level;
				2020	if (sscanf(buf, "node_%d id", &level) == 1) {
				2021	CHECK_LINE;
				2022	char *p = strchr(buf + sizeof(s4) - 1, ':');
				2023	unsigned val;
				2024	if ((p == NULL) \|\| (sscanf(p + 1, "%u\n", &val) != 1)) goto no_val;
				2025	KMP_ASSERT(nodeIdIndex + level <= maxIndex);
				2026	if (threadInfo[num_avail][nodeIdIndex + level] != UINT_MAX) goto dup_field;
				2027	threadInfo[num_avail][nodeIdIndex + level] = val;
				2028	continue;
				2029	}
				2030
				2031	//
				2032	// We didn't recognize the leading token on the line.
				2033	// There are lots of leading tokens that we don't recognize -
				2034	// if the line isn't empty, go on to the next line.
				2035	//
				2036	if ((buf != 0) && (buf != '\n')) {
				2037	//
				2038	// If the line is longer than the buffer, read characters
				2039	// until we find a newline.
				2040	//
				2041	if (long_line) {
				2042	int ch;
				2043	while (((ch = fgetc(f)) != EOF) && (ch != '\n'));
				2044	}
				2045	continue;
				2046	}
				2047
				2048	//
				2049	// A newline has signalled the end of the processor record.
				2050	// Check that there aren't too many procs specified.
				2051	//
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	2052	if ((int)num_avail == __kmp_xproc) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2053	CLEANUP_THREAD_INFO;
				2054	*msg_id = kmp_i18n_str_TooManyEntries;
				2055	return -1;
				2056	}
				2057
				2058	//
				2059	// Check for missing fields. The osId field must be there, and we
				2060	// currently require that the physical id field is specified, also.
				2061	//
				2062	if (threadInfo[num_avail][osIdIndex] == UINT_MAX) {
				2063	CLEANUP_THREAD_INFO;
				2064	*msg_id = kmp_i18n_str_MissingProcField;
				2065	return -1;
				2066	}
				2067	if (threadInfo[0][pkgIdIndex] == UINT_MAX) {
				2068	CLEANUP_THREAD_INFO;
				2069	*msg_id = kmp_i18n_str_MissingPhysicalIDField;
				2070	return -1;
				2071	}
				2072
				2073	//
				2074	// Skip this proc if it is not included in the machine model.
				2075	//
				2076	if (! KMP_CPU_ISSET(threadInfo[num_avail][osIdIndex], fullMask)) {
				2077	INIT_PROC_INFO(threadInfo[num_avail]);
				2078	continue;
				2079	}
				2080
				2081	//
				2082	// We have a successful parse of this proc's info.
				2083	// Increment the counter, and prepare for the next proc.
				2084	//
				2085	num_avail++;
				2086	KMP_ASSERT(num_avail <= num_records);
				2087	INIT_PROC_INFO(threadInfo[num_avail]);
				2088	}
				2089	continue;
				2090
				2091	no_val:
				2092	CLEANUP_THREAD_INFO;
				2093	*msg_id = kmp_i18n_str_MissingValCpuinfo;
				2094	return -1;
				2095
				2096	dup_field:
				2097	CLEANUP_THREAD_INFO;
				2098	*msg_id = kmp_i18n_str_DuplicateFieldCpuinfo;
				2099	return -1;
				2100	}
				2101	*line = 0;
				2102
				2103	# if KMP_MIC && REDUCE_TEAM_SIZE
				2104	unsigned teamSize = 0;
				2105	# endif // KMP_MIC && REDUCE_TEAM_SIZE
				2106
				2107	// check for num_records == __kmp_xproc ???
				2108
				2109	//
				2110	// If there's only one thread context to bind to, form an Address object
				2111	// with depth 1 and return immediately (or, if affinity is off, set
				2112	// address2os to NULL and return).
				2113	//
				2114	// If it is configured to omit the package level when there is only a
				2115	// single package, the logic at the end of this routine won't work if
				2116	// there is only a single thread - it would try to form an Address
				2117	// object with depth 0.
				2118	//
				2119	KMP_ASSERT(num_avail > 0);
				2120	KMP_ASSERT(num_avail <= num_records);
				2121	if (num_avail == 1) {
				2122	__kmp_ncores = 1;
				2123	__kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2124	if (__kmp_affinity_verbose) {
				2125	if (! KMP_AFFINITY_CAPABLE()) {
				2126	KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
				2127	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				2128	KMP_INFORM(Uniform, "KMP_AFFINITY");
				2129	}
				2130	else {
				2131	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				2132	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				2133	fullMask);
				2134	KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
				2135	if (__kmp_affinity_respect_mask) {
				2136	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				2137	} else {
				2138	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				2139	}
				2140	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				2141	KMP_INFORM(Uniform, "KMP_AFFINITY");
				2142	}
				2143	int index;
				2144	kmp_str_buf_t buf;
				2145	__kmp_str_buf_init(&buf);
				2146	__kmp_str_buf_print(&buf, "1");
				2147	for (index = maxIndex - 1; index > pkgIdIndex; index--) {
				2148	__kmp_str_buf_print(&buf, " x 1");
				2149	}
				2150	KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, 1, 1, 1);
				2151	__kmp_str_buf_free(&buf);
				2152	}
				2153
				2154	if (__kmp_affinity_type == affinity_none) {
				2155	CLEANUP_THREAD_INFO;
				2156	return 0;
				2157	}
				2158
				2159	address2os = (AddrUnsPair)__kmp_allocate(sizeof(AddrUnsPair));
				2160	Address addr(1);
				2161	addr.labels[0] = threadInfo[0][pkgIdIndex];
				2162	(*address2os)[0] = AddrUnsPair(addr, threadInfo[0][osIdIndex]);
				2163
				2164	if (__kmp_affinity_gran_levels < 0) {
				2165	__kmp_affinity_gran_levels = 0;
				2166	}
				2167
				2168	if (__kmp_affinity_verbose) {
				2169	__kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
				2170	}
				2171
				2172	CLEANUP_THREAD_INFO;
				2173	return 1;
				2174	}
				2175
				2176	//
				2177	// Sort the threadInfo table by physical Id.
				2178	//
				2179	qsort(threadInfo, num_avail, sizeof(*threadInfo),
				2180	__kmp_affinity_cmp_ProcCpuInfo_phys_id);
				2181
				2182	//
				2183	// The table is now sorted by pkgId / coreId / threadId, but we really
				2184	// don't know the radix of any of the fields. pkgId's may be sparsely
				2185	// assigned among the chips on a system. Although coreId's are usually
				2186	// assigned [0 .. coresPerPkg-1] and threadId's are usually assigned
				2187	// [0..threadsPerCore-1], we don't want to make any such assumptions.
				2188	//
				2189	// For that matter, we don't know what coresPerPkg and threadsPerCore
				2190	// (or the total # packages) are at this point - we want to determine
				2191	// that now. We only have an upper bound on the first two figures.
				2192	//
				2193	unsigned counts = (unsigned )__kmp_allocate((maxIndex + 1)
				2194	* sizeof(unsigned));
				2195	unsigned maxCt = (unsigned )__kmp_allocate((maxIndex + 1)
				2196	* sizeof(unsigned));
				2197	unsigned totals = (unsigned )__kmp_allocate((maxIndex + 1)
				2198	* sizeof(unsigned));
				2199	unsigned lastId = (unsigned )__kmp_allocate((maxIndex + 1)
				2200	* sizeof(unsigned));
				2201
				2202	bool assign_thread_ids = false;
				2203	unsigned threadIdCt;
				2204	unsigned index;
				2205
				2206	restart_radix_check:
				2207	threadIdCt = 0;
				2208
				2209	//
				2210	// Initialize the counter arrays with data from threadInfo[0].
				2211	//
				2212	if (assign_thread_ids) {
				2213	if (threadInfo[0][threadIdIndex] == UINT_MAX) {
				2214	threadInfo[0][threadIdIndex] = threadIdCt++;
				2215	}
				2216	else if (threadIdCt <= threadInfo[0][threadIdIndex]) {
				2217	threadIdCt = threadInfo[0][threadIdIndex] + 1;
				2218	}
				2219	}
				2220	for (index = 0; index <= maxIndex; index++) {
				2221	counts[index] = 1;
				2222	maxCt[index] = 1;
				2223	totals[index] = 1;
				2224	lastId[index] = threadInfo[0][index];;
				2225	}
				2226
				2227	//
				2228	// Run through the rest of the OS procs.
				2229	//
				2230	for (i = 1; i < num_avail; i++) {
				2231	//
				2232	// Find the most significant index whose id differs
				2233	// from the id for the previous OS proc.
				2234	//
				2235	for (index = maxIndex; index >= threadIdIndex; index--) {
				2236	if (assign_thread_ids && (index == threadIdIndex)) {
				2237	//
				2238	// Auto-assign the thread id field if it wasn't specified.
				2239	//
				2240	if (threadInfo[i][threadIdIndex] == UINT_MAX) {
				2241	threadInfo[i][threadIdIndex] = threadIdCt++;
				2242	}
				2243
				2244	//
				2245	// Aparrently the thread id field was specified for some
				2246	// entries and not others. Start the thread id counter
				2247	// off at the next higher thread id.
				2248	//
				2249	else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
				2250	threadIdCt = threadInfo[i][threadIdIndex] + 1;
				2251	}
				2252	}
				2253	if (threadInfo[i][index] != lastId[index]) {
				2254	//
				2255	// Run through all indices which are less significant,
				2256	// and reset the counts to 1.
				2257	//
				2258	// At all levels up to and including index, we need to
				2259	// increment the totals and record the last id.
				2260	//
				2261	unsigned index2;
				2262	for (index2 = threadIdIndex; index2 < index; index2++) {
				2263	totals[index2]++;
				2264	if (counts[index2] > maxCt[index2]) {
				2265	maxCt[index2] = counts[index2];
				2266	}
				2267	counts[index2] = 1;
				2268	lastId[index2] = threadInfo[i][index2];
				2269	}
				2270	counts[index]++;
				2271	totals[index]++;
				2272	lastId[index] = threadInfo[i][index];
				2273
				2274	if (assign_thread_ids && (index > threadIdIndex)) {
				2275
				2276	# if KMP_MIC && REDUCE_TEAM_SIZE
				2277	//
				2278	// The default team size is the total #threads in the machine
				2279	// minus 1 thread for every core that has 3 or more threads.
				2280	//
				2281	teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 );
				2282	# endif // KMP_MIC && REDUCE_TEAM_SIZE
				2283
				2284	//
				2285	// Restart the thread counter, as we are on a new core.
				2286	//
				2287	threadIdCt = 0;
				2288
				2289	//
				2290	// Auto-assign the thread id field if it wasn't specified.
				2291	//
				2292	if (threadInfo[i][threadIdIndex] == UINT_MAX) {
				2293	threadInfo[i][threadIdIndex] = threadIdCt++;
				2294	}
				2295
				2296	//
				2297	// Aparrently the thread id field was specified for some
				2298	// entries and not others. Start the thread id counter
				2299	// off at the next higher thread id.
				2300	//
				2301	else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
				2302	threadIdCt = threadInfo[i][threadIdIndex] + 1;
				2303	}
				2304	}
				2305	break;
				2306	}
				2307	}
				2308	if (index < threadIdIndex) {
				2309	//
				2310	// If thread ids were specified, it is an error if they are not
				2311	// unique. Also, check that we waven't already restarted the
				2312	// loop (to be safe - shouldn't need to).
				2313	//
				2314	if ((threadInfo[i][threadIdIndex] != UINT_MAX)
				2315	\|\| assign_thread_ids) {
				2316	__kmp_free(lastId);
				2317	__kmp_free(totals);
				2318	__kmp_free(maxCt);
				2319	__kmp_free(counts);
				2320	CLEANUP_THREAD_INFO;
				2321	*msg_id = kmp_i18n_str_PhysicalIDsNotUnique;
				2322	return -1;
				2323	}
				2324
				2325	//
				2326	// If the thread ids were not specified and we see entries
				2327	// entries that are duplicates, start the loop over and
				2328	// assign the thread ids manually.
				2329	//
				2330	assign_thread_ids = true;
				2331	goto restart_radix_check;
				2332	}
				2333	}
				2334
				2335	# if KMP_MIC && REDUCE_TEAM_SIZE
				2336	//
				2337	// The default team size is the total #threads in the machine
				2338	// minus 1 thread for every core that has 3 or more threads.
				2339	//
				2340	teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 );
				2341	# endif // KMP_MIC && REDUCE_TEAM_SIZE
				2342
				2343	for (index = threadIdIndex; index <= maxIndex; index++) {
				2344	if (counts[index] > maxCt[index]) {
				2345	maxCt[index] = counts[index];
				2346	}
				2347	}
				2348
				2349	__kmp_nThreadsPerCore = maxCt[threadIdIndex];
				2350	nCoresPerPkg = maxCt[coreIdIndex];
				2351	nPackages = totals[pkgIdIndex];
				2352
				2353	//
				2354	// Check to see if the machine topology is uniform
				2355	//
				2356	unsigned prod = totals[maxIndex];
				2357	for (index = threadIdIndex; index < maxIndex; index++) {
				2358	prod *= maxCt[index];
				2359	}
				2360	bool uniform = (prod == totals[threadIdIndex]);
				2361
				2362	//
				2363	// When affinity is off, this routine will still be called to set
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame]	2364	// __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2365	// nCoresPerPkg, & nPackages. Make sure all these vars are set
				2366	// correctly, and return now if affinity is not enabled.
				2367	//
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2368	__kmp_ncores = totals[coreIdIndex];
				2369
				2370	if (__kmp_affinity_verbose) {
				2371	if (! KMP_AFFINITY_CAPABLE()) {
				2372	KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
				2373	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				2374	if (uniform) {
				2375	KMP_INFORM(Uniform, "KMP_AFFINITY");
				2376	} else {
				2377	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				2378	}
				2379	}
				2380	else {
				2381	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				2382	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, fullMask);
				2383	KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
				2384	if (__kmp_affinity_respect_mask) {
				2385	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				2386	} else {
				2387	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				2388	}
				2389	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				2390	if (uniform) {
				2391	KMP_INFORM(Uniform, "KMP_AFFINITY");
				2392	} else {
				2393	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				2394	}
				2395	}
				2396	kmp_str_buf_t buf;
				2397	__kmp_str_buf_init(&buf);
				2398
				2399	__kmp_str_buf_print(&buf, "%d", totals[maxIndex]);
				2400	for (index = maxIndex - 1; index >= pkgIdIndex; index--) {
				2401	__kmp_str_buf_print(&buf, " x %d", maxCt[index]);
				2402	}
				2403	KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, maxCt[coreIdIndex],
				2404	maxCt[threadIdIndex], __kmp_ncores);
				2405
				2406	__kmp_str_buf_free(&buf);
				2407	}
				2408
				2409	# if KMP_MIC && REDUCE_TEAM_SIZE
				2410	//
				2411	// Set the default team size.
				2412	//
				2413	if ((__kmp_dflt_team_nth == 0) && (teamSize > 0)) {
				2414	__kmp_dflt_team_nth = teamSize;
				2415	KA_TRACE(20, ("__kmp_affinity_create_cpuinfo_map: setting __kmp_dflt_team_nth = %d\n",
				2416	__kmp_dflt_team_nth));
				2417	}
				2418	# endif // KMP_MIC && REDUCE_TEAM_SIZE
				2419
				2420	if (__kmp_affinity_type == affinity_none) {
				2421	__kmp_free(lastId);
				2422	__kmp_free(totals);
				2423	__kmp_free(maxCt);
				2424	__kmp_free(counts);
				2425	CLEANUP_THREAD_INFO;
				2426	return 0;
				2427	}
				2428
				2429	//
				2430	// Count the number of levels which have more nodes at that level than
				2431	// at the parent's level (with there being an implicit root node of
				2432	// the top level). This is equivalent to saying that there is at least
				2433	// one node at this level which has a sibling. These levels are in the
				2434	// map, and the package level is always in the map.
				2435	//
				2436	bool inMap = (bool )__kmp_allocate((maxIndex + 1) * sizeof(bool));
				2437	int level = 0;
				2438	for (index = threadIdIndex; index < maxIndex; index++) {
				2439	KMP_ASSERT(totals[index] >= totals[index + 1]);
				2440	inMap[index] = (totals[index] > totals[index + 1]);
				2441	}
				2442	inMap[maxIndex] = (totals[maxIndex] > 1);
				2443	inMap[pkgIdIndex] = true;
				2444
				2445	int depth = 0;
				2446	for (index = threadIdIndex; index <= maxIndex; index++) {
				2447	if (inMap[index]) {
				2448	depth++;
				2449	}
				2450	}
				2451	KMP_ASSERT(depth > 0);
				2452
				2453	//
				2454	// Construct the data structure that is to be returned.
				2455	//
				2456	address2os = (AddrUnsPair)
				2457	__kmp_allocate(sizeof(AddrUnsPair) * num_avail);
				2458	int pkgLevel = -1;
				2459	int coreLevel = -1;
				2460	int threadLevel = -1;
				2461
				2462	for (i = 0; i < num_avail; ++i) {
				2463	Address addr(depth);
				2464	unsigned os = threadInfo[i][osIdIndex];
				2465	int src_index;
				2466	int dst_index = 0;
				2467
				2468	for (src_index = maxIndex; src_index >= threadIdIndex; src_index--) {
				2469	if (! inMap[src_index]) {
				2470	continue;
				2471	}
				2472	addr.labels[dst_index] = threadInfo[i][src_index];
				2473	if (src_index == pkgIdIndex) {
				2474	pkgLevel = dst_index;
				2475	}
				2476	else if (src_index == coreIdIndex) {
				2477	coreLevel = dst_index;
				2478	}
				2479	else if (src_index == threadIdIndex) {
				2480	threadLevel = dst_index;
				2481	}
				2482	dst_index++;
				2483	}
				2484	(*address2os)[i] = AddrUnsPair(addr, os);
				2485	}
				2486
				2487	if (__kmp_affinity_gran_levels < 0) {
				2488	//
				2489	// Set the granularity level based on what levels are modeled
				2490	// in the machine topology map.
				2491	//
				2492	unsigned src_index;
				2493	__kmp_affinity_gran_levels = 0;
				2494	for (src_index = threadIdIndex; src_index <= maxIndex; src_index++) {
				2495	if (! inMap[src_index]) {
				2496	continue;
				2497	}
				2498	switch (src_index) {
				2499	case threadIdIndex:
				2500	if (__kmp_affinity_gran > affinity_gran_thread) {
				2501	__kmp_affinity_gran_levels++;
				2502	}
				2503
				2504	break;
				2505	case coreIdIndex:
				2506	if (__kmp_affinity_gran > affinity_gran_core) {
				2507	__kmp_affinity_gran_levels++;
				2508	}
				2509	break;
				2510
				2511	case pkgIdIndex:
				2512	if (__kmp_affinity_gran > affinity_gran_package) {
				2513	__kmp_affinity_gran_levels++;
				2514	}
				2515	break;
				2516	}
				2517	}
				2518	}
				2519
				2520	if (__kmp_affinity_verbose) {
				2521	__kmp_affinity_print_topology(*address2os, num_avail, depth, pkgLevel,
				2522	coreLevel, threadLevel);
				2523	}
				2524
				2525	__kmp_free(inMap);
				2526	__kmp_free(lastId);
				2527	__kmp_free(totals);
				2528	__kmp_free(maxCt);
				2529	__kmp_free(counts);
				2530	CLEANUP_THREAD_INFO;
				2531	return depth;
				2532	}
				2533
				2534
				2535	//
				2536	// Create and return a table of affinity masks, indexed by OS thread ID.
				2537	// This routine handles OR'ing together all the affinity masks of threads
				2538	// that are sufficiently close, if granularity > fine.
				2539	//
				2540	static kmp_affin_mask_t *
				2541	__kmp_create_masks(unsigned maxIndex, unsigned numUnique,
				2542	AddrUnsPair *address2os, unsigned numAddrs)
				2543	{
				2544	//
				2545	// First form a table of affinity masks in order of OS thread id.
				2546	//
				2547	unsigned depth;
				2548	unsigned maxOsId;
				2549	unsigned i;
				2550
				2551	KMP_ASSERT(numAddrs > 0);
				2552	depth = address2os[0].first.depth;
				2553
				2554	maxOsId = 0;
				2555	for (i = 0; i < numAddrs; i++) {
				2556	unsigned osId = address2os[i].second;
				2557	if (osId > maxOsId) {
				2558	maxOsId = osId;
				2559	}
				2560	}
				2561	kmp_affin_mask_t osId2Mask = (kmp_affin_mask_t )__kmp_allocate(
				2562	(maxOsId + 1) * __kmp_affin_mask_size);
				2563
				2564	//
				2565	// Sort the address2os table according to physical order. Doing so
				2566	// will put all threads on the same core/package/node in consecutive
				2567	// locations.
				2568	//
				2569	qsort(address2os, numAddrs, sizeof(*address2os),
				2570	__kmp_affinity_cmp_Address_labels);
				2571
				2572	KMP_ASSERT(__kmp_affinity_gran_levels >= 0);
				2573	if (__kmp_affinity_verbose && (__kmp_affinity_gran_levels > 0)) {
				2574	KMP_INFORM(ThreadsMigrate, "KMP_AFFINITY", __kmp_affinity_gran_levels);
				2575	}
				2576	if (__kmp_affinity_gran_levels >= (int)depth) {
				2577	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				2578	&& (__kmp_affinity_type != affinity_none))) {
				2579	KMP_WARNING(AffThreadsMayMigrate);
				2580	}
				2581	}
				2582
				2583	//
				2584	// Run through the table, forming the masks for all threads on each
				2585	// core. Threads on the same core will have identical "Address"
				2586	// objects, not considering the last level, which must be the thread
				2587	// id. All threads on a core will appear consecutively.
				2588	//
				2589	unsigned unique = 0;
				2590	unsigned j = 0; // index of 1st thread on core
				2591	unsigned leader = 0;
				2592	Address *leaderAddr = &(address2os[0].first);
				2593	kmp_affin_mask_t *sum
				2594	= (kmp_affin_mask_t *)alloca(__kmp_affin_mask_size);
				2595	KMP_CPU_ZERO(sum);
				2596	KMP_CPU_SET(address2os[0].second, sum);
				2597	for (i = 1; i < numAddrs; i++) {
				2598	//
Alp Toker	8f2d3f0	2014-02-24 10:40:15 +0000	[diff] [blame]	2599	// If this thread is sufficiently close to the leader (within the
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2600	// granularity setting), then set the bit for this os thread in the
				2601	// affinity mask for this group, and go on to the next thread.
				2602	//
				2603	if (leaderAddr->isClose(address2os[i].first,
				2604	__kmp_affinity_gran_levels)) {
				2605	KMP_CPU_SET(address2os[i].second, sum);
				2606	continue;
				2607	}
				2608
				2609	//
				2610	// For every thread in this group, copy the mask to the thread's
				2611	// entry in the osId2Mask table. Mark the first address as a
				2612	// leader.
				2613	//
				2614	for (; j < i; j++) {
				2615	unsigned osId = address2os[j].second;
				2616	KMP_DEBUG_ASSERT(osId <= maxOsId);
				2617	kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
				2618	KMP_CPU_COPY(mask, sum);
				2619	address2os[j].first.leader = (j == leader);
				2620	}
				2621	unique++;
				2622
				2623	//
				2624	// Start a new mask.
				2625	//
				2626	leader = i;
				2627	leaderAddr = &(address2os[i].first);
				2628	KMP_CPU_ZERO(sum);
				2629	KMP_CPU_SET(address2os[i].second, sum);
				2630	}
				2631
				2632	//
				2633	// For every thread in last group, copy the mask to the thread's
				2634	// entry in the osId2Mask table.
				2635	//
				2636	for (; j < i; j++) {
				2637	unsigned osId = address2os[j].second;
				2638	KMP_DEBUG_ASSERT(osId <= maxOsId);
				2639	kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
				2640	KMP_CPU_COPY(mask, sum);
				2641	address2os[j].first.leader = (j == leader);
				2642	}
				2643	unique++;
				2644
				2645	*maxIndex = maxOsId;
				2646	*numUnique = unique;
				2647	return osId2Mask;
				2648	}
				2649
				2650
				2651	//
				2652	// Stuff for the affinity proclist parsers. It's easier to declare these vars
				2653	// as file-static than to try and pass them through the calling sequence of
				2654	// the recursive-descent OMP_PLACES parser.
				2655	//
				2656	static kmp_affin_mask_t *newMasks;
				2657	static int numNewMasks;
				2658	static int nextNewMask;
				2659
				2660	#define ADD_MASK(_mask) \
				2661	{ \
				2662	if (nextNewMask >= numNewMasks) { \
				2663	numNewMasks *= 2; \
				2664	newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_REALLOC(newMasks, \
				2665	numNewMasks * __kmp_affin_mask_size); \
				2666	} \
				2667	KMP_CPU_COPY(KMP_CPU_INDEX(newMasks, nextNewMask), (_mask)); \
				2668	nextNewMask++; \
				2669	}
				2670
				2671	#define ADD_MASK_OSID(_osId,_osId2Mask,_maxOsId) \
				2672	{ \
				2673	if (((_osId) > _maxOsId) \|\| \
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	2674	(! KMP_CPU_ISSET((_osId), KMP_CPU_INDEX((_osId2Mask), (_osId))))) { \
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2675	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings \
				2676	&& (__kmp_affinity_type != affinity_none))) { \
				2677	KMP_WARNING(AffIgnoreInvalidProcID, _osId); \
				2678	} \
				2679	} \
				2680	else { \
				2681	ADD_MASK(KMP_CPU_INDEX(_osId2Mask, (_osId))); \
				2682	} \
				2683	}
				2684
				2685
				2686	//
				2687	// Re-parse the proclist (for the explicit affinity type), and form the list
				2688	// of affinity newMasks indexed by gtid.
				2689	//
				2690	static void
				2691	__kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks,
				2692	unsigned int out_numMasks, const char proclist,
				2693	kmp_affin_mask_t *osId2Mask, int maxOsId)
				2694	{
				2695	const char *scan = proclist;
				2696	const char *next = proclist;
				2697
				2698	//
				2699	// We use malloc() for the temporary mask vector,
				2700	// so that we can use realloc() to extend it.
				2701	//
				2702	numNewMasks = 2;
				2703	newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(numNewMasks
				2704	* __kmp_affin_mask_size);
				2705	nextNewMask = 0;
				2706	kmp_affin_mask_t sumMask = (kmp_affin_mask_t )__kmp_allocate(
				2707	__kmp_affin_mask_size);
				2708	int setSize = 0;
				2709
				2710	for (;;) {
				2711	int start, end, stride;
				2712
				2713	SKIP_WS(scan);
				2714	next = scan;
				2715	if (*next == '\0') {
				2716	break;
				2717	}
				2718
				2719	if (*next == '{') {
				2720	int num;
				2721	setSize = 0;
				2722	next++; // skip '{'
				2723	SKIP_WS(next);
				2724	scan = next;
				2725
				2726	//
				2727	// Read the first integer in the set.
				2728	//
				2729	KMP_ASSERT2((next >= '0') && (next <= '9'),
				2730	"bad proclist");
				2731	SKIP_DIGITS(next);
				2732	num = __kmp_str_to_int(scan, *next);
				2733	KMP_ASSERT2(num >= 0, "bad explicit proc list");
				2734
				2735	//
				2736	// Copy the mask for that osId to the sum (union) mask.
				2737	//
				2738	if ((num > maxOsId) \|\|
				2739	(! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
				2740	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				2741	&& (__kmp_affinity_type != affinity_none))) {
				2742	KMP_WARNING(AffIgnoreInvalidProcID, num);
				2743	}
				2744	KMP_CPU_ZERO(sumMask);
				2745	}
				2746	else {
				2747	KMP_CPU_COPY(sumMask, KMP_CPU_INDEX(osId2Mask, num));
				2748	setSize = 1;
				2749	}
				2750
				2751	for (;;) {
				2752	//
				2753	// Check for end of set.
				2754	//
				2755	SKIP_WS(next);
				2756	if (*next == '}') {
				2757	next++; // skip '}'
				2758	break;
				2759	}
				2760
				2761	//
				2762	// Skip optional comma.
				2763	//
				2764	if (*next == ',') {
				2765	next++;
				2766	}
				2767	SKIP_WS(next);
				2768
				2769	//
				2770	// Read the next integer in the set.
				2771	//
				2772	scan = next;
				2773	KMP_ASSERT2((next >= '0') && (next <= '9'),
				2774	"bad explicit proc list");
				2775
				2776	SKIP_DIGITS(next);
				2777	num = __kmp_str_to_int(scan, *next);
				2778	KMP_ASSERT2(num >= 0, "bad explicit proc list");
				2779
				2780	//
				2781	// Add the mask for that osId to the sum mask.
				2782	//
				2783	if ((num > maxOsId) \|\|
				2784	(! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
				2785	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				2786	&& (__kmp_affinity_type != affinity_none))) {
				2787	KMP_WARNING(AffIgnoreInvalidProcID, num);
				2788	}
				2789	}
				2790	else {
				2791	KMP_CPU_UNION(sumMask, KMP_CPU_INDEX(osId2Mask, num));
				2792	setSize++;
				2793	}
				2794	}
				2795	if (setSize > 0) {
				2796	ADD_MASK(sumMask);
				2797	}
				2798
				2799	SKIP_WS(next);
				2800	if (*next == ',') {
				2801	next++;
				2802	}
				2803	scan = next;
				2804	continue;
				2805	}
				2806
				2807	//
				2808	// Read the first integer.
				2809	//
				2810	KMP_ASSERT2((next >= '0') && (next <= '9'), "bad explicit proc list");
				2811	SKIP_DIGITS(next);
				2812	start = __kmp_str_to_int(scan, *next);
				2813	KMP_ASSERT2(start >= 0, "bad explicit proc list");
				2814	SKIP_WS(next);
				2815
				2816	//
				2817	// If this isn't a range, then add a mask to the list and go on.
				2818	//
				2819	if (*next != '-') {
				2820	ADD_MASK_OSID(start, osId2Mask, maxOsId);
				2821
				2822	//
				2823	// Skip optional comma.
				2824	//
				2825	if (*next == ',') {
				2826	next++;
				2827	}
				2828	scan = next;
				2829	continue;
				2830	}
				2831
				2832	//
				2833	// This is a range. Skip over the '-' and read in the 2nd int.
				2834	//
				2835	next++; // skip '-'
				2836	SKIP_WS(next);
				2837	scan = next;
				2838	KMP_ASSERT2((next >= '0') && (next <= '9'), "bad explicit proc list");
				2839	SKIP_DIGITS(next);
				2840	end = __kmp_str_to_int(scan, *next);
				2841	KMP_ASSERT2(end >= 0, "bad explicit proc list");
				2842
				2843	//
				2844	// Check for a stride parameter
				2845	//
				2846	stride = 1;
				2847	SKIP_WS(next);
				2848	if (*next == ':') {
				2849	//
				2850	// A stride is specified. Skip over the ':" and read the 3rd int.
				2851	//
				2852	int sign = +1;
				2853	next++; // skip ':'
				2854	SKIP_WS(next);
				2855	scan = next;
				2856	if (*next == '-') {
				2857	sign = -1;
				2858	next++;
				2859	SKIP_WS(next);
				2860	scan = next;
				2861	}
				2862	KMP_ASSERT2((next >= '0') && (next <= '9'),
				2863	"bad explicit proc list");
				2864	SKIP_DIGITS(next);
				2865	stride = __kmp_str_to_int(scan, *next);
				2866	KMP_ASSERT2(stride >= 0, "bad explicit proc list");
				2867	stride *= sign;
				2868	}
				2869
				2870	//
				2871	// Do some range checks.
				2872	//
				2873	KMP_ASSERT2(stride != 0, "bad explicit proc list");
				2874	if (stride > 0) {
				2875	KMP_ASSERT2(start <= end, "bad explicit proc list");
				2876	}
				2877	else {
				2878	KMP_ASSERT2(start >= end, "bad explicit proc list");
				2879	}
				2880	KMP_ASSERT2((end - start) / stride <= 65536, "bad explicit proc list");
				2881
				2882	//
				2883	// Add the mask for each OS proc # to the list.
				2884	//
				2885	if (stride > 0) {
				2886	do {
				2887	ADD_MASK_OSID(start, osId2Mask, maxOsId);
				2888	start += stride;
				2889	} while (start <= end);
				2890	}
				2891	else {
				2892	do {
				2893	ADD_MASK_OSID(start, osId2Mask, maxOsId);
				2894	start += stride;
				2895	} while (start >= end);
				2896	}
				2897
				2898	//
				2899	// Skip optional comma.
				2900	//
				2901	SKIP_WS(next);
				2902	if (*next == ',') {
				2903	next++;
				2904	}
				2905	scan = next;
				2906	}
				2907
				2908	*out_numMasks = nextNewMask;
				2909	if (nextNewMask == 0) {
				2910	*out_masks = NULL;
				2911	KMP_INTERNAL_FREE(newMasks);
				2912	return;
				2913	}
				2914	*out_masks
				2915	= (kmp_affin_mask_t )__kmp_allocate(nextNewMask __kmp_affin_mask_size);
				2916	memcpy(out_masks, newMasks, nextNewMask __kmp_affin_mask_size);
				2917	__kmp_free(sumMask);
				2918	KMP_INTERNAL_FREE(newMasks);
				2919	}
				2920
				2921
				2922	# if OMP_40_ENABLED
				2923
				2924	/*-----------------------------------------------------------------------------
				2925
				2926	Re-parse the OMP_PLACES proc id list, forming the newMasks for the different
				2927	places. Again, Here is the grammar:
				2928
				2929	place_list := place
				2930	place_list := place , place_list
				2931	place := num
				2932	place := place : num
				2933	place := place : num : signed
				2934	place := { subplacelist }
				2935	place := ! place // (lowest priority)
				2936	subplace_list := subplace
				2937	subplace_list := subplace , subplace_list
				2938	subplace := num
				2939	subplace := num : num
				2940	subplace := num : num : signed
				2941	signed := num
				2942	signed := + signed
				2943	signed := - signed
				2944
				2945	-----------------------------------------------------------------------------*/
				2946
				2947	static void
				2948	__kmp_process_subplace_list(const char *scan, kmp_affin_mask_t osId2Mask,
				2949	int maxOsId, kmp_affin_mask_t tempMask, int setSize)
				2950	{
				2951	const char *next;
				2952
				2953	for (;;) {
				2954	int start, count, stride, i;
				2955
				2956	//
				2957	// Read in the starting proc id
				2958	//
				2959	SKIP_WS(*scan);
				2960	KMP_ASSERT2((scan >= '0') && (scan <= '9'),
				2961	"bad explicit places list");
				2962	next = *scan;
				2963	SKIP_DIGITS(next);
				2964	start = __kmp_str_to_int(scan, next);
				2965	KMP_ASSERT(start >= 0);
				2966	*scan = next;
				2967
				2968	//
				2969	// valid follow sets are ',' ':' and '}'
				2970	//
				2971	SKIP_WS(*scan);
				2972	if (scan == '}' \|\| scan == ',') {
				2973	if ((start > maxOsId) \|\|
				2974	(! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
				2975	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				2976	&& (__kmp_affinity_type != affinity_none))) {
				2977	KMP_WARNING(AffIgnoreInvalidProcID, start);
				2978	}
				2979	}
				2980	else {
				2981	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
				2982	(*setSize)++;
				2983	}
				2984	if (**scan == '}') {
				2985	break;
				2986	}
				2987	(*scan)++; // skip ','
				2988	continue;
				2989	}
				2990	KMP_ASSERT2(**scan == ':', "bad explicit places list");
				2991	(*scan)++; // skip ':'
				2992
				2993	//
				2994	// Read count parameter
				2995	//
				2996	SKIP_WS(*scan);
				2997	KMP_ASSERT2((scan >= '0') && (scan <= '9'),
				2998	"bad explicit places list");
				2999	next = *scan;
				3000	SKIP_DIGITS(next);
				3001	count = __kmp_str_to_int(scan, next);
				3002	KMP_ASSERT(count >= 0);
				3003	*scan = next;
				3004
				3005	//
				3006	// valid follow sets are ',' ':' and '}'
				3007	//
				3008	SKIP_WS(*scan);
				3009	if (scan == '}' \|\| scan == ',') {
				3010	for (i = 0; i < count; i++) {
				3011	if ((start > maxOsId) \|\|
				3012	(! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
				3013	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3014	&& (__kmp_affinity_type != affinity_none))) {
				3015	KMP_WARNING(AffIgnoreInvalidProcID, start);
				3016	}
				3017	break; // don't proliferate warnings for large count
				3018	}
				3019	else {
				3020	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
				3021	start++;
				3022	(*setSize)++;
				3023	}
				3024	}
				3025	if (**scan == '}') {
				3026	break;
				3027	}
				3028	(*scan)++; // skip ','
				3029	continue;
				3030	}
				3031	KMP_ASSERT2(**scan == ':', "bad explicit places list");
				3032	(*scan)++; // skip ':'
				3033
				3034	//
				3035	// Read stride parameter
				3036	//
				3037	int sign = +1;
				3038	for (;;) {
				3039	SKIP_WS(*scan);
				3040	if (**scan == '+') {
				3041	(*scan)++; // skip '+'
				3042	continue;
				3043	}
				3044	if (**scan == '-') {
				3045	sign *= -1;
				3046	(*scan)++; // skip '-'
				3047	continue;
				3048	}
				3049	break;
				3050	}
				3051	SKIP_WS(*scan);
				3052	KMP_ASSERT2((scan >= '0') && (scan <= '9'),
				3053	"bad explicit places list");
				3054	next = *scan;
				3055	SKIP_DIGITS(next);
				3056	stride = __kmp_str_to_int(scan, next);
				3057	KMP_ASSERT(stride >= 0);
				3058	*scan = next;
				3059	stride *= sign;
				3060
				3061	//
				3062	// valid follow sets are ',' and '}'
				3063	//
				3064	SKIP_WS(*scan);
				3065	if (scan == '}' \|\| scan == ',') {
				3066	for (i = 0; i < count; i++) {
				3067	if ((start > maxOsId) \|\|
				3068	(! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
				3069	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3070	&& (__kmp_affinity_type != affinity_none))) {
				3071	KMP_WARNING(AffIgnoreInvalidProcID, start);
				3072	}
				3073	break; // don't proliferate warnings for large count
				3074	}
				3075	else {
				3076	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
				3077	start += stride;
				3078	(*setSize)++;
				3079	}
				3080	}
				3081	if (**scan == '}') {
				3082	break;
				3083	}
				3084	(*scan)++; // skip ','
				3085	continue;
				3086	}
				3087
				3088	KMP_ASSERT2(0, "bad explicit places list");
				3089	}
				3090	}
				3091
				3092
				3093	static void
				3094	__kmp_process_place(const char *scan, kmp_affin_mask_t osId2Mask,
				3095	int maxOsId, kmp_affin_mask_t tempMask, int setSize)
				3096	{
				3097	const char *next;
				3098
				3099	//
				3100	// valid follow sets are '{' '!' and num
				3101	//
				3102	SKIP_WS(*scan);
				3103	if (**scan == '{') {
				3104	(*scan)++; // skip '{'
				3105	__kmp_process_subplace_list(scan, osId2Mask, maxOsId , tempMask,
				3106	setSize);
				3107	KMP_ASSERT2(**scan == '}', "bad explicit places list");
				3108	(*scan)++; // skip '}'
				3109	}
				3110	else if (**scan == '!') {
				3111	__kmp_process_place(scan, osId2Mask, maxOsId, tempMask, setSize);
				3112	KMP_CPU_COMPLEMENT(tempMask);
				3113	(*scan)++; // skip '!'
				3114	}
				3115	else if ((scan >= '0') && (scan <= '9')) {
				3116	next = *scan;
				3117	SKIP_DIGITS(next);
				3118	int num = __kmp_str_to_int(scan, next);
				3119	KMP_ASSERT(num >= 0);
				3120	if ((num > maxOsId) \|\|
				3121	(! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
				3122	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3123	&& (__kmp_affinity_type != affinity_none))) {
				3124	KMP_WARNING(AffIgnoreInvalidProcID, num);
				3125	}
				3126	}
				3127	else {
				3128	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, num));
				3129	(*setSize)++;
				3130	}
				3131	*scan = next; // skip num
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3132	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3133	else {
				3134	KMP_ASSERT2(0, "bad explicit places list");
				3135	}
				3136	}
				3137
				3138
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3139	//static void
				3140	void
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3141	__kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
				3142	unsigned int out_numMasks, const char placelist,
				3143	kmp_affin_mask_t *osId2Mask, int maxOsId)
				3144	{
				3145	const char *scan = placelist;
				3146	const char *next = placelist;
				3147
				3148	numNewMasks = 2;
				3149	newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(numNewMasks
				3150	* __kmp_affin_mask_size);
				3151	nextNewMask = 0;
				3152
				3153	kmp_affin_mask_t tempMask = (kmp_affin_mask_t )__kmp_allocate(
				3154	__kmp_affin_mask_size);
				3155	KMP_CPU_ZERO(tempMask);
				3156	int setSize = 0;
				3157
				3158	for (;;) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3159	__kmp_process_place(&scan, osId2Mask, maxOsId, tempMask, &setSize);
				3160
				3161	//
				3162	// valid follow sets are ',' ':' and EOL
				3163	//
				3164	SKIP_WS(scan);
				3165	if (scan == '\0' \|\| scan == ',') {
				3166	if (setSize > 0) {
				3167	ADD_MASK(tempMask);
				3168	}
				3169	KMP_CPU_ZERO(tempMask);
				3170	setSize = 0;
				3171	if (*scan == '\0') {
				3172	break;
				3173	}
				3174	scan++; // skip ','
				3175	continue;
				3176	}
				3177
				3178	KMP_ASSERT2(*scan == ':', "bad explicit places list");
				3179	scan++; // skip ':'
				3180
				3181	//
				3182	// Read count parameter
				3183	//
				3184	SKIP_WS(scan);
				3185	KMP_ASSERT2((scan >= '0') && (scan <= '9'),
				3186	"bad explicit places list");
				3187	next = scan;
				3188	SKIP_DIGITS(next);
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	3189	int count = __kmp_str_to_int(scan, *next);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3190	KMP_ASSERT(count >= 0);
				3191	scan = next;
				3192
				3193	//
				3194	// valid follow sets are ',' ':' and EOL
				3195	//
				3196	SKIP_WS(scan);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3197	int stride;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3198	if (scan == '\0' \|\| scan == ',') {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3199	stride = +1;
				3200	}
				3201	else {
				3202	KMP_ASSERT2(*scan == ':', "bad explicit places list");
				3203	scan++; // skip ':'
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3204
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3205	//
				3206	// Read stride parameter
				3207	//
				3208	int sign = +1;
				3209	for (;;) {
				3210	SKIP_WS(scan);
				3211	if (*scan == '+') {
				3212	scan++; // skip '+'
				3213	continue;
				3214	}
				3215	if (*scan == '-') {
				3216	sign *= -1;
				3217	scan++; // skip '-'
				3218	continue;
				3219	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3220	break;
				3221	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3222	SKIP_WS(scan);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3223	KMP_ASSERT2((scan >= '0') && (scan <= '9'),
				3224	"bad explicit places list");
				3225	next = scan;
				3226	SKIP_DIGITS(next);
				3227	stride = __kmp_str_to_int(scan, *next);
				3228	KMP_DEBUG_ASSERT(stride >= 0);
				3229	scan = next;
				3230	stride *= sign;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3231	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3232
				3233	if (stride > 0) {
				3234	int i;
				3235	for (i = 0; i < count; i++) {
				3236	int j;
				3237	if (setSize == 0) {
				3238	break;
				3239	}
				3240	ADD_MASK(tempMask);
				3241	setSize = 0;
				3242	for (j = __kmp_affin_mask_size * CHAR_BIT - 1; j >= stride; j--) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3243	if (! KMP_CPU_ISSET(j - stride, tempMask)) {
				3244	KMP_CPU_CLR(j, tempMask);
				3245	}
				3246	else if ((j > maxOsId) \|\|
				3247	(! KMP_CPU_ISSET(j, KMP_CPU_INDEX(osId2Mask, j)))) {
Andrey Churbanov	16a1432	2015-03-10 09:34:38 +0000	[diff] [blame]	3248	if ((__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3249	&& (__kmp_affinity_type != affinity_none))) && i < count - 1) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3250	KMP_WARNING(AffIgnoreInvalidProcID, j);
				3251	}
				3252	KMP_CPU_CLR(j, tempMask);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3253	}
				3254	else {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3255	KMP_CPU_SET(j, tempMask);
				3256	setSize++;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3257	}
				3258	}
				3259	for (; j >= 0; j--) {
				3260	KMP_CPU_CLR(j, tempMask);
				3261	}
				3262	}
				3263	}
				3264	else {
				3265	int i;
				3266	for (i = 0; i < count; i++) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3267	int j;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3268	if (setSize == 0) {
				3269	break;
				3270	}
				3271	ADD_MASK(tempMask);
				3272	setSize = 0;
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3273	for (j = 0; j < ((int)__kmp_affin_mask_size * CHAR_BIT) + stride;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3274	j++) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3275	if (! KMP_CPU_ISSET(j - stride, tempMask)) {
				3276	KMP_CPU_CLR(j, tempMask);
				3277	}
				3278	else if ((j > maxOsId) \|\|
				3279	(! KMP_CPU_ISSET(j, KMP_CPU_INDEX(osId2Mask, j)))) {
Andrey Churbanov	16a1432	2015-03-10 09:34:38 +0000	[diff] [blame]	3280	if ((__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3281	&& (__kmp_affinity_type != affinity_none))) && i < count - 1) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3282	KMP_WARNING(AffIgnoreInvalidProcID, j);
				3283	}
				3284	KMP_CPU_CLR(j, tempMask);
				3285	}
				3286	else {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3287	KMP_CPU_SET(j, tempMask);
				3288	setSize++;
				3289	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3290	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3291	for (; j < (int)__kmp_affin_mask_size * CHAR_BIT; j++) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3292	KMP_CPU_CLR(j, tempMask);
				3293	}
				3294	}
				3295	}
				3296	KMP_CPU_ZERO(tempMask);
				3297	setSize = 0;
				3298
				3299	//
				3300	// valid follow sets are ',' and EOL
				3301	//
				3302	SKIP_WS(scan);
				3303	if (*scan == '\0') {
				3304	break;
				3305	}
				3306	if (*scan == ',') {
				3307	scan++; // skip ','
				3308	continue;
				3309	}
				3310
				3311	KMP_ASSERT2(0, "bad explicit places list");
				3312	}
				3313
				3314	*out_numMasks = nextNewMask;
				3315	if (nextNewMask == 0) {
				3316	*out_masks = NULL;
				3317	KMP_INTERNAL_FREE(newMasks);
				3318	return;
				3319	}
				3320	*out_masks
				3321	= (kmp_affin_mask_t )__kmp_allocate(nextNewMask __kmp_affin_mask_size);
				3322	memcpy(out_masks, newMasks, nextNewMask __kmp_affin_mask_size);
				3323	__kmp_free(tempMask);
				3324	KMP_INTERNAL_FREE(newMasks);
				3325	}
				3326
				3327	# endif /* OMP_40_ENABLED */
				3328
				3329	#undef ADD_MASK
				3330	#undef ADD_MASK_OSID
				3331
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3332	static void
				3333	__kmp_apply_thread_places(AddrUnsPair **pAddr, int depth)
				3334	{
				3335	if ( __kmp_place_num_cores == 0 ) {
				3336	if ( __kmp_place_num_threads_per_core == 0 ) {
				3337	return; // no cores limiting actions requested, exit
				3338	}
				3339	__kmp_place_num_cores = nCoresPerPkg; // use all available cores
				3340	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3341	if ( !__kmp_affinity_uniform_topology() ) {
				3342	KMP_WARNING( AffThrPlaceNonUniform );
				3343	return; // don't support non-uniform topology
				3344	}
				3345	if ( depth != 3 ) {
				3346	KMP_WARNING( AffThrPlaceNonThreeLevel );
				3347	return; // don't support not-3-level topology
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3348	}
				3349	if ( __kmp_place_num_threads_per_core == 0 ) {
				3350	__kmp_place_num_threads_per_core = __kmp_nThreadsPerCore; // use all HW contexts
				3351	}
Andrey Churbanov	1287557	2015-03-10 09:00:36 +0000	[diff] [blame]	3352	if ( __kmp_place_core_offset + __kmp_place_num_cores > nCoresPerPkg ) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3353	KMP_WARNING( AffThrPlaceManyCores );
				3354	return;
				3355	}
				3356
				3357	AddrUnsPair newAddr = (AddrUnsPair )__kmp_allocate( sizeof(AddrUnsPair) *
				3358	nPackages * __kmp_place_num_cores * __kmp_place_num_threads_per_core);
				3359	int i, j, k, n_old = 0, n_new = 0;
				3360	for ( i = 0; i < nPackages; ++i ) {
				3361	for ( j = 0; j < nCoresPerPkg; ++j ) {
Andrey Churbanov	1287557	2015-03-10 09:00:36 +0000	[diff] [blame]	3362	if ( j < __kmp_place_core_offset \|\| j >= __kmp_place_core_offset + __kmp_place_num_cores ) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3363	n_old += __kmp_nThreadsPerCore; // skip not-requested core
				3364	} else {
				3365	for ( k = 0; k < __kmp_nThreadsPerCore; ++k ) {
Andrey Churbanov	1287557	2015-03-10 09:00:36 +0000	[diff] [blame]	3366	if ( k < __kmp_place_num_threads_per_core ) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3367	newAddr[n_new] = (*pAddr)[n_old]; // copy requested core' data to new location
				3368	n_new++;
				3369	}
				3370	n_old++;
				3371	}
				3372	}
				3373	}
				3374	}
				3375	nCoresPerPkg = __kmp_place_num_cores; // correct nCoresPerPkg
				3376	__kmp_nThreadsPerCore = __kmp_place_num_threads_per_core; // correct __kmp_nThreadsPerCore
				3377	__kmp_avail_proc = n_new; // correct avail_proc
				3378	__kmp_ncores = nPackages * __kmp_place_num_cores; // correct ncores
				3379
				3380	__kmp_free( *pAddr );
				3381	*pAddr = newAddr; // replace old topology with new one
				3382	}
				3383
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3384
				3385	static AddrUnsPair *address2os = NULL;
				3386	static int * procarr = NULL;
				3387	static int __kmp_aff_depth = 0;
				3388
				3389	static void
				3390	__kmp_aux_affinity_initialize(void)
				3391	{
				3392	if (__kmp_affinity_masks != NULL) {
				3393	KMP_ASSERT(fullMask != NULL);
				3394	return;
				3395	}
				3396
				3397	//
				3398	// Create the "full" mask - this defines all of the processors that we
				3399	// consider to be in the machine model. If respect is set, then it is
				3400	// the initialization thread's affinity mask. Otherwise, it is all
				3401	// processors that we know about on the machine.
				3402	//
				3403	if (fullMask == NULL) {
				3404	fullMask = (kmp_affin_mask_t *)__kmp_allocate(__kmp_affin_mask_size);
				3405	}
				3406	if (KMP_AFFINITY_CAPABLE()) {
				3407	if (__kmp_affinity_respect_mask) {
				3408	__kmp_get_system_affinity(fullMask, TRUE);
				3409
				3410	//
				3411	// Count the number of available processors.
				3412	//
				3413	unsigned i;
				3414	__kmp_avail_proc = 0;
				3415	for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
				3416	if (! KMP_CPU_ISSET(i, fullMask)) {
				3417	continue;
				3418	}
				3419	__kmp_avail_proc++;
				3420	}
				3421	if (__kmp_avail_proc > __kmp_xproc) {
				3422	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3423	&& (__kmp_affinity_type != affinity_none))) {
				3424	KMP_WARNING(ErrorInitializeAffinity);
				3425	}
				3426	__kmp_affinity_type = affinity_none;
Andrey Churbanov	1f037e4	2015-03-10 09:15:26 +0000	[diff] [blame]	3427	KMP_AFFINITY_DISABLE();
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3428	return;
				3429	}
				3430	}
				3431	else {
				3432	__kmp_affinity_entire_machine_mask(fullMask);
				3433	__kmp_avail_proc = __kmp_xproc;
				3434	}
				3435	}
				3436
				3437	int depth = -1;
				3438	kmp_i18n_id_t msg_id = kmp_i18n_null;
				3439
				3440	//
Alp Toker	8f2d3f0	2014-02-24 10:40:15 +0000	[diff] [blame]	3441	// For backward compatibility, setting KMP_CPUINFO_FILE =>
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3442	// KMP_TOPOLOGY_METHOD=cpuinfo
				3443	//
				3444	if ((__kmp_cpuinfo_file != NULL) &&
				3445	(__kmp_affinity_top_method == affinity_top_method_all)) {
				3446	__kmp_affinity_top_method = affinity_top_method_cpuinfo;
				3447	}
				3448
				3449	if (__kmp_affinity_top_method == affinity_top_method_all) {
				3450	//
				3451	// In the default code path, errors are not fatal - we just try using
				3452	// another method. We only emit a warning message if affinity is on,
				3453	// or the verbose flag is set, an the nowarnings flag was not set.
				3454	//
				3455	const char *file_name = NULL;
				3456	int line = 0;
				3457
				3458	# if KMP_ARCH_X86 \|\| KMP_ARCH_X86_64
				3459
				3460	if (__kmp_affinity_verbose) {
				3461	KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
				3462	}
				3463
				3464	file_name = NULL;
				3465	depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
				3466	if (depth == 0) {
				3467	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3468	KMP_ASSERT(address2os == NULL);
				3469	return;
				3470	}
				3471
				3472	if (depth < 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3473	if (__kmp_affinity_verbose) {
				3474	if (msg_id != kmp_i18n_null) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3475	KMP_INFORM(AffInfoStrStr, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id),
				3476	KMP_I18N_STR(DecodingLegacyAPIC));
				3477	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3478	else {
				3479	KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(DecodingLegacyAPIC));
				3480	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3481	}
				3482
				3483	file_name = NULL;
				3484	depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
				3485	if (depth == 0) {
				3486	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3487	KMP_ASSERT(address2os == NULL);
				3488	return;
				3489	}
				3490	}
				3491
				3492	# endif /* KMP_ARCH_X86 \|\| KMP_ARCH_X86_64 */
				3493
				3494	# if KMP_OS_LINUX
				3495
				3496	if (depth < 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3497	if (__kmp_affinity_verbose) {
				3498	if (msg_id != kmp_i18n_null) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3499	KMP_INFORM(AffStrParseFilename, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id), "/proc/cpuinfo");
				3500	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3501	else {
				3502	KMP_INFORM(AffParseFilename, "KMP_AFFINITY", "/proc/cpuinfo");
				3503	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3504	}
				3505
				3506	FILE *f = fopen("/proc/cpuinfo", "r");
				3507	if (f == NULL) {
				3508	msg_id = kmp_i18n_str_CantOpenCpuinfo;
				3509	}
				3510	else {
				3511	file_name = "/proc/cpuinfo";
				3512	depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
				3513	fclose(f);
				3514	if (depth == 0) {
				3515	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3516	KMP_ASSERT(address2os == NULL);
				3517	return;
				3518	}
				3519	}
				3520	}
				3521
				3522	# endif /* KMP_OS_LINUX */
				3523
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	3524	# if KMP_GROUP_AFFINITY
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3525
				3526	if ((depth < 0) && (__kmp_num_proc_groups > 1)) {
				3527	if (__kmp_affinity_verbose) {
				3528	KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
				3529	}
				3530
				3531	depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
				3532	KMP_ASSERT(depth != 0);
				3533	}
				3534
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	3535	# endif /* KMP_GROUP_AFFINITY */
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3536
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3537	if (depth < 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3538	if (__kmp_affinity_verbose && (msg_id != kmp_i18n_null)) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3539	if (file_name == NULL) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3540	KMP_INFORM(UsingFlatOS, __kmp_i18n_catgets(msg_id));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3541	}
				3542	else if (line == 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3543	KMP_INFORM(UsingFlatOSFile, file_name, __kmp_i18n_catgets(msg_id));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3544	}
				3545	else {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3546	KMP_INFORM(UsingFlatOSFileLine, file_name, line, __kmp_i18n_catgets(msg_id));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3547	}
				3548	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3549	// FIXME - print msg if msg_id = kmp_i18n_null ???
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3550
				3551	file_name = "";
				3552	depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
				3553	if (depth == 0) {
				3554	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3555	KMP_ASSERT(address2os == NULL);
				3556	return;
				3557	}
				3558	KMP_ASSERT(depth > 0);
				3559	KMP_ASSERT(address2os != NULL);
				3560	}
				3561	}
				3562
				3563	//
				3564	// If the user has specified that a paricular topology discovery method
				3565	// is to be used, then we abort if that method fails. The exception is
				3566	// group affinity, which might have been implicitly set.
				3567	//
				3568
				3569	# if KMP_ARCH_X86 \|\| KMP_ARCH_X86_64
				3570
				3571	else if (__kmp_affinity_top_method == affinity_top_method_x2apicid) {
				3572	if (__kmp_affinity_verbose) {
				3573	KMP_INFORM(AffInfoStr, "KMP_AFFINITY",
				3574	KMP_I18N_STR(Decodingx2APIC));
				3575	}
				3576
				3577	depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
				3578	if (depth == 0) {
				3579	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3580	KMP_ASSERT(address2os == NULL);
				3581	return;
				3582	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3583	if (depth < 0) {
				3584	KMP_ASSERT(msg_id != kmp_i18n_null);
				3585	KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
				3586	}
				3587	}
				3588	else if (__kmp_affinity_top_method == affinity_top_method_apicid) {
				3589	if (__kmp_affinity_verbose) {
				3590	KMP_INFORM(AffInfoStr, "KMP_AFFINITY",
				3591	KMP_I18N_STR(DecodingLegacyAPIC));
				3592	}
				3593
				3594	depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
				3595	if (depth == 0) {
				3596	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3597	KMP_ASSERT(address2os == NULL);
				3598	return;
				3599	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3600	if (depth < 0) {
				3601	KMP_ASSERT(msg_id != kmp_i18n_null);
				3602	KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
				3603	}
				3604	}
				3605
				3606	# endif /* KMP_ARCH_X86 \|\| KMP_ARCH_X86_64 */
				3607
				3608	else if (__kmp_affinity_top_method == affinity_top_method_cpuinfo) {
				3609	const char *filename;
				3610	if (__kmp_cpuinfo_file != NULL) {
				3611	filename = __kmp_cpuinfo_file;
				3612	}
				3613	else {
				3614	filename = "/proc/cpuinfo";
				3615	}
				3616
				3617	if (__kmp_affinity_verbose) {
				3618	KMP_INFORM(AffParseFilename, "KMP_AFFINITY", filename);
				3619	}
				3620
				3621	FILE *f = fopen(filename, "r");
				3622	if (f == NULL) {
				3623	int code = errno;
				3624	if (__kmp_cpuinfo_file != NULL) {
				3625	__kmp_msg(
				3626	kmp_ms_fatal,
				3627	KMP_MSG(CantOpenFileForReading, filename),
				3628	KMP_ERR(code),
				3629	KMP_HNT(NameComesFrom_CPUINFO_FILE),
				3630	__kmp_msg_null
				3631	);
				3632	}
				3633	else {
				3634	__kmp_msg(
				3635	kmp_ms_fatal,
				3636	KMP_MSG(CantOpenFileForReading, filename),
				3637	KMP_ERR(code),
				3638	__kmp_msg_null
				3639	);
				3640	}
				3641	}
				3642	int line = 0;
				3643	depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
				3644	fclose(f);
				3645	if (depth < 0) {
				3646	KMP_ASSERT(msg_id != kmp_i18n_null);
				3647	if (line > 0) {
				3648	KMP_FATAL(FileLineMsgExiting, filename, line, __kmp_i18n_catgets(msg_id));
				3649	}
				3650	else {
				3651	KMP_FATAL(FileMsgExiting, filename, __kmp_i18n_catgets(msg_id));
				3652	}
				3653	}
				3654	if (__kmp_affinity_type == affinity_none) {
				3655	KMP_ASSERT(depth == 0);
				3656	KMP_ASSERT(address2os == NULL);
				3657	return;
				3658	}
				3659	}
				3660
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	3661	# if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3662
				3663	else if (__kmp_affinity_top_method == affinity_top_method_group) {
				3664	if (__kmp_affinity_verbose) {
				3665	KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
				3666	}
				3667
				3668	depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
				3669	KMP_ASSERT(depth != 0);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3670	if (depth < 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3671	KMP_ASSERT(msg_id != kmp_i18n_null);
				3672	KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3673	}
				3674	}
				3675
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	3676	# endif /* KMP_GROUP_AFFINITY */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3677
				3678	else if (__kmp_affinity_top_method == affinity_top_method_flat) {
				3679	if (__kmp_affinity_verbose) {
				3680	KMP_INFORM(AffUsingFlatOS, "KMP_AFFINITY");
				3681	}
				3682
				3683	depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
				3684	if (depth == 0) {
				3685	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3686	KMP_ASSERT(address2os == NULL);
				3687	return;
				3688	}
				3689	// should not fail
				3690	KMP_ASSERT(depth > 0);
				3691	KMP_ASSERT(address2os != NULL);
				3692	}
				3693
				3694	if (address2os == NULL) {
				3695	if (KMP_AFFINITY_CAPABLE()
				3696	&& (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3697	&& (__kmp_affinity_type != affinity_none)))) {
				3698	KMP_WARNING(ErrorInitializeAffinity);
				3699	}
				3700	__kmp_affinity_type = affinity_none;
Andrey Churbanov	1f037e4	2015-03-10 09:15:26 +0000	[diff] [blame]	3701	KMP_AFFINITY_DISABLE();
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3702	return;
				3703	}
				3704
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3705	__kmp_apply_thread_places(&address2os, depth);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3706
				3707	//
				3708	// Create the table of masks, indexed by thread Id.
				3709	//
				3710	unsigned maxIndex;
				3711	unsigned numUnique;
				3712	kmp_affin_mask_t *osId2Mask = __kmp_create_masks(&maxIndex, &numUnique,
				3713	address2os, __kmp_avail_proc);
				3714	if (__kmp_affinity_gran_levels == 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3715	KMP_DEBUG_ASSERT((int)numUnique == __kmp_avail_proc);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3716	}
				3717
				3718	//
				3719	// Set the childNums vector in all Address objects. This must be done
				3720	// before we can sort using __kmp_affinity_cmp_Address_child_num(),
				3721	// which takes into account the setting of __kmp_affinity_compact.
				3722	//
				3723	__kmp_affinity_assign_child_nums(address2os, __kmp_avail_proc);
				3724
				3725	switch (__kmp_affinity_type) {
				3726
				3727	case affinity_explicit:
				3728	KMP_DEBUG_ASSERT(__kmp_affinity_proclist != NULL);
				3729	# if OMP_40_ENABLED
				3730	if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
				3731	# endif
				3732	{
				3733	__kmp_affinity_process_proclist(&__kmp_affinity_masks,
				3734	&__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask,
				3735	maxIndex);
				3736	}
				3737	# if OMP_40_ENABLED
				3738	else {
				3739	__kmp_affinity_process_placelist(&__kmp_affinity_masks,
				3740	&__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask,
				3741	maxIndex);
				3742	}
				3743	# endif
				3744	if (__kmp_affinity_num_masks == 0) {
				3745	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3746	&& (__kmp_affinity_type != affinity_none))) {
				3747	KMP_WARNING(AffNoValidProcID);
				3748	}
				3749	__kmp_affinity_type = affinity_none;
				3750	return;
				3751	}
				3752	break;
				3753
				3754	//
				3755	// The other affinity types rely on sorting the Addresses according
				3756	// to some permutation of the machine topology tree. Set
				3757	// __kmp_affinity_compact and __kmp_affinity_offset appropriately,
				3758	// then jump to a common code fragment to do the sort and create
				3759	// the array of affinity masks.
				3760	//
				3761
				3762	case affinity_logical:
				3763	__kmp_affinity_compact = 0;
				3764	if (__kmp_affinity_offset) {
				3765	__kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset
				3766	% __kmp_avail_proc;
				3767	}
				3768	goto sortAddresses;
				3769
				3770	case affinity_physical:
				3771	if (__kmp_nThreadsPerCore > 1) {
				3772	__kmp_affinity_compact = 1;
				3773	if (__kmp_affinity_compact >= depth) {
				3774	__kmp_affinity_compact = 0;
				3775	}
				3776	} else {
				3777	__kmp_affinity_compact = 0;
				3778	}
				3779	if (__kmp_affinity_offset) {
				3780	__kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset
				3781	% __kmp_avail_proc;
				3782	}
				3783	goto sortAddresses;
				3784
				3785	case affinity_scatter:
				3786	if (__kmp_affinity_compact >= depth) {
				3787	__kmp_affinity_compact = 0;
				3788	}
				3789	else {
				3790	__kmp_affinity_compact = depth - 1 - __kmp_affinity_compact;
				3791	}
				3792	goto sortAddresses;
				3793
				3794	case affinity_compact:
				3795	if (__kmp_affinity_compact >= depth) {
				3796	__kmp_affinity_compact = depth - 1;
				3797	}
				3798	goto sortAddresses;
				3799
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3800	case affinity_balanced:
Andrey Churbanov	e4b9213	2015-03-05 17:46:50 +0000	[diff] [blame]	3801	// Balanced works only for the case of a single package and uniform topology
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3802	if( nPackages > 1 ) {
				3803	if( __kmp_affinity_verbose \|\| __kmp_affinity_warnings ) {
				3804	KMP_WARNING( AffBalancedNotAvail, "KMP_AFFINITY" );
				3805	}
				3806	__kmp_affinity_type = affinity_none;
				3807	return;
				3808	} else if( __kmp_affinity_uniform_topology() ) {
				3809	break;
				3810	} else { // Non-uniform topology
				3811
				3812	// Save the depth for further usage
				3813	__kmp_aff_depth = depth;
				3814
				3815	// Number of hyper threads per core in HT machine
				3816	int nth_per_core = __kmp_nThreadsPerCore;
				3817
				3818	int core_level;
				3819	if( nth_per_core > 1 ) {
				3820	core_level = depth - 2;
				3821	} else {
				3822	core_level = depth - 1;
				3823	}
				3824	int ncores = address2os[ __kmp_avail_proc - 1 ].first.labels[ core_level ] + 1;
				3825	int nproc = nth_per_core * ncores;
				3826
				3827	procarr = ( int * )__kmp_allocate( sizeof( int ) * nproc );
				3828	for( int i = 0; i < nproc; i++ ) {
				3829	procarr[ i ] = -1;
				3830	}
				3831
				3832	for( int i = 0; i < __kmp_avail_proc; i++ ) {
				3833	int proc = address2os[ i ].second;
				3834	// If depth == 3 then level=0 - package, level=1 - core, level=2 - thread.
				3835	// If there is only one thread per core then depth == 2: level 0 - package,
				3836	// level 1 - core.
				3837	int level = depth - 1;
				3838
				3839	// __kmp_nth_per_core == 1
				3840	int thread = 0;
				3841	int core = address2os[ i ].first.labels[ level ];
				3842	// If the thread level exists, that is we have more than one thread context per core
				3843	if( nth_per_core > 1 ) {
				3844	thread = address2os[ i ].first.labels[ level ] % nth_per_core;
				3845	core = address2os[ i ].first.labels[ level - 1 ];
				3846	}
				3847	procarr[ core * nth_per_core + thread ] = proc;
				3848	}
				3849
				3850	break;
				3851	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3852
				3853	sortAddresses:
				3854	//
				3855	// Allocate the gtid->affinity mask table.
				3856	//
				3857	if (__kmp_affinity_dups) {
				3858	__kmp_affinity_num_masks = __kmp_avail_proc;
				3859	}
				3860	else {
				3861	__kmp_affinity_num_masks = numUnique;
				3862	}
				3863
				3864	# if OMP_40_ENABLED
				3865	if ( ( __kmp_nested_proc_bind.bind_types[0] != proc_bind_intel )
				3866	&& ( __kmp_affinity_num_places > 0 )
				3867	&& ( (unsigned)__kmp_affinity_num_places < __kmp_affinity_num_masks ) ) {
				3868	__kmp_affinity_num_masks = __kmp_affinity_num_places;
				3869	}
				3870	# endif
				3871
				3872	__kmp_affinity_masks = (kmp_affin_mask_t*)__kmp_allocate(
				3873	__kmp_affinity_num_masks * __kmp_affin_mask_size);
				3874
				3875	//
				3876	// Sort the address2os table according to the current setting of
				3877	// __kmp_affinity_compact, then fill out __kmp_affinity_masks.
				3878	//
				3879	qsort(address2os, __kmp_avail_proc, sizeof(*address2os),
				3880	__kmp_affinity_cmp_Address_child_num);
				3881	{
				3882	int i;
				3883	unsigned j;
				3884	for (i = 0, j = 0; i < __kmp_avail_proc; i++) {
				3885	if ((! __kmp_affinity_dups) && (! address2os[i].first.leader)) {
				3886	continue;
				3887	}
				3888	unsigned osId = address2os[i].second;
				3889	kmp_affin_mask_t *src = KMP_CPU_INDEX(osId2Mask, osId);
				3890	kmp_affin_mask_t *dest
				3891	= KMP_CPU_INDEX(__kmp_affinity_masks, j);
				3892	KMP_ASSERT(KMP_CPU_ISSET(osId, src));
				3893	KMP_CPU_COPY(dest, src);
				3894	if (++j >= __kmp_affinity_num_masks) {
				3895	break;
				3896	}
				3897	}
				3898	KMP_DEBUG_ASSERT(j == __kmp_affinity_num_masks);
				3899	}
				3900	break;
				3901
				3902	default:
				3903	KMP_ASSERT2(0, "Unexpected affinity setting");
				3904	}
				3905
				3906	__kmp_free(osId2Mask);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3907	machine_hierarchy.init(address2os, __kmp_avail_proc);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3908	}
				3909
				3910
				3911	void
				3912	__kmp_affinity_initialize(void)
				3913	{
				3914	//
				3915	// Much of the code above was written assumming that if a machine was not
				3916	// affinity capable, then __kmp_affinity_type == affinity_none. We now
				3917	// explicitly represent this as __kmp_affinity_type == affinity_disabled.
				3918	//
				3919	// There are too many checks for __kmp_affinity_type == affinity_none
				3920	// in this code. Instead of trying to change them all, check if
				3921	// __kmp_affinity_type == affinity_disabled, and if so, slam it with
				3922	// affinity_none, call the real initialization routine, then restore
				3923	// __kmp_affinity_type to affinity_disabled.
				3924	//
				3925	int disabled = (__kmp_affinity_type == affinity_disabled);
				3926	if (! KMP_AFFINITY_CAPABLE()) {
				3927	KMP_ASSERT(disabled);
				3928	}
				3929	if (disabled) {
				3930	__kmp_affinity_type = affinity_none;
				3931	}
				3932	__kmp_aux_affinity_initialize();
				3933	if (disabled) {
				3934	__kmp_affinity_type = affinity_disabled;
				3935	}
				3936	}
				3937
				3938
				3939	void
				3940	__kmp_affinity_uninitialize(void)
				3941	{
				3942	if (__kmp_affinity_masks != NULL) {
				3943	__kmp_free(__kmp_affinity_masks);
				3944	__kmp_affinity_masks = NULL;
				3945	}
				3946	if (fullMask != NULL) {
				3947	KMP_CPU_FREE(fullMask);
				3948	fullMask = NULL;
				3949	}
				3950	__kmp_affinity_num_masks = 0;
				3951	# if OMP_40_ENABLED
				3952	__kmp_affinity_num_places = 0;
				3953	# endif
				3954	if (__kmp_affinity_proclist != NULL) {
				3955	__kmp_free(__kmp_affinity_proclist);
				3956	__kmp_affinity_proclist = NULL;
				3957	}
				3958	if( address2os != NULL ) {
				3959	__kmp_free( address2os );
				3960	address2os = NULL;
				3961	}
				3962	if( procarr != NULL ) {
				3963	__kmp_free( procarr );
				3964	procarr = NULL;
				3965	}
				3966	}
				3967
				3968
				3969	void
				3970	__kmp_affinity_set_init_mask(int gtid, int isa_root)
				3971	{
				3972	if (! KMP_AFFINITY_CAPABLE()) {
				3973	return;
				3974	}
				3975
				3976	kmp_info_t th = (kmp_info_t )TCR_SYNC_PTR(__kmp_threads[gtid]);
				3977	if (th->th.th_affin_mask == NULL) {
				3978	KMP_CPU_ALLOC(th->th.th_affin_mask);
				3979	}
				3980	else {
				3981	KMP_CPU_ZERO(th->th.th_affin_mask);
				3982	}
				3983
				3984	//
				3985	// Copy the thread mask to the kmp_info_t strucuture.
				3986	// If __kmp_affinity_type == affinity_none, copy the "full" mask, i.e. one
				3987	// that has all of the OS proc ids set, or if __kmp_affinity_respect_mask
				3988	// is set, then the full mask is the same as the mask of the initialization
				3989	// thread.
				3990	//
				3991	kmp_affin_mask_t *mask;
				3992	int i;
				3993
				3994	# if OMP_40_ENABLED
				3995	if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
				3996	# endif
				3997	{
Andrey Churbanov	f28f613	2015-01-13 14:54:00 +0000	[diff] [blame]	3998	if ((__kmp_affinity_type == affinity_none) \|\| (__kmp_affinity_type == affinity_balanced)
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3999	) {
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	4000	# if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4001	if (__kmp_num_proc_groups > 1) {
				4002	return;
				4003	}
				4004	# endif
				4005	KMP_ASSERT(fullMask != NULL);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4006	i = KMP_PLACE_ALL;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4007	mask = fullMask;
				4008	}
				4009	else {
				4010	KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 );
				4011	i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
				4012	mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
				4013	}
				4014	}
				4015	# if OMP_40_ENABLED
				4016	else {
				4017	if ((! isa_root)
				4018	\|\| (__kmp_nested_proc_bind.bind_types[0] == proc_bind_false)) {
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	4019	# if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4020	if (__kmp_num_proc_groups > 1) {
				4021	return;
				4022	}
				4023	# endif
				4024	KMP_ASSERT(fullMask != NULL);
				4025	i = KMP_PLACE_ALL;
				4026	mask = fullMask;
				4027	}
				4028	else {
				4029	//
				4030	// int i = some hash function or just a counter that doesn't
				4031	// always start at 0. Use gtid for now.
				4032	//
				4033	KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 );
				4034	i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
				4035	mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
				4036	}
				4037	}
				4038	# endif
				4039
				4040	# if OMP_40_ENABLED
				4041	th->th.th_current_place = i;
				4042	if (isa_root) {
				4043	th->th.th_new_place = i;
				4044	th->th.th_first_place = 0;
				4045	th->th.th_last_place = __kmp_affinity_num_masks - 1;
				4046	}
				4047
				4048	if (i == KMP_PLACE_ALL) {
				4049	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to all places\n",
				4050	gtid));
				4051	}
				4052	else {
				4053	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to place %d\n",
				4054	gtid, i));
				4055	}
				4056	# else
				4057	if (i == -1) {
				4058	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to fullMask\n",
				4059	gtid));
				4060	}
				4061	else {
				4062	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to mask %d\n",
				4063	gtid, i));
				4064	}
				4065	# endif /* OMP_40_ENABLED */
				4066
				4067	KMP_CPU_COPY(th->th.th_affin_mask, mask);
				4068
				4069	if (__kmp_affinity_verbose) {
				4070	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4071	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4072	th->th.th_affin_mask);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4073	KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(), gtid,
				4074	buf);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4075	}
				4076
				4077	# if KMP_OS_WINDOWS
				4078	//
				4079	// On Windows* OS, the process affinity mask might have changed.
				4080	// If the user didn't request affinity and this call fails,
				4081	// just continue silently. See CQ171393.
				4082	//
				4083	if ( __kmp_affinity_type == affinity_none ) {
				4084	__kmp_set_system_affinity(th->th.th_affin_mask, FALSE);
				4085	}
				4086	else
				4087	# endif
				4088	__kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
				4089	}
				4090
				4091
				4092	# if OMP_40_ENABLED
				4093
				4094	void
				4095	__kmp_affinity_set_place(int gtid)
				4096	{
				4097	int retval;
				4098
				4099	if (! KMP_AFFINITY_CAPABLE()) {
				4100	return;
				4101	}
				4102
				4103	kmp_info_t th = (kmp_info_t )TCR_SYNC_PTR(__kmp_threads[gtid]);
				4104
				4105	KA_TRACE(100, ("__kmp_affinity_set_place: binding T#%d to place %d (current place = %d)\n",
				4106	gtid, th->th.th_new_place, th->th.th_current_place));
				4107
				4108	//
Alp Toker	8f2d3f0	2014-02-24 10:40:15 +0000	[diff] [blame]	4109	// Check that the new place is within this thread's partition.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4110	//
				4111	KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4112	KMP_ASSERT(th->th.th_new_place >= 0);
				4113	KMP_ASSERT((unsigned)th->th.th_new_place <= __kmp_affinity_num_masks);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4114	if (th->th.th_first_place <= th->th.th_last_place) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4115	KMP_ASSERT((th->th.th_new_place >= th->th.th_first_place)
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4116	&& (th->th.th_new_place <= th->th.th_last_place));
				4117	}
				4118	else {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4119	KMP_ASSERT((th->th.th_new_place <= th->th.th_first_place)
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4120	\|\| (th->th.th_new_place >= th->th.th_last_place));
				4121	}
				4122
				4123	//
				4124	// Copy the thread mask to the kmp_info_t strucuture,
				4125	// and set this thread's affinity.
				4126	//
				4127	kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity_masks,
				4128	th->th.th_new_place);
				4129	KMP_CPU_COPY(th->th.th_affin_mask, mask);
				4130	th->th.th_current_place = th->th.th_new_place;
				4131
				4132	if (__kmp_affinity_verbose) {
				4133	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4134	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4135	th->th.th_affin_mask);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4136	KMP_INFORM(BoundToOSProcSet, "OMP_PROC_BIND", (kmp_int32)getpid(),
				4137	gtid, buf);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4138	}
				4139	__kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
				4140	}
				4141
				4142	# endif /* OMP_40_ENABLED */
				4143
				4144
				4145	int
				4146	__kmp_aux_set_affinity(void **mask)
				4147	{
				4148	int gtid;
				4149	kmp_info_t *th;
				4150	int retval;
				4151
				4152	if (! KMP_AFFINITY_CAPABLE()) {
				4153	return -1;
				4154	}
				4155
				4156	gtid = __kmp_entry_gtid();
				4157	KA_TRACE(1000, ;{
				4158	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4159	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4160	(kmp_affin_mask_t )(mask));
				4161	__kmp_debug_printf("kmp_set_affinity: setting affinity mask for thread %d = %s\n",
				4162	gtid, buf);
				4163	});
				4164
				4165	if (__kmp_env_consistency_check) {
				4166	if ((mask == NULL) \|\| (*mask == NULL)) {
				4167	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				4168	}
				4169	else {
				4170	unsigned proc;
				4171	int num_procs = 0;
				4172
				4173	for (proc = 0; proc < KMP_CPU_SETSIZE; proc++) {
				4174	if (! KMP_CPU_ISSET(proc, (kmp_affin_mask_t )(mask))) {
				4175	continue;
				4176	}
				4177	num_procs++;
				4178	if (! KMP_CPU_ISSET(proc, fullMask)) {
				4179	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				4180	break;
				4181	}
				4182	}
				4183	if (num_procs == 0) {
				4184	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				4185	}
				4186
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	4187	# if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4188	if (__kmp_get_proc_group((kmp_affin_mask_t )(mask)) < 0) {
				4189	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				4190	}
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	4191	# endif /* KMP_GROUP_AFFINITY */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4192
				4193	}
				4194	}
				4195
				4196	th = __kmp_threads[gtid];
				4197	KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
				4198	retval = __kmp_set_system_affinity((kmp_affin_mask_t )(mask), FALSE);
				4199	if (retval == 0) {
				4200	KMP_CPU_COPY(th->th.th_affin_mask, (kmp_affin_mask_t )(mask));
				4201	}
				4202
				4203	# if OMP_40_ENABLED
				4204	th->th.th_current_place = KMP_PLACE_UNDEFINED;
				4205	th->th.th_new_place = KMP_PLACE_UNDEFINED;
				4206	th->th.th_first_place = 0;
				4207	th->th.th_last_place = __kmp_affinity_num_masks - 1;
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4208
				4209	//
				4210	// Turn off 4.0 affinity for the current tread at this parallel level.
				4211	//
				4212	th->th.th_current_task->td_icvs.proc_bind = proc_bind_false;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4213	# endif
				4214
				4215	return retval;
				4216	}
				4217
				4218
				4219	int
				4220	__kmp_aux_get_affinity(void **mask)
				4221	{
				4222	int gtid;
				4223	int retval;
				4224	kmp_info_t *th;
				4225
				4226	if (! KMP_AFFINITY_CAPABLE()) {
				4227	return -1;
				4228	}
				4229
				4230	gtid = __kmp_entry_gtid();
				4231	th = __kmp_threads[gtid];
				4232	KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
				4233
				4234	KA_TRACE(1000, ;{
				4235	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4236	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4237	th->th.th_affin_mask);
				4238	__kmp_printf("kmp_get_affinity: stored affinity mask for thread %d = %s\n", gtid, buf);
				4239	});
				4240
				4241	if (__kmp_env_consistency_check) {
				4242	if ((mask == NULL) \|\| (*mask == NULL)) {
				4243	KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity");
				4244	}
				4245	}
				4246
				4247	# if !KMP_OS_WINDOWS
				4248
				4249	retval = __kmp_get_system_affinity((kmp_affin_mask_t )(mask), FALSE);
				4250	KA_TRACE(1000, ;{
				4251	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4252	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4253	(kmp_affin_mask_t )(mask));
				4254	__kmp_printf("kmp_get_affinity: system affinity mask for thread %d = %s\n", gtid, buf);
				4255	});
				4256	return retval;
				4257
				4258	# else
				4259
				4260	KMP_CPU_COPY((kmp_affin_mask_t )(mask), th->th.th_affin_mask);
				4261	return 0;
				4262
				4263	# endif /* KMP_OS_WINDOWS */
				4264
				4265	}
				4266
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4267	int
				4268	__kmp_aux_set_affinity_mask_proc(int proc, void **mask)
				4269	{
				4270	int retval;
				4271
				4272	if (! KMP_AFFINITY_CAPABLE()) {
				4273	return -1;
				4274	}
				4275
				4276	KA_TRACE(1000, ;{
				4277	int gtid = __kmp_entry_gtid();
				4278	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4279	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4280	(kmp_affin_mask_t )(mask));
				4281	__kmp_debug_printf("kmp_set_affinity_mask_proc: setting proc %d in affinity mask for thread %d = %s\n",
				4282	proc, gtid, buf);
				4283	});
				4284
				4285	if (__kmp_env_consistency_check) {
				4286	if ((mask == NULL) \|\| (*mask == NULL)) {
				4287	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity_mask_proc");
				4288	}
				4289	}
				4290
				4291	if ((proc < 0) \|\| ((unsigned)proc >= KMP_CPU_SETSIZE)) {
				4292	return -1;
				4293	}
				4294	if (! KMP_CPU_ISSET(proc, fullMask)) {
				4295	return -2;
				4296	}
				4297
				4298	KMP_CPU_SET(proc, (kmp_affin_mask_t )(mask));
				4299	return 0;
				4300	}
				4301
				4302
				4303	int
				4304	__kmp_aux_unset_affinity_mask_proc(int proc, void **mask)
				4305	{
				4306	int retval;
				4307
				4308	if (! KMP_AFFINITY_CAPABLE()) {
				4309	return -1;
				4310	}
				4311
				4312	KA_TRACE(1000, ;{
				4313	int gtid = __kmp_entry_gtid();
				4314	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4315	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4316	(kmp_affin_mask_t )(mask));
				4317	__kmp_debug_printf("kmp_unset_affinity_mask_proc: unsetting proc %d in affinity mask for thread %d = %s\n",
				4318	proc, gtid, buf);
				4319	});
				4320
				4321	if (__kmp_env_consistency_check) {
				4322	if ((mask == NULL) \|\| (*mask == NULL)) {
				4323	KMP_FATAL(AffinityInvalidMask, "kmp_unset_affinity_mask_proc");
				4324	}
				4325	}
				4326
				4327	if ((proc < 0) \|\| ((unsigned)proc >= KMP_CPU_SETSIZE)) {
				4328	return -1;
				4329	}
				4330	if (! KMP_CPU_ISSET(proc, fullMask)) {
				4331	return -2;
				4332	}
				4333
				4334	KMP_CPU_CLR(proc, (kmp_affin_mask_t )(mask));
				4335	return 0;
				4336	}
				4337
				4338
				4339	int
				4340	__kmp_aux_get_affinity_mask_proc(int proc, void **mask)
				4341	{
				4342	int retval;
				4343
				4344	if (! KMP_AFFINITY_CAPABLE()) {
				4345	return -1;
				4346	}
				4347
				4348	KA_TRACE(1000, ;{
				4349	int gtid = __kmp_entry_gtid();
				4350	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4351	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4352	(kmp_affin_mask_t )(mask));
				4353	__kmp_debug_printf("kmp_get_affinity_mask_proc: getting proc %d in affinity mask for thread %d = %s\n",
				4354	proc, gtid, buf);
				4355	});
				4356
				4357	if (__kmp_env_consistency_check) {
				4358	if ((mask == NULL) \|\| (*mask == NULL)) {
Andrey Churbanov	4b2f17a	2015-01-29 15:49:22 +0000	[diff] [blame]	4359	KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity_mask_proc");
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4360	}
				4361	}
				4362
				4363	if ((proc < 0) \|\| ((unsigned)proc >= KMP_CPU_SETSIZE)) {
				4364	return 0;
				4365	}
				4366	if (! KMP_CPU_ISSET(proc, fullMask)) {
				4367	return 0;
				4368	}
				4369
				4370	return KMP_CPU_ISSET(proc, (kmp_affin_mask_t )(mask));
				4371	}
				4372
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4373
				4374	// Dynamic affinity settings - Affinity balanced
				4375	void __kmp_balanced_affinity( int tid, int nthreads )
				4376	{
				4377	if( __kmp_affinity_uniform_topology() ) {
				4378	int coreID;
				4379	int threadID;
				4380	// Number of hyper threads per core in HT machine
				4381	int __kmp_nth_per_core = __kmp_avail_proc / __kmp_ncores;
				4382	// Number of cores
				4383	int ncores = __kmp_ncores;
				4384	// How many threads will be bound to each core
				4385	int chunk = nthreads / ncores;
				4386	// How many cores will have an additional thread bound to it - "big cores"
				4387	int big_cores = nthreads % ncores;
				4388	// Number of threads on the big cores
				4389	int big_nth = ( chunk + 1 ) * big_cores;
				4390	if( tid < big_nth ) {
				4391	coreID = tid / (chunk + 1 );
				4392	threadID = ( tid % (chunk + 1 ) ) % __kmp_nth_per_core ;
				4393	} else { //tid >= big_nth
				4394	coreID = ( tid - big_cores ) / chunk;
				4395	threadID = ( ( tid - big_cores ) % chunk ) % __kmp_nth_per_core ;
				4396	}
				4397
				4398	KMP_DEBUG_ASSERT2(KMP_AFFINITY_CAPABLE(),
				4399	"Illegal set affinity operation when not capable");
				4400
				4401	kmp_affin_mask_t mask = (kmp_affin_mask_t )alloca(__kmp_affin_mask_size);
				4402	KMP_CPU_ZERO(mask);
				4403
				4404	// Granularity == thread
				4405	if( __kmp_affinity_gran == affinity_gran_fine \|\| __kmp_affinity_gran == affinity_gran_thread) {
				4406	int osID = address2os[ coreID * __kmp_nth_per_core + threadID ].second;
				4407	KMP_CPU_SET( osID, mask);
				4408	} else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
				4409	for( int i = 0; i < __kmp_nth_per_core; i++ ) {
				4410	int osID;
				4411	osID = address2os[ coreID * __kmp_nth_per_core + i ].second;
				4412	KMP_CPU_SET( osID, mask);
				4413	}
				4414	}
				4415	if (__kmp_affinity_verbose) {
				4416	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4417	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4418	KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
				4419	tid, buf);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4420	}
				4421	__kmp_set_system_affinity( mask, TRUE );
				4422	} else { // Non-uniform topology
				4423
				4424	kmp_affin_mask_t mask = (kmp_affin_mask_t )alloca(__kmp_affin_mask_size);
				4425	KMP_CPU_ZERO(mask);
				4426
				4427	// Number of hyper threads per core in HT machine
				4428	int nth_per_core = __kmp_nThreadsPerCore;
				4429	int core_level;
				4430	if( nth_per_core > 1 ) {
				4431	core_level = __kmp_aff_depth - 2;
				4432	} else {
				4433	core_level = __kmp_aff_depth - 1;
				4434	}
				4435
				4436	// Number of cores - maximum value; it does not count trail cores with 0 processors
				4437	int ncores = address2os[ __kmp_avail_proc - 1 ].first.labels[ core_level ] + 1;
				4438
				4439	// For performance gain consider the special case nthreads == __kmp_avail_proc
				4440	if( nthreads == __kmp_avail_proc ) {
				4441	if( __kmp_affinity_gran == affinity_gran_fine \|\| __kmp_affinity_gran == affinity_gran_thread) {
				4442	int osID = address2os[ tid ].second;
				4443	KMP_CPU_SET( osID, mask);
				4444	} else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
				4445	int coreID = address2os[ tid ].first.labels[ core_level ];
				4446	// We'll count found osIDs for the current core; they can be not more than nth_per_core;
				4447	// since the address2os is sortied we can break when cnt==nth_per_core
				4448	int cnt = 0;
				4449	for( int i = 0; i < __kmp_avail_proc; i++ ) {
				4450	int osID = address2os[ i ].second;
				4451	int core = address2os[ i ].first.labels[ core_level ];
				4452	if( core == coreID ) {
				4453	KMP_CPU_SET( osID, mask);
				4454	cnt++;
				4455	if( cnt == nth_per_core ) {
				4456	break;
				4457	}
				4458	}
				4459	}
				4460	}
				4461	} else if( nthreads <= __kmp_ncores ) {
				4462
				4463	int core = 0;
				4464	for( int i = 0; i < ncores; i++ ) {
				4465	// Check if this core from procarr[] is in the mask
				4466	int in_mask = 0;
				4467	for( int j = 0; j < nth_per_core; j++ ) {
				4468	if( procarr[ i * nth_per_core + j ] != - 1 ) {
				4469	in_mask = 1;
				4470	break;
				4471	}
				4472	}
				4473	if( in_mask ) {
				4474	if( tid == core ) {
				4475	for( int j = 0; j < nth_per_core; j++ ) {
				4476	int osID = procarr[ i * nth_per_core + j ];
				4477	if( osID != -1 ) {
				4478	KMP_CPU_SET( osID, mask );
				4479	// For granularity=thread it is enough to set the first available osID for this core
				4480	if( __kmp_affinity_gran == affinity_gran_fine \|\| __kmp_affinity_gran == affinity_gran_thread) {
				4481	break;
				4482	}
				4483	}
				4484	}
				4485	break;
				4486	} else {
				4487	core++;
				4488	}
				4489	}
				4490	}
				4491
				4492	} else { // nthreads > __kmp_ncores
				4493
				4494	// Array to save the number of processors at each core
				4495	int nproc_at_core[ ncores ];
				4496	// Array to save the number of cores with "x" available processors;
				4497	int ncores_with_x_procs[ nth_per_core + 1 ];
				4498	// Array to save the number of cores with # procs from x to nth_per_core
				4499	int ncores_with_x_to_max_procs[ nth_per_core + 1 ];
				4500
				4501	for( int i = 0; i <= nth_per_core; i++ ) {
				4502	ncores_with_x_procs[ i ] = 0;
				4503	ncores_with_x_to_max_procs[ i ] = 0;
				4504	}
				4505
				4506	for( int i = 0; i < ncores; i++ ) {
				4507	int cnt = 0;
				4508	for( int j = 0; j < nth_per_core; j++ ) {
				4509	if( procarr[ i * nth_per_core + j ] != -1 ) {
				4510	cnt++;
				4511	}
				4512	}
				4513	nproc_at_core[ i ] = cnt;
				4514	ncores_with_x_procs[ cnt ]++;
				4515	}
				4516
				4517	for( int i = 0; i <= nth_per_core; i++ ) {
				4518	for( int j = i; j <= nth_per_core; j++ ) {
				4519	ncores_with_x_to_max_procs[ i ] += ncores_with_x_procs[ j ];
				4520	}
				4521	}
				4522
				4523	// Max number of processors
				4524	int nproc = nth_per_core * ncores;
				4525	// An array to keep number of threads per each context
				4526	int * newarr = ( int * )__kmp_allocate( sizeof( int ) * nproc );
				4527	for( int i = 0; i < nproc; i++ ) {
				4528	newarr[ i ] = 0;
				4529	}
				4530
				4531	int nth = nthreads;
				4532	int flag = 0;
				4533	while( nth > 0 ) {
				4534	for( int j = 1; j <= nth_per_core; j++ ) {
				4535	int cnt = ncores_with_x_to_max_procs[ j ];
				4536	for( int i = 0; i < ncores; i++ ) {
				4537	// Skip the core with 0 processors
				4538	if( nproc_at_core[ i ] == 0 ) {
				4539	continue;
				4540	}
				4541	for( int k = 0; k < nth_per_core; k++ ) {
				4542	if( procarr[ i * nth_per_core + k ] != -1 ) {
				4543	if( newarr[ i * nth_per_core + k ] == 0 ) {
				4544	newarr[ i * nth_per_core + k ] = 1;
				4545	cnt--;
				4546	nth--;
				4547	break;
				4548	} else {
				4549	if( flag != 0 ) {
				4550	newarr[ i * nth_per_core + k ] ++;
				4551	cnt--;
				4552	nth--;
				4553	break;
				4554	}
				4555	}
				4556	}
				4557	}
				4558	if( cnt == 0 \|\| nth == 0 ) {
				4559	break;
				4560	}
				4561	}
				4562	if( nth == 0 ) {
				4563	break;
				4564	}
				4565	}
				4566	flag = 1;
				4567	}
				4568	int sum = 0;
				4569	for( int i = 0; i < nproc; i++ ) {
				4570	sum += newarr[ i ];
				4571	if( sum > tid ) {
				4572	// Granularity == thread
				4573	if( __kmp_affinity_gran == affinity_gran_fine \|\| __kmp_affinity_gran == affinity_gran_thread) {
				4574	int osID = procarr[ i ];
				4575	KMP_CPU_SET( osID, mask);
				4576	} else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
				4577	int coreID = i / nth_per_core;
				4578	for( int ii = 0; ii < nth_per_core; ii++ ) {
				4579	int osID = procarr[ coreID * nth_per_core + ii ];
				4580	if( osID != -1 ) {
				4581	KMP_CPU_SET( osID, mask);
				4582	}
				4583	}
				4584	}
				4585	break;
				4586	}
				4587	}
				4588	__kmp_free( newarr );
				4589	}
				4590
				4591	if (__kmp_affinity_verbose) {
				4592	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4593	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4594	KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
				4595	tid, buf);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4596	}
				4597	__kmp_set_system_affinity( mask, TRUE );
				4598	}
				4599	}
				4600
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4601	#else
				4602	// affinity not supported
				4603
				4604	kmp_uint32 mac_skipPerLevel[7];
				4605	kmp_uint32 mac_depth;
				4606	kmp_uint8 mac_leaf_kids;
				4607	void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
				4608	static int first = 1;
				4609	if (first) {
				4610	const kmp_uint32 maxLevels = 7;
				4611	kmp_uint32 numPerLevel[maxLevels];
				4612
				4613	for (kmp_uint32 i=0; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
				4614	numPerLevel[i] = 1;
				4615	mac_skipPerLevel[i] = 1;
				4616	}
				4617
				4618	mac_depth = 2;
				4619	numPerLevel[0] = nproc;
				4620
				4621	kmp_uint32 branch = 4;
				4622	if (numPerLevel[0] == 1) branch = nproc/4;
				4623	if (branch<4) branch=4;
				4624	for (kmp_uint32 d=0; d<mac_depth-1; ++d) { // optimize hierarchy width
				4625	while (numPerLevel[d] > branch \|\| (d==0 && numPerLevel[d]>4)) { // max 4 on level 0!
				4626	if (numPerLevel[d] & 1) numPerLevel[d]++;
				4627	numPerLevel[d] = numPerLevel[d] >> 1;
				4628	if (numPerLevel[d+1] == 1) mac_depth++;
				4629	numPerLevel[d+1] = numPerLevel[d+1] << 1;
				4630	}
				4631	if(numPerLevel[0] == 1) {
				4632	branch = branch >> 1;
				4633	if (branch<4) branch = 4;
				4634	}
				4635	}
				4636
				4637	for (kmp_uint32 i=1; i<mac_depth; ++i)
				4638	mac_skipPerLevel[i] = numPerLevel[i-1] * mac_skipPerLevel[i-1];
				4639	mac_leaf_kids = (kmp_uint8)numPerLevel[0]-1;
				4640	first=0;
				4641	}
				4642	thr_bar->depth = mac_depth;
				4643	thr_bar->base_leaf_kids = mac_leaf_kids;
				4644	thr_bar->skip_per_level = mac_skipPerLevel;
				4645	}
				4646
Alp Toker	763b939	2014-02-28 09:42:41 +0000	[diff] [blame]	4647	#endif // KMP_AFFINITY_SUPPORTED