Blame - openmp/runtime/src/kmp_affinity.cpp - toolchain/llvm-project

blob: d6821e0440f7069e6bc93386ae2645e2ca0a04f8 [file] [log] [blame]

Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1	/*
				2	* kmp_affinity.cpp -- affinity management
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame^]	3	* $Revision: 43473 $
				4	* $Date: 2014-09-26 15:02:57 -0500 (Fri, 26 Sep 2014) $
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	5	*/
				6
				7
				8	//===----------------------------------------------------------------------===//
				9	//
				10	// The LLVM Compiler Infrastructure
				11	//
				12	// This file is dual licensed under the MIT and the University of Illinois Open
				13	// Source Licenses. See LICENSE.txt for details.
				14	//
				15	//===----------------------------------------------------------------------===//
				16
				17
				18	#include "kmp.h"
				19	#include "kmp_i18n.h"
				20	#include "kmp_io.h"
				21	#include "kmp_str.h"
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame^]	22	#include "kmp_wrapper_getpid.h"
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	23
Alp Toker	763b939	2014-02-28 09:42:41 +0000	[diff] [blame]	24	#if KMP_AFFINITY_SUPPORTED
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	25
				26	//
				27	// Print the affinity mask to the character array in a pretty format.
				28	//
				29	char *
				30	__kmp_affinity_print_mask(char buf, int buf_len, kmp_affin_mask_t mask)
				31	{
				32	KMP_ASSERT(buf_len >= 40);
				33	char *scan = buf;
				34	char *end = buf + buf_len - 1;
				35
				36	//
				37	// Find first element / check for empty set.
				38	//
				39	size_t i;
				40	for (i = 0; i < KMP_CPU_SETSIZE; i++) {
				41	if (KMP_CPU_ISSET(i, mask)) {
				42	break;
				43	}
				44	}
				45	if (i == KMP_CPU_SETSIZE) {
				46	sprintf(scan, "{<empty>}");
				47	while (*scan != '\0') scan++;
				48	KMP_ASSERT(scan <= end);
				49	return buf;
				50	}
				51
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame^]	52	sprintf(scan, "{%ld", (long)i);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	53	while (*scan != '\0') scan++;
				54	i++;
				55	for (; i < KMP_CPU_SETSIZE; i++) {
				56	if (! KMP_CPU_ISSET(i, mask)) {
				57	continue;
				58	}
				59
				60	//
				61	// Check for buffer overflow. A string of the form ",<n>" will have
				62	// at most 10 characters, plus we want to leave room to print ",...}"
				63	// if the set is too large to print for a total of 15 characters.
				64	// We already left room for '\0' in setting end.
				65	//
				66	if (end - scan < 15) {
				67	break;
				68	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame^]	69	sprintf(scan, ",%-ld", (long)i);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	70	while (*scan != '\0') scan++;
				71	}
				72	if (i < KMP_CPU_SETSIZE) {
				73	sprintf(scan, ",...");
				74	while (*scan != '\0') scan++;
				75	}
				76	sprintf(scan, "}");
				77	while (*scan != '\0') scan++;
				78	KMP_ASSERT(scan <= end);
				79	return buf;
				80	}
				81
				82
				83	void
				84	__kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask)
				85	{
				86	KMP_CPU_ZERO(mask);
				87
				88	# if KMP_OS_WINDOWS && KMP_ARCH_X86_64
				89
				90	if (__kmp_num_proc_groups > 1) {
				91	int group;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	92	KMP_DEBUG_ASSERT(__kmp_GetActiveProcessorCount != NULL);
				93	for (group = 0; group < __kmp_num_proc_groups; group++) {
				94	int i;
				95	int num = __kmp_GetActiveProcessorCount(group);
				96	for (i = 0; i < num; i++) {
				97	KMP_CPU_SET(i + group * (CHAR_BIT * sizeof(DWORD_PTR)), mask);
				98	}
				99	}
				100	}
				101	else
				102
				103	# endif /* KMP_OS_WINDOWS && KMP_ARCH_X86_64 */
				104
				105	{
				106	int proc;
				107	for (proc = 0; proc < __kmp_xproc; proc++) {
				108	KMP_CPU_SET(proc, mask);
				109	}
				110	}
				111	}
				112
				113
				114	//
				115	// In Linux* OS debug & cover (-O0) builds, we need to avoid inline member
				116	// functions.
				117	//
				118	// The icc codegen emits sections with extremely long names, of the form
				119	// ".gnu.linkonce.<mangled_name>". There seems to have been a linker bug
				120	// introduced between GNU ld version 2.14.90.0.4 and 2.15.92.0.2 involving
				121	// some sort of memory corruption or table overflow that is triggered by
				122	// these long strings. I checked the latest version of the linker -
				123	// GNU ld (Linux* OS/GNU Binutils) 2.18.50.0.7.20080422 - and the bug is not
				124	// fixed.
				125	//
				126	// Unfortunately, my attempts to reproduce it in a smaller example have
				127	// failed - I'm not sure what the prospects are of getting it fixed
				128	// properly - but we need a reproducer smaller than all of libiomp.
				129	//
				130	// Work around the problem by avoiding inline constructors in such builds.
				131	// We do this for all platforms, not just Linux* OS - non-inline functions are
				132	// more debuggable and provide better coverage into than inline functions.
				133	// Use inline functions in shipping libs, for performance.
				134	//
				135
				136	# if !defined(KMP_DEBUG) && !defined(COVER)
				137
				138	class Address {
				139	public:
				140	static const unsigned maxDepth = 32;
				141	unsigned labels[maxDepth];
				142	unsigned childNums[maxDepth];
				143	unsigned depth;
				144	unsigned leader;
				145	Address(unsigned _depth)
				146	: depth(_depth), leader(FALSE) {
				147	}
				148	Address &operator=(const Address &b) {
				149	depth = b.depth;
				150	for (unsigned i = 0; i < depth; i++) {
				151	labels[i] = b.labels[i];
				152	childNums[i] = b.childNums[i];
				153	}
				154	leader = FALSE;
				155	return *this;
				156	}
				157	bool operator==(const Address &b) const {
				158	if (depth != b.depth)
				159	return false;
				160	for (unsigned i = 0; i < depth; i++)
				161	if(labels[i] != b.labels[i])
				162	return false;
				163	return true;
				164	}
				165	bool isClose(const Address &b, int level) const {
				166	if (depth != b.depth)
				167	return false;
				168	if ((unsigned)level >= depth)
				169	return true;
				170	for (unsigned i = 0; i < (depth - level); i++)
				171	if(labels[i] != b.labels[i])
				172	return false;
				173	return true;
				174	}
				175	bool operator!=(const Address &b) const {
				176	return !operator==(b);
				177	}
				178	};
				179
				180	class AddrUnsPair {
				181	public:
				182	Address first;
				183	unsigned second;
				184	AddrUnsPair(Address _first, unsigned _second)
				185	: first(_first), second(_second) {
				186	}
				187	AddrUnsPair &operator=(const AddrUnsPair &b)
				188	{
				189	first = b.first;
				190	second = b.second;
				191	return *this;
				192	}
				193	};
				194
				195	# else
				196
				197	class Address {
				198	public:
				199	static const unsigned maxDepth = 32;
				200	unsigned labels[maxDepth];
				201	unsigned childNums[maxDepth];
				202	unsigned depth;
				203	unsigned leader;
				204	Address(unsigned _depth);
				205	Address &operator=(const Address &b);
				206	bool operator==(const Address &b) const;
				207	bool isClose(const Address &b, int level) const;
				208	bool operator!=(const Address &b) const;
				209	};
				210
				211	Address::Address(unsigned _depth)
				212	{
				213	depth = _depth;
				214	leader = FALSE;
				215	}
				216
				217	Address &Address::operator=(const Address &b) {
				218	depth = b.depth;
				219	for (unsigned i = 0; i < depth; i++) {
				220	labels[i] = b.labels[i];
				221	childNums[i] = b.childNums[i];
				222	}
				223	leader = FALSE;
				224	return *this;
				225	}
				226
				227	bool Address::operator==(const Address &b) const {
				228	if (depth != b.depth)
				229	return false;
				230	for (unsigned i = 0; i < depth; i++)
				231	if(labels[i] != b.labels[i])
				232	return false;
				233	return true;
				234	}
				235
				236	bool Address::isClose(const Address &b, int level) const {
				237	if (depth != b.depth)
				238	return false;
				239	if ((unsigned)level >= depth)
				240	return true;
				241	for (unsigned i = 0; i < (depth - level); i++)
				242	if(labels[i] != b.labels[i])
				243	return false;
				244	return true;
				245	}
				246
				247	bool Address::operator!=(const Address &b) const {
				248	return !operator==(b);
				249	}
				250
				251	class AddrUnsPair {
				252	public:
				253	Address first;
				254	unsigned second;
				255	AddrUnsPair(Address _first, unsigned _second);
				256	AddrUnsPair &operator=(const AddrUnsPair &b);
				257	};
				258
				259	AddrUnsPair::AddrUnsPair(Address _first, unsigned _second)
				260	: first(_first), second(_second)
				261	{
				262	}
				263
				264	AddrUnsPair &AddrUnsPair::operator=(const AddrUnsPair &b)
				265	{
				266	first = b.first;
				267	second = b.second;
				268	return *this;
				269	}
				270
				271	# endif /* !defined(KMP_DEBUG) && !defined(COVER) */
				272
				273
				274	static int
				275	__kmp_affinity_cmp_Address_labels(const void a, const void b)
				276	{
				277	const Address aa = (const Address )&(((AddrUnsPair *)a)
				278	->first);
				279	const Address bb = (const Address )&(((AddrUnsPair *)b)
				280	->first);
				281	unsigned depth = aa->depth;
				282	unsigned i;
				283	KMP_DEBUG_ASSERT(depth == bb->depth);
				284	for (i = 0; i < depth; i++) {
				285	if (aa->labels[i] < bb->labels[i]) return -1;
				286	if (aa->labels[i] > bb->labels[i]) return 1;
				287	}
				288	return 0;
				289	}
				290
				291
				292	static int
				293	__kmp_affinity_cmp_Address_child_num(const void a, const void b)
				294	{
				295	const Address aa = (const Address )&(((AddrUnsPair *)a)
				296	->first);
				297	const Address bb = (const Address )&(((AddrUnsPair *)b)
				298	->first);
				299	unsigned depth = aa->depth;
				300	unsigned i;
				301	KMP_DEBUG_ASSERT(depth == bb->depth);
				302	KMP_DEBUG_ASSERT((unsigned)__kmp_affinity_compact <= depth);
				303	KMP_DEBUG_ASSERT(__kmp_affinity_compact >= 0);
				304	for (i = 0; i < (unsigned)__kmp_affinity_compact; i++) {
				305	int j = depth - i - 1;
				306	if (aa->childNums[j] < bb->childNums[j]) return -1;
				307	if (aa->childNums[j] > bb->childNums[j]) return 1;
				308	}
				309	for (; i < depth; i++) {
				310	int j = i - __kmp_affinity_compact;
				311	if (aa->childNums[j] < bb->childNums[j]) return -1;
				312	if (aa->childNums[j] > bb->childNums[j]) return 1;
				313	}
				314	return 0;
				315	}
				316
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame^]	317	/** A structure for holding machine-specific hierarchy info to be computed once at init. */
				318	class hierarchy_info {
				319	public:
				320	/** Typical levels are threads/core, cores/package or socket, packages/node, nodes/machine,
				321	etc. We don't want to get specific with nomenclature */
				322	static const kmp_uint32 maxLevels=7;
				323
				324	/** This is specifically the depth of the machine configuration hierarchy, in terms of the
				325	number of levels along the longest path from root to any leaf. It corresponds to the
				326	number of entries in numPerLevel if we exclude all but one trailing 1. */
				327	kmp_uint32 depth;
				328	kmp_uint32 base_depth;
				329	kmp_uint32 base_num_threads;
				330	bool uninitialized;
				331
				332	/** Level 0 corresponds to leaves. numPerLevel[i] is the number of children the parent of a
				333	node at level i has. For example, if we have a machine with 4 packages, 4 cores/package
				334	and 2 HT per core, then numPerLevel = {2, 4, 4, 1, 1}. All empty levels are set to 1. */
				335	kmp_uint32 numPerLevel[maxLevels];
				336	kmp_uint32 skipPerLevel[maxLevels];
				337
				338	void deriveLevels(AddrUnsPair *adr2os, int num_addrs) {
				339	int hier_depth = adr2os[0].first.depth;
				340	int level = 0;
				341	for (int i=hier_depth-1; i>=0; --i) {
				342	int max = -1;
				343	for (int j=0; j<num_addrs; ++j) {
				344	int next = adr2os[j].first.childNums[i];
				345	if (next > max) max = next;
				346	}
				347	numPerLevel[level] = max+1;
				348	++level;
				349	}
				350	}
				351
				352	hierarchy_info() : depth(1), uninitialized(true) {}
				353	void init(AddrUnsPair *adr2os, int num_addrs)
				354	{
				355	uninitialized = false;
				356	for (kmp_uint32 i=0; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
				357	numPerLevel[i] = 1;
				358	skipPerLevel[i] = 1;
				359	}
				360
				361	// Sort table by physical ID
				362	if (adr2os) {
				363	qsort(adr2os, num_addrs, sizeof(*adr2os), __kmp_affinity_cmp_Address_labels);
				364	deriveLevels(adr2os, num_addrs);
				365	}
				366	else {
				367	numPerLevel[0] = 4;
				368	numPerLevel[1] = num_addrs/4;
				369	if (num_addrs%4) numPerLevel[1]++;
				370	}
				371
				372	base_num_threads = num_addrs;
				373	for (int i=maxLevels-1; i>=0; --i) // count non-empty levels to get depth
				374	if (numPerLevel[i] != 1 \|\| depth > 1) // only count one top-level '1'
				375	depth++;
				376
				377	kmp_uint32 branch = 4;
				378	if (numPerLevel[0] == 1) branch = num_addrs/4;
				379	if (branch<4) branch=4;
				380	for (kmp_uint32 d=0; d<depth-1; ++d) { // optimize hierarchy width
				381	while (numPerLevel[d] > branch \|\| (d==0 && numPerLevel[d]>4)) { // max 4 on level 0!
				382	if (numPerLevel[d] & 1) numPerLevel[d]++;
				383	numPerLevel[d] = numPerLevel[d] >> 1;
				384	if (numPerLevel[d+1] == 1) depth++;
				385	numPerLevel[d+1] = numPerLevel[d+1] << 1;
				386	}
				387	if(numPerLevel[0] == 1) {
				388	branch = branch >> 1;
				389	if (branch<4) branch = 4;
				390	}
				391	}
				392
				393	for (kmp_uint32 i=1; i<depth; ++i)
				394	skipPerLevel[i] = numPerLevel[i-1] * skipPerLevel[i-1];
				395
				396	base_depth = depth;
				397	}
				398	};
				399
				400	static hierarchy_info machine_hierarchy;
				401
				402	void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
				403	if (machine_hierarchy.uninitialized)
				404	machine_hierarchy.init(NULL, nproc);
				405
				406	if (nproc <= machine_hierarchy.base_num_threads)
				407	machine_hierarchy.depth = machine_hierarchy.base_depth;
				408	KMP_DEBUG_ASSERT(machine_hierarchy.depth > 0);
				409	while (nproc > machine_hierarchy.skipPerLevel[machine_hierarchy.depth-1]) {
				410	machine_hierarchy.depth++;
				411	machine_hierarchy.skipPerLevel[machine_hierarchy.depth-1] = 2*machine_hierarchy.skipPerLevel[machine_hierarchy.depth-2];
				412	}
				413	thr_bar->depth = machine_hierarchy.depth;
				414	thr_bar->base_leaf_kids = (kmp_uint8)machine_hierarchy.numPerLevel[0]-1;
				415	thr_bar->skip_per_level = machine_hierarchy.skipPerLevel;
				416	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	417
				418	//
				419	// When sorting by labels, __kmp_affinity_assign_child_nums() must first be
				420	// called to renumber the labels from [0..n] and place them into the child_num
				421	// vector of the address object. This is done in case the labels used for
Alp Toker	8f2d3f0	2014-02-24 10:40:15 +0000	[diff] [blame]	422	// the children at one node of the hierarchy differ from those used for
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	423	// another node at the same level. Example: suppose the machine has 2 nodes
				424	// with 2 packages each. The first node contains packages 601 and 602, and
				425	// second node contains packages 603 and 604. If we try to sort the table
				426	// for "scatter" affinity, the table will still be sorted 601, 602, 603, 604
				427	// because we are paying attention to the labels themselves, not the ordinal
				428	// child numbers. By using the child numbers in the sort, the result is
				429	// {0,0}=601, {0,1}=603, {1,0}=602, {1,1}=604.
				430	//
				431	static void
				432	__kmp_affinity_assign_child_nums(AddrUnsPair *address2os,
				433	int numAddrs)
				434	{
				435	KMP_DEBUG_ASSERT(numAddrs > 0);
				436	int depth = address2os->first.depth;
				437	unsigned counts = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				438	unsigned lastLabel = (unsigned )__kmp_allocate(depth
				439	* sizeof(unsigned));
				440	int labCt;
				441	for (labCt = 0; labCt < depth; labCt++) {
				442	address2os[0].first.childNums[labCt] = counts[labCt] = 0;
				443	lastLabel[labCt] = address2os[0].first.labels[labCt];
				444	}
				445	int i;
				446	for (i = 1; i < numAddrs; i++) {
				447	for (labCt = 0; labCt < depth; labCt++) {
				448	if (address2os[i].first.labels[labCt] != lastLabel[labCt]) {
				449	int labCt2;
				450	for (labCt2 = labCt + 1; labCt2 < depth; labCt2++) {
				451	counts[labCt2] = 0;
				452	lastLabel[labCt2] = address2os[i].first.labels[labCt2];
				453	}
				454	counts[labCt]++;
				455	lastLabel[labCt] = address2os[i].first.labels[labCt];
				456	break;
				457	}
				458	}
				459	for (labCt = 0; labCt < depth; labCt++) {
				460	address2os[i].first.childNums[labCt] = counts[labCt];
				461	}
				462	for (; labCt < (int)Address::maxDepth; labCt++) {
				463	address2os[i].first.childNums[labCt] = 0;
				464	}
				465	}
				466	}
				467
				468
				469	//
				470	// All of the __kmp_affinity_create_*_map() routines should set
				471	// __kmp_affinity_masks to a vector of affinity mask objects of length
				472	// __kmp_affinity_num_masks, if __kmp_affinity_type != affinity_none, and
				473	// return the number of levels in the machine topology tree (zero if
				474	// __kmp_affinity_type == affinity_none).
				475	//
				476	// All of the __kmp_affinity_create__map() routines should set fullMask
				477	// to the affinity mask for the initialization thread. They need to save and
				478	// restore the mask, and it could be needed later, so saving it is just an
				479	// optimization to avoid calling kmp_get_system_affinity() again.
				480	//
				481	static kmp_affin_mask_t *fullMask = NULL;
				482
				483	kmp_affin_mask_t *
				484	__kmp_affinity_get_fullMask() { return fullMask; }
				485
				486
				487	static int nCoresPerPkg, nPackages;
				488	int __kmp_nThreadsPerCore;
				489
				490	//
				491	// __kmp_affinity_uniform_topology() doesn't work when called from
				492	// places which support arbitrarily many levels in the machine topology
				493	// map, i.e. the non-default cases in __kmp_affinity_create_cpuinfo_map()
				494	// __kmp_affinity_create_x2apicid_map().
				495	//
				496	inline static bool
				497	__kmp_affinity_uniform_topology()
				498	{
				499	return __kmp_avail_proc == (__kmp_nThreadsPerCore * nCoresPerPkg * nPackages);
				500	}
				501
				502
				503	//
				504	// Print out the detailed machine topology map, i.e. the physical locations
				505	// of each OS proc.
				506	//
				507	static void
				508	__kmp_affinity_print_topology(AddrUnsPair *address2os, int len, int depth,
				509	int pkgLevel, int coreLevel, int threadLevel)
				510	{
				511	int proc;
				512
				513	KMP_INFORM(OSProcToPhysicalThreadMap, "KMP_AFFINITY");
				514	for (proc = 0; proc < len; proc++) {
				515	int level;
				516	kmp_str_buf_t buf;
				517	__kmp_str_buf_init(&buf);
				518	for (level = 0; level < depth; level++) {
				519	if (level == threadLevel) {
				520	__kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Thread));
				521	}
				522	else if (level == coreLevel) {
				523	__kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Core));
				524	}
				525	else if (level == pkgLevel) {
				526	__kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Package));
				527	}
				528	else if (level > pkgLevel) {
				529	__kmp_str_buf_print(&buf, "%s_%d ", KMP_I18N_STR(Node),
				530	level - pkgLevel - 1);
				531	}
				532	else {
				533	__kmp_str_buf_print(&buf, "L%d ", level);
				534	}
				535	__kmp_str_buf_print(&buf, "%d ",
				536	address2os[proc].first.labels[level]);
				537	}
				538	KMP_INFORM(OSProcMapToPack, "KMP_AFFINITY", address2os[proc].second,
				539	buf.str);
				540	__kmp_str_buf_free(&buf);
				541	}
				542	}
				543
				544
				545	//
				546	// If we don't know how to retrieve the machine's processor topology, or
				547	// encounter an error in doing so, this routine is called to form a "flat"
				548	// mapping of os thread id's <-> processor id's.
				549	//
				550	static int
				551	__kmp_affinity_create_flat_map(AddrUnsPair **address2os,
				552	kmp_i18n_id_t *const msg_id)
				553	{
				554	*address2os = NULL;
				555	*msg_id = kmp_i18n_null;
				556
				557	//
				558	// Even if __kmp_affinity_type == affinity_none, this routine might still
				559	// called to set __kmp_ht_enabled, & __kmp_ncores, as well as
				560	// __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
				561	//
				562	if (! KMP_AFFINITY_CAPABLE()) {
				563	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				564	__kmp_ncores = nPackages = __kmp_xproc;
				565	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
				566	__kmp_ht_enabled = FALSE;
				567	if (__kmp_affinity_verbose) {
				568	KMP_INFORM(AffFlatTopology, "KMP_AFFINITY");
				569	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				570	KMP_INFORM(Uniform, "KMP_AFFINITY");
				571	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				572	__kmp_nThreadsPerCore, __kmp_ncores);
				573	}
				574	return 0;
				575	}
				576
				577	//
				578	// When affinity is off, this routine will still be called to set
				579	// __kmp_ht_enabled, & __kmp_ncores, as well as __kmp_nThreadsPerCore,
				580	// nCoresPerPkg, & nPackages. Make sure all these vars are set
				581	// correctly, and return now if affinity is not enabled.
				582	//
				583	__kmp_ncores = nPackages = __kmp_avail_proc;
				584	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
				585	__kmp_ht_enabled = FALSE;
				586	if (__kmp_affinity_verbose) {
				587	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				588	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, fullMask);
				589
				590	KMP_INFORM(AffCapableUseFlat, "KMP_AFFINITY");
				591	if (__kmp_affinity_respect_mask) {
				592	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				593	} else {
				594	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				595	}
				596	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				597	KMP_INFORM(Uniform, "KMP_AFFINITY");
				598	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				599	__kmp_nThreadsPerCore, __kmp_ncores);
				600	}
				601	if (__kmp_affinity_type == affinity_none) {
				602	return 0;
				603	}
				604
				605	//
				606	// Contruct the data structure to be returned.
				607	//
				608	address2os = (AddrUnsPair)
				609	__kmp_allocate(sizeof(*address2os) __kmp_avail_proc);
				610	int avail_ct = 0;
				611	unsigned int i;
				612	for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
				613	//
				614	// Skip this proc if it is not included in the machine model.
				615	//
				616	if (! KMP_CPU_ISSET(i, fullMask)) {
				617	continue;
				618	}
				619
				620	Address addr(1);
				621	addr.labels[0] = i;
				622	(*address2os)[avail_ct++] = AddrUnsPair(addr,i);
				623	}
				624	if (__kmp_affinity_verbose) {
				625	KMP_INFORM(OSProcToPackage, "KMP_AFFINITY");
				626	}
				627
				628	if (__kmp_affinity_gran_levels < 0) {
				629	//
				630	// Only the package level is modeled in the machine topology map,
				631	// so the #levels of granularity is either 0 or 1.
				632	//
				633	if (__kmp_affinity_gran > affinity_gran_package) {
				634	__kmp_affinity_gran_levels = 1;
				635	}
				636	else {
				637	__kmp_affinity_gran_levels = 0;
				638	}
				639	}
				640	return 1;
				641	}
				642
				643
				644	# if KMP_OS_WINDOWS && KMP_ARCH_X86_64
				645
				646	//
				647	// If multiple Windows* OS processor groups exist, we can create a 2-level
				648	// topology map with the groups at level 0 and the individual procs at
				649	// level 1.
				650	//
				651	// This facilitates letting the threads float among all procs in a group,
				652	// if granularity=group (the default when there are multiple groups).
				653	//
				654	static int
				655	__kmp_affinity_create_proc_group_map(AddrUnsPair **address2os,
				656	kmp_i18n_id_t *const msg_id)
				657	{
				658	*address2os = NULL;
				659	*msg_id = kmp_i18n_null;
				660
				661	//
				662	// If we don't have multiple processor groups, return now.
				663	// The flat mapping will be used.
				664	//
				665	if ((! KMP_AFFINITY_CAPABLE()) \|\| (__kmp_get_proc_group(fullMask) >= 0)) {
				666	// FIXME set *msg_id
				667	return -1;
				668	}
				669
				670	//
				671	// Contruct the data structure to be returned.
				672	//
				673	address2os = (AddrUnsPair)
				674	__kmp_allocate(sizeof(*address2os) __kmp_avail_proc);
				675	int avail_ct = 0;
				676	int i;
				677	for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
				678	//
				679	// Skip this proc if it is not included in the machine model.
				680	//
				681	if (! KMP_CPU_ISSET(i, fullMask)) {
				682	continue;
				683	}
				684
				685	Address addr(2);
				686	addr.labels[0] = i / (CHAR_BIT * sizeof(DWORD_PTR));
				687	addr.labels[1] = i % (CHAR_BIT * sizeof(DWORD_PTR));
				688	(*address2os)[avail_ct++] = AddrUnsPair(addr,i);
				689
				690	if (__kmp_affinity_verbose) {
				691	KMP_INFORM(AffOSProcToGroup, "KMP_AFFINITY", i, addr.labels[0],
				692	addr.labels[1]);
				693	}
				694	}
				695
				696	if (__kmp_affinity_gran_levels < 0) {
				697	if (__kmp_affinity_gran == affinity_gran_group) {
				698	__kmp_affinity_gran_levels = 1;
				699	}
				700	else if ((__kmp_affinity_gran == affinity_gran_fine)
				701	\|\| (__kmp_affinity_gran == affinity_gran_thread)) {
				702	__kmp_affinity_gran_levels = 0;
				703	}
				704	else {
				705	const char *gran_str = NULL;
				706	if (__kmp_affinity_gran == affinity_gran_core) {
				707	gran_str = "core";
				708	}
				709	else if (__kmp_affinity_gran == affinity_gran_package) {
				710	gran_str = "package";
				711	}
				712	else if (__kmp_affinity_gran == affinity_gran_node) {
				713	gran_str = "node";
				714	}
				715	else {
				716	KMP_ASSERT(0);
				717	}
				718
				719	// Warning: can't use affinity granularity \"gran\" with group topology method, using "thread"
				720	__kmp_affinity_gran_levels = 0;
				721	}
				722	}
				723	return 2;
				724	}
				725
				726	# endif /* KMP_OS_WINDOWS && KMP_ARCH_X86_64 */
				727
				728
				729	# if KMP_ARCH_X86 \|\| KMP_ARCH_X86_64
				730
				731	static int
				732	__kmp_cpuid_mask_width(int count) {
				733	int r = 0;
				734
				735	while((1<<r) < count)
				736	++r;
				737	return r;
				738	}
				739
				740
				741	class apicThreadInfo {
				742	public:
				743	unsigned osId; // param to __kmp_affinity_bind_thread
				744	unsigned apicId; // from cpuid after binding
				745	unsigned maxCoresPerPkg; // ""
				746	unsigned maxThreadsPerPkg; // ""
				747	unsigned pkgId; // inferred from above values
				748	unsigned coreId; // ""
				749	unsigned threadId; // ""
				750	};
				751
				752
				753	static int
				754	__kmp_affinity_cmp_apicThreadInfo_os_id(const void a, const void b)
				755	{
				756	const apicThreadInfo aa = (const apicThreadInfo )a;
				757	const apicThreadInfo bb = (const apicThreadInfo )b;
				758	if (aa->osId < bb->osId) return -1;
				759	if (aa->osId > bb->osId) return 1;
				760	return 0;
				761	}
				762
				763
				764	static int
				765	__kmp_affinity_cmp_apicThreadInfo_phys_id(const void a, const void b)
				766	{
				767	const apicThreadInfo aa = (const apicThreadInfo )a;
				768	const apicThreadInfo bb = (const apicThreadInfo )b;
				769	if (aa->pkgId < bb->pkgId) return -1;
				770	if (aa->pkgId > bb->pkgId) return 1;
				771	if (aa->coreId < bb->coreId) return -1;
				772	if (aa->coreId > bb->coreId) return 1;
				773	if (aa->threadId < bb->threadId) return -1;
				774	if (aa->threadId > bb->threadId) return 1;
				775	return 0;
				776	}
				777
				778
				779	//
				780	// On IA-32 architecture and Intel(R) 64 architecture, we attempt to use
				781	// an algorithm which cycles through the available os threads, setting
				782	// the current thread's affinity mask to that thread, and then retrieves
				783	// the Apic Id for each thread context using the cpuid instruction.
				784	//
				785	static int
				786	__kmp_affinity_create_apicid_map(AddrUnsPair **address2os,
				787	kmp_i18n_id_t *const msg_id)
				788	{
				789	int rc;
				790	*address2os = NULL;
				791	*msg_id = kmp_i18n_null;
				792
				793	# if KMP_MIC
				794	{
				795	// The code below will use cpuid(4).
				796	// Check if cpuid(4) is supported.
				797	// FIXME? - this really doesn't need to be specific to MIC.
				798	kmp_cpuid buf;
				799	__kmp_x86_cpuid(0, 0, &buf);
				800	if (buf.eax < 4) {
				801	*msg_id = kmp_i18n_str_NoLeaf4Support;
				802	return -1;
				803	}
				804	}
				805	# endif // KMP_MIC
				806
				807	//
				808	// Even if __kmp_affinity_type == affinity_none, this routine is still
				809	// called to set __kmp_ht_enabled, & __kmp_ncores, as well as
				810	// __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
				811	//
				812	// The algorithm used starts by setting the affinity to each available
				813	// thread and retreiving info from the cpuid instruction, so if we are not
				814	// capable of calling __kmp_affinity_get_map()/__kmp_affinity_get_map(),
				815	// then we need to do something else.
				816	//
				817	if (! KMP_AFFINITY_CAPABLE()) {
				818	//
				819	// Hack to try and infer the machine topology using only the data
				820	// available from cpuid on the current thread, and __kmp_xproc.
				821	//
				822	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				823
				824	//
				825	// Get an upper bound on the number of threads per package using
				826	// cpuid(1).
				827	//
				828	// On some OS/chps combinations where HT is supported by the chip
				829	// but is disabled, this value will be 2 on a single core chip.
				830	// Usually, it will be 2 if HT is enabled and 1 if HT is disabled.
				831	//
				832	kmp_cpuid buf;
				833	__kmp_x86_cpuid(1, 0, &buf);
				834	int maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
				835	if (maxThreadsPerPkg == 0) {
				836	maxThreadsPerPkg = 1;
				837	}
				838
				839	//
				840	// The num cores per pkg comes from cpuid(4).
				841	// 1 must be added to the encoded value.
				842	//
				843	// The author of cpu_count.cpp treated this only an upper bound
				844	// on the number of cores, but I haven't seen any cases where it
				845	// was greater than the actual number of cores, so we will treat
				846	// it as exact in this block of code.
				847	//
				848	// First, we need to check if cpuid(4) is supported on this chip.
				849	// To see if cpuid(n) is supported, issue cpuid(0) and check if eax
				850	// has the value n or greater.
				851	//
				852	__kmp_x86_cpuid(0, 0, &buf);
				853	if (buf.eax >= 4) {
				854	__kmp_x86_cpuid(4, 0, &buf);
				855	nCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
				856	}
				857	else {
				858	nCoresPerPkg = 1;
				859	}
				860
				861	//
				862	// There is no way to reliably tell if HT is enabled without issuing
				863	// the cpuid instruction from every thread, can correlating the cpuid
				864	// info, so if the machine is not affinity capable, we assume that HT
				865	// is off. We have seen quite a few machines where maxThreadsPerPkg
				866	// is 2, yet the machine does not support HT.
				867	//
				868	// - Older OSes are usually found on machines with older chips, which
				869	// do not support HT.
				870	//
				871	// - The performance penalty for mistakenly identifying a machine as
				872	// HT when it isn't (which results in blocktime being incorrecly set
				873	// to 0) is greater than the penalty when for mistakenly identifying
				874	// a machine as being 1 thread/core when it is really HT enabled
				875	// (which results in blocktime being incorrectly set to a positive
				876	// value).
				877	//
				878	__kmp_ncores = __kmp_xproc;
				879	nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
				880	__kmp_nThreadsPerCore = 1;
				881	__kmp_ht_enabled = FALSE;
				882	if (__kmp_affinity_verbose) {
				883	KMP_INFORM(AffNotCapableUseLocCpuid, "KMP_AFFINITY");
				884	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				885	if (__kmp_affinity_uniform_topology()) {
				886	KMP_INFORM(Uniform, "KMP_AFFINITY");
				887	} else {
				888	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				889	}
				890	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				891	__kmp_nThreadsPerCore, __kmp_ncores);
				892	}
				893	return 0;
				894	}
				895
				896	//
				897	//
				898	// From here on, we can assume that it is safe to call
				899	// __kmp_get_system_affinity() and __kmp_set_system_affinity(),
				900	// even if __kmp_affinity_type = affinity_none.
				901	//
				902
				903	//
				904	// Save the affinity mask for the current thread.
				905	//
				906	kmp_affin_mask_t *oldMask;
				907	KMP_CPU_ALLOC(oldMask);
				908	KMP_ASSERT(oldMask != NULL);
				909	__kmp_get_system_affinity(oldMask, TRUE);
				910
				911	//
				912	// Run through each of the available contexts, binding the current thread
				913	// to it, and obtaining the pertinent information using the cpuid instr.
				914	//
				915	// The relevant information is:
				916	//
				917	// Apic Id: Bits 24:31 of ebx after issuing cpuid(1) - each thread context
				918	// has a uniqie Apic Id, which is of the form pkg# : core# : thread#.
				919	//
				920	// Max Threads Per Pkg: Bits 16:23 of ebx after issuing cpuid(1). The
				921	// value of this field determines the width of the core# + thread#
				922	// fields in the Apic Id. It is also an upper bound on the number
				923	// of threads per package, but it has been verified that situations
				924	// happen were it is not exact. In particular, on certain OS/chip
				925	// combinations where Intel(R) Hyper-Threading Technology is supported
				926	// by the chip but has
				927	// been disabled, the value of this field will be 2 (for a single core
				928	// chip). On other OS/chip combinations supporting
				929	// Intel(R) Hyper-Threading Technology, the value of
				930	// this field will be 1 when Intel(R) Hyper-Threading Technology is
				931	// disabled and 2 when it is enabled.
				932	//
				933	// Max Cores Per Pkg: Bits 26:31 of eax after issuing cpuid(4). The
				934	// value of this field (+1) determines the width of the core# field in
				935	// the Apic Id. The comments in "cpucount.cpp" say that this value is
				936	// an upper bound, but the IA-32 architecture manual says that it is
				937	// exactly the number of cores per package, and I haven't seen any
				938	// case where it wasn't.
				939	//
				940	// From this information, deduce the package Id, core Id, and thread Id,
				941	// and set the corresponding fields in the apicThreadInfo struct.
				942	//
				943	unsigned i;
				944	apicThreadInfo threadInfo = (apicThreadInfo )__kmp_allocate(
				945	__kmp_avail_proc * sizeof(apicThreadInfo));
				946	unsigned nApics = 0;
				947	for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
				948	//
				949	// Skip this proc if it is not included in the machine model.
				950	//
				951	if (! KMP_CPU_ISSET(i, fullMask)) {
				952	continue;
				953	}
				954	KMP_DEBUG_ASSERT((int)nApics < __kmp_avail_proc);
				955
				956	__kmp_affinity_bind_thread(i);
				957	threadInfo[nApics].osId = i;
				958
				959	//
				960	// The apic id and max threads per pkg come from cpuid(1).
				961	//
				962	kmp_cpuid buf;
				963	__kmp_x86_cpuid(1, 0, &buf);
				964	if (! (buf.edx >> 9) & 1) {
				965	__kmp_set_system_affinity(oldMask, TRUE);
				966	__kmp_free(threadInfo);
				967	KMP_CPU_FREE(oldMask);
				968	*msg_id = kmp_i18n_str_ApicNotPresent;
				969	return -1;
				970	}
				971	threadInfo[nApics].apicId = (buf.ebx >> 24) & 0xff;
				972	threadInfo[nApics].maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
				973	if (threadInfo[nApics].maxThreadsPerPkg == 0) {
				974	threadInfo[nApics].maxThreadsPerPkg = 1;
				975	}
				976
				977	//
				978	// Max cores per pkg comes from cpuid(4).
				979	// 1 must be added to the encoded value.
				980	//
				981	// First, we need to check if cpuid(4) is supported on this chip.
				982	// To see if cpuid(n) is supported, issue cpuid(0) and check if eax
				983	// has the value n or greater.
				984	//
				985	__kmp_x86_cpuid(0, 0, &buf);
				986	if (buf.eax >= 4) {
				987	__kmp_x86_cpuid(4, 0, &buf);
				988	threadInfo[nApics].maxCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
				989	}
				990	else {
				991	threadInfo[nApics].maxCoresPerPkg = 1;
				992	}
				993
				994	//
				995	// Infer the pkgId / coreId / threadId using only the info
				996	// obtained locally.
				997	//
				998	int widthCT = __kmp_cpuid_mask_width(
				999	threadInfo[nApics].maxThreadsPerPkg);
				1000	threadInfo[nApics].pkgId = threadInfo[nApics].apicId >> widthCT;
				1001
				1002	int widthC = __kmp_cpuid_mask_width(
				1003	threadInfo[nApics].maxCoresPerPkg);
				1004	int widthT = widthCT - widthC;
				1005	if (widthT < 0) {
				1006	//
				1007	// I've never seen this one happen, but I suppose it could, if
				1008	// the cpuid instruction on a chip was really screwed up.
				1009	// Make sure to restore the affinity mask before the tail call.
				1010	//
				1011	__kmp_set_system_affinity(oldMask, TRUE);
				1012	__kmp_free(threadInfo);
				1013	KMP_CPU_FREE(oldMask);
				1014	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1015	return -1;
				1016	}
				1017
				1018	int maskC = (1 << widthC) - 1;
				1019	threadInfo[nApics].coreId = (threadInfo[nApics].apicId >> widthT)
				1020	&maskC;
				1021
				1022	int maskT = (1 << widthT) - 1;
				1023	threadInfo[nApics].threadId = threadInfo[nApics].apicId &maskT;
				1024
				1025	nApics++;
				1026	}
				1027
				1028	//
				1029	// We've collected all the info we need.
				1030	// Restore the old affinity mask for this thread.
				1031	//
				1032	__kmp_set_system_affinity(oldMask, TRUE);
				1033
				1034	//
				1035	// If there's only one thread context to bind to, form an Address object
				1036	// with depth 1 and return immediately (or, if affinity is off, set
				1037	// address2os to NULL and return).
				1038	//
				1039	// If it is configured to omit the package level when there is only a
				1040	// single package, the logic at the end of this routine won't work if
				1041	// there is only a single thread - it would try to form an Address
				1042	// object with depth 0.
				1043	//
				1044	KMP_ASSERT(nApics > 0);
				1045	if (nApics == 1) {
				1046	__kmp_ncores = nPackages = 1;
				1047	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
				1048	__kmp_ht_enabled = FALSE;
				1049	if (__kmp_affinity_verbose) {
				1050	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				1051	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				1052
				1053	KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
				1054	if (__kmp_affinity_respect_mask) {
				1055	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				1056	} else {
				1057	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				1058	}
				1059	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1060	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1061	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				1062	__kmp_nThreadsPerCore, __kmp_ncores);
				1063	}
				1064
				1065	if (__kmp_affinity_type == affinity_none) {
				1066	__kmp_free(threadInfo);
				1067	KMP_CPU_FREE(oldMask);
				1068	return 0;
				1069	}
				1070
				1071	address2os = (AddrUnsPair)__kmp_allocate(sizeof(AddrUnsPair));
				1072	Address addr(1);
				1073	addr.labels[0] = threadInfo[0].pkgId;
				1074	(*address2os)[0] = AddrUnsPair(addr, threadInfo[0].osId);
				1075
				1076	if (__kmp_affinity_gran_levels < 0) {
				1077	__kmp_affinity_gran_levels = 0;
				1078	}
				1079
				1080	if (__kmp_affinity_verbose) {
				1081	__kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
				1082	}
				1083
				1084	__kmp_free(threadInfo);
				1085	KMP_CPU_FREE(oldMask);
				1086	return 1;
				1087	}
				1088
				1089	//
				1090	// Sort the threadInfo table by physical Id.
				1091	//
				1092	qsort(threadInfo, nApics, sizeof(*threadInfo),
				1093	__kmp_affinity_cmp_apicThreadInfo_phys_id);
				1094
				1095	//
				1096	// The table is now sorted by pkgId / coreId / threadId, but we really
				1097	// don't know the radix of any of the fields. pkgId's may be sparsely
				1098	// assigned among the chips on a system. Although coreId's are usually
				1099	// assigned [0 .. coresPerPkg-1] and threadId's are usually assigned
				1100	// [0..threadsPerCore-1], we don't want to make any such assumptions.
				1101	//
				1102	// For that matter, we don't know what coresPerPkg and threadsPerCore
				1103	// (or the total # packages) are at this point - we want to determine
				1104	// that now. We only have an upper bound on the first two figures.
				1105	//
				1106	// We also perform a consistency check at this point: the values returned
				1107	// by the cpuid instruction for any thread bound to a given package had
				1108	// better return the same info for maxThreadsPerPkg and maxCoresPerPkg.
				1109	//
				1110	nPackages = 1;
				1111	nCoresPerPkg = 1;
				1112	__kmp_nThreadsPerCore = 1;
				1113	unsigned nCores = 1;
				1114
				1115	unsigned pkgCt = 1; // to determine radii
				1116	unsigned lastPkgId = threadInfo[0].pkgId;
				1117	unsigned coreCt = 1;
				1118	unsigned lastCoreId = threadInfo[0].coreId;
				1119	unsigned threadCt = 1;
				1120	unsigned lastThreadId = threadInfo[0].threadId;
				1121
				1122	// intra-pkg consist checks
				1123	unsigned prevMaxCoresPerPkg = threadInfo[0].maxCoresPerPkg;
				1124	unsigned prevMaxThreadsPerPkg = threadInfo[0].maxThreadsPerPkg;
				1125
				1126	for (i = 1; i < nApics; i++) {
				1127	if (threadInfo[i].pkgId != lastPkgId) {
				1128	nCores++;
				1129	pkgCt++;
				1130	lastPkgId = threadInfo[i].pkgId;
				1131	if ((int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt;
				1132	coreCt = 1;
				1133	lastCoreId = threadInfo[i].coreId;
				1134	if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
				1135	threadCt = 1;
				1136	lastThreadId = threadInfo[i].threadId;
				1137
				1138	//
				1139	// This is a different package, so go on to the next iteration
				1140	// without doing any consistency checks. Reset the consistency
				1141	// check vars, though.
				1142	//
				1143	prevMaxCoresPerPkg = threadInfo[i].maxCoresPerPkg;
				1144	prevMaxThreadsPerPkg = threadInfo[i].maxThreadsPerPkg;
				1145	continue;
				1146	}
				1147
				1148	if (threadInfo[i].coreId != lastCoreId) {
				1149	nCores++;
				1150	coreCt++;
				1151	lastCoreId = threadInfo[i].coreId;
				1152	if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
				1153	threadCt = 1;
				1154	lastThreadId = threadInfo[i].threadId;
				1155	}
				1156	else if (threadInfo[i].threadId != lastThreadId) {
				1157	threadCt++;
				1158	lastThreadId = threadInfo[i].threadId;
				1159	}
				1160	else {
				1161	__kmp_free(threadInfo);
				1162	KMP_CPU_FREE(oldMask);
				1163	*msg_id = kmp_i18n_str_LegacyApicIDsNotUnique;
				1164	return -1;
				1165	}
				1166
				1167	//
				1168	// Check to make certain that the maxCoresPerPkg and maxThreadsPerPkg
				1169	// fields agree between all the threads bounds to a given package.
				1170	//
				1171	if ((prevMaxCoresPerPkg != threadInfo[i].maxCoresPerPkg)
				1172	\|\| (prevMaxThreadsPerPkg != threadInfo[i].maxThreadsPerPkg)) {
				1173	__kmp_free(threadInfo);
				1174	KMP_CPU_FREE(oldMask);
				1175	*msg_id = kmp_i18n_str_InconsistentCpuidInfo;
				1176	return -1;
				1177	}
				1178	}
				1179	nPackages = pkgCt;
				1180	if ((int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt;
				1181	if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
				1182
				1183	//
				1184	// When affinity is off, this routine will still be called to set
				1185	// __kmp_ht_enabled, & __kmp_ncores, as well as __kmp_nThreadsPerCore,
				1186	// nCoresPerPkg, & nPackages. Make sure all these vars are set
				1187	// correctly, and return now if affinity is not enabled.
				1188	//
				1189	__kmp_ht_enabled = (__kmp_nThreadsPerCore > 1);
				1190	__kmp_ncores = nCores;
				1191	if (__kmp_affinity_verbose) {
				1192	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				1193	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				1194
				1195	KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
				1196	if (__kmp_affinity_respect_mask) {
				1197	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				1198	} else {
				1199	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				1200	}
				1201	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1202	if (__kmp_affinity_uniform_topology()) {
				1203	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1204	} else {
				1205	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				1206	}
				1207	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				1208	__kmp_nThreadsPerCore, __kmp_ncores);
				1209
				1210	}
				1211
				1212	if (__kmp_affinity_type == affinity_none) {
				1213	__kmp_free(threadInfo);
				1214	KMP_CPU_FREE(oldMask);
				1215	return 0;
				1216	}
				1217
				1218	//
				1219	// Now that we've determined the number of packages, the number of cores
				1220	// per package, and the number of threads per core, we can construct the
				1221	// data structure that is to be returned.
				1222	//
				1223	int pkgLevel = 0;
				1224	int coreLevel = (nCoresPerPkg <= 1) ? -1 : 1;
				1225	int threadLevel = (__kmp_nThreadsPerCore <= 1) ? -1 : ((coreLevel >= 0) ? 2 : 1);
				1226	unsigned depth = (pkgLevel >= 0) + (coreLevel >= 0) + (threadLevel >= 0);
				1227
				1228	KMP_ASSERT(depth > 0);
				1229	address2os = (AddrUnsPair)__kmp_allocate(sizeof(AddrUnsPair) * nApics);
				1230
				1231	for (i = 0; i < nApics; ++i) {
				1232	Address addr(depth);
				1233	unsigned os = threadInfo[i].osId;
				1234	int d = 0;
				1235
				1236	if (pkgLevel >= 0) {
				1237	addr.labels[d++] = threadInfo[i].pkgId;
				1238	}
				1239	if (coreLevel >= 0) {
				1240	addr.labels[d++] = threadInfo[i].coreId;
				1241	}
				1242	if (threadLevel >= 0) {
				1243	addr.labels[d++] = threadInfo[i].threadId;
				1244	}
				1245	(*address2os)[i] = AddrUnsPair(addr, os);
				1246	}
				1247
				1248	if (__kmp_affinity_gran_levels < 0) {
				1249	//
				1250	// Set the granularity level based on what levels are modeled
				1251	// in the machine topology map.
				1252	//
				1253	__kmp_affinity_gran_levels = 0;
				1254	if ((threadLevel >= 0)
				1255	&& (__kmp_affinity_gran > affinity_gran_thread)) {
				1256	__kmp_affinity_gran_levels++;
				1257	}
				1258	if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
				1259	__kmp_affinity_gran_levels++;
				1260	}
				1261	if ((pkgLevel >= 0) && (__kmp_affinity_gran > affinity_gran_package)) {
				1262	__kmp_affinity_gran_levels++;
				1263	}
				1264	}
				1265
				1266	if (__kmp_affinity_verbose) {
				1267	__kmp_affinity_print_topology(*address2os, nApics, depth, pkgLevel,
				1268	coreLevel, threadLevel);
				1269	}
				1270
				1271	__kmp_free(threadInfo);
				1272	KMP_CPU_FREE(oldMask);
				1273	return depth;
				1274	}
				1275
				1276
				1277	//
				1278	// Intel(R) microarchitecture code name Nehalem, Dunnington and later
				1279	// architectures support a newer interface for specifying the x2APIC Ids,
				1280	// based on cpuid leaf 11.
				1281	//
				1282	static int
				1283	__kmp_affinity_create_x2apicid_map(AddrUnsPair **address2os,
				1284	kmp_i18n_id_t *const msg_id)
				1285	{
				1286	kmp_cpuid buf;
				1287
				1288	*address2os = NULL;
				1289	*msg_id = kmp_i18n_null;
				1290
				1291	//
				1292	// Check to see if cpuid leaf 11 is supported.
				1293	//
				1294	__kmp_x86_cpuid(0, 0, &buf);
				1295	if (buf.eax < 11) {
				1296	*msg_id = kmp_i18n_str_NoLeaf11Support;
				1297	return -1;
				1298	}
				1299	__kmp_x86_cpuid(11, 0, &buf);
				1300	if (buf.ebx == 0) {
				1301	*msg_id = kmp_i18n_str_NoLeaf11Support;
				1302	return -1;
				1303	}
				1304
				1305	//
				1306	// Find the number of levels in the machine topology. While we're at it,
				1307	// get the default values for __kmp_nThreadsPerCore & nCoresPerPkg. We will
				1308	// try to get more accurate values later by explicitly counting them,
				1309	// but get reasonable defaults now, in case we return early.
				1310	//
				1311	int level;
				1312	int threadLevel = -1;
				1313	int coreLevel = -1;
				1314	int pkgLevel = -1;
				1315	__kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
				1316
				1317	for (level = 0;; level++) {
				1318	if (level > 31) {
				1319	//
				1320	// FIXME: Hack for DPD200163180
				1321	//
				1322	// If level is big then something went wrong -> exiting
				1323	//
				1324	// There could actually be 32 valid levels in the machine topology,
				1325	// but so far, the only machine we have seen which does not exit
				1326	// this loop before iteration 32 has fubar x2APIC settings.
				1327	//
				1328	// For now, just reject this case based upon loop trip count.
				1329	//
				1330	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1331	return -1;
				1332	}
				1333	__kmp_x86_cpuid(11, level, &buf);
				1334	if (buf.ebx == 0) {
				1335	if (pkgLevel < 0) {
				1336	//
				1337	// Will infer nPackages from __kmp_xproc
				1338	//
				1339	pkgLevel = level;
				1340	level++;
				1341	}
				1342	break;
				1343	}
				1344	int kind = (buf.ecx >> 8) & 0xff;
				1345	if (kind == 1) {
				1346	//
				1347	// SMT level
				1348	//
				1349	threadLevel = level;
				1350	coreLevel = -1;
				1351	pkgLevel = -1;
				1352	__kmp_nThreadsPerCore = buf.ebx & 0xff;
				1353	if (__kmp_nThreadsPerCore == 0) {
				1354	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1355	return -1;
				1356	}
				1357	}
				1358	else if (kind == 2) {
				1359	//
				1360	// core level
				1361	//
				1362	coreLevel = level;
				1363	pkgLevel = -1;
				1364	nCoresPerPkg = buf.ebx & 0xff;
				1365	if (nCoresPerPkg == 0) {
				1366	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1367	return -1;
				1368	}
				1369	}
				1370	else {
				1371	if (level <= 0) {
				1372	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1373	return -1;
				1374	}
				1375	if (pkgLevel >= 0) {
				1376	continue;
				1377	}
				1378	pkgLevel = level;
				1379	nPackages = buf.ebx & 0xff;
				1380	if (nPackages == 0) {
				1381	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1382	return -1;
				1383	}
				1384	}
				1385	}
				1386	int depth = level;
				1387
				1388	//
				1389	// In the above loop, "level" was counted from the finest level (usually
				1390	// thread) to the coarsest. The caller expects that we will place the
				1391	// labels in (*address2os)[].first.labels[] in the inverse order, so
				1392	// we need to invert the vars saying which level means what.
				1393	//
				1394	if (threadLevel >= 0) {
				1395	threadLevel = depth - threadLevel - 1;
				1396	}
				1397	if (coreLevel >= 0) {
				1398	coreLevel = depth - coreLevel - 1;
				1399	}
				1400	KMP_DEBUG_ASSERT(pkgLevel >= 0);
				1401	pkgLevel = depth - pkgLevel - 1;
				1402
				1403	//
				1404	// The algorithm used starts by setting the affinity to each available
				1405	// thread and retrieving info from the cpuid instruction, so if we are not
				1406	// capable of calling __kmp_affinity_get_map()/__kmp_affinity_get_map(),
				1407	// then we need to do something else - use the defaults that we calculated
				1408	// from issuing cpuid without binding to each proc.
				1409	//
				1410	if (! KMP_AFFINITY_CAPABLE())
				1411	{
				1412	//
				1413	// Hack to try and infer the machine topology using only the data
				1414	// available from cpuid on the current thread, and __kmp_xproc.
				1415	//
				1416	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				1417
				1418	__kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
				1419	nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
				1420	__kmp_ht_enabled = (__kmp_nThreadsPerCore > 1);
				1421	if (__kmp_affinity_verbose) {
				1422	KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY");
				1423	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1424	if (__kmp_affinity_uniform_topology()) {
				1425	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1426	} else {
				1427	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				1428	}
				1429	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				1430	__kmp_nThreadsPerCore, __kmp_ncores);
				1431	}
				1432	return 0;
				1433	}
				1434
				1435	//
				1436	//
				1437	// From here on, we can assume that it is safe to call
				1438	// __kmp_get_system_affinity() and __kmp_set_system_affinity(),
				1439	// even if __kmp_affinity_type = affinity_none.
				1440	//
				1441
				1442	//
				1443	// Save the affinity mask for the current thread.
				1444	//
				1445	kmp_affin_mask_t *oldMask;
				1446	KMP_CPU_ALLOC(oldMask);
				1447	__kmp_get_system_affinity(oldMask, TRUE);
				1448
				1449	//
				1450	// Allocate the data structure to be returned.
				1451	//
				1452	AddrUnsPair retval = (AddrUnsPair )
				1453	__kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc);
				1454
				1455	//
				1456	// Run through each of the available contexts, binding the current thread
				1457	// to it, and obtaining the pertinent information using the cpuid instr.
				1458	//
				1459	unsigned int proc;
				1460	int nApics = 0;
				1461	for (proc = 0; proc < KMP_CPU_SETSIZE; ++proc) {
				1462	//
				1463	// Skip this proc if it is not included in the machine model.
				1464	//
				1465	if (! KMP_CPU_ISSET(proc, fullMask)) {
				1466	continue;
				1467	}
				1468	KMP_DEBUG_ASSERT(nApics < __kmp_avail_proc);
				1469
				1470	__kmp_affinity_bind_thread(proc);
				1471
				1472	//
				1473	// Extrach the labels for each level in the machine topology map
				1474	// from the Apic ID.
				1475	//
				1476	Address addr(depth);
				1477	int prev_shift = 0;
				1478
				1479	for (level = 0; level < depth; level++) {
				1480	__kmp_x86_cpuid(11, level, &buf);
				1481	unsigned apicId = buf.edx;
				1482	if (buf.ebx == 0) {
				1483	if (level != depth - 1) {
				1484	KMP_CPU_FREE(oldMask);
				1485	*msg_id = kmp_i18n_str_InconsistentCpuidInfo;
				1486	return -1;
				1487	}
				1488	addr.labels[depth - level - 1] = apicId >> prev_shift;
				1489	level++;
				1490	break;
				1491	}
				1492	int shift = buf.eax & 0x1f;
				1493	int mask = (1 << shift) - 1;
				1494	addr.labels[depth - level - 1] = (apicId & mask) >> prev_shift;
				1495	prev_shift = shift;
				1496	}
				1497	if (level != depth) {
				1498	KMP_CPU_FREE(oldMask);
				1499	*msg_id = kmp_i18n_str_InconsistentCpuidInfo;
				1500	return -1;
				1501	}
				1502
				1503	retval[nApics] = AddrUnsPair(addr, proc);
				1504	nApics++;
				1505	}
				1506
				1507	//
				1508	// We've collected all the info we need.
				1509	// Restore the old affinity mask for this thread.
				1510	//
				1511	__kmp_set_system_affinity(oldMask, TRUE);
				1512
				1513	//
				1514	// If there's only one thread context to bind to, return now.
				1515	//
				1516	KMP_ASSERT(nApics > 0);
				1517	if (nApics == 1) {
				1518	__kmp_ncores = nPackages = 1;
				1519	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
				1520	__kmp_ht_enabled = FALSE;
				1521	if (__kmp_affinity_verbose) {
				1522	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				1523	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				1524
				1525	KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
				1526	if (__kmp_affinity_respect_mask) {
				1527	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				1528	} else {
				1529	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				1530	}
				1531	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1532	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1533	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				1534	__kmp_nThreadsPerCore, __kmp_ncores);
				1535	}
				1536
				1537	if (__kmp_affinity_type == affinity_none) {
				1538	__kmp_free(retval);
				1539	KMP_CPU_FREE(oldMask);
				1540	return 0;
				1541	}
				1542
				1543	//
				1544	// Form an Address object which only includes the package level.
				1545	//
				1546	Address addr(1);
				1547	addr.labels[0] = retval[0].first.labels[pkgLevel];
				1548	retval[0].first = addr;
				1549
				1550	if (__kmp_affinity_gran_levels < 0) {
				1551	__kmp_affinity_gran_levels = 0;
				1552	}
				1553
				1554	if (__kmp_affinity_verbose) {
				1555	__kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
				1556	}
				1557
				1558	*address2os = retval;
				1559	KMP_CPU_FREE(oldMask);
				1560	return 1;
				1561	}
				1562
				1563	//
				1564	// Sort the table by physical Id.
				1565	//
				1566	qsort(retval, nApics, sizeof(*retval), __kmp_affinity_cmp_Address_labels);
				1567
				1568	//
				1569	// Find the radix at each of the levels.
				1570	//
				1571	unsigned totals = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1572	unsigned counts = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1573	unsigned maxCt = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1574	unsigned last = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1575	for (level = 0; level < depth; level++) {
				1576	totals[level] = 1;
				1577	maxCt[level] = 1;
				1578	counts[level] = 1;
				1579	last[level] = retval[0].first.labels[level];
				1580	}
				1581
				1582	//
				1583	// From here on, the iteration variable "level" runs from the finest
				1584	// level to the coarsest, i.e. we iterate forward through
				1585	// (*address2os)[].first.labels[] - in the previous loops, we iterated
				1586	// backwards.
				1587	//
				1588	for (proc = 1; (int)proc < nApics; proc++) {
				1589	int level;
				1590	for (level = 0; level < depth; level++) {
				1591	if (retval[proc].first.labels[level] != last[level]) {
				1592	int j;
				1593	for (j = level + 1; j < depth; j++) {
				1594	totals[j]++;
				1595	counts[j] = 1;
				1596	// The line below causes printing incorrect topology information
				1597	// in case the max value for some level (maxCt[level]) is encountered earlier than
				1598	// some less value while going through the array.
				1599	// For example, let pkg0 has 4 cores and pkg1 has 2 cores. Then maxCt[1] == 2
				1600	// whereas it must be 4.
				1601	// TODO!!! Check if it can be commented safely
				1602	//maxCt[j] = 1;
				1603	last[j] = retval[proc].first.labels[j];
				1604	}
				1605	totals[level]++;
				1606	counts[level]++;
				1607	if (counts[level] > maxCt[level]) {
				1608	maxCt[level] = counts[level];
				1609	}
				1610	last[level] = retval[proc].first.labels[level];
				1611	break;
				1612	}
				1613	else if (level == depth - 1) {
				1614	__kmp_free(last);
				1615	__kmp_free(maxCt);
				1616	__kmp_free(counts);
				1617	__kmp_free(totals);
				1618	__kmp_free(retval);
				1619	KMP_CPU_FREE(oldMask);
				1620	*msg_id = kmp_i18n_str_x2ApicIDsNotUnique;
				1621	return -1;
				1622	}
				1623	}
				1624	}
				1625
				1626	//
				1627	// When affinity is off, this routine will still be called to set
				1628	// __kmp_ht_enabled, & __kmp_ncores, as well as __kmp_nThreadsPerCore,
				1629	// nCoresPerPkg, & nPackages. Make sure all these vars are set
				1630	// correctly, and return if affinity is not enabled.
				1631	//
				1632	if (threadLevel >= 0) {
				1633	__kmp_nThreadsPerCore = maxCt[threadLevel];
				1634	}
				1635	else {
				1636	__kmp_nThreadsPerCore = 1;
				1637	}
				1638	__kmp_ht_enabled = (__kmp_nThreadsPerCore > 1);
				1639
				1640	nPackages = totals[pkgLevel];
				1641
				1642	if (coreLevel >= 0) {
				1643	__kmp_ncores = totals[coreLevel];
				1644	nCoresPerPkg = maxCt[coreLevel];
				1645	}
				1646	else {
				1647	__kmp_ncores = nPackages;
				1648	nCoresPerPkg = 1;
				1649	}
				1650
				1651	//
				1652	// Check to see if the machine topology is uniform
				1653	//
				1654	unsigned prod = maxCt[0];
				1655	for (level = 1; level < depth; level++) {
				1656	prod *= maxCt[level];
				1657	}
				1658	bool uniform = (prod == totals[level - 1]);
				1659
				1660	//
				1661	// Print the machine topology summary.
				1662	//
				1663	if (__kmp_affinity_verbose) {
				1664	char mask[KMP_AFFIN_MASK_PRINT_LEN];
				1665	__kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				1666
				1667	KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
				1668	if (__kmp_affinity_respect_mask) {
				1669	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask);
				1670	} else {
				1671	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask);
				1672	}
				1673	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1674	if (uniform) {
				1675	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1676	} else {
				1677	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				1678	}
				1679
				1680	kmp_str_buf_t buf;
				1681	__kmp_str_buf_init(&buf);
				1682
				1683	__kmp_str_buf_print(&buf, "%d", totals[0]);
				1684	for (level = 1; level <= pkgLevel; level++) {
				1685	__kmp_str_buf_print(&buf, " x %d", maxCt[level]);
				1686	}
				1687	KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg,
				1688	__kmp_nThreadsPerCore, __kmp_ncores);
				1689
				1690	__kmp_str_buf_free(&buf);
				1691	}
				1692
				1693	if (__kmp_affinity_type == affinity_none) {
				1694	__kmp_free(last);
				1695	__kmp_free(maxCt);
				1696	__kmp_free(counts);
				1697	__kmp_free(totals);
				1698	__kmp_free(retval);
				1699	KMP_CPU_FREE(oldMask);
				1700	return 0;
				1701	}
				1702
				1703	//
				1704	// Find any levels with radiix 1, and remove them from the map
				1705	// (except for the package level).
				1706	//
				1707	int new_depth = 0;
				1708	for (level = 0; level < depth; level++) {
				1709	if ((maxCt[level] == 1) && (level != pkgLevel)) {
				1710	continue;
				1711	}
				1712	new_depth++;
				1713	}
				1714
				1715	//
				1716	// If we are removing any levels, allocate a new vector to return,
				1717	// and copy the relevant information to it.
				1718	//
				1719	if (new_depth != depth) {
				1720	AddrUnsPair new_retval = (AddrUnsPair )__kmp_allocate(
				1721	sizeof(AddrUnsPair) * nApics);
				1722	for (proc = 0; (int)proc < nApics; proc++) {
				1723	Address addr(new_depth);
				1724	new_retval[proc] = AddrUnsPair(addr, retval[proc].second);
				1725	}
				1726	int new_level = 0;
				1727	for (level = 0; level < depth; level++) {
				1728	if ((maxCt[level] == 1) && (level != pkgLevel)) {
				1729	if (level == threadLevel) {
				1730	threadLevel = -1;
				1731	}
				1732	else if ((threadLevel >= 0) && (level < threadLevel)) {
				1733	threadLevel--;
				1734	}
				1735	if (level == coreLevel) {
				1736	coreLevel = -1;
				1737	}
				1738	else if ((coreLevel >= 0) && (level < coreLevel)) {
				1739	coreLevel--;
				1740	}
				1741	if (level < pkgLevel) {
				1742	pkgLevel--;
				1743	}
				1744	continue;
				1745	}
				1746	for (proc = 0; (int)proc < nApics; proc++) {
				1747	new_retval[proc].first.labels[new_level]
				1748	= retval[proc].first.labels[level];
				1749	}
				1750	new_level++;
				1751	}
				1752
				1753	__kmp_free(retval);
				1754	retval = new_retval;
				1755	depth = new_depth;
				1756	}
				1757
				1758	if (__kmp_affinity_gran_levels < 0) {
				1759	//
				1760	// Set the granularity level based on what levels are modeled
				1761	// in the machine topology map.
				1762	//
				1763	__kmp_affinity_gran_levels = 0;
				1764	if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
				1765	__kmp_affinity_gran_levels++;
				1766	}
				1767	if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
				1768	__kmp_affinity_gran_levels++;
				1769	}
				1770	if (__kmp_affinity_gran > affinity_gran_package) {
				1771	__kmp_affinity_gran_levels++;
				1772	}
				1773	}
				1774
				1775	if (__kmp_affinity_verbose) {
				1776	__kmp_affinity_print_topology(retval, nApics, depth, pkgLevel,
				1777	coreLevel, threadLevel);
				1778	}
				1779
				1780	__kmp_free(last);
				1781	__kmp_free(maxCt);
				1782	__kmp_free(counts);
				1783	__kmp_free(totals);
				1784	KMP_CPU_FREE(oldMask);
				1785	*address2os = retval;
				1786	return depth;
				1787	}
				1788
				1789
				1790	# endif /* KMP_ARCH_X86 \|\| KMP_ARCH_X86_64 */
				1791
				1792
				1793	#define osIdIndex 0
				1794	#define threadIdIndex 1
				1795	#define coreIdIndex 2
				1796	#define pkgIdIndex 3
				1797	#define nodeIdIndex 4
				1798
				1799	typedef unsigned *ProcCpuInfo;
				1800	static unsigned maxIndex = pkgIdIndex;
				1801
				1802
				1803	static int
				1804	__kmp_affinity_cmp_ProcCpuInfo_os_id(const void a, const void b)
				1805	{
				1806	const unsigned aa = (const unsigned )a;
				1807	const unsigned bb = (const unsigned )b;
				1808	if (aa[osIdIndex] < bb[osIdIndex]) return -1;
				1809	if (aa[osIdIndex] > bb[osIdIndex]) return 1;
				1810	return 0;
				1811	};
				1812
				1813
				1814	static int
				1815	__kmp_affinity_cmp_ProcCpuInfo_phys_id(const void a, const void b)
				1816	{
				1817	unsigned i;
				1818	const unsigned aa = ((const unsigned **)a);
				1819	const unsigned bb = ((const unsigned **)b);
				1820	for (i = maxIndex; ; i--) {
				1821	if (aa[i] < bb[i]) return -1;
				1822	if (aa[i] > bb[i]) return 1;
				1823	if (i == osIdIndex) break;
				1824	}
				1825	return 0;
				1826	}
				1827
				1828
				1829	//
				1830	// Parse /proc/cpuinfo (or an alternate file in the same format) to obtain the
				1831	// affinity map.
				1832	//
				1833	static int
				1834	__kmp_affinity_create_cpuinfo_map(AddrUnsPair *address2os, int line,
				1835	kmp_i18n_id_t const msg_id, FILE f)
				1836	{
				1837	*address2os = NULL;
				1838	*msg_id = kmp_i18n_null;
				1839
				1840	//
				1841	// Scan of the file, and count the number of "processor" (osId) fields,
Alp Toker	8f2d3f0	2014-02-24 10:40:15 +0000	[diff] [blame]	1842	// and find the highest value of <n> for a node_<n> field.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1843	//
				1844	char buf[256];
				1845	unsigned num_records = 0;
				1846	while (! feof(f)) {
				1847	buf[sizeof(buf) - 1] = 1;
				1848	if (! fgets(buf, sizeof(buf), f)) {
				1849	//
				1850	// Read errors presumably because of EOF
				1851	//
				1852	break;
				1853	}
				1854
				1855	char s1[] = "processor";
				1856	if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
				1857	num_records++;
				1858	continue;
				1859	}
				1860
				1861	//
				1862	// FIXME - this will match "node_<n> <garbage>"
				1863	//
				1864	unsigned level;
				1865	if (sscanf(buf, "node_%d id", &level) == 1) {
				1866	if (nodeIdIndex + level >= maxIndex) {
				1867	maxIndex = nodeIdIndex + level;
				1868	}
				1869	continue;
				1870	}
				1871	}
				1872
				1873	//
				1874	// Check for empty file / no valid processor records, or too many.
				1875	// The number of records can't exceed the number of valid bits in the
				1876	// affinity mask.
				1877	//
				1878	if (num_records == 0) {
				1879	*line = 0;
				1880	*msg_id = kmp_i18n_str_NoProcRecords;
				1881	return -1;
				1882	}
				1883	if (num_records > (unsigned)__kmp_xproc) {
				1884	*line = 0;
				1885	*msg_id = kmp_i18n_str_TooManyProcRecords;
				1886	return -1;
				1887	}
				1888
				1889	//
				1890	// Set the file pointer back to the begginning, so that we can scan the
				1891	// file again, this time performing a full parse of the data.
				1892	// Allocate a vector of ProcCpuInfo object, where we will place the data.
				1893	// Adding an extra element at the end allows us to remove a lot of extra
				1894	// checks for termination conditions.
				1895	//
				1896	if (fseek(f, 0, SEEK_SET) != 0) {
				1897	*line = 0;
				1898	*msg_id = kmp_i18n_str_CantRewindCpuinfo;
				1899	return -1;
				1900	}
				1901
				1902	//
				1903	// Allocate the array of records to store the proc info in. The dummy
				1904	// element at the end makes the logic in filling them out easier to code.
				1905	//
				1906	unsigned threadInfo = (unsigned )__kmp_allocate((num_records + 1)
				1907	* sizeof(unsigned *));
				1908	unsigned i;
				1909	for (i = 0; i <= num_records; i++) {
				1910	threadInfo[i] = (unsigned *)__kmp_allocate((maxIndex + 1)
				1911	* sizeof(unsigned));
				1912	}
				1913
				1914	#define CLEANUP_THREAD_INFO \
				1915	for (i = 0; i <= num_records; i++) { \
				1916	__kmp_free(threadInfo[i]); \
				1917	} \
				1918	__kmp_free(threadInfo);
				1919
				1920	//
				1921	// A value of UINT_MAX means that we didn't find the field
				1922	//
				1923	unsigned __index;
				1924
				1925	#define INIT_PROC_INFO(p) \
				1926	for (__index = 0; __index <= maxIndex; __index++) { \
				1927	(p)[__index] = UINT_MAX; \
				1928	}
				1929
				1930	for (i = 0; i <= num_records; i++) {
				1931	INIT_PROC_INFO(threadInfo[i]);
				1932	}
				1933
				1934	unsigned num_avail = 0;
				1935	*line = 0;
				1936	while (! feof(f)) {
				1937	//
				1938	// Create an inner scoping level, so that all the goto targets at the
				1939	// end of the loop appear in an outer scoping level. This avoids
				1940	// warnings about jumping past an initialization to a target in the
				1941	// same block.
				1942	//
				1943	{
				1944	buf[sizeof(buf) - 1] = 1;
				1945	bool long_line = false;
				1946	if (! fgets(buf, sizeof(buf), f)) {
				1947	//
				1948	// Read errors presumably because of EOF
				1949	//
				1950	// If there is valid data in threadInfo[num_avail], then fake
				1951	// a blank line in ensure that the last address gets parsed.
				1952	//
				1953	bool valid = false;
				1954	for (i = 0; i <= maxIndex; i++) {
				1955	if (threadInfo[num_avail][i] != UINT_MAX) {
				1956	valid = true;
				1957	}
				1958	}
				1959	if (! valid) {
				1960	break;
				1961	}
				1962	buf[0] = 0;
				1963	} else if (!buf[sizeof(buf) - 1]) {
				1964	//
				1965	// The line is longer than the buffer. Set a flag and don't
				1966	// emit an error if we were going to ignore the line, anyway.
				1967	//
				1968	long_line = true;
				1969
				1970	#define CHECK_LINE \
				1971	if (long_line) { \
				1972	CLEANUP_THREAD_INFO; \
				1973	*msg_id = kmp_i18n_str_LongLineCpuinfo; \
				1974	return -1; \
				1975	}
				1976	}
				1977	(*line)++;
				1978
				1979	char s1[] = "processor";
				1980	if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
				1981	CHECK_LINE;
				1982	char *p = strchr(buf + sizeof(s1) - 1, ':');
				1983	unsigned val;
				1984	if ((p == NULL) \|\| (sscanf(p + 1, "%u\n", &val) != 1)) goto no_val;
				1985	if (threadInfo[num_avail][osIdIndex] != UINT_MAX) goto dup_field;
				1986	threadInfo[num_avail][osIdIndex] = val;
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	1987	#if KMP_OS_LINUX && USE_SYSFS_INFO
				1988	char path[256];
				1989	snprintf(path, sizeof(path),
				1990	"/sys/devices/system/cpu/cpu%u/topology/physical_package_id",
				1991	threadInfo[num_avail][osIdIndex]);
				1992	__kmp_read_from_file(path, "%u", &threadInfo[num_avail][pkgIdIndex]);
				1993
				1994	snprintf(path, sizeof(path),
				1995	"/sys/devices/system/cpu/cpu%u/topology/core_id",
				1996	threadInfo[num_avail][osIdIndex]);
				1997	__kmp_read_from_file(path, "%u", &threadInfo[num_avail][coreIdIndex]);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1998	continue;
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	1999	#else
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2000	}
				2001	char s2[] = "physical id";
				2002	if (strncmp(buf, s2, sizeof(s2) - 1) == 0) {
				2003	CHECK_LINE;
				2004	char *p = strchr(buf + sizeof(s2) - 1, ':');
				2005	unsigned val;
				2006	if ((p == NULL) \|\| (sscanf(p + 1, "%u\n", &val) != 1)) goto no_val;
				2007	if (threadInfo[num_avail][pkgIdIndex] != UINT_MAX) goto dup_field;
				2008	threadInfo[num_avail][pkgIdIndex] = val;
				2009	continue;
				2010	}
				2011	char s3[] = "core id";
				2012	if (strncmp(buf, s3, sizeof(s3) - 1) == 0) {
				2013	CHECK_LINE;
				2014	char *p = strchr(buf + sizeof(s3) - 1, ':');
				2015	unsigned val;
				2016	if ((p == NULL) \|\| (sscanf(p + 1, "%u\n", &val) != 1)) goto no_val;
				2017	if (threadInfo[num_avail][coreIdIndex] != UINT_MAX) goto dup_field;
				2018	threadInfo[num_avail][coreIdIndex] = val;
				2019	continue;
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	2020	#endif // KMP_OS_LINUX && USE_SYSFS_INFO
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2021	}
				2022	char s4[] = "thread id";
				2023	if (strncmp(buf, s4, sizeof(s4) - 1) == 0) {
				2024	CHECK_LINE;
				2025	char *p = strchr(buf + sizeof(s4) - 1, ':');
				2026	unsigned val;
				2027	if ((p == NULL) \|\| (sscanf(p + 1, "%u\n", &val) != 1)) goto no_val;
				2028	if (threadInfo[num_avail][threadIdIndex] != UINT_MAX) goto dup_field;
				2029	threadInfo[num_avail][threadIdIndex] = val;
				2030	continue;
				2031	}
				2032	unsigned level;
				2033	if (sscanf(buf, "node_%d id", &level) == 1) {
				2034	CHECK_LINE;
				2035	char *p = strchr(buf + sizeof(s4) - 1, ':');
				2036	unsigned val;
				2037	if ((p == NULL) \|\| (sscanf(p + 1, "%u\n", &val) != 1)) goto no_val;
				2038	KMP_ASSERT(nodeIdIndex + level <= maxIndex);
				2039	if (threadInfo[num_avail][nodeIdIndex + level] != UINT_MAX) goto dup_field;
				2040	threadInfo[num_avail][nodeIdIndex + level] = val;
				2041	continue;
				2042	}
				2043
				2044	//
				2045	// We didn't recognize the leading token on the line.
				2046	// There are lots of leading tokens that we don't recognize -
				2047	// if the line isn't empty, go on to the next line.
				2048	//
				2049	if ((buf != 0) && (buf != '\n')) {
				2050	//
				2051	// If the line is longer than the buffer, read characters
				2052	// until we find a newline.
				2053	//
				2054	if (long_line) {
				2055	int ch;
				2056	while (((ch = fgetc(f)) != EOF) && (ch != '\n'));
				2057	}
				2058	continue;
				2059	}
				2060
				2061	//
				2062	// A newline has signalled the end of the processor record.
				2063	// Check that there aren't too many procs specified.
				2064	//
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame^]	2065	if ((int)num_avail == __kmp_xproc) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2066	CLEANUP_THREAD_INFO;
				2067	*msg_id = kmp_i18n_str_TooManyEntries;
				2068	return -1;
				2069	}
				2070
				2071	//
				2072	// Check for missing fields. The osId field must be there, and we
				2073	// currently require that the physical id field is specified, also.
				2074	//
				2075	if (threadInfo[num_avail][osIdIndex] == UINT_MAX) {
				2076	CLEANUP_THREAD_INFO;
				2077	*msg_id = kmp_i18n_str_MissingProcField;
				2078	return -1;
				2079	}
				2080	if (threadInfo[0][pkgIdIndex] == UINT_MAX) {
				2081	CLEANUP_THREAD_INFO;
				2082	*msg_id = kmp_i18n_str_MissingPhysicalIDField;
				2083	return -1;
				2084	}
				2085
				2086	//
				2087	// Skip this proc if it is not included in the machine model.
				2088	//
				2089	if (! KMP_CPU_ISSET(threadInfo[num_avail][osIdIndex], fullMask)) {
				2090	INIT_PROC_INFO(threadInfo[num_avail]);
				2091	continue;
				2092	}
				2093
				2094	//
				2095	// We have a successful parse of this proc's info.
				2096	// Increment the counter, and prepare for the next proc.
				2097	//
				2098	num_avail++;
				2099	KMP_ASSERT(num_avail <= num_records);
				2100	INIT_PROC_INFO(threadInfo[num_avail]);
				2101	}
				2102	continue;
				2103
				2104	no_val:
				2105	CLEANUP_THREAD_INFO;
				2106	*msg_id = kmp_i18n_str_MissingValCpuinfo;
				2107	return -1;
				2108
				2109	dup_field:
				2110	CLEANUP_THREAD_INFO;
				2111	*msg_id = kmp_i18n_str_DuplicateFieldCpuinfo;
				2112	return -1;
				2113	}
				2114	*line = 0;
				2115
				2116	# if KMP_MIC && REDUCE_TEAM_SIZE
				2117	unsigned teamSize = 0;
				2118	# endif // KMP_MIC && REDUCE_TEAM_SIZE
				2119
				2120	// check for num_records == __kmp_xproc ???
				2121
				2122	//
				2123	// If there's only one thread context to bind to, form an Address object
				2124	// with depth 1 and return immediately (or, if affinity is off, set
				2125	// address2os to NULL and return).
				2126	//
				2127	// If it is configured to omit the package level when there is only a
				2128	// single package, the logic at the end of this routine won't work if
				2129	// there is only a single thread - it would try to form an Address
				2130	// object with depth 0.
				2131	//
				2132	KMP_ASSERT(num_avail > 0);
				2133	KMP_ASSERT(num_avail <= num_records);
				2134	if (num_avail == 1) {
				2135	__kmp_ncores = 1;
				2136	__kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
				2137	__kmp_ht_enabled = FALSE;
				2138	if (__kmp_affinity_verbose) {
				2139	if (! KMP_AFFINITY_CAPABLE()) {
				2140	KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
				2141	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				2142	KMP_INFORM(Uniform, "KMP_AFFINITY");
				2143	}
				2144	else {
				2145	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				2146	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				2147	fullMask);
				2148	KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
				2149	if (__kmp_affinity_respect_mask) {
				2150	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				2151	} else {
				2152	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				2153	}
				2154	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				2155	KMP_INFORM(Uniform, "KMP_AFFINITY");
				2156	}
				2157	int index;
				2158	kmp_str_buf_t buf;
				2159	__kmp_str_buf_init(&buf);
				2160	__kmp_str_buf_print(&buf, "1");
				2161	for (index = maxIndex - 1; index > pkgIdIndex; index--) {
				2162	__kmp_str_buf_print(&buf, " x 1");
				2163	}
				2164	KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, 1, 1, 1);
				2165	__kmp_str_buf_free(&buf);
				2166	}
				2167
				2168	if (__kmp_affinity_type == affinity_none) {
				2169	CLEANUP_THREAD_INFO;
				2170	return 0;
				2171	}
				2172
				2173	address2os = (AddrUnsPair)__kmp_allocate(sizeof(AddrUnsPair));
				2174	Address addr(1);
				2175	addr.labels[0] = threadInfo[0][pkgIdIndex];
				2176	(*address2os)[0] = AddrUnsPair(addr, threadInfo[0][osIdIndex]);
				2177
				2178	if (__kmp_affinity_gran_levels < 0) {
				2179	__kmp_affinity_gran_levels = 0;
				2180	}
				2181
				2182	if (__kmp_affinity_verbose) {
				2183	__kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
				2184	}
				2185
				2186	CLEANUP_THREAD_INFO;
				2187	return 1;
				2188	}
				2189
				2190	//
				2191	// Sort the threadInfo table by physical Id.
				2192	//
				2193	qsort(threadInfo, num_avail, sizeof(*threadInfo),
				2194	__kmp_affinity_cmp_ProcCpuInfo_phys_id);
				2195
				2196	//
				2197	// The table is now sorted by pkgId / coreId / threadId, but we really
				2198	// don't know the radix of any of the fields. pkgId's may be sparsely
				2199	// assigned among the chips on a system. Although coreId's are usually
				2200	// assigned [0 .. coresPerPkg-1] and threadId's are usually assigned
				2201	// [0..threadsPerCore-1], we don't want to make any such assumptions.
				2202	//
				2203	// For that matter, we don't know what coresPerPkg and threadsPerCore
				2204	// (or the total # packages) are at this point - we want to determine
				2205	// that now. We only have an upper bound on the first two figures.
				2206	//
				2207	unsigned counts = (unsigned )__kmp_allocate((maxIndex + 1)
				2208	* sizeof(unsigned));
				2209	unsigned maxCt = (unsigned )__kmp_allocate((maxIndex + 1)
				2210	* sizeof(unsigned));
				2211	unsigned totals = (unsigned )__kmp_allocate((maxIndex + 1)
				2212	* sizeof(unsigned));
				2213	unsigned lastId = (unsigned )__kmp_allocate((maxIndex + 1)
				2214	* sizeof(unsigned));
				2215
				2216	bool assign_thread_ids = false;
				2217	unsigned threadIdCt;
				2218	unsigned index;
				2219
				2220	restart_radix_check:
				2221	threadIdCt = 0;
				2222
				2223	//
				2224	// Initialize the counter arrays with data from threadInfo[0].
				2225	//
				2226	if (assign_thread_ids) {
				2227	if (threadInfo[0][threadIdIndex] == UINT_MAX) {
				2228	threadInfo[0][threadIdIndex] = threadIdCt++;
				2229	}
				2230	else if (threadIdCt <= threadInfo[0][threadIdIndex]) {
				2231	threadIdCt = threadInfo[0][threadIdIndex] + 1;
				2232	}
				2233	}
				2234	for (index = 0; index <= maxIndex; index++) {
				2235	counts[index] = 1;
				2236	maxCt[index] = 1;
				2237	totals[index] = 1;
				2238	lastId[index] = threadInfo[0][index];;
				2239	}
				2240
				2241	//
				2242	// Run through the rest of the OS procs.
				2243	//
				2244	for (i = 1; i < num_avail; i++) {
				2245	//
				2246	// Find the most significant index whose id differs
				2247	// from the id for the previous OS proc.
				2248	//
				2249	for (index = maxIndex; index >= threadIdIndex; index--) {
				2250	if (assign_thread_ids && (index == threadIdIndex)) {
				2251	//
				2252	// Auto-assign the thread id field if it wasn't specified.
				2253	//
				2254	if (threadInfo[i][threadIdIndex] == UINT_MAX) {
				2255	threadInfo[i][threadIdIndex] = threadIdCt++;
				2256	}
				2257
				2258	//
				2259	// Aparrently the thread id field was specified for some
				2260	// entries and not others. Start the thread id counter
				2261	// off at the next higher thread id.
				2262	//
				2263	else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
				2264	threadIdCt = threadInfo[i][threadIdIndex] + 1;
				2265	}
				2266	}
				2267	if (threadInfo[i][index] != lastId[index]) {
				2268	//
				2269	// Run through all indices which are less significant,
				2270	// and reset the counts to 1.
				2271	//
				2272	// At all levels up to and including index, we need to
				2273	// increment the totals and record the last id.
				2274	//
				2275	unsigned index2;
				2276	for (index2 = threadIdIndex; index2 < index; index2++) {
				2277	totals[index2]++;
				2278	if (counts[index2] > maxCt[index2]) {
				2279	maxCt[index2] = counts[index2];
				2280	}
				2281	counts[index2] = 1;
				2282	lastId[index2] = threadInfo[i][index2];
				2283	}
				2284	counts[index]++;
				2285	totals[index]++;
				2286	lastId[index] = threadInfo[i][index];
				2287
				2288	if (assign_thread_ids && (index > threadIdIndex)) {
				2289
				2290	# if KMP_MIC && REDUCE_TEAM_SIZE
				2291	//
				2292	// The default team size is the total #threads in the machine
				2293	// minus 1 thread for every core that has 3 or more threads.
				2294	//
				2295	teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 );
				2296	# endif // KMP_MIC && REDUCE_TEAM_SIZE
				2297
				2298	//
				2299	// Restart the thread counter, as we are on a new core.
				2300	//
				2301	threadIdCt = 0;
				2302
				2303	//
				2304	// Auto-assign the thread id field if it wasn't specified.
				2305	//
				2306	if (threadInfo[i][threadIdIndex] == UINT_MAX) {
				2307	threadInfo[i][threadIdIndex] = threadIdCt++;
				2308	}
				2309
				2310	//
				2311	// Aparrently the thread id field was specified for some
				2312	// entries and not others. Start the thread id counter
				2313	// off at the next higher thread id.
				2314	//
				2315	else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
				2316	threadIdCt = threadInfo[i][threadIdIndex] + 1;
				2317	}
				2318	}
				2319	break;
				2320	}
				2321	}
				2322	if (index < threadIdIndex) {
				2323	//
				2324	// If thread ids were specified, it is an error if they are not
				2325	// unique. Also, check that we waven't already restarted the
				2326	// loop (to be safe - shouldn't need to).
				2327	//
				2328	if ((threadInfo[i][threadIdIndex] != UINT_MAX)
				2329	\|\| assign_thread_ids) {
				2330	__kmp_free(lastId);
				2331	__kmp_free(totals);
				2332	__kmp_free(maxCt);
				2333	__kmp_free(counts);
				2334	CLEANUP_THREAD_INFO;
				2335	*msg_id = kmp_i18n_str_PhysicalIDsNotUnique;
				2336	return -1;
				2337	}
				2338
				2339	//
				2340	// If the thread ids were not specified and we see entries
				2341	// entries that are duplicates, start the loop over and
				2342	// assign the thread ids manually.
				2343	//
				2344	assign_thread_ids = true;
				2345	goto restart_radix_check;
				2346	}
				2347	}
				2348
				2349	# if KMP_MIC && REDUCE_TEAM_SIZE
				2350	//
				2351	// The default team size is the total #threads in the machine
				2352	// minus 1 thread for every core that has 3 or more threads.
				2353	//
				2354	teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 );
				2355	# endif // KMP_MIC && REDUCE_TEAM_SIZE
				2356
				2357	for (index = threadIdIndex; index <= maxIndex; index++) {
				2358	if (counts[index] > maxCt[index]) {
				2359	maxCt[index] = counts[index];
				2360	}
				2361	}
				2362
				2363	__kmp_nThreadsPerCore = maxCt[threadIdIndex];
				2364	nCoresPerPkg = maxCt[coreIdIndex];
				2365	nPackages = totals[pkgIdIndex];
				2366
				2367	//
				2368	// Check to see if the machine topology is uniform
				2369	//
				2370	unsigned prod = totals[maxIndex];
				2371	for (index = threadIdIndex; index < maxIndex; index++) {
				2372	prod *= maxCt[index];
				2373	}
				2374	bool uniform = (prod == totals[threadIdIndex]);
				2375
				2376	//
				2377	// When affinity is off, this routine will still be called to set
				2378	// __kmp_ht_enabled, & __kmp_ncores, as well as __kmp_nThreadsPerCore,
				2379	// nCoresPerPkg, & nPackages. Make sure all these vars are set
				2380	// correctly, and return now if affinity is not enabled.
				2381	//
				2382	__kmp_ht_enabled = (maxCt[threadIdIndex] > 1); // threads per core > 1
				2383	__kmp_ncores = totals[coreIdIndex];
				2384
				2385	if (__kmp_affinity_verbose) {
				2386	if (! KMP_AFFINITY_CAPABLE()) {
				2387	KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
				2388	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				2389	if (uniform) {
				2390	KMP_INFORM(Uniform, "KMP_AFFINITY");
				2391	} else {
				2392	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				2393	}
				2394	}
				2395	else {
				2396	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				2397	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, fullMask);
				2398	KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
				2399	if (__kmp_affinity_respect_mask) {
				2400	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				2401	} else {
				2402	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				2403	}
				2404	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				2405	if (uniform) {
				2406	KMP_INFORM(Uniform, "KMP_AFFINITY");
				2407	} else {
				2408	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				2409	}
				2410	}
				2411	kmp_str_buf_t buf;
				2412	__kmp_str_buf_init(&buf);
				2413
				2414	__kmp_str_buf_print(&buf, "%d", totals[maxIndex]);
				2415	for (index = maxIndex - 1; index >= pkgIdIndex; index--) {
				2416	__kmp_str_buf_print(&buf, " x %d", maxCt[index]);
				2417	}
				2418	KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, maxCt[coreIdIndex],
				2419	maxCt[threadIdIndex], __kmp_ncores);
				2420
				2421	__kmp_str_buf_free(&buf);
				2422	}
				2423
				2424	# if KMP_MIC && REDUCE_TEAM_SIZE
				2425	//
				2426	// Set the default team size.
				2427	//
				2428	if ((__kmp_dflt_team_nth == 0) && (teamSize > 0)) {
				2429	__kmp_dflt_team_nth = teamSize;
				2430	KA_TRACE(20, ("__kmp_affinity_create_cpuinfo_map: setting __kmp_dflt_team_nth = %d\n",
				2431	__kmp_dflt_team_nth));
				2432	}
				2433	# endif // KMP_MIC && REDUCE_TEAM_SIZE
				2434
				2435	if (__kmp_affinity_type == affinity_none) {
				2436	__kmp_free(lastId);
				2437	__kmp_free(totals);
				2438	__kmp_free(maxCt);
				2439	__kmp_free(counts);
				2440	CLEANUP_THREAD_INFO;
				2441	return 0;
				2442	}
				2443
				2444	//
				2445	// Count the number of levels which have more nodes at that level than
				2446	// at the parent's level (with there being an implicit root node of
				2447	// the top level). This is equivalent to saying that there is at least
				2448	// one node at this level which has a sibling. These levels are in the
				2449	// map, and the package level is always in the map.
				2450	//
				2451	bool inMap = (bool )__kmp_allocate((maxIndex + 1) * sizeof(bool));
				2452	int level = 0;
				2453	for (index = threadIdIndex; index < maxIndex; index++) {
				2454	KMP_ASSERT(totals[index] >= totals[index + 1]);
				2455	inMap[index] = (totals[index] > totals[index + 1]);
				2456	}
				2457	inMap[maxIndex] = (totals[maxIndex] > 1);
				2458	inMap[pkgIdIndex] = true;
				2459
				2460	int depth = 0;
				2461	for (index = threadIdIndex; index <= maxIndex; index++) {
				2462	if (inMap[index]) {
				2463	depth++;
				2464	}
				2465	}
				2466	KMP_ASSERT(depth > 0);
				2467
				2468	//
				2469	// Construct the data structure that is to be returned.
				2470	//
				2471	address2os = (AddrUnsPair)
				2472	__kmp_allocate(sizeof(AddrUnsPair) * num_avail);
				2473	int pkgLevel = -1;
				2474	int coreLevel = -1;
				2475	int threadLevel = -1;
				2476
				2477	for (i = 0; i < num_avail; ++i) {
				2478	Address addr(depth);
				2479	unsigned os = threadInfo[i][osIdIndex];
				2480	int src_index;
				2481	int dst_index = 0;
				2482
				2483	for (src_index = maxIndex; src_index >= threadIdIndex; src_index--) {
				2484	if (! inMap[src_index]) {
				2485	continue;
				2486	}
				2487	addr.labels[dst_index] = threadInfo[i][src_index];
				2488	if (src_index == pkgIdIndex) {
				2489	pkgLevel = dst_index;
				2490	}
				2491	else if (src_index == coreIdIndex) {
				2492	coreLevel = dst_index;
				2493	}
				2494	else if (src_index == threadIdIndex) {
				2495	threadLevel = dst_index;
				2496	}
				2497	dst_index++;
				2498	}
				2499	(*address2os)[i] = AddrUnsPair(addr, os);
				2500	}
				2501
				2502	if (__kmp_affinity_gran_levels < 0) {
				2503	//
				2504	// Set the granularity level based on what levels are modeled
				2505	// in the machine topology map.
				2506	//
				2507	unsigned src_index;
				2508	__kmp_affinity_gran_levels = 0;
				2509	for (src_index = threadIdIndex; src_index <= maxIndex; src_index++) {
				2510	if (! inMap[src_index]) {
				2511	continue;
				2512	}
				2513	switch (src_index) {
				2514	case threadIdIndex:
				2515	if (__kmp_affinity_gran > affinity_gran_thread) {
				2516	__kmp_affinity_gran_levels++;
				2517	}
				2518
				2519	break;
				2520	case coreIdIndex:
				2521	if (__kmp_affinity_gran > affinity_gran_core) {
				2522	__kmp_affinity_gran_levels++;
				2523	}
				2524	break;
				2525
				2526	case pkgIdIndex:
				2527	if (__kmp_affinity_gran > affinity_gran_package) {
				2528	__kmp_affinity_gran_levels++;
				2529	}
				2530	break;
				2531	}
				2532	}
				2533	}
				2534
				2535	if (__kmp_affinity_verbose) {
				2536	__kmp_affinity_print_topology(*address2os, num_avail, depth, pkgLevel,
				2537	coreLevel, threadLevel);
				2538	}
				2539
				2540	__kmp_free(inMap);
				2541	__kmp_free(lastId);
				2542	__kmp_free(totals);
				2543	__kmp_free(maxCt);
				2544	__kmp_free(counts);
				2545	CLEANUP_THREAD_INFO;
				2546	return depth;
				2547	}
				2548
				2549
				2550	//
				2551	// Create and return a table of affinity masks, indexed by OS thread ID.
				2552	// This routine handles OR'ing together all the affinity masks of threads
				2553	// that are sufficiently close, if granularity > fine.
				2554	//
				2555	static kmp_affin_mask_t *
				2556	__kmp_create_masks(unsigned maxIndex, unsigned numUnique,
				2557	AddrUnsPair *address2os, unsigned numAddrs)
				2558	{
				2559	//
				2560	// First form a table of affinity masks in order of OS thread id.
				2561	//
				2562	unsigned depth;
				2563	unsigned maxOsId;
				2564	unsigned i;
				2565
				2566	KMP_ASSERT(numAddrs > 0);
				2567	depth = address2os[0].first.depth;
				2568
				2569	maxOsId = 0;
				2570	for (i = 0; i < numAddrs; i++) {
				2571	unsigned osId = address2os[i].second;
				2572	if (osId > maxOsId) {
				2573	maxOsId = osId;
				2574	}
				2575	}
				2576	kmp_affin_mask_t osId2Mask = (kmp_affin_mask_t )__kmp_allocate(
				2577	(maxOsId + 1) * __kmp_affin_mask_size);
				2578
				2579	//
				2580	// Sort the address2os table according to physical order. Doing so
				2581	// will put all threads on the same core/package/node in consecutive
				2582	// locations.
				2583	//
				2584	qsort(address2os, numAddrs, sizeof(*address2os),
				2585	__kmp_affinity_cmp_Address_labels);
				2586
				2587	KMP_ASSERT(__kmp_affinity_gran_levels >= 0);
				2588	if (__kmp_affinity_verbose && (__kmp_affinity_gran_levels > 0)) {
				2589	KMP_INFORM(ThreadsMigrate, "KMP_AFFINITY", __kmp_affinity_gran_levels);
				2590	}
				2591	if (__kmp_affinity_gran_levels >= (int)depth) {
				2592	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				2593	&& (__kmp_affinity_type != affinity_none))) {
				2594	KMP_WARNING(AffThreadsMayMigrate);
				2595	}
				2596	}
				2597
				2598	//
				2599	// Run through the table, forming the masks for all threads on each
				2600	// core. Threads on the same core will have identical "Address"
				2601	// objects, not considering the last level, which must be the thread
				2602	// id. All threads on a core will appear consecutively.
				2603	//
				2604	unsigned unique = 0;
				2605	unsigned j = 0; // index of 1st thread on core
				2606	unsigned leader = 0;
				2607	Address *leaderAddr = &(address2os[0].first);
				2608	kmp_affin_mask_t *sum
				2609	= (kmp_affin_mask_t *)alloca(__kmp_affin_mask_size);
				2610	KMP_CPU_ZERO(sum);
				2611	KMP_CPU_SET(address2os[0].second, sum);
				2612	for (i = 1; i < numAddrs; i++) {
				2613	//
Alp Toker	8f2d3f0	2014-02-24 10:40:15 +0000	[diff] [blame]	2614	// If this thread is sufficiently close to the leader (within the
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2615	// granularity setting), then set the bit for this os thread in the
				2616	// affinity mask for this group, and go on to the next thread.
				2617	//
				2618	if (leaderAddr->isClose(address2os[i].first,
				2619	__kmp_affinity_gran_levels)) {
				2620	KMP_CPU_SET(address2os[i].second, sum);
				2621	continue;
				2622	}
				2623
				2624	//
				2625	// For every thread in this group, copy the mask to the thread's
				2626	// entry in the osId2Mask table. Mark the first address as a
				2627	// leader.
				2628	//
				2629	for (; j < i; j++) {
				2630	unsigned osId = address2os[j].second;
				2631	KMP_DEBUG_ASSERT(osId <= maxOsId);
				2632	kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
				2633	KMP_CPU_COPY(mask, sum);
				2634	address2os[j].first.leader = (j == leader);
				2635	}
				2636	unique++;
				2637
				2638	//
				2639	// Start a new mask.
				2640	//
				2641	leader = i;
				2642	leaderAddr = &(address2os[i].first);
				2643	KMP_CPU_ZERO(sum);
				2644	KMP_CPU_SET(address2os[i].second, sum);
				2645	}
				2646
				2647	//
				2648	// For every thread in last group, copy the mask to the thread's
				2649	// entry in the osId2Mask table.
				2650	//
				2651	for (; j < i; j++) {
				2652	unsigned osId = address2os[j].second;
				2653	KMP_DEBUG_ASSERT(osId <= maxOsId);
				2654	kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
				2655	KMP_CPU_COPY(mask, sum);
				2656	address2os[j].first.leader = (j == leader);
				2657	}
				2658	unique++;
				2659
				2660	*maxIndex = maxOsId;
				2661	*numUnique = unique;
				2662	return osId2Mask;
				2663	}
				2664
				2665
				2666	//
				2667	// Stuff for the affinity proclist parsers. It's easier to declare these vars
				2668	// as file-static than to try and pass them through the calling sequence of
				2669	// the recursive-descent OMP_PLACES parser.
				2670	//
				2671	static kmp_affin_mask_t *newMasks;
				2672	static int numNewMasks;
				2673	static int nextNewMask;
				2674
				2675	#define ADD_MASK(_mask) \
				2676	{ \
				2677	if (nextNewMask >= numNewMasks) { \
				2678	numNewMasks *= 2; \
				2679	newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_REALLOC(newMasks, \
				2680	numNewMasks * __kmp_affin_mask_size); \
				2681	} \
				2682	KMP_CPU_COPY(KMP_CPU_INDEX(newMasks, nextNewMask), (_mask)); \
				2683	nextNewMask++; \
				2684	}
				2685
				2686	#define ADD_MASK_OSID(_osId,_osId2Mask,_maxOsId) \
				2687	{ \
				2688	if (((_osId) > _maxOsId) \|\| \
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame^]	2689	(! KMP_CPU_ISSET((_osId), KMP_CPU_INDEX((_osId2Mask), (_osId))))) { \
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2690	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings \
				2691	&& (__kmp_affinity_type != affinity_none))) { \
				2692	KMP_WARNING(AffIgnoreInvalidProcID, _osId); \
				2693	} \
				2694	} \
				2695	else { \
				2696	ADD_MASK(KMP_CPU_INDEX(_osId2Mask, (_osId))); \
				2697	} \
				2698	}
				2699
				2700
				2701	//
				2702	// Re-parse the proclist (for the explicit affinity type), and form the list
				2703	// of affinity newMasks indexed by gtid.
				2704	//
				2705	static void
				2706	__kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks,
				2707	unsigned int out_numMasks, const char proclist,
				2708	kmp_affin_mask_t *osId2Mask, int maxOsId)
				2709	{
				2710	const char *scan = proclist;
				2711	const char *next = proclist;
				2712
				2713	//
				2714	// We use malloc() for the temporary mask vector,
				2715	// so that we can use realloc() to extend it.
				2716	//
				2717	numNewMasks = 2;
				2718	newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(numNewMasks
				2719	* __kmp_affin_mask_size);
				2720	nextNewMask = 0;
				2721	kmp_affin_mask_t sumMask = (kmp_affin_mask_t )__kmp_allocate(
				2722	__kmp_affin_mask_size);
				2723	int setSize = 0;
				2724
				2725	for (;;) {
				2726	int start, end, stride;
				2727
				2728	SKIP_WS(scan);
				2729	next = scan;
				2730	if (*next == '\0') {
				2731	break;
				2732	}
				2733
				2734	if (*next == '{') {
				2735	int num;
				2736	setSize = 0;
				2737	next++; // skip '{'
				2738	SKIP_WS(next);
				2739	scan = next;
				2740
				2741	//
				2742	// Read the first integer in the set.
				2743	//
				2744	KMP_ASSERT2((next >= '0') && (next <= '9'),
				2745	"bad proclist");
				2746	SKIP_DIGITS(next);
				2747	num = __kmp_str_to_int(scan, *next);
				2748	KMP_ASSERT2(num >= 0, "bad explicit proc list");
				2749
				2750	//
				2751	// Copy the mask for that osId to the sum (union) mask.
				2752	//
				2753	if ((num > maxOsId) \|\|
				2754	(! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
				2755	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				2756	&& (__kmp_affinity_type != affinity_none))) {
				2757	KMP_WARNING(AffIgnoreInvalidProcID, num);
				2758	}
				2759	KMP_CPU_ZERO(sumMask);
				2760	}
				2761	else {
				2762	KMP_CPU_COPY(sumMask, KMP_CPU_INDEX(osId2Mask, num));
				2763	setSize = 1;
				2764	}
				2765
				2766	for (;;) {
				2767	//
				2768	// Check for end of set.
				2769	//
				2770	SKIP_WS(next);
				2771	if (*next == '}') {
				2772	next++; // skip '}'
				2773	break;
				2774	}
				2775
				2776	//
				2777	// Skip optional comma.
				2778	//
				2779	if (*next == ',') {
				2780	next++;
				2781	}
				2782	SKIP_WS(next);
				2783
				2784	//
				2785	// Read the next integer in the set.
				2786	//
				2787	scan = next;
				2788	KMP_ASSERT2((next >= '0') && (next <= '9'),
				2789	"bad explicit proc list");
				2790
				2791	SKIP_DIGITS(next);
				2792	num = __kmp_str_to_int(scan, *next);
				2793	KMP_ASSERT2(num >= 0, "bad explicit proc list");
				2794
				2795	//
				2796	// Add the mask for that osId to the sum mask.
				2797	//
				2798	if ((num > maxOsId) \|\|
				2799	(! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
				2800	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				2801	&& (__kmp_affinity_type != affinity_none))) {
				2802	KMP_WARNING(AffIgnoreInvalidProcID, num);
				2803	}
				2804	}
				2805	else {
				2806	KMP_CPU_UNION(sumMask, KMP_CPU_INDEX(osId2Mask, num));
				2807	setSize++;
				2808	}
				2809	}
				2810	if (setSize > 0) {
				2811	ADD_MASK(sumMask);
				2812	}
				2813
				2814	SKIP_WS(next);
				2815	if (*next == ',') {
				2816	next++;
				2817	}
				2818	scan = next;
				2819	continue;
				2820	}
				2821
				2822	//
				2823	// Read the first integer.
				2824	//
				2825	KMP_ASSERT2((next >= '0') && (next <= '9'), "bad explicit proc list");
				2826	SKIP_DIGITS(next);
				2827	start = __kmp_str_to_int(scan, *next);
				2828	KMP_ASSERT2(start >= 0, "bad explicit proc list");
				2829	SKIP_WS(next);
				2830
				2831	//
				2832	// If this isn't a range, then add a mask to the list and go on.
				2833	//
				2834	if (*next != '-') {
				2835	ADD_MASK_OSID(start, osId2Mask, maxOsId);
				2836
				2837	//
				2838	// Skip optional comma.
				2839	//
				2840	if (*next == ',') {
				2841	next++;
				2842	}
				2843	scan = next;
				2844	continue;
				2845	}
				2846
				2847	//
				2848	// This is a range. Skip over the '-' and read in the 2nd int.
				2849	//
				2850	next++; // skip '-'
				2851	SKIP_WS(next);
				2852	scan = next;
				2853	KMP_ASSERT2((next >= '0') && (next <= '9'), "bad explicit proc list");
				2854	SKIP_DIGITS(next);
				2855	end = __kmp_str_to_int(scan, *next);
				2856	KMP_ASSERT2(end >= 0, "bad explicit proc list");
				2857
				2858	//
				2859	// Check for a stride parameter
				2860	//
				2861	stride = 1;
				2862	SKIP_WS(next);
				2863	if (*next == ':') {
				2864	//
				2865	// A stride is specified. Skip over the ':" and read the 3rd int.
				2866	//
				2867	int sign = +1;
				2868	next++; // skip ':'
				2869	SKIP_WS(next);
				2870	scan = next;
				2871	if (*next == '-') {
				2872	sign = -1;
				2873	next++;
				2874	SKIP_WS(next);
				2875	scan = next;
				2876	}
				2877	KMP_ASSERT2((next >= '0') && (next <= '9'),
				2878	"bad explicit proc list");
				2879	SKIP_DIGITS(next);
				2880	stride = __kmp_str_to_int(scan, *next);
				2881	KMP_ASSERT2(stride >= 0, "bad explicit proc list");
				2882	stride *= sign;
				2883	}
				2884
				2885	//
				2886	// Do some range checks.
				2887	//
				2888	KMP_ASSERT2(stride != 0, "bad explicit proc list");
				2889	if (stride > 0) {
				2890	KMP_ASSERT2(start <= end, "bad explicit proc list");
				2891	}
				2892	else {
				2893	KMP_ASSERT2(start >= end, "bad explicit proc list");
				2894	}
				2895	KMP_ASSERT2((end - start) / stride <= 65536, "bad explicit proc list");
				2896
				2897	//
				2898	// Add the mask for each OS proc # to the list.
				2899	//
				2900	if (stride > 0) {
				2901	do {
				2902	ADD_MASK_OSID(start, osId2Mask, maxOsId);
				2903	start += stride;
				2904	} while (start <= end);
				2905	}
				2906	else {
				2907	do {
				2908	ADD_MASK_OSID(start, osId2Mask, maxOsId);
				2909	start += stride;
				2910	} while (start >= end);
				2911	}
				2912
				2913	//
				2914	// Skip optional comma.
				2915	//
				2916	SKIP_WS(next);
				2917	if (*next == ',') {
				2918	next++;
				2919	}
				2920	scan = next;
				2921	}
				2922
				2923	*out_numMasks = nextNewMask;
				2924	if (nextNewMask == 0) {
				2925	*out_masks = NULL;
				2926	KMP_INTERNAL_FREE(newMasks);
				2927	return;
				2928	}
				2929	*out_masks
				2930	= (kmp_affin_mask_t )__kmp_allocate(nextNewMask __kmp_affin_mask_size);
				2931	memcpy(out_masks, newMasks, nextNewMask __kmp_affin_mask_size);
				2932	__kmp_free(sumMask);
				2933	KMP_INTERNAL_FREE(newMasks);
				2934	}
				2935
				2936
				2937	# if OMP_40_ENABLED
				2938
				2939	/*-----------------------------------------------------------------------------
				2940
				2941	Re-parse the OMP_PLACES proc id list, forming the newMasks for the different
				2942	places. Again, Here is the grammar:
				2943
				2944	place_list := place
				2945	place_list := place , place_list
				2946	place := num
				2947	place := place : num
				2948	place := place : num : signed
				2949	place := { subplacelist }
				2950	place := ! place // (lowest priority)
				2951	subplace_list := subplace
				2952	subplace_list := subplace , subplace_list
				2953	subplace := num
				2954	subplace := num : num
				2955	subplace := num : num : signed
				2956	signed := num
				2957	signed := + signed
				2958	signed := - signed
				2959
				2960	-----------------------------------------------------------------------------*/
				2961
				2962	static void
				2963	__kmp_process_subplace_list(const char *scan, kmp_affin_mask_t osId2Mask,
				2964	int maxOsId, kmp_affin_mask_t tempMask, int setSize)
				2965	{
				2966	const char *next;
				2967
				2968	for (;;) {
				2969	int start, count, stride, i;
				2970
				2971	//
				2972	// Read in the starting proc id
				2973	//
				2974	SKIP_WS(*scan);
				2975	KMP_ASSERT2((scan >= '0') && (scan <= '9'),
				2976	"bad explicit places list");
				2977	next = *scan;
				2978	SKIP_DIGITS(next);
				2979	start = __kmp_str_to_int(scan, next);
				2980	KMP_ASSERT(start >= 0);
				2981	*scan = next;
				2982
				2983	//
				2984	// valid follow sets are ',' ':' and '}'
				2985	//
				2986	SKIP_WS(*scan);
				2987	if (scan == '}' \|\| scan == ',') {
				2988	if ((start > maxOsId) \|\|
				2989	(! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
				2990	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				2991	&& (__kmp_affinity_type != affinity_none))) {
				2992	KMP_WARNING(AffIgnoreInvalidProcID, start);
				2993	}
				2994	}
				2995	else {
				2996	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
				2997	(*setSize)++;
				2998	}
				2999	if (**scan == '}') {
				3000	break;
				3001	}
				3002	(*scan)++; // skip ','
				3003	continue;
				3004	}
				3005	KMP_ASSERT2(**scan == ':', "bad explicit places list");
				3006	(*scan)++; // skip ':'
				3007
				3008	//
				3009	// Read count parameter
				3010	//
				3011	SKIP_WS(*scan);
				3012	KMP_ASSERT2((scan >= '0') && (scan <= '9'),
				3013	"bad explicit places list");
				3014	next = *scan;
				3015	SKIP_DIGITS(next);
				3016	count = __kmp_str_to_int(scan, next);
				3017	KMP_ASSERT(count >= 0);
				3018	*scan = next;
				3019
				3020	//
				3021	// valid follow sets are ',' ':' and '}'
				3022	//
				3023	SKIP_WS(*scan);
				3024	if (scan == '}' \|\| scan == ',') {
				3025	for (i = 0; i < count; i++) {
				3026	if ((start > maxOsId) \|\|
				3027	(! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
				3028	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3029	&& (__kmp_affinity_type != affinity_none))) {
				3030	KMP_WARNING(AffIgnoreInvalidProcID, start);
				3031	}
				3032	break; // don't proliferate warnings for large count
				3033	}
				3034	else {
				3035	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
				3036	start++;
				3037	(*setSize)++;
				3038	}
				3039	}
				3040	if (**scan == '}') {
				3041	break;
				3042	}
				3043	(*scan)++; // skip ','
				3044	continue;
				3045	}
				3046	KMP_ASSERT2(**scan == ':', "bad explicit places list");
				3047	(*scan)++; // skip ':'
				3048
				3049	//
				3050	// Read stride parameter
				3051	//
				3052	int sign = +1;
				3053	for (;;) {
				3054	SKIP_WS(*scan);
				3055	if (**scan == '+') {
				3056	(*scan)++; // skip '+'
				3057	continue;
				3058	}
				3059	if (**scan == '-') {
				3060	sign *= -1;
				3061	(*scan)++; // skip '-'
				3062	continue;
				3063	}
				3064	break;
				3065	}
				3066	SKIP_WS(*scan);
				3067	KMP_ASSERT2((scan >= '0') && (scan <= '9'),
				3068	"bad explicit places list");
				3069	next = *scan;
				3070	SKIP_DIGITS(next);
				3071	stride = __kmp_str_to_int(scan, next);
				3072	KMP_ASSERT(stride >= 0);
				3073	*scan = next;
				3074	stride *= sign;
				3075
				3076	//
				3077	// valid follow sets are ',' and '}'
				3078	//
				3079	SKIP_WS(*scan);
				3080	if (scan == '}' \|\| scan == ',') {
				3081	for (i = 0; i < count; i++) {
				3082	if ((start > maxOsId) \|\|
				3083	(! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
				3084	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3085	&& (__kmp_affinity_type != affinity_none))) {
				3086	KMP_WARNING(AffIgnoreInvalidProcID, start);
				3087	}
				3088	break; // don't proliferate warnings for large count
				3089	}
				3090	else {
				3091	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
				3092	start += stride;
				3093	(*setSize)++;
				3094	}
				3095	}
				3096	if (**scan == '}') {
				3097	break;
				3098	}
				3099	(*scan)++; // skip ','
				3100	continue;
				3101	}
				3102
				3103	KMP_ASSERT2(0, "bad explicit places list");
				3104	}
				3105	}
				3106
				3107
				3108	static void
				3109	__kmp_process_place(const char *scan, kmp_affin_mask_t osId2Mask,
				3110	int maxOsId, kmp_affin_mask_t tempMask, int setSize)
				3111	{
				3112	const char *next;
				3113
				3114	//
				3115	// valid follow sets are '{' '!' and num
				3116	//
				3117	SKIP_WS(*scan);
				3118	if (**scan == '{') {
				3119	(*scan)++; // skip '{'
				3120	__kmp_process_subplace_list(scan, osId2Mask, maxOsId , tempMask,
				3121	setSize);
				3122	KMP_ASSERT2(**scan == '}', "bad explicit places list");
				3123	(*scan)++; // skip '}'
				3124	}
				3125	else if (**scan == '!') {
				3126	__kmp_process_place(scan, osId2Mask, maxOsId, tempMask, setSize);
				3127	KMP_CPU_COMPLEMENT(tempMask);
				3128	(*scan)++; // skip '!'
				3129	}
				3130	else if ((scan >= '0') && (scan <= '9')) {
				3131	next = *scan;
				3132	SKIP_DIGITS(next);
				3133	int num = __kmp_str_to_int(scan, next);
				3134	KMP_ASSERT(num >= 0);
				3135	if ((num > maxOsId) \|\|
				3136	(! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
				3137	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3138	&& (__kmp_affinity_type != affinity_none))) {
				3139	KMP_WARNING(AffIgnoreInvalidProcID, num);
				3140	}
				3141	}
				3142	else {
				3143	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, num));
				3144	(*setSize)++;
				3145	}
				3146	*scan = next; // skip num
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame^]	3147	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3148	else {
				3149	KMP_ASSERT2(0, "bad explicit places list");
				3150	}
				3151	}
				3152
				3153
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame^]	3154	//static void
				3155	void
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3156	__kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
				3157	unsigned int out_numMasks, const char placelist,
				3158	kmp_affin_mask_t *osId2Mask, int maxOsId)
				3159	{
				3160	const char *scan = placelist;
				3161	const char *next = placelist;
				3162
				3163	numNewMasks = 2;
				3164	newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(numNewMasks
				3165	* __kmp_affin_mask_size);
				3166	nextNewMask = 0;
				3167
				3168	kmp_affin_mask_t tempMask = (kmp_affin_mask_t )__kmp_allocate(
				3169	__kmp_affin_mask_size);
				3170	KMP_CPU_ZERO(tempMask);
				3171	int setSize = 0;
				3172
				3173	for (;;) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3174	__kmp_process_place(&scan, osId2Mask, maxOsId, tempMask, &setSize);
				3175
				3176	//
				3177	// valid follow sets are ',' ':' and EOL
				3178	//
				3179	SKIP_WS(scan);
				3180	if (scan == '\0' \|\| scan == ',') {
				3181	if (setSize > 0) {
				3182	ADD_MASK(tempMask);
				3183	}
				3184	KMP_CPU_ZERO(tempMask);
				3185	setSize = 0;
				3186	if (*scan == '\0') {
				3187	break;
				3188	}
				3189	scan++; // skip ','
				3190	continue;
				3191	}
				3192
				3193	KMP_ASSERT2(*scan == ':', "bad explicit places list");
				3194	scan++; // skip ':'
				3195
				3196	//
				3197	// Read count parameter
				3198	//
				3199	SKIP_WS(scan);
				3200	KMP_ASSERT2((scan >= '0') && (scan <= '9'),
				3201	"bad explicit places list");
				3202	next = scan;
				3203	SKIP_DIGITS(next);
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	3204	int count = __kmp_str_to_int(scan, *next);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3205	KMP_ASSERT(count >= 0);
				3206	scan = next;
				3207
				3208	//
				3209	// valid follow sets are ',' ':' and EOL
				3210	//
				3211	SKIP_WS(scan);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame^]	3212	int stride;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3213	if (scan == '\0' \|\| scan == ',') {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame^]	3214	stride = +1;
				3215	}
				3216	else {
				3217	KMP_ASSERT2(*scan == ':', "bad explicit places list");
				3218	scan++; // skip ':'
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3219
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame^]	3220	//
				3221	// Read stride parameter
				3222	//
				3223	int sign = +1;
				3224	for (;;) {
				3225	SKIP_WS(scan);
				3226	if (*scan == '+') {
				3227	scan++; // skip '+'
				3228	continue;
				3229	}
				3230	if (*scan == '-') {
				3231	sign *= -1;
				3232	scan++; // skip '-'
				3233	continue;
				3234	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3235	break;
				3236	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3237	SKIP_WS(scan);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame^]	3238	KMP_ASSERT2((scan >= '0') && (scan <= '9'),
				3239	"bad explicit places list");
				3240	next = scan;
				3241	SKIP_DIGITS(next);
				3242	stride = __kmp_str_to_int(scan, *next);
				3243	KMP_DEBUG_ASSERT(stride >= 0);
				3244	scan = next;
				3245	stride *= sign;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3246	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3247
				3248	if (stride > 0) {
				3249	int i;
				3250	for (i = 0; i < count; i++) {
				3251	int j;
				3252	if (setSize == 0) {
				3253	break;
				3254	}
				3255	ADD_MASK(tempMask);
				3256	setSize = 0;
				3257	for (j = __kmp_affin_mask_size * CHAR_BIT - 1; j >= stride; j--) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame^]	3258	if (! KMP_CPU_ISSET(j - stride, tempMask)) {
				3259	KMP_CPU_CLR(j, tempMask);
				3260	}
				3261	else if ((j > maxOsId) \|\|
				3262	(! KMP_CPU_ISSET(j, KMP_CPU_INDEX(osId2Mask, j)))) {
				3263	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3264	&& (__kmp_affinity_type != affinity_none))) {
				3265	KMP_WARNING(AffIgnoreInvalidProcID, j);
				3266	}
				3267	KMP_CPU_CLR(j, tempMask);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3268	}
				3269	else {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame^]	3270	KMP_CPU_SET(j, tempMask);
				3271	setSize++;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3272	}
				3273	}
				3274	for (; j >= 0; j--) {
				3275	KMP_CPU_CLR(j, tempMask);
				3276	}
				3277	}
				3278	}
				3279	else {
				3280	int i;
				3281	for (i = 0; i < count; i++) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame^]	3282	int j;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3283	if (setSize == 0) {
				3284	break;
				3285	}
				3286	ADD_MASK(tempMask);
				3287	setSize = 0;
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame^]	3288	for (j = 0; j < ((int)__kmp_affin_mask_size * CHAR_BIT) + stride;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3289	j++) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame^]	3290	if (! KMP_CPU_ISSET(j - stride, tempMask)) {
				3291	KMP_CPU_CLR(j, tempMask);
				3292	}
				3293	else if ((j > maxOsId) \|\|
				3294	(! KMP_CPU_ISSET(j, KMP_CPU_INDEX(osId2Mask, j)))) {
				3295	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3296	&& (__kmp_affinity_type != affinity_none))) {
				3297	KMP_WARNING(AffIgnoreInvalidProcID, j);
				3298	}
				3299	KMP_CPU_CLR(j, tempMask);
				3300	}
				3301	else {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3302	KMP_CPU_SET(j, tempMask);
				3303	setSize++;
				3304	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3305	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame^]	3306	for (; j < (int)__kmp_affin_mask_size * CHAR_BIT; j++) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3307	KMP_CPU_CLR(j, tempMask);
				3308	}
				3309	}
				3310	}
				3311	KMP_CPU_ZERO(tempMask);
				3312	setSize = 0;
				3313
				3314	//
				3315	// valid follow sets are ',' and EOL
				3316	//
				3317	SKIP_WS(scan);
				3318	if (*scan == '\0') {
				3319	break;
				3320	}
				3321	if (*scan == ',') {
				3322	scan++; // skip ','
				3323	continue;
				3324	}
				3325
				3326	KMP_ASSERT2(0, "bad explicit places list");
				3327	}
				3328
				3329	*out_numMasks = nextNewMask;
				3330	if (nextNewMask == 0) {
				3331	*out_masks = NULL;
				3332	KMP_INTERNAL_FREE(newMasks);
				3333	return;
				3334	}
				3335	*out_masks
				3336	= (kmp_affin_mask_t )__kmp_allocate(nextNewMask __kmp_affin_mask_size);
				3337	memcpy(out_masks, newMasks, nextNewMask __kmp_affin_mask_size);
				3338	__kmp_free(tempMask);
				3339	KMP_INTERNAL_FREE(newMasks);
				3340	}
				3341
				3342	# endif /* OMP_40_ENABLED */
				3343
				3344	#undef ADD_MASK
				3345	#undef ADD_MASK_OSID
				3346
				3347
				3348	# if KMP_MIC
				3349
				3350	static void
				3351	__kmp_apply_thread_places(AddrUnsPair **pAddr, int depth)
				3352	{
				3353	if ( __kmp_place_num_cores == 0 ) {
				3354	if ( __kmp_place_num_threads_per_core == 0 ) {
				3355	return; // no cores limiting actions requested, exit
				3356	}
				3357	__kmp_place_num_cores = nCoresPerPkg; // use all available cores
				3358	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame^]	3359	if ( !__kmp_affinity_uniform_topology() ) {
				3360	KMP_WARNING( AffThrPlaceNonUniform );
				3361	return; // don't support non-uniform topology
				3362	}
				3363	if ( depth != 3 ) {
				3364	KMP_WARNING( AffThrPlaceNonThreeLevel );
				3365	return; // don't support not-3-level topology
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3366	}
				3367	if ( __kmp_place_num_threads_per_core == 0 ) {
				3368	__kmp_place_num_threads_per_core = __kmp_nThreadsPerCore; // use all HW contexts
				3369	}
				3370	if ( __kmp_place_core_offset + __kmp_place_num_cores > nCoresPerPkg ) {
				3371	KMP_WARNING( AffThrPlaceManyCores );
				3372	return;
				3373	}
				3374
				3375	AddrUnsPair newAddr = (AddrUnsPair )__kmp_allocate( sizeof(AddrUnsPair) *
				3376	nPackages * __kmp_place_num_cores * __kmp_place_num_threads_per_core);
				3377	int i, j, k, n_old = 0, n_new = 0;
				3378	for ( i = 0; i < nPackages; ++i ) {
				3379	for ( j = 0; j < nCoresPerPkg; ++j ) {
				3380	if ( j < __kmp_place_core_offset \|\| j >= __kmp_place_core_offset + __kmp_place_num_cores ) {
				3381	n_old += __kmp_nThreadsPerCore; // skip not-requested core
				3382	} else {
				3383	for ( k = 0; k < __kmp_nThreadsPerCore; ++k ) {
				3384	if ( k < __kmp_place_num_threads_per_core ) {
				3385	newAddr[n_new] = (*pAddr)[n_old]; // copy requested core' data to new location
				3386	n_new++;
				3387	}
				3388	n_old++;
				3389	}
				3390	}
				3391	}
				3392	}
				3393	nCoresPerPkg = __kmp_place_num_cores; // correct nCoresPerPkg
				3394	__kmp_nThreadsPerCore = __kmp_place_num_threads_per_core; // correct __kmp_nThreadsPerCore
				3395	__kmp_avail_proc = n_new; // correct avail_proc
				3396	__kmp_ncores = nPackages * __kmp_place_num_cores; // correct ncores
				3397
				3398	__kmp_free( *pAddr );
				3399	*pAddr = newAddr; // replace old topology with new one
				3400	}
				3401
				3402	# endif /* KMP_MIC */
				3403
				3404
				3405	static AddrUnsPair *address2os = NULL;
				3406	static int * procarr = NULL;
				3407	static int __kmp_aff_depth = 0;
				3408
				3409	static void
				3410	__kmp_aux_affinity_initialize(void)
				3411	{
				3412	if (__kmp_affinity_masks != NULL) {
				3413	KMP_ASSERT(fullMask != NULL);
				3414	return;
				3415	}
				3416
				3417	//
				3418	// Create the "full" mask - this defines all of the processors that we
				3419	// consider to be in the machine model. If respect is set, then it is
				3420	// the initialization thread's affinity mask. Otherwise, it is all
				3421	// processors that we know about on the machine.
				3422	//
				3423	if (fullMask == NULL) {
				3424	fullMask = (kmp_affin_mask_t *)__kmp_allocate(__kmp_affin_mask_size);
				3425	}
				3426	if (KMP_AFFINITY_CAPABLE()) {
				3427	if (__kmp_affinity_respect_mask) {
				3428	__kmp_get_system_affinity(fullMask, TRUE);
				3429
				3430	//
				3431	// Count the number of available processors.
				3432	//
				3433	unsigned i;
				3434	__kmp_avail_proc = 0;
				3435	for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
				3436	if (! KMP_CPU_ISSET(i, fullMask)) {
				3437	continue;
				3438	}
				3439	__kmp_avail_proc++;
				3440	}
				3441	if (__kmp_avail_proc > __kmp_xproc) {
				3442	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3443	&& (__kmp_affinity_type != affinity_none))) {
				3444	KMP_WARNING(ErrorInitializeAffinity);
				3445	}
				3446	__kmp_affinity_type = affinity_none;
				3447	__kmp_affin_mask_size = 0;
				3448	return;
				3449	}
				3450	}
				3451	else {
				3452	__kmp_affinity_entire_machine_mask(fullMask);
				3453	__kmp_avail_proc = __kmp_xproc;
				3454	}
				3455	}
				3456
				3457	int depth = -1;
				3458	kmp_i18n_id_t msg_id = kmp_i18n_null;
				3459
				3460	//
Alp Toker	8f2d3f0	2014-02-24 10:40:15 +0000	[diff] [blame]	3461	// For backward compatibility, setting KMP_CPUINFO_FILE =>
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3462	// KMP_TOPOLOGY_METHOD=cpuinfo
				3463	//
				3464	if ((__kmp_cpuinfo_file != NULL) &&
				3465	(__kmp_affinity_top_method == affinity_top_method_all)) {
				3466	__kmp_affinity_top_method = affinity_top_method_cpuinfo;
				3467	}
				3468
				3469	if (__kmp_affinity_top_method == affinity_top_method_all) {
				3470	//
				3471	// In the default code path, errors are not fatal - we just try using
				3472	// another method. We only emit a warning message if affinity is on,
				3473	// or the verbose flag is set, an the nowarnings flag was not set.
				3474	//
				3475	const char *file_name = NULL;
				3476	int line = 0;
				3477
				3478	# if KMP_ARCH_X86 \|\| KMP_ARCH_X86_64
				3479
				3480	if (__kmp_affinity_verbose) {
				3481	KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
				3482	}
				3483
				3484	file_name = NULL;
				3485	depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
				3486	if (depth == 0) {
				3487	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3488	KMP_ASSERT(address2os == NULL);
				3489	return;
				3490	}
				3491
				3492	if (depth < 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame^]	3493	if (__kmp_affinity_verbose) {
				3494	if (msg_id != kmp_i18n_null) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3495	KMP_INFORM(AffInfoStrStr, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id),
				3496	KMP_I18N_STR(DecodingLegacyAPIC));
				3497	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame^]	3498	else {
				3499	KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(DecodingLegacyAPIC));
				3500	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3501	}
				3502
				3503	file_name = NULL;
				3504	depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
				3505	if (depth == 0) {
				3506	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3507	KMP_ASSERT(address2os == NULL);
				3508	return;
				3509	}
				3510	}
				3511
				3512	# endif /* KMP_ARCH_X86 \|\| KMP_ARCH_X86_64 */
				3513
				3514	# if KMP_OS_LINUX
				3515
				3516	if (depth < 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame^]	3517	if (__kmp_affinity_verbose) {
				3518	if (msg_id != kmp_i18n_null) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3519	KMP_INFORM(AffStrParseFilename, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id), "/proc/cpuinfo");
				3520	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame^]	3521	else {
				3522	KMP_INFORM(AffParseFilename, "KMP_AFFINITY", "/proc/cpuinfo");
				3523	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3524	}
				3525
				3526	FILE *f = fopen("/proc/cpuinfo", "r");
				3527	if (f == NULL) {
				3528	msg_id = kmp_i18n_str_CantOpenCpuinfo;
				3529	}
				3530	else {
				3531	file_name = "/proc/cpuinfo";
				3532	depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
				3533	fclose(f);
				3534	if (depth == 0) {
				3535	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3536	KMP_ASSERT(address2os == NULL);
				3537	return;
				3538	}
				3539	}
				3540	}
				3541
				3542	# endif /* KMP_OS_LINUX */
				3543
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame^]	3544	# if KMP_OS_WINDOWS && KMP_ARCH_X86_64
				3545
				3546	if ((depth < 0) && (__kmp_num_proc_groups > 1)) {
				3547	if (__kmp_affinity_verbose) {
				3548	KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
				3549	}
				3550
				3551	depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
				3552	KMP_ASSERT(depth != 0);
				3553	}
				3554
				3555	# endif /* KMP_OS_WINDOWS && KMP_ARCH_X86_64 */
				3556
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3557	if (depth < 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame^]	3558	if (__kmp_affinity_verbose && (msg_id != kmp_i18n_null)) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3559	if (file_name == NULL) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame^]	3560	KMP_INFORM(UsingFlatOS, __kmp_i18n_catgets(msg_id));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3561	}
				3562	else if (line == 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame^]	3563	KMP_INFORM(UsingFlatOSFile, file_name, __kmp_i18n_catgets(msg_id));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3564	}
				3565	else {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame^]	3566	KMP_INFORM(UsingFlatOSFileLine, file_name, line, __kmp_i18n_catgets(msg_id));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3567	}
				3568	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame^]	3569	// FIXME - print msg if msg_id = kmp_i18n_null ???
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3570
				3571	file_name = "";
				3572	depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
				3573	if (depth == 0) {
				3574	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3575	KMP_ASSERT(address2os == NULL);
				3576	return;
				3577	}
				3578	KMP_ASSERT(depth > 0);
				3579	KMP_ASSERT(address2os != NULL);
				3580	}
				3581	}
				3582
				3583	//
				3584	// If the user has specified that a paricular topology discovery method
				3585	// is to be used, then we abort if that method fails. The exception is
				3586	// group affinity, which might have been implicitly set.
				3587	//
				3588
				3589	# if KMP_ARCH_X86 \|\| KMP_ARCH_X86_64
				3590
				3591	else if (__kmp_affinity_top_method == affinity_top_method_x2apicid) {
				3592	if (__kmp_affinity_verbose) {
				3593	KMP_INFORM(AffInfoStr, "KMP_AFFINITY",
				3594	KMP_I18N_STR(Decodingx2APIC));
				3595	}
				3596
				3597	depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
				3598	if (depth == 0) {
				3599	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3600	KMP_ASSERT(address2os == NULL);
				3601	return;
				3602	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3603	if (depth < 0) {
				3604	KMP_ASSERT(msg_id != kmp_i18n_null);
				3605	KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
				3606	}
				3607	}
				3608	else if (__kmp_affinity_top_method == affinity_top_method_apicid) {
				3609	if (__kmp_affinity_verbose) {
				3610	KMP_INFORM(AffInfoStr, "KMP_AFFINITY",
				3611	KMP_I18N_STR(DecodingLegacyAPIC));
				3612	}
				3613
				3614	depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
				3615	if (depth == 0) {
				3616	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3617	KMP_ASSERT(address2os == NULL);
				3618	return;
				3619	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3620	if (depth < 0) {
				3621	KMP_ASSERT(msg_id != kmp_i18n_null);
				3622	KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
				3623	}
				3624	}
				3625
				3626	# endif /* KMP_ARCH_X86 \|\| KMP_ARCH_X86_64 */
				3627
				3628	else if (__kmp_affinity_top_method == affinity_top_method_cpuinfo) {
				3629	const char *filename;
				3630	if (__kmp_cpuinfo_file != NULL) {
				3631	filename = __kmp_cpuinfo_file;
				3632	}
				3633	else {
				3634	filename = "/proc/cpuinfo";
				3635	}
				3636
				3637	if (__kmp_affinity_verbose) {
				3638	KMP_INFORM(AffParseFilename, "KMP_AFFINITY", filename);
				3639	}
				3640
				3641	FILE *f = fopen(filename, "r");
				3642	if (f == NULL) {
				3643	int code = errno;
				3644	if (__kmp_cpuinfo_file != NULL) {
				3645	__kmp_msg(
				3646	kmp_ms_fatal,
				3647	KMP_MSG(CantOpenFileForReading, filename),
				3648	KMP_ERR(code),
				3649	KMP_HNT(NameComesFrom_CPUINFO_FILE),
				3650	__kmp_msg_null
				3651	);
				3652	}
				3653	else {
				3654	__kmp_msg(
				3655	kmp_ms_fatal,
				3656	KMP_MSG(CantOpenFileForReading, filename),
				3657	KMP_ERR(code),
				3658	__kmp_msg_null
				3659	);
				3660	}
				3661	}
				3662	int line = 0;
				3663	depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
				3664	fclose(f);
				3665	if (depth < 0) {
				3666	KMP_ASSERT(msg_id != kmp_i18n_null);
				3667	if (line > 0) {
				3668	KMP_FATAL(FileLineMsgExiting, filename, line, __kmp_i18n_catgets(msg_id));
				3669	}
				3670	else {
				3671	KMP_FATAL(FileMsgExiting, filename, __kmp_i18n_catgets(msg_id));
				3672	}
				3673	}
				3674	if (__kmp_affinity_type == affinity_none) {
				3675	KMP_ASSERT(depth == 0);
				3676	KMP_ASSERT(address2os == NULL);
				3677	return;
				3678	}
				3679	}
				3680
				3681	# if KMP_OS_WINDOWS && KMP_ARCH_X86_64
				3682
				3683	else if (__kmp_affinity_top_method == affinity_top_method_group) {
				3684	if (__kmp_affinity_verbose) {
				3685	KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
				3686	}
				3687
				3688	depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
				3689	KMP_ASSERT(depth != 0);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3690	if (depth < 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame^]	3691	KMP_ASSERT(msg_id != kmp_i18n_null);
				3692	KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3693	}
				3694	}
				3695
				3696	# endif /* KMP_OS_WINDOWS && KMP_ARCH_X86_64 */
				3697
				3698	else if (__kmp_affinity_top_method == affinity_top_method_flat) {
				3699	if (__kmp_affinity_verbose) {
				3700	KMP_INFORM(AffUsingFlatOS, "KMP_AFFINITY");
				3701	}
				3702
				3703	depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
				3704	if (depth == 0) {
				3705	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3706	KMP_ASSERT(address2os == NULL);
				3707	return;
				3708	}
				3709	// should not fail
				3710	KMP_ASSERT(depth > 0);
				3711	KMP_ASSERT(address2os != NULL);
				3712	}
				3713
				3714	if (address2os == NULL) {
				3715	if (KMP_AFFINITY_CAPABLE()
				3716	&& (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3717	&& (__kmp_affinity_type != affinity_none)))) {
				3718	KMP_WARNING(ErrorInitializeAffinity);
				3719	}
				3720	__kmp_affinity_type = affinity_none;
				3721	__kmp_affin_mask_size = 0;
				3722	return;
				3723	}
				3724
				3725	# if KMP_MIC
				3726	__kmp_apply_thread_places(&address2os, depth);
				3727	# endif
				3728
				3729	//
				3730	// Create the table of masks, indexed by thread Id.
				3731	//
				3732	unsigned maxIndex;
				3733	unsigned numUnique;
				3734	kmp_affin_mask_t *osId2Mask = __kmp_create_masks(&maxIndex, &numUnique,
				3735	address2os, __kmp_avail_proc);
				3736	if (__kmp_affinity_gran_levels == 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame^]	3737	KMP_DEBUG_ASSERT((int)numUnique == __kmp_avail_proc);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3738	}
				3739
				3740	//
				3741	// Set the childNums vector in all Address objects. This must be done
				3742	// before we can sort using __kmp_affinity_cmp_Address_child_num(),
				3743	// which takes into account the setting of __kmp_affinity_compact.
				3744	//
				3745	__kmp_affinity_assign_child_nums(address2os, __kmp_avail_proc);
				3746
				3747	switch (__kmp_affinity_type) {
				3748
				3749	case affinity_explicit:
				3750	KMP_DEBUG_ASSERT(__kmp_affinity_proclist != NULL);
				3751	# if OMP_40_ENABLED
				3752	if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
				3753	# endif
				3754	{
				3755	__kmp_affinity_process_proclist(&__kmp_affinity_masks,
				3756	&__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask,
				3757	maxIndex);
				3758	}
				3759	# if OMP_40_ENABLED
				3760	else {
				3761	__kmp_affinity_process_placelist(&__kmp_affinity_masks,
				3762	&__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask,
				3763	maxIndex);
				3764	}
				3765	# endif
				3766	if (__kmp_affinity_num_masks == 0) {
				3767	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3768	&& (__kmp_affinity_type != affinity_none))) {
				3769	KMP_WARNING(AffNoValidProcID);
				3770	}
				3771	__kmp_affinity_type = affinity_none;
				3772	return;
				3773	}
				3774	break;
				3775
				3776	//
				3777	// The other affinity types rely on sorting the Addresses according
				3778	// to some permutation of the machine topology tree. Set
				3779	// __kmp_affinity_compact and __kmp_affinity_offset appropriately,
				3780	// then jump to a common code fragment to do the sort and create
				3781	// the array of affinity masks.
				3782	//
				3783
				3784	case affinity_logical:
				3785	__kmp_affinity_compact = 0;
				3786	if (__kmp_affinity_offset) {
				3787	__kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset
				3788	% __kmp_avail_proc;
				3789	}
				3790	goto sortAddresses;
				3791
				3792	case affinity_physical:
				3793	if (__kmp_nThreadsPerCore > 1) {
				3794	__kmp_affinity_compact = 1;
				3795	if (__kmp_affinity_compact >= depth) {
				3796	__kmp_affinity_compact = 0;
				3797	}
				3798	} else {
				3799	__kmp_affinity_compact = 0;
				3800	}
				3801	if (__kmp_affinity_offset) {
				3802	__kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset
				3803	% __kmp_avail_proc;
				3804	}
				3805	goto sortAddresses;
				3806
				3807	case affinity_scatter:
				3808	if (__kmp_affinity_compact >= depth) {
				3809	__kmp_affinity_compact = 0;
				3810	}
				3811	else {
				3812	__kmp_affinity_compact = depth - 1 - __kmp_affinity_compact;
				3813	}
				3814	goto sortAddresses;
				3815
				3816	case affinity_compact:
				3817	if (__kmp_affinity_compact >= depth) {
				3818	__kmp_affinity_compact = depth - 1;
				3819	}
				3820	goto sortAddresses;
				3821
				3822	# if KMP_MIC
				3823	case affinity_balanced:
				3824	// Balanced works only for the case of a single package and uniform topology
				3825	if( nPackages > 1 ) {
				3826	if( __kmp_affinity_verbose \|\| __kmp_affinity_warnings ) {
				3827	KMP_WARNING( AffBalancedNotAvail, "KMP_AFFINITY" );
				3828	}
				3829	__kmp_affinity_type = affinity_none;
				3830	return;
				3831	} else if( __kmp_affinity_uniform_topology() ) {
				3832	break;
				3833	} else { // Non-uniform topology
				3834
				3835	// Save the depth for further usage
				3836	__kmp_aff_depth = depth;
				3837
				3838	// Number of hyper threads per core in HT machine
				3839	int nth_per_core = __kmp_nThreadsPerCore;
				3840
				3841	int core_level;
				3842	if( nth_per_core > 1 ) {
				3843	core_level = depth - 2;
				3844	} else {
				3845	core_level = depth - 1;
				3846	}
				3847	int ncores = address2os[ __kmp_avail_proc - 1 ].first.labels[ core_level ] + 1;
				3848	int nproc = nth_per_core * ncores;
				3849
				3850	procarr = ( int * )__kmp_allocate( sizeof( int ) * nproc );
				3851	for( int i = 0; i < nproc; i++ ) {
				3852	procarr[ i ] = -1;
				3853	}
				3854
				3855	for( int i = 0; i < __kmp_avail_proc; i++ ) {
				3856	int proc = address2os[ i ].second;
				3857	// If depth == 3 then level=0 - package, level=1 - core, level=2 - thread.
				3858	// If there is only one thread per core then depth == 2: level 0 - package,
				3859	// level 1 - core.
				3860	int level = depth - 1;
				3861
				3862	// __kmp_nth_per_core == 1
				3863	int thread = 0;
				3864	int core = address2os[ i ].first.labels[ level ];
				3865	// If the thread level exists, that is we have more than one thread context per core
				3866	if( nth_per_core > 1 ) {
				3867	thread = address2os[ i ].first.labels[ level ] % nth_per_core;
				3868	core = address2os[ i ].first.labels[ level - 1 ];
				3869	}
				3870	procarr[ core * nth_per_core + thread ] = proc;
				3871	}
				3872
				3873	break;
				3874	}
				3875	# endif
				3876
				3877	sortAddresses:
				3878	//
				3879	// Allocate the gtid->affinity mask table.
				3880	//
				3881	if (__kmp_affinity_dups) {
				3882	__kmp_affinity_num_masks = __kmp_avail_proc;
				3883	}
				3884	else {
				3885	__kmp_affinity_num_masks = numUnique;
				3886	}
				3887
				3888	# if OMP_40_ENABLED
				3889	if ( ( __kmp_nested_proc_bind.bind_types[0] != proc_bind_intel )
				3890	&& ( __kmp_affinity_num_places > 0 )
				3891	&& ( (unsigned)__kmp_affinity_num_places < __kmp_affinity_num_masks ) ) {
				3892	__kmp_affinity_num_masks = __kmp_affinity_num_places;
				3893	}
				3894	# endif
				3895
				3896	__kmp_affinity_masks = (kmp_affin_mask_t*)__kmp_allocate(
				3897	__kmp_affinity_num_masks * __kmp_affin_mask_size);
				3898
				3899	//
				3900	// Sort the address2os table according to the current setting of
				3901	// __kmp_affinity_compact, then fill out __kmp_affinity_masks.
				3902	//
				3903	qsort(address2os, __kmp_avail_proc, sizeof(*address2os),
				3904	__kmp_affinity_cmp_Address_child_num);
				3905	{
				3906	int i;
				3907	unsigned j;
				3908	for (i = 0, j = 0; i < __kmp_avail_proc; i++) {
				3909	if ((! __kmp_affinity_dups) && (! address2os[i].first.leader)) {
				3910	continue;
				3911	}
				3912	unsigned osId = address2os[i].second;
				3913	kmp_affin_mask_t *src = KMP_CPU_INDEX(osId2Mask, osId);
				3914	kmp_affin_mask_t *dest
				3915	= KMP_CPU_INDEX(__kmp_affinity_masks, j);
				3916	KMP_ASSERT(KMP_CPU_ISSET(osId, src));
				3917	KMP_CPU_COPY(dest, src);
				3918	if (++j >= __kmp_affinity_num_masks) {
				3919	break;
				3920	}
				3921	}
				3922	KMP_DEBUG_ASSERT(j == __kmp_affinity_num_masks);
				3923	}
				3924	break;
				3925
				3926	default:
				3927	KMP_ASSERT2(0, "Unexpected affinity setting");
				3928	}
				3929
				3930	__kmp_free(osId2Mask);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame^]	3931	machine_hierarchy.init(address2os, __kmp_avail_proc);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3932	}
				3933
				3934
				3935	void
				3936	__kmp_affinity_initialize(void)
				3937	{
				3938	//
				3939	// Much of the code above was written assumming that if a machine was not
				3940	// affinity capable, then __kmp_affinity_type == affinity_none. We now
				3941	// explicitly represent this as __kmp_affinity_type == affinity_disabled.
				3942	//
				3943	// There are too many checks for __kmp_affinity_type == affinity_none
				3944	// in this code. Instead of trying to change them all, check if
				3945	// __kmp_affinity_type == affinity_disabled, and if so, slam it with
				3946	// affinity_none, call the real initialization routine, then restore
				3947	// __kmp_affinity_type to affinity_disabled.
				3948	//
				3949	int disabled = (__kmp_affinity_type == affinity_disabled);
				3950	if (! KMP_AFFINITY_CAPABLE()) {
				3951	KMP_ASSERT(disabled);
				3952	}
				3953	if (disabled) {
				3954	__kmp_affinity_type = affinity_none;
				3955	}
				3956	__kmp_aux_affinity_initialize();
				3957	if (disabled) {
				3958	__kmp_affinity_type = affinity_disabled;
				3959	}
				3960	}
				3961
				3962
				3963	void
				3964	__kmp_affinity_uninitialize(void)
				3965	{
				3966	if (__kmp_affinity_masks != NULL) {
				3967	__kmp_free(__kmp_affinity_masks);
				3968	__kmp_affinity_masks = NULL;
				3969	}
				3970	if (fullMask != NULL) {
				3971	KMP_CPU_FREE(fullMask);
				3972	fullMask = NULL;
				3973	}
				3974	__kmp_affinity_num_masks = 0;
				3975	# if OMP_40_ENABLED
				3976	__kmp_affinity_num_places = 0;
				3977	# endif
				3978	if (__kmp_affinity_proclist != NULL) {
				3979	__kmp_free(__kmp_affinity_proclist);
				3980	__kmp_affinity_proclist = NULL;
				3981	}
				3982	if( address2os != NULL ) {
				3983	__kmp_free( address2os );
				3984	address2os = NULL;
				3985	}
				3986	if( procarr != NULL ) {
				3987	__kmp_free( procarr );
				3988	procarr = NULL;
				3989	}
				3990	}
				3991
				3992
				3993	void
				3994	__kmp_affinity_set_init_mask(int gtid, int isa_root)
				3995	{
				3996	if (! KMP_AFFINITY_CAPABLE()) {
				3997	return;
				3998	}
				3999
				4000	kmp_info_t th = (kmp_info_t )TCR_SYNC_PTR(__kmp_threads[gtid]);
				4001	if (th->th.th_affin_mask == NULL) {
				4002	KMP_CPU_ALLOC(th->th.th_affin_mask);
				4003	}
				4004	else {
				4005	KMP_CPU_ZERO(th->th.th_affin_mask);
				4006	}
				4007
				4008	//
				4009	// Copy the thread mask to the kmp_info_t strucuture.
				4010	// If __kmp_affinity_type == affinity_none, copy the "full" mask, i.e. one
				4011	// that has all of the OS proc ids set, or if __kmp_affinity_respect_mask
				4012	// is set, then the full mask is the same as the mask of the initialization
				4013	// thread.
				4014	//
				4015	kmp_affin_mask_t *mask;
				4016	int i;
				4017
				4018	# if OMP_40_ENABLED
				4019	if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
				4020	# endif
				4021	{
				4022	if ((__kmp_affinity_type == affinity_none)
				4023	# if KMP_MIC
				4024	\|\| (__kmp_affinity_type == affinity_balanced)
				4025	# endif
				4026	) {
				4027	# if KMP_OS_WINDOWS && KMP_ARCH_X86_64
				4028	if (__kmp_num_proc_groups > 1) {
				4029	return;
				4030	}
				4031	# endif
				4032	KMP_ASSERT(fullMask != NULL);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame^]	4033	i = KMP_PLACE_ALL;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4034	mask = fullMask;
				4035	}
				4036	else {
				4037	KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 );
				4038	i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
				4039	mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
				4040	}
				4041	}
				4042	# if OMP_40_ENABLED
				4043	else {
				4044	if ((! isa_root)
				4045	\|\| (__kmp_nested_proc_bind.bind_types[0] == proc_bind_false)) {
				4046	# if KMP_OS_WINDOWS && KMP_ARCH_X86_64
				4047	if (__kmp_num_proc_groups > 1) {
				4048	return;
				4049	}
				4050	# endif
				4051	KMP_ASSERT(fullMask != NULL);
				4052	i = KMP_PLACE_ALL;
				4053	mask = fullMask;
				4054	}
				4055	else {
				4056	//
				4057	// int i = some hash function or just a counter that doesn't
				4058	// always start at 0. Use gtid for now.
				4059	//
				4060	KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 );
				4061	i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
				4062	mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
				4063	}
				4064	}
				4065	# endif
				4066
				4067	# if OMP_40_ENABLED
				4068	th->th.th_current_place = i;
				4069	if (isa_root) {
				4070	th->th.th_new_place = i;
				4071	th->th.th_first_place = 0;
				4072	th->th.th_last_place = __kmp_affinity_num_masks - 1;
				4073	}
				4074
				4075	if (i == KMP_PLACE_ALL) {
				4076	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to all places\n",
				4077	gtid));
				4078	}
				4079	else {
				4080	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to place %d\n",
				4081	gtid, i));
				4082	}
				4083	# else
				4084	if (i == -1) {
				4085	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to fullMask\n",
				4086	gtid));
				4087	}
				4088	else {
				4089	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to mask %d\n",
				4090	gtid, i));
				4091	}
				4092	# endif /* OMP_40_ENABLED */
				4093
				4094	KMP_CPU_COPY(th->th.th_affin_mask, mask);
				4095
				4096	if (__kmp_affinity_verbose) {
				4097	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4098	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4099	th->th.th_affin_mask);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame^]	4100	KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(), gtid,
				4101	buf);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4102	}
				4103
				4104	# if KMP_OS_WINDOWS
				4105	//
				4106	// On Windows* OS, the process affinity mask might have changed.
				4107	// If the user didn't request affinity and this call fails,
				4108	// just continue silently. See CQ171393.
				4109	//
				4110	if ( __kmp_affinity_type == affinity_none ) {
				4111	__kmp_set_system_affinity(th->th.th_affin_mask, FALSE);
				4112	}
				4113	else
				4114	# endif
				4115	__kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
				4116	}
				4117
				4118
				4119	# if OMP_40_ENABLED
				4120
				4121	void
				4122	__kmp_affinity_set_place(int gtid)
				4123	{
				4124	int retval;
				4125
				4126	if (! KMP_AFFINITY_CAPABLE()) {
				4127	return;
				4128	}
				4129
				4130	kmp_info_t th = (kmp_info_t )TCR_SYNC_PTR(__kmp_threads[gtid]);
				4131
				4132	KA_TRACE(100, ("__kmp_affinity_set_place: binding T#%d to place %d (current place = %d)\n",
				4133	gtid, th->th.th_new_place, th->th.th_current_place));
				4134
				4135	//
Alp Toker	8f2d3f0	2014-02-24 10:40:15 +0000	[diff] [blame]	4136	// Check that the new place is within this thread's partition.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4137	//
				4138	KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame^]	4139	KMP_ASSERT(th->th.th_new_place >= 0);
				4140	KMP_ASSERT((unsigned)th->th.th_new_place <= __kmp_affinity_num_masks);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4141	if (th->th.th_first_place <= th->th.th_last_place) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame^]	4142	KMP_ASSERT((th->th.th_new_place >= th->th.th_first_place)
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4143	&& (th->th.th_new_place <= th->th.th_last_place));
				4144	}
				4145	else {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame^]	4146	KMP_ASSERT((th->th.th_new_place <= th->th.th_first_place)
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4147	\|\| (th->th.th_new_place >= th->th.th_last_place));
				4148	}
				4149
				4150	//
				4151	// Copy the thread mask to the kmp_info_t strucuture,
				4152	// and set this thread's affinity.
				4153	//
				4154	kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity_masks,
				4155	th->th.th_new_place);
				4156	KMP_CPU_COPY(th->th.th_affin_mask, mask);
				4157	th->th.th_current_place = th->th.th_new_place;
				4158
				4159	if (__kmp_affinity_verbose) {
				4160	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4161	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4162	th->th.th_affin_mask);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame^]	4163	KMP_INFORM(BoundToOSProcSet, "OMP_PROC_BIND", (kmp_int32)getpid(),
				4164	gtid, buf);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4165	}
				4166	__kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
				4167	}
				4168
				4169	# endif /* OMP_40_ENABLED */
				4170
				4171
				4172	int
				4173	__kmp_aux_set_affinity(void **mask)
				4174	{
				4175	int gtid;
				4176	kmp_info_t *th;
				4177	int retval;
				4178
				4179	if (! KMP_AFFINITY_CAPABLE()) {
				4180	return -1;
				4181	}
				4182
				4183	gtid = __kmp_entry_gtid();
				4184	KA_TRACE(1000, ;{
				4185	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4186	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4187	(kmp_affin_mask_t )(mask));
				4188	__kmp_debug_printf("kmp_set_affinity: setting affinity mask for thread %d = %s\n",
				4189	gtid, buf);
				4190	});
				4191
				4192	if (__kmp_env_consistency_check) {
				4193	if ((mask == NULL) \|\| (*mask == NULL)) {
				4194	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				4195	}
				4196	else {
				4197	unsigned proc;
				4198	int num_procs = 0;
				4199
				4200	for (proc = 0; proc < KMP_CPU_SETSIZE; proc++) {
				4201	if (! KMP_CPU_ISSET(proc, (kmp_affin_mask_t )(mask))) {
				4202	continue;
				4203	}
				4204	num_procs++;
				4205	if (! KMP_CPU_ISSET(proc, fullMask)) {
				4206	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				4207	break;
				4208	}
				4209	}
				4210	if (num_procs == 0) {
				4211	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				4212	}
				4213
				4214	# if KMP_OS_WINDOWS && KMP_ARCH_X86_64
				4215	if (__kmp_get_proc_group((kmp_affin_mask_t )(mask)) < 0) {
				4216	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				4217	}
				4218	# endif /* KMP_OS_WINDOWS && KMP_ARCH_X86_64 */
				4219
				4220	}
				4221	}
				4222
				4223	th = __kmp_threads[gtid];
				4224	KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
				4225	retval = __kmp_set_system_affinity((kmp_affin_mask_t )(mask), FALSE);
				4226	if (retval == 0) {
				4227	KMP_CPU_COPY(th->th.th_affin_mask, (kmp_affin_mask_t )(mask));
				4228	}
				4229
				4230	# if OMP_40_ENABLED
				4231	th->th.th_current_place = KMP_PLACE_UNDEFINED;
				4232	th->th.th_new_place = KMP_PLACE_UNDEFINED;
				4233	th->th.th_first_place = 0;
				4234	th->th.th_last_place = __kmp_affinity_num_masks - 1;
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame^]	4235
				4236	//
				4237	// Turn off 4.0 affinity for the current tread at this parallel level.
				4238	//
				4239	th->th.th_current_task->td_icvs.proc_bind = proc_bind_false;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4240	# endif
				4241
				4242	return retval;
				4243	}
				4244
				4245
				4246	int
				4247	__kmp_aux_get_affinity(void **mask)
				4248	{
				4249	int gtid;
				4250	int retval;
				4251	kmp_info_t *th;
				4252
				4253	if (! KMP_AFFINITY_CAPABLE()) {
				4254	return -1;
				4255	}
				4256
				4257	gtid = __kmp_entry_gtid();
				4258	th = __kmp_threads[gtid];
				4259	KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
				4260
				4261	KA_TRACE(1000, ;{
				4262	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4263	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4264	th->th.th_affin_mask);
				4265	__kmp_printf("kmp_get_affinity: stored affinity mask for thread %d = %s\n", gtid, buf);
				4266	});
				4267
				4268	if (__kmp_env_consistency_check) {
				4269	if ((mask == NULL) \|\| (*mask == NULL)) {
				4270	KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity");
				4271	}
				4272	}
				4273
				4274	# if !KMP_OS_WINDOWS
				4275
				4276	retval = __kmp_get_system_affinity((kmp_affin_mask_t )(mask), FALSE);
				4277	KA_TRACE(1000, ;{
				4278	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4279	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4280	(kmp_affin_mask_t )(mask));
				4281	__kmp_printf("kmp_get_affinity: system affinity mask for thread %d = %s\n", gtid, buf);
				4282	});
				4283	return retval;
				4284
				4285	# else
				4286
				4287	KMP_CPU_COPY((kmp_affin_mask_t )(mask), th->th.th_affin_mask);
				4288	return 0;
				4289
				4290	# endif /* KMP_OS_WINDOWS */
				4291
				4292	}
				4293
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4294	int
				4295	__kmp_aux_set_affinity_mask_proc(int proc, void **mask)
				4296	{
				4297	int retval;
				4298
				4299	if (! KMP_AFFINITY_CAPABLE()) {
				4300	return -1;
				4301	}
				4302
				4303	KA_TRACE(1000, ;{
				4304	int gtid = __kmp_entry_gtid();
				4305	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4306	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4307	(kmp_affin_mask_t )(mask));
				4308	__kmp_debug_printf("kmp_set_affinity_mask_proc: setting proc %d in affinity mask for thread %d = %s\n",
				4309	proc, gtid, buf);
				4310	});
				4311
				4312	if (__kmp_env_consistency_check) {
				4313	if ((mask == NULL) \|\| (*mask == NULL)) {
				4314	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity_mask_proc");
				4315	}
				4316	}
				4317
				4318	if ((proc < 0) \|\| ((unsigned)proc >= KMP_CPU_SETSIZE)) {
				4319	return -1;
				4320	}
				4321	if (! KMP_CPU_ISSET(proc, fullMask)) {
				4322	return -2;
				4323	}
				4324
				4325	KMP_CPU_SET(proc, (kmp_affin_mask_t )(mask));
				4326	return 0;
				4327	}
				4328
				4329
				4330	int
				4331	__kmp_aux_unset_affinity_mask_proc(int proc, void **mask)
				4332	{
				4333	int retval;
				4334
				4335	if (! KMP_AFFINITY_CAPABLE()) {
				4336	return -1;
				4337	}
				4338
				4339	KA_TRACE(1000, ;{
				4340	int gtid = __kmp_entry_gtid();
				4341	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4342	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4343	(kmp_affin_mask_t )(mask));
				4344	__kmp_debug_printf("kmp_unset_affinity_mask_proc: unsetting proc %d in affinity mask for thread %d = %s\n",
				4345	proc, gtid, buf);
				4346	});
				4347
				4348	if (__kmp_env_consistency_check) {
				4349	if ((mask == NULL) \|\| (*mask == NULL)) {
				4350	KMP_FATAL(AffinityInvalidMask, "kmp_unset_affinity_mask_proc");
				4351	}
				4352	}
				4353
				4354	if ((proc < 0) \|\| ((unsigned)proc >= KMP_CPU_SETSIZE)) {
				4355	return -1;
				4356	}
				4357	if (! KMP_CPU_ISSET(proc, fullMask)) {
				4358	return -2;
				4359	}
				4360
				4361	KMP_CPU_CLR(proc, (kmp_affin_mask_t )(mask));
				4362	return 0;
				4363	}
				4364
				4365
				4366	int
				4367	__kmp_aux_get_affinity_mask_proc(int proc, void **mask)
				4368	{
				4369	int retval;
				4370
				4371	if (! KMP_AFFINITY_CAPABLE()) {
				4372	return -1;
				4373	}
				4374
				4375	KA_TRACE(1000, ;{
				4376	int gtid = __kmp_entry_gtid();
				4377	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4378	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4379	(kmp_affin_mask_t )(mask));
				4380	__kmp_debug_printf("kmp_get_affinity_mask_proc: getting proc %d in affinity mask for thread %d = %s\n",
				4381	proc, gtid, buf);
				4382	});
				4383
				4384	if (__kmp_env_consistency_check) {
				4385	if ((mask == NULL) \|\| (*mask == NULL)) {
				4386	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity_mask_proc");
				4387	}
				4388	}
				4389
				4390	if ((proc < 0) \|\| ((unsigned)proc >= KMP_CPU_SETSIZE)) {
				4391	return 0;
				4392	}
				4393	if (! KMP_CPU_ISSET(proc, fullMask)) {
				4394	return 0;
				4395	}
				4396
				4397	return KMP_CPU_ISSET(proc, (kmp_affin_mask_t )(mask));
				4398	}
				4399
				4400	# if KMP_MIC
				4401
				4402	// Dynamic affinity settings - Affinity balanced
				4403	void __kmp_balanced_affinity( int tid, int nthreads )
				4404	{
				4405	if( __kmp_affinity_uniform_topology() ) {
				4406	int coreID;
				4407	int threadID;
				4408	// Number of hyper threads per core in HT machine
				4409	int __kmp_nth_per_core = __kmp_avail_proc / __kmp_ncores;
				4410	// Number of cores
				4411	int ncores = __kmp_ncores;
				4412	// How many threads will be bound to each core
				4413	int chunk = nthreads / ncores;
				4414	// How many cores will have an additional thread bound to it - "big cores"
				4415	int big_cores = nthreads % ncores;
				4416	// Number of threads on the big cores
				4417	int big_nth = ( chunk + 1 ) * big_cores;
				4418	if( tid < big_nth ) {
				4419	coreID = tid / (chunk + 1 );
				4420	threadID = ( tid % (chunk + 1 ) ) % __kmp_nth_per_core ;
				4421	} else { //tid >= big_nth
				4422	coreID = ( tid - big_cores ) / chunk;
				4423	threadID = ( ( tid - big_cores ) % chunk ) % __kmp_nth_per_core ;
				4424	}
				4425
				4426	KMP_DEBUG_ASSERT2(KMP_AFFINITY_CAPABLE(),
				4427	"Illegal set affinity operation when not capable");
				4428
				4429	kmp_affin_mask_t mask = (kmp_affin_mask_t )alloca(__kmp_affin_mask_size);
				4430	KMP_CPU_ZERO(mask);
				4431
				4432	// Granularity == thread
				4433	if( __kmp_affinity_gran == affinity_gran_fine \|\| __kmp_affinity_gran == affinity_gran_thread) {
				4434	int osID = address2os[ coreID * __kmp_nth_per_core + threadID ].second;
				4435	KMP_CPU_SET( osID, mask);
				4436	} else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
				4437	for( int i = 0; i < __kmp_nth_per_core; i++ ) {
				4438	int osID;
				4439	osID = address2os[ coreID * __kmp_nth_per_core + i ].second;
				4440	KMP_CPU_SET( osID, mask);
				4441	}
				4442	}
				4443	if (__kmp_affinity_verbose) {
				4444	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4445	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame^]	4446	KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
				4447	tid, buf);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4448	}
				4449	__kmp_set_system_affinity( mask, TRUE );
				4450	} else { // Non-uniform topology
				4451
				4452	kmp_affin_mask_t mask = (kmp_affin_mask_t )alloca(__kmp_affin_mask_size);
				4453	KMP_CPU_ZERO(mask);
				4454
				4455	// Number of hyper threads per core in HT machine
				4456	int nth_per_core = __kmp_nThreadsPerCore;
				4457	int core_level;
				4458	if( nth_per_core > 1 ) {
				4459	core_level = __kmp_aff_depth - 2;
				4460	} else {
				4461	core_level = __kmp_aff_depth - 1;
				4462	}
				4463
				4464	// Number of cores - maximum value; it does not count trail cores with 0 processors
				4465	int ncores = address2os[ __kmp_avail_proc - 1 ].first.labels[ core_level ] + 1;
				4466
				4467	// For performance gain consider the special case nthreads == __kmp_avail_proc
				4468	if( nthreads == __kmp_avail_proc ) {
				4469	if( __kmp_affinity_gran == affinity_gran_fine \|\| __kmp_affinity_gran == affinity_gran_thread) {
				4470	int osID = address2os[ tid ].second;
				4471	KMP_CPU_SET( osID, mask);
				4472	} else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
				4473	int coreID = address2os[ tid ].first.labels[ core_level ];
				4474	// We'll count found osIDs for the current core; they can be not more than nth_per_core;
				4475	// since the address2os is sortied we can break when cnt==nth_per_core
				4476	int cnt = 0;
				4477	for( int i = 0; i < __kmp_avail_proc; i++ ) {
				4478	int osID = address2os[ i ].second;
				4479	int core = address2os[ i ].first.labels[ core_level ];
				4480	if( core == coreID ) {
				4481	KMP_CPU_SET( osID, mask);
				4482	cnt++;
				4483	if( cnt == nth_per_core ) {
				4484	break;
				4485	}
				4486	}
				4487	}
				4488	}
				4489	} else if( nthreads <= __kmp_ncores ) {
				4490
				4491	int core = 0;
				4492	for( int i = 0; i < ncores; i++ ) {
				4493	// Check if this core from procarr[] is in the mask
				4494	int in_mask = 0;
				4495	for( int j = 0; j < nth_per_core; j++ ) {
				4496	if( procarr[ i * nth_per_core + j ] != - 1 ) {
				4497	in_mask = 1;
				4498	break;
				4499	}
				4500	}
				4501	if( in_mask ) {
				4502	if( tid == core ) {
				4503	for( int j = 0; j < nth_per_core; j++ ) {
				4504	int osID = procarr[ i * nth_per_core + j ];
				4505	if( osID != -1 ) {
				4506	KMP_CPU_SET( osID, mask );
				4507	// For granularity=thread it is enough to set the first available osID for this core
				4508	if( __kmp_affinity_gran == affinity_gran_fine \|\| __kmp_affinity_gran == affinity_gran_thread) {
				4509	break;
				4510	}
				4511	}
				4512	}
				4513	break;
				4514	} else {
				4515	core++;
				4516	}
				4517	}
				4518	}
				4519
				4520	} else { // nthreads > __kmp_ncores
				4521
				4522	// Array to save the number of processors at each core
				4523	int nproc_at_core[ ncores ];
				4524	// Array to save the number of cores with "x" available processors;
				4525	int ncores_with_x_procs[ nth_per_core + 1 ];
				4526	// Array to save the number of cores with # procs from x to nth_per_core
				4527	int ncores_with_x_to_max_procs[ nth_per_core + 1 ];
				4528
				4529	for( int i = 0; i <= nth_per_core; i++ ) {
				4530	ncores_with_x_procs[ i ] = 0;
				4531	ncores_with_x_to_max_procs[ i ] = 0;
				4532	}
				4533
				4534	for( int i = 0; i < ncores; i++ ) {
				4535	int cnt = 0;
				4536	for( int j = 0; j < nth_per_core; j++ ) {
				4537	if( procarr[ i * nth_per_core + j ] != -1 ) {
				4538	cnt++;
				4539	}
				4540	}
				4541	nproc_at_core[ i ] = cnt;
				4542	ncores_with_x_procs[ cnt ]++;
				4543	}
				4544
				4545	for( int i = 0; i <= nth_per_core; i++ ) {
				4546	for( int j = i; j <= nth_per_core; j++ ) {
				4547	ncores_with_x_to_max_procs[ i ] += ncores_with_x_procs[ j ];
				4548	}
				4549	}
				4550
				4551	// Max number of processors
				4552	int nproc = nth_per_core * ncores;
				4553	// An array to keep number of threads per each context
				4554	int * newarr = ( int * )__kmp_allocate( sizeof( int ) * nproc );
				4555	for( int i = 0; i < nproc; i++ ) {
				4556	newarr[ i ] = 0;
				4557	}
				4558
				4559	int nth = nthreads;
				4560	int flag = 0;
				4561	while( nth > 0 ) {
				4562	for( int j = 1; j <= nth_per_core; j++ ) {
				4563	int cnt = ncores_with_x_to_max_procs[ j ];
				4564	for( int i = 0; i < ncores; i++ ) {
				4565	// Skip the core with 0 processors
				4566	if( nproc_at_core[ i ] == 0 ) {
				4567	continue;
				4568	}
				4569	for( int k = 0; k < nth_per_core; k++ ) {
				4570	if( procarr[ i * nth_per_core + k ] != -1 ) {
				4571	if( newarr[ i * nth_per_core + k ] == 0 ) {
				4572	newarr[ i * nth_per_core + k ] = 1;
				4573	cnt--;
				4574	nth--;
				4575	break;
				4576	} else {
				4577	if( flag != 0 ) {
				4578	newarr[ i * nth_per_core + k ] ++;
				4579	cnt--;
				4580	nth--;
				4581	break;
				4582	}
				4583	}
				4584	}
				4585	}
				4586	if( cnt == 0 \|\| nth == 0 ) {
				4587	break;
				4588	}
				4589	}
				4590	if( nth == 0 ) {
				4591	break;
				4592	}
				4593	}
				4594	flag = 1;
				4595	}
				4596	int sum = 0;
				4597	for( int i = 0; i < nproc; i++ ) {
				4598	sum += newarr[ i ];
				4599	if( sum > tid ) {
				4600	// Granularity == thread
				4601	if( __kmp_affinity_gran == affinity_gran_fine \|\| __kmp_affinity_gran == affinity_gran_thread) {
				4602	int osID = procarr[ i ];
				4603	KMP_CPU_SET( osID, mask);
				4604	} else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
				4605	int coreID = i / nth_per_core;
				4606	for( int ii = 0; ii < nth_per_core; ii++ ) {
				4607	int osID = procarr[ coreID * nth_per_core + ii ];
				4608	if( osID != -1 ) {
				4609	KMP_CPU_SET( osID, mask);
				4610	}
				4611	}
				4612	}
				4613	break;
				4614	}
				4615	}
				4616	__kmp_free( newarr );
				4617	}
				4618
				4619	if (__kmp_affinity_verbose) {
				4620	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4621	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame^]	4622	KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
				4623	tid, buf);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4624	}
				4625	__kmp_set_system_affinity( mask, TRUE );
				4626	}
				4627	}
				4628
				4629	# endif /* KMP_MIC */
				4630
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame^]	4631	#else
				4632	// affinity not supported
				4633
				4634	kmp_uint32 mac_skipPerLevel[7];
				4635	kmp_uint32 mac_depth;
				4636	kmp_uint8 mac_leaf_kids;
				4637	void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
				4638	static int first = 1;
				4639	if (first) {
				4640	const kmp_uint32 maxLevels = 7;
				4641	kmp_uint32 numPerLevel[maxLevels];
				4642
				4643	for (kmp_uint32 i=0; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
				4644	numPerLevel[i] = 1;
				4645	mac_skipPerLevel[i] = 1;
				4646	}
				4647
				4648	mac_depth = 2;
				4649	numPerLevel[0] = nproc;
				4650
				4651	kmp_uint32 branch = 4;
				4652	if (numPerLevel[0] == 1) branch = nproc/4;
				4653	if (branch<4) branch=4;
				4654	for (kmp_uint32 d=0; d<mac_depth-1; ++d) { // optimize hierarchy width
				4655	while (numPerLevel[d] > branch \|\| (d==0 && numPerLevel[d]>4)) { // max 4 on level 0!
				4656	if (numPerLevel[d] & 1) numPerLevel[d]++;
				4657	numPerLevel[d] = numPerLevel[d] >> 1;
				4658	if (numPerLevel[d+1] == 1) mac_depth++;
				4659	numPerLevel[d+1] = numPerLevel[d+1] << 1;
				4660	}
				4661	if(numPerLevel[0] == 1) {
				4662	branch = branch >> 1;
				4663	if (branch<4) branch = 4;
				4664	}
				4665	}
				4666
				4667	for (kmp_uint32 i=1; i<mac_depth; ++i)
				4668	mac_skipPerLevel[i] = numPerLevel[i-1] * mac_skipPerLevel[i-1];
				4669	mac_leaf_kids = (kmp_uint8)numPerLevel[0]-1;
				4670	first=0;
				4671	}
				4672	thr_bar->depth = mac_depth;
				4673	thr_bar->base_leaf_kids = mac_leaf_kids;
				4674	thr_bar->skip_per_level = mac_skipPerLevel;
				4675	}
				4676
Alp Toker	763b939	2014-02-28 09:42:41 +0000	[diff] [blame]	4677	#endif // KMP_AFFINITY_SUPPORTED