Blame - openmp/runtime/src/kmp_affinity.cpp - toolchain/llvm-project

blob: 644251da4b58ae62a09ed64f5ffdb3e7c9a9dba4 [file] [log] [blame]

Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1	/*
				2	* kmp_affinity.cpp -- affinity management
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	3	* $Revision: 42810 $
				4	* $Date: 2013-11-07 12:06:33 -0600 (Thu, 07 Nov 2013) $
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	5	*/
				6
				7
				8	//===----------------------------------------------------------------------===//
				9	//
				10	// The LLVM Compiler Infrastructure
				11	//
				12	// This file is dual licensed under the MIT and the University of Illinois Open
				13	// Source Licenses. See LICENSE.txt for details.
				14	//
				15	//===----------------------------------------------------------------------===//
				16
				17
				18	#include "kmp.h"
				19	#include "kmp_i18n.h"
				20	#include "kmp_io.h"
				21	#include "kmp_str.h"
				22
				23
				24	#if KMP_OS_WINDOWS \|\| KMP_OS_LINUX
				25
				26	//
				27	// Print the affinity mask to the character array in a pretty format.
				28	//
				29	char *
				30	__kmp_affinity_print_mask(char buf, int buf_len, kmp_affin_mask_t mask)
				31	{
				32	KMP_ASSERT(buf_len >= 40);
				33	char *scan = buf;
				34	char *end = buf + buf_len - 1;
				35
				36	//
				37	// Find first element / check for empty set.
				38	//
				39	size_t i;
				40	for (i = 0; i < KMP_CPU_SETSIZE; i++) {
				41	if (KMP_CPU_ISSET(i, mask)) {
				42	break;
				43	}
				44	}
				45	if (i == KMP_CPU_SETSIZE) {
				46	sprintf(scan, "{<empty>}");
				47	while (*scan != '\0') scan++;
				48	KMP_ASSERT(scan <= end);
				49	return buf;
				50	}
				51
				52	sprintf(scan, "{%ld", i);
				53	while (*scan != '\0') scan++;
				54	i++;
				55	for (; i < KMP_CPU_SETSIZE; i++) {
				56	if (! KMP_CPU_ISSET(i, mask)) {
				57	continue;
				58	}
				59
				60	//
				61	// Check for buffer overflow. A string of the form ",<n>" will have
				62	// at most 10 characters, plus we want to leave room to print ",...}"
				63	// if the set is too large to print for a total of 15 characters.
				64	// We already left room for '\0' in setting end.
				65	//
				66	if (end - scan < 15) {
				67	break;
				68	}
				69	sprintf(scan, ",%-ld", i);
				70	while (*scan != '\0') scan++;
				71	}
				72	if (i < KMP_CPU_SETSIZE) {
				73	sprintf(scan, ",...");
				74	while (*scan != '\0') scan++;
				75	}
				76	sprintf(scan, "}");
				77	while (*scan != '\0') scan++;
				78	KMP_ASSERT(scan <= end);
				79	return buf;
				80	}
				81
				82
				83	void
				84	__kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask)
				85	{
				86	KMP_CPU_ZERO(mask);
				87
				88	# if KMP_OS_WINDOWS && KMP_ARCH_X86_64
				89
				90	if (__kmp_num_proc_groups > 1) {
				91	int group;
				92	struct GROUP_AFFINITY ga;
				93	KMP_DEBUG_ASSERT(__kmp_GetActiveProcessorCount != NULL);
				94	for (group = 0; group < __kmp_num_proc_groups; group++) {
				95	int i;
				96	int num = __kmp_GetActiveProcessorCount(group);
				97	for (i = 0; i < num; i++) {
				98	KMP_CPU_SET(i + group * (CHAR_BIT * sizeof(DWORD_PTR)), mask);
				99	}
				100	}
				101	}
				102	else
				103
				104	# endif /* KMP_OS_WINDOWS && KMP_ARCH_X86_64 */
				105
				106	{
				107	int proc;
				108	for (proc = 0; proc < __kmp_xproc; proc++) {
				109	KMP_CPU_SET(proc, mask);
				110	}
				111	}
				112	}
				113
				114
				115	//
				116	// In Linux* OS debug & cover (-O0) builds, we need to avoid inline member
				117	// functions.
				118	//
				119	// The icc codegen emits sections with extremely long names, of the form
				120	// ".gnu.linkonce.<mangled_name>". There seems to have been a linker bug
				121	// introduced between GNU ld version 2.14.90.0.4 and 2.15.92.0.2 involving
				122	// some sort of memory corruption or table overflow that is triggered by
				123	// these long strings. I checked the latest version of the linker -
				124	// GNU ld (Linux* OS/GNU Binutils) 2.18.50.0.7.20080422 - and the bug is not
				125	// fixed.
				126	//
				127	// Unfortunately, my attempts to reproduce it in a smaller example have
				128	// failed - I'm not sure what the prospects are of getting it fixed
				129	// properly - but we need a reproducer smaller than all of libiomp.
				130	//
				131	// Work around the problem by avoiding inline constructors in such builds.
				132	// We do this for all platforms, not just Linux* OS - non-inline functions are
				133	// more debuggable and provide better coverage into than inline functions.
				134	// Use inline functions in shipping libs, for performance.
				135	//
				136
				137	# if !defined(KMP_DEBUG) && !defined(COVER)
				138
				139	class Address {
				140	public:
				141	static const unsigned maxDepth = 32;
				142	unsigned labels[maxDepth];
				143	unsigned childNums[maxDepth];
				144	unsigned depth;
				145	unsigned leader;
				146	Address(unsigned _depth)
				147	: depth(_depth), leader(FALSE) {
				148	}
				149	Address &operator=(const Address &b) {
				150	depth = b.depth;
				151	for (unsigned i = 0; i < depth; i++) {
				152	labels[i] = b.labels[i];
				153	childNums[i] = b.childNums[i];
				154	}
				155	leader = FALSE;
				156	return *this;
				157	}
				158	bool operator==(const Address &b) const {
				159	if (depth != b.depth)
				160	return false;
				161	for (unsigned i = 0; i < depth; i++)
				162	if(labels[i] != b.labels[i])
				163	return false;
				164	return true;
				165	}
				166	bool isClose(const Address &b, int level) const {
				167	if (depth != b.depth)
				168	return false;
				169	if ((unsigned)level >= depth)
				170	return true;
				171	for (unsigned i = 0; i < (depth - level); i++)
				172	if(labels[i] != b.labels[i])
				173	return false;
				174	return true;
				175	}
				176	bool operator!=(const Address &b) const {
				177	return !operator==(b);
				178	}
				179	};
				180
				181	class AddrUnsPair {
				182	public:
				183	Address first;
				184	unsigned second;
				185	AddrUnsPair(Address _first, unsigned _second)
				186	: first(_first), second(_second) {
				187	}
				188	AddrUnsPair &operator=(const AddrUnsPair &b)
				189	{
				190	first = b.first;
				191	second = b.second;
				192	return *this;
				193	}
				194	};
				195
				196	# else
				197
				198	class Address {
				199	public:
				200	static const unsigned maxDepth = 32;
				201	unsigned labels[maxDepth];
				202	unsigned childNums[maxDepth];
				203	unsigned depth;
				204	unsigned leader;
				205	Address(unsigned _depth);
				206	Address &operator=(const Address &b);
				207	bool operator==(const Address &b) const;
				208	bool isClose(const Address &b, int level) const;
				209	bool operator!=(const Address &b) const;
				210	};
				211
				212	Address::Address(unsigned _depth)
				213	{
				214	depth = _depth;
				215	leader = FALSE;
				216	}
				217
				218	Address &Address::operator=(const Address &b) {
				219	depth = b.depth;
				220	for (unsigned i = 0; i < depth; i++) {
				221	labels[i] = b.labels[i];
				222	childNums[i] = b.childNums[i];
				223	}
				224	leader = FALSE;
				225	return *this;
				226	}
				227
				228	bool Address::operator==(const Address &b) const {
				229	if (depth != b.depth)
				230	return false;
				231	for (unsigned i = 0; i < depth; i++)
				232	if(labels[i] != b.labels[i])
				233	return false;
				234	return true;
				235	}
				236
				237	bool Address::isClose(const Address &b, int level) const {
				238	if (depth != b.depth)
				239	return false;
				240	if ((unsigned)level >= depth)
				241	return true;
				242	for (unsigned i = 0; i < (depth - level); i++)
				243	if(labels[i] != b.labels[i])
				244	return false;
				245	return true;
				246	}
				247
				248	bool Address::operator!=(const Address &b) const {
				249	return !operator==(b);
				250	}
				251
				252	class AddrUnsPair {
				253	public:
				254	Address first;
				255	unsigned second;
				256	AddrUnsPair(Address _first, unsigned _second);
				257	AddrUnsPair &operator=(const AddrUnsPair &b);
				258	};
				259
				260	AddrUnsPair::AddrUnsPair(Address _first, unsigned _second)
				261	: first(_first), second(_second)
				262	{
				263	}
				264
				265	AddrUnsPair &AddrUnsPair::operator=(const AddrUnsPair &b)
				266	{
				267	first = b.first;
				268	second = b.second;
				269	return *this;
				270	}
				271
				272	# endif /* !defined(KMP_DEBUG) && !defined(COVER) */
				273
				274
				275	static int
				276	__kmp_affinity_cmp_Address_labels(const void a, const void b)
				277	{
				278	const Address aa = (const Address )&(((AddrUnsPair *)a)
				279	->first);
				280	const Address bb = (const Address )&(((AddrUnsPair *)b)
				281	->first);
				282	unsigned depth = aa->depth;
				283	unsigned i;
				284	KMP_DEBUG_ASSERT(depth == bb->depth);
				285	for (i = 0; i < depth; i++) {
				286	if (aa->labels[i] < bb->labels[i]) return -1;
				287	if (aa->labels[i] > bb->labels[i]) return 1;
				288	}
				289	return 0;
				290	}
				291
				292
				293	static int
				294	__kmp_affinity_cmp_Address_child_num(const void a, const void b)
				295	{
				296	const Address aa = (const Address )&(((AddrUnsPair *)a)
				297	->first);
				298	const Address bb = (const Address )&(((AddrUnsPair *)b)
				299	->first);
				300	unsigned depth = aa->depth;
				301	unsigned i;
				302	KMP_DEBUG_ASSERT(depth == bb->depth);
				303	KMP_DEBUG_ASSERT((unsigned)__kmp_affinity_compact <= depth);
				304	KMP_DEBUG_ASSERT(__kmp_affinity_compact >= 0);
				305	for (i = 0; i < (unsigned)__kmp_affinity_compact; i++) {
				306	int j = depth - i - 1;
				307	if (aa->childNums[j] < bb->childNums[j]) return -1;
				308	if (aa->childNums[j] > bb->childNums[j]) return 1;
				309	}
				310	for (; i < depth; i++) {
				311	int j = i - __kmp_affinity_compact;
				312	if (aa->childNums[j] < bb->childNums[j]) return -1;
				313	if (aa->childNums[j] > bb->childNums[j]) return 1;
				314	}
				315	return 0;
				316	}
				317
				318
				319	//
				320	// When sorting by labels, __kmp_affinity_assign_child_nums() must first be
				321	// called to renumber the labels from [0..n] and place them into the child_num
				322	// vector of the address object. This is done in case the labels used for
				323	// the children at one node of the heirarchy differ from those used for
				324	// another node at the same level. Example: suppose the machine has 2 nodes
				325	// with 2 packages each. The first node contains packages 601 and 602, and
				326	// second node contains packages 603 and 604. If we try to sort the table
				327	// for "scatter" affinity, the table will still be sorted 601, 602, 603, 604
				328	// because we are paying attention to the labels themselves, not the ordinal
				329	// child numbers. By using the child numbers in the sort, the result is
				330	// {0,0}=601, {0,1}=603, {1,0}=602, {1,1}=604.
				331	//
				332	static void
				333	__kmp_affinity_assign_child_nums(AddrUnsPair *address2os,
				334	int numAddrs)
				335	{
				336	KMP_DEBUG_ASSERT(numAddrs > 0);
				337	int depth = address2os->first.depth;
				338	unsigned counts = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				339	unsigned lastLabel = (unsigned )__kmp_allocate(depth
				340	* sizeof(unsigned));
				341	int labCt;
				342	for (labCt = 0; labCt < depth; labCt++) {
				343	address2os[0].first.childNums[labCt] = counts[labCt] = 0;
				344	lastLabel[labCt] = address2os[0].first.labels[labCt];
				345	}
				346	int i;
				347	for (i = 1; i < numAddrs; i++) {
				348	for (labCt = 0; labCt < depth; labCt++) {
				349	if (address2os[i].first.labels[labCt] != lastLabel[labCt]) {
				350	int labCt2;
				351	for (labCt2 = labCt + 1; labCt2 < depth; labCt2++) {
				352	counts[labCt2] = 0;
				353	lastLabel[labCt2] = address2os[i].first.labels[labCt2];
				354	}
				355	counts[labCt]++;
				356	lastLabel[labCt] = address2os[i].first.labels[labCt];
				357	break;
				358	}
				359	}
				360	for (labCt = 0; labCt < depth; labCt++) {
				361	address2os[i].first.childNums[labCt] = counts[labCt];
				362	}
				363	for (; labCt < (int)Address::maxDepth; labCt++) {
				364	address2os[i].first.childNums[labCt] = 0;
				365	}
				366	}
				367	}
				368
				369
				370	//
				371	// All of the __kmp_affinity_create_*_map() routines should set
				372	// __kmp_affinity_masks to a vector of affinity mask objects of length
				373	// __kmp_affinity_num_masks, if __kmp_affinity_type != affinity_none, and
				374	// return the number of levels in the machine topology tree (zero if
				375	// __kmp_affinity_type == affinity_none).
				376	//
				377	// All of the __kmp_affinity_create__map() routines should set fullMask
				378	// to the affinity mask for the initialization thread. They need to save and
				379	// restore the mask, and it could be needed later, so saving it is just an
				380	// optimization to avoid calling kmp_get_system_affinity() again.
				381	//
				382	static kmp_affin_mask_t *fullMask = NULL;
				383
				384	kmp_affin_mask_t *
				385	__kmp_affinity_get_fullMask() { return fullMask; }
				386
				387
				388	static int nCoresPerPkg, nPackages;
				389	int __kmp_nThreadsPerCore;
				390
				391	//
				392	// __kmp_affinity_uniform_topology() doesn't work when called from
				393	// places which support arbitrarily many levels in the machine topology
				394	// map, i.e. the non-default cases in __kmp_affinity_create_cpuinfo_map()
				395	// __kmp_affinity_create_x2apicid_map().
				396	//
				397	inline static bool
				398	__kmp_affinity_uniform_topology()
				399	{
				400	return __kmp_avail_proc == (__kmp_nThreadsPerCore * nCoresPerPkg * nPackages);
				401	}
				402
				403
				404	//
				405	// Print out the detailed machine topology map, i.e. the physical locations
				406	// of each OS proc.
				407	//
				408	static void
				409	__kmp_affinity_print_topology(AddrUnsPair *address2os, int len, int depth,
				410	int pkgLevel, int coreLevel, int threadLevel)
				411	{
				412	int proc;
				413
				414	KMP_INFORM(OSProcToPhysicalThreadMap, "KMP_AFFINITY");
				415	for (proc = 0; proc < len; proc++) {
				416	int level;
				417	kmp_str_buf_t buf;
				418	__kmp_str_buf_init(&buf);
				419	for (level = 0; level < depth; level++) {
				420	if (level == threadLevel) {
				421	__kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Thread));
				422	}
				423	else if (level == coreLevel) {
				424	__kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Core));
				425	}
				426	else if (level == pkgLevel) {
				427	__kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Package));
				428	}
				429	else if (level > pkgLevel) {
				430	__kmp_str_buf_print(&buf, "%s_%d ", KMP_I18N_STR(Node),
				431	level - pkgLevel - 1);
				432	}
				433	else {
				434	__kmp_str_buf_print(&buf, "L%d ", level);
				435	}
				436	__kmp_str_buf_print(&buf, "%d ",
				437	address2os[proc].first.labels[level]);
				438	}
				439	KMP_INFORM(OSProcMapToPack, "KMP_AFFINITY", address2os[proc].second,
				440	buf.str);
				441	__kmp_str_buf_free(&buf);
				442	}
				443	}
				444
				445
				446	//
				447	// If we don't know how to retrieve the machine's processor topology, or
				448	// encounter an error in doing so, this routine is called to form a "flat"
				449	// mapping of os thread id's <-> processor id's.
				450	//
				451	static int
				452	__kmp_affinity_create_flat_map(AddrUnsPair **address2os,
				453	kmp_i18n_id_t *const msg_id)
				454	{
				455	*address2os = NULL;
				456	*msg_id = kmp_i18n_null;
				457
				458	//
				459	// Even if __kmp_affinity_type == affinity_none, this routine might still
				460	// called to set __kmp_ht_enabled, & __kmp_ncores, as well as
				461	// __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
				462	//
				463	if (! KMP_AFFINITY_CAPABLE()) {
				464	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				465	__kmp_ncores = nPackages = __kmp_xproc;
				466	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
				467	__kmp_ht_enabled = FALSE;
				468	if (__kmp_affinity_verbose) {
				469	KMP_INFORM(AffFlatTopology, "KMP_AFFINITY");
				470	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				471	KMP_INFORM(Uniform, "KMP_AFFINITY");
				472	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				473	__kmp_nThreadsPerCore, __kmp_ncores);
				474	}
				475	return 0;
				476	}
				477
				478	//
				479	// When affinity is off, this routine will still be called to set
				480	// __kmp_ht_enabled, & __kmp_ncores, as well as __kmp_nThreadsPerCore,
				481	// nCoresPerPkg, & nPackages. Make sure all these vars are set
				482	// correctly, and return now if affinity is not enabled.
				483	//
				484	__kmp_ncores = nPackages = __kmp_avail_proc;
				485	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
				486	__kmp_ht_enabled = FALSE;
				487	if (__kmp_affinity_verbose) {
				488	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				489	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, fullMask);
				490
				491	KMP_INFORM(AffCapableUseFlat, "KMP_AFFINITY");
				492	if (__kmp_affinity_respect_mask) {
				493	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				494	} else {
				495	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				496	}
				497	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				498	KMP_INFORM(Uniform, "KMP_AFFINITY");
				499	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				500	__kmp_nThreadsPerCore, __kmp_ncores);
				501	}
				502	if (__kmp_affinity_type == affinity_none) {
				503	return 0;
				504	}
				505
				506	//
				507	// Contruct the data structure to be returned.
				508	//
				509	address2os = (AddrUnsPair)
				510	__kmp_allocate(sizeof(*address2os) __kmp_avail_proc);
				511	int avail_ct = 0;
				512	unsigned int i;
				513	for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
				514	//
				515	// Skip this proc if it is not included in the machine model.
				516	//
				517	if (! KMP_CPU_ISSET(i, fullMask)) {
				518	continue;
				519	}
				520
				521	Address addr(1);
				522	addr.labels[0] = i;
				523	(*address2os)[avail_ct++] = AddrUnsPair(addr,i);
				524	}
				525	if (__kmp_affinity_verbose) {
				526	KMP_INFORM(OSProcToPackage, "KMP_AFFINITY");
				527	}
				528
				529	if (__kmp_affinity_gran_levels < 0) {
				530	//
				531	// Only the package level is modeled in the machine topology map,
				532	// so the #levels of granularity is either 0 or 1.
				533	//
				534	if (__kmp_affinity_gran > affinity_gran_package) {
				535	__kmp_affinity_gran_levels = 1;
				536	}
				537	else {
				538	__kmp_affinity_gran_levels = 0;
				539	}
				540	}
				541	return 1;
				542	}
				543
				544
				545	# if KMP_OS_WINDOWS && KMP_ARCH_X86_64
				546
				547	//
				548	// If multiple Windows* OS processor groups exist, we can create a 2-level
				549	// topology map with the groups at level 0 and the individual procs at
				550	// level 1.
				551	//
				552	// This facilitates letting the threads float among all procs in a group,
				553	// if granularity=group (the default when there are multiple groups).
				554	//
				555	static int
				556	__kmp_affinity_create_proc_group_map(AddrUnsPair **address2os,
				557	kmp_i18n_id_t *const msg_id)
				558	{
				559	*address2os = NULL;
				560	*msg_id = kmp_i18n_null;
				561
				562	//
				563	// If we don't have multiple processor groups, return now.
				564	// The flat mapping will be used.
				565	//
				566	if ((! KMP_AFFINITY_CAPABLE()) \|\| (__kmp_get_proc_group(fullMask) >= 0)) {
				567	// FIXME set *msg_id
				568	return -1;
				569	}
				570
				571	//
				572	// Contruct the data structure to be returned.
				573	//
				574	address2os = (AddrUnsPair)
				575	__kmp_allocate(sizeof(*address2os) __kmp_avail_proc);
				576	int avail_ct = 0;
				577	int i;
				578	for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
				579	//
				580	// Skip this proc if it is not included in the machine model.
				581	//
				582	if (! KMP_CPU_ISSET(i, fullMask)) {
				583	continue;
				584	}
				585
				586	Address addr(2);
				587	addr.labels[0] = i / (CHAR_BIT * sizeof(DWORD_PTR));
				588	addr.labels[1] = i % (CHAR_BIT * sizeof(DWORD_PTR));
				589	(*address2os)[avail_ct++] = AddrUnsPair(addr,i);
				590
				591	if (__kmp_affinity_verbose) {
				592	KMP_INFORM(AffOSProcToGroup, "KMP_AFFINITY", i, addr.labels[0],
				593	addr.labels[1]);
				594	}
				595	}
				596
				597	if (__kmp_affinity_gran_levels < 0) {
				598	if (__kmp_affinity_gran == affinity_gran_group) {
				599	__kmp_affinity_gran_levels = 1;
				600	}
				601	else if ((__kmp_affinity_gran == affinity_gran_fine)
				602	\|\| (__kmp_affinity_gran == affinity_gran_thread)) {
				603	__kmp_affinity_gran_levels = 0;
				604	}
				605	else {
				606	const char *gran_str = NULL;
				607	if (__kmp_affinity_gran == affinity_gran_core) {
				608	gran_str = "core";
				609	}
				610	else if (__kmp_affinity_gran == affinity_gran_package) {
				611	gran_str = "package";
				612	}
				613	else if (__kmp_affinity_gran == affinity_gran_node) {
				614	gran_str = "node";
				615	}
				616	else {
				617	KMP_ASSERT(0);
				618	}
				619
				620	// Warning: can't use affinity granularity \"gran\" with group topology method, using "thread"
				621	__kmp_affinity_gran_levels = 0;
				622	}
				623	}
				624	return 2;
				625	}
				626
				627	# endif /* KMP_OS_WINDOWS && KMP_ARCH_X86_64 */
				628
				629
				630	# if KMP_ARCH_X86 \|\| KMP_ARCH_X86_64
				631
				632	static int
				633	__kmp_cpuid_mask_width(int count) {
				634	int r = 0;
				635
				636	while((1<<r) < count)
				637	++r;
				638	return r;
				639	}
				640
				641
				642	class apicThreadInfo {
				643	public:
				644	unsigned osId; // param to __kmp_affinity_bind_thread
				645	unsigned apicId; // from cpuid after binding
				646	unsigned maxCoresPerPkg; // ""
				647	unsigned maxThreadsPerPkg; // ""
				648	unsigned pkgId; // inferred from above values
				649	unsigned coreId; // ""
				650	unsigned threadId; // ""
				651	};
				652
				653
				654	static int
				655	__kmp_affinity_cmp_apicThreadInfo_os_id(const void a, const void b)
				656	{
				657	const apicThreadInfo aa = (const apicThreadInfo )a;
				658	const apicThreadInfo bb = (const apicThreadInfo )b;
				659	if (aa->osId < bb->osId) return -1;
				660	if (aa->osId > bb->osId) return 1;
				661	return 0;
				662	}
				663
				664
				665	static int
				666	__kmp_affinity_cmp_apicThreadInfo_phys_id(const void a, const void b)
				667	{
				668	const apicThreadInfo aa = (const apicThreadInfo )a;
				669	const apicThreadInfo bb = (const apicThreadInfo )b;
				670	if (aa->pkgId < bb->pkgId) return -1;
				671	if (aa->pkgId > bb->pkgId) return 1;
				672	if (aa->coreId < bb->coreId) return -1;
				673	if (aa->coreId > bb->coreId) return 1;
				674	if (aa->threadId < bb->threadId) return -1;
				675	if (aa->threadId > bb->threadId) return 1;
				676	return 0;
				677	}
				678
				679
				680	//
				681	// On IA-32 architecture and Intel(R) 64 architecture, we attempt to use
				682	// an algorithm which cycles through the available os threads, setting
				683	// the current thread's affinity mask to that thread, and then retrieves
				684	// the Apic Id for each thread context using the cpuid instruction.
				685	//
				686	static int
				687	__kmp_affinity_create_apicid_map(AddrUnsPair **address2os,
				688	kmp_i18n_id_t *const msg_id)
				689	{
				690	int rc;
				691	*address2os = NULL;
				692	*msg_id = kmp_i18n_null;
				693
				694	# if KMP_MIC
				695	{
				696	// The code below will use cpuid(4).
				697	// Check if cpuid(4) is supported.
				698	// FIXME? - this really doesn't need to be specific to MIC.
				699	kmp_cpuid buf;
				700	__kmp_x86_cpuid(0, 0, &buf);
				701	if (buf.eax < 4) {
				702	*msg_id = kmp_i18n_str_NoLeaf4Support;
				703	return -1;
				704	}
				705	}
				706	# endif // KMP_MIC
				707
				708	//
				709	// Even if __kmp_affinity_type == affinity_none, this routine is still
				710	// called to set __kmp_ht_enabled, & __kmp_ncores, as well as
				711	// __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
				712	//
				713	// The algorithm used starts by setting the affinity to each available
				714	// thread and retreiving info from the cpuid instruction, so if we are not
				715	// capable of calling __kmp_affinity_get_map()/__kmp_affinity_get_map(),
				716	// then we need to do something else.
				717	//
				718	if (! KMP_AFFINITY_CAPABLE()) {
				719	//
				720	// Hack to try and infer the machine topology using only the data
				721	// available from cpuid on the current thread, and __kmp_xproc.
				722	//
				723	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				724
				725	//
				726	// Get an upper bound on the number of threads per package using
				727	// cpuid(1).
				728	//
				729	// On some OS/chps combinations where HT is supported by the chip
				730	// but is disabled, this value will be 2 on a single core chip.
				731	// Usually, it will be 2 if HT is enabled and 1 if HT is disabled.
				732	//
				733	kmp_cpuid buf;
				734	__kmp_x86_cpuid(1, 0, &buf);
				735	int maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
				736	if (maxThreadsPerPkg == 0) {
				737	maxThreadsPerPkg = 1;
				738	}
				739
				740	//
				741	// The num cores per pkg comes from cpuid(4).
				742	// 1 must be added to the encoded value.
				743	//
				744	// The author of cpu_count.cpp treated this only an upper bound
				745	// on the number of cores, but I haven't seen any cases where it
				746	// was greater than the actual number of cores, so we will treat
				747	// it as exact in this block of code.
				748	//
				749	// First, we need to check if cpuid(4) is supported on this chip.
				750	// To see if cpuid(n) is supported, issue cpuid(0) and check if eax
				751	// has the value n or greater.
				752	//
				753	__kmp_x86_cpuid(0, 0, &buf);
				754	if (buf.eax >= 4) {
				755	__kmp_x86_cpuid(4, 0, &buf);
				756	nCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
				757	}
				758	else {
				759	nCoresPerPkg = 1;
				760	}
				761
				762	//
				763	// There is no way to reliably tell if HT is enabled without issuing
				764	// the cpuid instruction from every thread, can correlating the cpuid
				765	// info, so if the machine is not affinity capable, we assume that HT
				766	// is off. We have seen quite a few machines where maxThreadsPerPkg
				767	// is 2, yet the machine does not support HT.
				768	//
				769	// - Older OSes are usually found on machines with older chips, which
				770	// do not support HT.
				771	//
				772	// - The performance penalty for mistakenly identifying a machine as
				773	// HT when it isn't (which results in blocktime being incorrecly set
				774	// to 0) is greater than the penalty when for mistakenly identifying
				775	// a machine as being 1 thread/core when it is really HT enabled
				776	// (which results in blocktime being incorrectly set to a positive
				777	// value).
				778	//
				779	__kmp_ncores = __kmp_xproc;
				780	nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
				781	__kmp_nThreadsPerCore = 1;
				782	__kmp_ht_enabled = FALSE;
				783	if (__kmp_affinity_verbose) {
				784	KMP_INFORM(AffNotCapableUseLocCpuid, "KMP_AFFINITY");
				785	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				786	if (__kmp_affinity_uniform_topology()) {
				787	KMP_INFORM(Uniform, "KMP_AFFINITY");
				788	} else {
				789	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				790	}
				791	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				792	__kmp_nThreadsPerCore, __kmp_ncores);
				793	}
				794	return 0;
				795	}
				796
				797	//
				798	//
				799	// From here on, we can assume that it is safe to call
				800	// __kmp_get_system_affinity() and __kmp_set_system_affinity(),
				801	// even if __kmp_affinity_type = affinity_none.
				802	//
				803
				804	//
				805	// Save the affinity mask for the current thread.
				806	//
				807	kmp_affin_mask_t *oldMask;
				808	KMP_CPU_ALLOC(oldMask);
				809	KMP_ASSERT(oldMask != NULL);
				810	__kmp_get_system_affinity(oldMask, TRUE);
				811
				812	//
				813	// Run through each of the available contexts, binding the current thread
				814	// to it, and obtaining the pertinent information using the cpuid instr.
				815	//
				816	// The relevant information is:
				817	//
				818	// Apic Id: Bits 24:31 of ebx after issuing cpuid(1) - each thread context
				819	// has a uniqie Apic Id, which is of the form pkg# : core# : thread#.
				820	//
				821	// Max Threads Per Pkg: Bits 16:23 of ebx after issuing cpuid(1). The
				822	// value of this field determines the width of the core# + thread#
				823	// fields in the Apic Id. It is also an upper bound on the number
				824	// of threads per package, but it has been verified that situations
				825	// happen were it is not exact. In particular, on certain OS/chip
				826	// combinations where Intel(R) Hyper-Threading Technology is supported
				827	// by the chip but has
				828	// been disabled, the value of this field will be 2 (for a single core
				829	// chip). On other OS/chip combinations supporting
				830	// Intel(R) Hyper-Threading Technology, the value of
				831	// this field will be 1 when Intel(R) Hyper-Threading Technology is
				832	// disabled and 2 when it is enabled.
				833	//
				834	// Max Cores Per Pkg: Bits 26:31 of eax after issuing cpuid(4). The
				835	// value of this field (+1) determines the width of the core# field in
				836	// the Apic Id. The comments in "cpucount.cpp" say that this value is
				837	// an upper bound, but the IA-32 architecture manual says that it is
				838	// exactly the number of cores per package, and I haven't seen any
				839	// case where it wasn't.
				840	//
				841	// From this information, deduce the package Id, core Id, and thread Id,
				842	// and set the corresponding fields in the apicThreadInfo struct.
				843	//
				844	unsigned i;
				845	apicThreadInfo threadInfo = (apicThreadInfo )__kmp_allocate(
				846	__kmp_avail_proc * sizeof(apicThreadInfo));
				847	unsigned nApics = 0;
				848	for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
				849	//
				850	// Skip this proc if it is not included in the machine model.
				851	//
				852	if (! KMP_CPU_ISSET(i, fullMask)) {
				853	continue;
				854	}
				855	KMP_DEBUG_ASSERT((int)nApics < __kmp_avail_proc);
				856
				857	__kmp_affinity_bind_thread(i);
				858	threadInfo[nApics].osId = i;
				859
				860	//
				861	// The apic id and max threads per pkg come from cpuid(1).
				862	//
				863	kmp_cpuid buf;
				864	__kmp_x86_cpuid(1, 0, &buf);
				865	if (! (buf.edx >> 9) & 1) {
				866	__kmp_set_system_affinity(oldMask, TRUE);
				867	__kmp_free(threadInfo);
				868	KMP_CPU_FREE(oldMask);
				869	*msg_id = kmp_i18n_str_ApicNotPresent;
				870	return -1;
				871	}
				872	threadInfo[nApics].apicId = (buf.ebx >> 24) & 0xff;
				873	threadInfo[nApics].maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
				874	if (threadInfo[nApics].maxThreadsPerPkg == 0) {
				875	threadInfo[nApics].maxThreadsPerPkg = 1;
				876	}
				877
				878	//
				879	// Max cores per pkg comes from cpuid(4).
				880	// 1 must be added to the encoded value.
				881	//
				882	// First, we need to check if cpuid(4) is supported on this chip.
				883	// To see if cpuid(n) is supported, issue cpuid(0) and check if eax
				884	// has the value n or greater.
				885	//
				886	__kmp_x86_cpuid(0, 0, &buf);
				887	if (buf.eax >= 4) {
				888	__kmp_x86_cpuid(4, 0, &buf);
				889	threadInfo[nApics].maxCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
				890	}
				891	else {
				892	threadInfo[nApics].maxCoresPerPkg = 1;
				893	}
				894
				895	//
				896	// Infer the pkgId / coreId / threadId using only the info
				897	// obtained locally.
				898	//
				899	int widthCT = __kmp_cpuid_mask_width(
				900	threadInfo[nApics].maxThreadsPerPkg);
				901	threadInfo[nApics].pkgId = threadInfo[nApics].apicId >> widthCT;
				902
				903	int widthC = __kmp_cpuid_mask_width(
				904	threadInfo[nApics].maxCoresPerPkg);
				905	int widthT = widthCT - widthC;
				906	if (widthT < 0) {
				907	//
				908	// I've never seen this one happen, but I suppose it could, if
				909	// the cpuid instruction on a chip was really screwed up.
				910	// Make sure to restore the affinity mask before the tail call.
				911	//
				912	__kmp_set_system_affinity(oldMask, TRUE);
				913	__kmp_free(threadInfo);
				914	KMP_CPU_FREE(oldMask);
				915	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				916	return -1;
				917	}
				918
				919	int maskC = (1 << widthC) - 1;
				920	threadInfo[nApics].coreId = (threadInfo[nApics].apicId >> widthT)
				921	&maskC;
				922
				923	int maskT = (1 << widthT) - 1;
				924	threadInfo[nApics].threadId = threadInfo[nApics].apicId &maskT;
				925
				926	nApics++;
				927	}
				928
				929	//
				930	// We've collected all the info we need.
				931	// Restore the old affinity mask for this thread.
				932	//
				933	__kmp_set_system_affinity(oldMask, TRUE);
				934
				935	//
				936	// If there's only one thread context to bind to, form an Address object
				937	// with depth 1 and return immediately (or, if affinity is off, set
				938	// address2os to NULL and return).
				939	//
				940	// If it is configured to omit the package level when there is only a
				941	// single package, the logic at the end of this routine won't work if
				942	// there is only a single thread - it would try to form an Address
				943	// object with depth 0.
				944	//
				945	KMP_ASSERT(nApics > 0);
				946	if (nApics == 1) {
				947	__kmp_ncores = nPackages = 1;
				948	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
				949	__kmp_ht_enabled = FALSE;
				950	if (__kmp_affinity_verbose) {
				951	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				952	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				953
				954	KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
				955	if (__kmp_affinity_respect_mask) {
				956	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				957	} else {
				958	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				959	}
				960	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				961	KMP_INFORM(Uniform, "KMP_AFFINITY");
				962	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				963	__kmp_nThreadsPerCore, __kmp_ncores);
				964	}
				965
				966	if (__kmp_affinity_type == affinity_none) {
				967	__kmp_free(threadInfo);
				968	KMP_CPU_FREE(oldMask);
				969	return 0;
				970	}
				971
				972	address2os = (AddrUnsPair)__kmp_allocate(sizeof(AddrUnsPair));
				973	Address addr(1);
				974	addr.labels[0] = threadInfo[0].pkgId;
				975	(*address2os)[0] = AddrUnsPair(addr, threadInfo[0].osId);
				976
				977	if (__kmp_affinity_gran_levels < 0) {
				978	__kmp_affinity_gran_levels = 0;
				979	}
				980
				981	if (__kmp_affinity_verbose) {
				982	__kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
				983	}
				984
				985	__kmp_free(threadInfo);
				986	KMP_CPU_FREE(oldMask);
				987	return 1;
				988	}
				989
				990	//
				991	// Sort the threadInfo table by physical Id.
				992	//
				993	qsort(threadInfo, nApics, sizeof(*threadInfo),
				994	__kmp_affinity_cmp_apicThreadInfo_phys_id);
				995
				996	//
				997	// The table is now sorted by pkgId / coreId / threadId, but we really
				998	// don't know the radix of any of the fields. pkgId's may be sparsely
				999	// assigned among the chips on a system. Although coreId's are usually
				1000	// assigned [0 .. coresPerPkg-1] and threadId's are usually assigned
				1001	// [0..threadsPerCore-1], we don't want to make any such assumptions.
				1002	//
				1003	// For that matter, we don't know what coresPerPkg and threadsPerCore
				1004	// (or the total # packages) are at this point - we want to determine
				1005	// that now. We only have an upper bound on the first two figures.
				1006	//
				1007	// We also perform a consistency check at this point: the values returned
				1008	// by the cpuid instruction for any thread bound to a given package had
				1009	// better return the same info for maxThreadsPerPkg and maxCoresPerPkg.
				1010	//
				1011	nPackages = 1;
				1012	nCoresPerPkg = 1;
				1013	__kmp_nThreadsPerCore = 1;
				1014	unsigned nCores = 1;
				1015
				1016	unsigned pkgCt = 1; // to determine radii
				1017	unsigned lastPkgId = threadInfo[0].pkgId;
				1018	unsigned coreCt = 1;
				1019	unsigned lastCoreId = threadInfo[0].coreId;
				1020	unsigned threadCt = 1;
				1021	unsigned lastThreadId = threadInfo[0].threadId;
				1022
				1023	// intra-pkg consist checks
				1024	unsigned prevMaxCoresPerPkg = threadInfo[0].maxCoresPerPkg;
				1025	unsigned prevMaxThreadsPerPkg = threadInfo[0].maxThreadsPerPkg;
				1026
				1027	for (i = 1; i < nApics; i++) {
				1028	if (threadInfo[i].pkgId != lastPkgId) {
				1029	nCores++;
				1030	pkgCt++;
				1031	lastPkgId = threadInfo[i].pkgId;
				1032	if ((int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt;
				1033	coreCt = 1;
				1034	lastCoreId = threadInfo[i].coreId;
				1035	if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
				1036	threadCt = 1;
				1037	lastThreadId = threadInfo[i].threadId;
				1038
				1039	//
				1040	// This is a different package, so go on to the next iteration
				1041	// without doing any consistency checks. Reset the consistency
				1042	// check vars, though.
				1043	//
				1044	prevMaxCoresPerPkg = threadInfo[i].maxCoresPerPkg;
				1045	prevMaxThreadsPerPkg = threadInfo[i].maxThreadsPerPkg;
				1046	continue;
				1047	}
				1048
				1049	if (threadInfo[i].coreId != lastCoreId) {
				1050	nCores++;
				1051	coreCt++;
				1052	lastCoreId = threadInfo[i].coreId;
				1053	if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
				1054	threadCt = 1;
				1055	lastThreadId = threadInfo[i].threadId;
				1056	}
				1057	else if (threadInfo[i].threadId != lastThreadId) {
				1058	threadCt++;
				1059	lastThreadId = threadInfo[i].threadId;
				1060	}
				1061	else {
				1062	__kmp_free(threadInfo);
				1063	KMP_CPU_FREE(oldMask);
				1064	*msg_id = kmp_i18n_str_LegacyApicIDsNotUnique;
				1065	return -1;
				1066	}
				1067
				1068	//
				1069	// Check to make certain that the maxCoresPerPkg and maxThreadsPerPkg
				1070	// fields agree between all the threads bounds to a given package.
				1071	//
				1072	if ((prevMaxCoresPerPkg != threadInfo[i].maxCoresPerPkg)
				1073	\|\| (prevMaxThreadsPerPkg != threadInfo[i].maxThreadsPerPkg)) {
				1074	__kmp_free(threadInfo);
				1075	KMP_CPU_FREE(oldMask);
				1076	*msg_id = kmp_i18n_str_InconsistentCpuidInfo;
				1077	return -1;
				1078	}
				1079	}
				1080	nPackages = pkgCt;
				1081	if ((int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt;
				1082	if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
				1083
				1084	//
				1085	// When affinity is off, this routine will still be called to set
				1086	// __kmp_ht_enabled, & __kmp_ncores, as well as __kmp_nThreadsPerCore,
				1087	// nCoresPerPkg, & nPackages. Make sure all these vars are set
				1088	// correctly, and return now if affinity is not enabled.
				1089	//
				1090	__kmp_ht_enabled = (__kmp_nThreadsPerCore > 1);
				1091	__kmp_ncores = nCores;
				1092	if (__kmp_affinity_verbose) {
				1093	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				1094	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				1095
				1096	KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
				1097	if (__kmp_affinity_respect_mask) {
				1098	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				1099	} else {
				1100	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				1101	}
				1102	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1103	if (__kmp_affinity_uniform_topology()) {
				1104	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1105	} else {
				1106	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				1107	}
				1108	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				1109	__kmp_nThreadsPerCore, __kmp_ncores);
				1110
				1111	}
				1112
				1113	if (__kmp_affinity_type == affinity_none) {
				1114	__kmp_free(threadInfo);
				1115	KMP_CPU_FREE(oldMask);
				1116	return 0;
				1117	}
				1118
				1119	//
				1120	// Now that we've determined the number of packages, the number of cores
				1121	// per package, and the number of threads per core, we can construct the
				1122	// data structure that is to be returned.
				1123	//
				1124	int pkgLevel = 0;
				1125	int coreLevel = (nCoresPerPkg <= 1) ? -1 : 1;
				1126	int threadLevel = (__kmp_nThreadsPerCore <= 1) ? -1 : ((coreLevel >= 0) ? 2 : 1);
				1127	unsigned depth = (pkgLevel >= 0) + (coreLevel >= 0) + (threadLevel >= 0);
				1128
				1129	KMP_ASSERT(depth > 0);
				1130	address2os = (AddrUnsPair)__kmp_allocate(sizeof(AddrUnsPair) * nApics);
				1131
				1132	for (i = 0; i < nApics; ++i) {
				1133	Address addr(depth);
				1134	unsigned os = threadInfo[i].osId;
				1135	int d = 0;
				1136
				1137	if (pkgLevel >= 0) {
				1138	addr.labels[d++] = threadInfo[i].pkgId;
				1139	}
				1140	if (coreLevel >= 0) {
				1141	addr.labels[d++] = threadInfo[i].coreId;
				1142	}
				1143	if (threadLevel >= 0) {
				1144	addr.labels[d++] = threadInfo[i].threadId;
				1145	}
				1146	(*address2os)[i] = AddrUnsPair(addr, os);
				1147	}
				1148
				1149	if (__kmp_affinity_gran_levels < 0) {
				1150	//
				1151	// Set the granularity level based on what levels are modeled
				1152	// in the machine topology map.
				1153	//
				1154	__kmp_affinity_gran_levels = 0;
				1155	if ((threadLevel >= 0)
				1156	&& (__kmp_affinity_gran > affinity_gran_thread)) {
				1157	__kmp_affinity_gran_levels++;
				1158	}
				1159	if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
				1160	__kmp_affinity_gran_levels++;
				1161	}
				1162	if ((pkgLevel >= 0) && (__kmp_affinity_gran > affinity_gran_package)) {
				1163	__kmp_affinity_gran_levels++;
				1164	}
				1165	}
				1166
				1167	if (__kmp_affinity_verbose) {
				1168	__kmp_affinity_print_topology(*address2os, nApics, depth, pkgLevel,
				1169	coreLevel, threadLevel);
				1170	}
				1171
				1172	__kmp_free(threadInfo);
				1173	KMP_CPU_FREE(oldMask);
				1174	return depth;
				1175	}
				1176
				1177
				1178	//
				1179	// Intel(R) microarchitecture code name Nehalem, Dunnington and later
				1180	// architectures support a newer interface for specifying the x2APIC Ids,
				1181	// based on cpuid leaf 11.
				1182	//
				1183	static int
				1184	__kmp_affinity_create_x2apicid_map(AddrUnsPair **address2os,
				1185	kmp_i18n_id_t *const msg_id)
				1186	{
				1187	kmp_cpuid buf;
				1188
				1189	*address2os = NULL;
				1190	*msg_id = kmp_i18n_null;
				1191
				1192	//
				1193	// Check to see if cpuid leaf 11 is supported.
				1194	//
				1195	__kmp_x86_cpuid(0, 0, &buf);
				1196	if (buf.eax < 11) {
				1197	*msg_id = kmp_i18n_str_NoLeaf11Support;
				1198	return -1;
				1199	}
				1200	__kmp_x86_cpuid(11, 0, &buf);
				1201	if (buf.ebx == 0) {
				1202	*msg_id = kmp_i18n_str_NoLeaf11Support;
				1203	return -1;
				1204	}
				1205
				1206	//
				1207	// Find the number of levels in the machine topology. While we're at it,
				1208	// get the default values for __kmp_nThreadsPerCore & nCoresPerPkg. We will
				1209	// try to get more accurate values later by explicitly counting them,
				1210	// but get reasonable defaults now, in case we return early.
				1211	//
				1212	int level;
				1213	int threadLevel = -1;
				1214	int coreLevel = -1;
				1215	int pkgLevel = -1;
				1216	__kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
				1217
				1218	for (level = 0;; level++) {
				1219	if (level > 31) {
				1220	//
				1221	// FIXME: Hack for DPD200163180
				1222	//
				1223	// If level is big then something went wrong -> exiting
				1224	//
				1225	// There could actually be 32 valid levels in the machine topology,
				1226	// but so far, the only machine we have seen which does not exit
				1227	// this loop before iteration 32 has fubar x2APIC settings.
				1228	//
				1229	// For now, just reject this case based upon loop trip count.
				1230	//
				1231	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1232	return -1;
				1233	}
				1234	__kmp_x86_cpuid(11, level, &buf);
				1235	if (buf.ebx == 0) {
				1236	if (pkgLevel < 0) {
				1237	//
				1238	// Will infer nPackages from __kmp_xproc
				1239	//
				1240	pkgLevel = level;
				1241	level++;
				1242	}
				1243	break;
				1244	}
				1245	int kind = (buf.ecx >> 8) & 0xff;
				1246	if (kind == 1) {
				1247	//
				1248	// SMT level
				1249	//
				1250	threadLevel = level;
				1251	coreLevel = -1;
				1252	pkgLevel = -1;
				1253	__kmp_nThreadsPerCore = buf.ebx & 0xff;
				1254	if (__kmp_nThreadsPerCore == 0) {
				1255	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1256	return -1;
				1257	}
				1258	}
				1259	else if (kind == 2) {
				1260	//
				1261	// core level
				1262	//
				1263	coreLevel = level;
				1264	pkgLevel = -1;
				1265	nCoresPerPkg = buf.ebx & 0xff;
				1266	if (nCoresPerPkg == 0) {
				1267	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1268	return -1;
				1269	}
				1270	}
				1271	else {
				1272	if (level <= 0) {
				1273	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1274	return -1;
				1275	}
				1276	if (pkgLevel >= 0) {
				1277	continue;
				1278	}
				1279	pkgLevel = level;
				1280	nPackages = buf.ebx & 0xff;
				1281	if (nPackages == 0) {
				1282	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1283	return -1;
				1284	}
				1285	}
				1286	}
				1287	int depth = level;
				1288
				1289	//
				1290	// In the above loop, "level" was counted from the finest level (usually
				1291	// thread) to the coarsest. The caller expects that we will place the
				1292	// labels in (*address2os)[].first.labels[] in the inverse order, so
				1293	// we need to invert the vars saying which level means what.
				1294	//
				1295	if (threadLevel >= 0) {
				1296	threadLevel = depth - threadLevel - 1;
				1297	}
				1298	if (coreLevel >= 0) {
				1299	coreLevel = depth - coreLevel - 1;
				1300	}
				1301	KMP_DEBUG_ASSERT(pkgLevel >= 0);
				1302	pkgLevel = depth - pkgLevel - 1;
				1303
				1304	//
				1305	// The algorithm used starts by setting the affinity to each available
				1306	// thread and retrieving info from the cpuid instruction, so if we are not
				1307	// capable of calling __kmp_affinity_get_map()/__kmp_affinity_get_map(),
				1308	// then we need to do something else - use the defaults that we calculated
				1309	// from issuing cpuid without binding to each proc.
				1310	//
				1311	if (! KMP_AFFINITY_CAPABLE())
				1312	{
				1313	//
				1314	// Hack to try and infer the machine topology using only the data
				1315	// available from cpuid on the current thread, and __kmp_xproc.
				1316	//
				1317	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				1318
				1319	__kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
				1320	nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
				1321	__kmp_ht_enabled = (__kmp_nThreadsPerCore > 1);
				1322	if (__kmp_affinity_verbose) {
				1323	KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY");
				1324	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1325	if (__kmp_affinity_uniform_topology()) {
				1326	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1327	} else {
				1328	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				1329	}
				1330	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				1331	__kmp_nThreadsPerCore, __kmp_ncores);
				1332	}
				1333	return 0;
				1334	}
				1335
				1336	//
				1337	//
				1338	// From here on, we can assume that it is safe to call
				1339	// __kmp_get_system_affinity() and __kmp_set_system_affinity(),
				1340	// even if __kmp_affinity_type = affinity_none.
				1341	//
				1342
				1343	//
				1344	// Save the affinity mask for the current thread.
				1345	//
				1346	kmp_affin_mask_t *oldMask;
				1347	KMP_CPU_ALLOC(oldMask);
				1348	__kmp_get_system_affinity(oldMask, TRUE);
				1349
				1350	//
				1351	// Allocate the data structure to be returned.
				1352	//
				1353	AddrUnsPair retval = (AddrUnsPair )
				1354	__kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc);
				1355
				1356	//
				1357	// Run through each of the available contexts, binding the current thread
				1358	// to it, and obtaining the pertinent information using the cpuid instr.
				1359	//
				1360	unsigned int proc;
				1361	int nApics = 0;
				1362	for (proc = 0; proc < KMP_CPU_SETSIZE; ++proc) {
				1363	//
				1364	// Skip this proc if it is not included in the machine model.
				1365	//
				1366	if (! KMP_CPU_ISSET(proc, fullMask)) {
				1367	continue;
				1368	}
				1369	KMP_DEBUG_ASSERT(nApics < __kmp_avail_proc);
				1370
				1371	__kmp_affinity_bind_thread(proc);
				1372
				1373	//
				1374	// Extrach the labels for each level in the machine topology map
				1375	// from the Apic ID.
				1376	//
				1377	Address addr(depth);
				1378	int prev_shift = 0;
				1379
				1380	for (level = 0; level < depth; level++) {
				1381	__kmp_x86_cpuid(11, level, &buf);
				1382	unsigned apicId = buf.edx;
				1383	if (buf.ebx == 0) {
				1384	if (level != depth - 1) {
				1385	KMP_CPU_FREE(oldMask);
				1386	*msg_id = kmp_i18n_str_InconsistentCpuidInfo;
				1387	return -1;
				1388	}
				1389	addr.labels[depth - level - 1] = apicId >> prev_shift;
				1390	level++;
				1391	break;
				1392	}
				1393	int shift = buf.eax & 0x1f;
				1394	int mask = (1 << shift) - 1;
				1395	addr.labels[depth - level - 1] = (apicId & mask) >> prev_shift;
				1396	prev_shift = shift;
				1397	}
				1398	if (level != depth) {
				1399	KMP_CPU_FREE(oldMask);
				1400	*msg_id = kmp_i18n_str_InconsistentCpuidInfo;
				1401	return -1;
				1402	}
				1403
				1404	retval[nApics] = AddrUnsPair(addr, proc);
				1405	nApics++;
				1406	}
				1407
				1408	//
				1409	// We've collected all the info we need.
				1410	// Restore the old affinity mask for this thread.
				1411	//
				1412	__kmp_set_system_affinity(oldMask, TRUE);
				1413
				1414	//
				1415	// If there's only one thread context to bind to, return now.
				1416	//
				1417	KMP_ASSERT(nApics > 0);
				1418	if (nApics == 1) {
				1419	__kmp_ncores = nPackages = 1;
				1420	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
				1421	__kmp_ht_enabled = FALSE;
				1422	if (__kmp_affinity_verbose) {
				1423	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				1424	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				1425
				1426	KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
				1427	if (__kmp_affinity_respect_mask) {
				1428	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				1429	} else {
				1430	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				1431	}
				1432	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1433	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1434	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				1435	__kmp_nThreadsPerCore, __kmp_ncores);
				1436	}
				1437
				1438	if (__kmp_affinity_type == affinity_none) {
				1439	__kmp_free(retval);
				1440	KMP_CPU_FREE(oldMask);
				1441	return 0;
				1442	}
				1443
				1444	//
				1445	// Form an Address object which only includes the package level.
				1446	//
				1447	Address addr(1);
				1448	addr.labels[0] = retval[0].first.labels[pkgLevel];
				1449	retval[0].first = addr;
				1450
				1451	if (__kmp_affinity_gran_levels < 0) {
				1452	__kmp_affinity_gran_levels = 0;
				1453	}
				1454
				1455	if (__kmp_affinity_verbose) {
				1456	__kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
				1457	}
				1458
				1459	*address2os = retval;
				1460	KMP_CPU_FREE(oldMask);
				1461	return 1;
				1462	}
				1463
				1464	//
				1465	// Sort the table by physical Id.
				1466	//
				1467	qsort(retval, nApics, sizeof(*retval), __kmp_affinity_cmp_Address_labels);
				1468
				1469	//
				1470	// Find the radix at each of the levels.
				1471	//
				1472	unsigned totals = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1473	unsigned counts = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1474	unsigned maxCt = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1475	unsigned last = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1476	for (level = 0; level < depth; level++) {
				1477	totals[level] = 1;
				1478	maxCt[level] = 1;
				1479	counts[level] = 1;
				1480	last[level] = retval[0].first.labels[level];
				1481	}
				1482
				1483	//
				1484	// From here on, the iteration variable "level" runs from the finest
				1485	// level to the coarsest, i.e. we iterate forward through
				1486	// (*address2os)[].first.labels[] - in the previous loops, we iterated
				1487	// backwards.
				1488	//
				1489	for (proc = 1; (int)proc < nApics; proc++) {
				1490	int level;
				1491	for (level = 0; level < depth; level++) {
				1492	if (retval[proc].first.labels[level] != last[level]) {
				1493	int j;
				1494	for (j = level + 1; j < depth; j++) {
				1495	totals[j]++;
				1496	counts[j] = 1;
				1497	// The line below causes printing incorrect topology information
				1498	// in case the max value for some level (maxCt[level]) is encountered earlier than
				1499	// some less value while going through the array.
				1500	// For example, let pkg0 has 4 cores and pkg1 has 2 cores. Then maxCt[1] == 2
				1501	// whereas it must be 4.
				1502	// TODO!!! Check if it can be commented safely
				1503	//maxCt[j] = 1;
				1504	last[j] = retval[proc].first.labels[j];
				1505	}
				1506	totals[level]++;
				1507	counts[level]++;
				1508	if (counts[level] > maxCt[level]) {
				1509	maxCt[level] = counts[level];
				1510	}
				1511	last[level] = retval[proc].first.labels[level];
				1512	break;
				1513	}
				1514	else if (level == depth - 1) {
				1515	__kmp_free(last);
				1516	__kmp_free(maxCt);
				1517	__kmp_free(counts);
				1518	__kmp_free(totals);
				1519	__kmp_free(retval);
				1520	KMP_CPU_FREE(oldMask);
				1521	*msg_id = kmp_i18n_str_x2ApicIDsNotUnique;
				1522	return -1;
				1523	}
				1524	}
				1525	}
				1526
				1527	//
				1528	// When affinity is off, this routine will still be called to set
				1529	// __kmp_ht_enabled, & __kmp_ncores, as well as __kmp_nThreadsPerCore,
				1530	// nCoresPerPkg, & nPackages. Make sure all these vars are set
				1531	// correctly, and return if affinity is not enabled.
				1532	//
				1533	if (threadLevel >= 0) {
				1534	__kmp_nThreadsPerCore = maxCt[threadLevel];
				1535	}
				1536	else {
				1537	__kmp_nThreadsPerCore = 1;
				1538	}
				1539	__kmp_ht_enabled = (__kmp_nThreadsPerCore > 1);
				1540
				1541	nPackages = totals[pkgLevel];
				1542
				1543	if (coreLevel >= 0) {
				1544	__kmp_ncores = totals[coreLevel];
				1545	nCoresPerPkg = maxCt[coreLevel];
				1546	}
				1547	else {
				1548	__kmp_ncores = nPackages;
				1549	nCoresPerPkg = 1;
				1550	}
				1551
				1552	//
				1553	// Check to see if the machine topology is uniform
				1554	//
				1555	unsigned prod = maxCt[0];
				1556	for (level = 1; level < depth; level++) {
				1557	prod *= maxCt[level];
				1558	}
				1559	bool uniform = (prod == totals[level - 1]);
				1560
				1561	//
				1562	// Print the machine topology summary.
				1563	//
				1564	if (__kmp_affinity_verbose) {
				1565	char mask[KMP_AFFIN_MASK_PRINT_LEN];
				1566	__kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				1567
				1568	KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
				1569	if (__kmp_affinity_respect_mask) {
				1570	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask);
				1571	} else {
				1572	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask);
				1573	}
				1574	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1575	if (uniform) {
				1576	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1577	} else {
				1578	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				1579	}
				1580
				1581	kmp_str_buf_t buf;
				1582	__kmp_str_buf_init(&buf);
				1583
				1584	__kmp_str_buf_print(&buf, "%d", totals[0]);
				1585	for (level = 1; level <= pkgLevel; level++) {
				1586	__kmp_str_buf_print(&buf, " x %d", maxCt[level]);
				1587	}
				1588	KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg,
				1589	__kmp_nThreadsPerCore, __kmp_ncores);
				1590
				1591	__kmp_str_buf_free(&buf);
				1592	}
				1593
				1594	if (__kmp_affinity_type == affinity_none) {
				1595	__kmp_free(last);
				1596	__kmp_free(maxCt);
				1597	__kmp_free(counts);
				1598	__kmp_free(totals);
				1599	__kmp_free(retval);
				1600	KMP_CPU_FREE(oldMask);
				1601	return 0;
				1602	}
				1603
				1604	//
				1605	// Find any levels with radiix 1, and remove them from the map
				1606	// (except for the package level).
				1607	//
				1608	int new_depth = 0;
				1609	for (level = 0; level < depth; level++) {
				1610	if ((maxCt[level] == 1) && (level != pkgLevel)) {
				1611	continue;
				1612	}
				1613	new_depth++;
				1614	}
				1615
				1616	//
				1617	// If we are removing any levels, allocate a new vector to return,
				1618	// and copy the relevant information to it.
				1619	//
				1620	if (new_depth != depth) {
				1621	AddrUnsPair new_retval = (AddrUnsPair )__kmp_allocate(
				1622	sizeof(AddrUnsPair) * nApics);
				1623	for (proc = 0; (int)proc < nApics; proc++) {
				1624	Address addr(new_depth);
				1625	new_retval[proc] = AddrUnsPair(addr, retval[proc].second);
				1626	}
				1627	int new_level = 0;
				1628	for (level = 0; level < depth; level++) {
				1629	if ((maxCt[level] == 1) && (level != pkgLevel)) {
				1630	if (level == threadLevel) {
				1631	threadLevel = -1;
				1632	}
				1633	else if ((threadLevel >= 0) && (level < threadLevel)) {
				1634	threadLevel--;
				1635	}
				1636	if (level == coreLevel) {
				1637	coreLevel = -1;
				1638	}
				1639	else if ((coreLevel >= 0) && (level < coreLevel)) {
				1640	coreLevel--;
				1641	}
				1642	if (level < pkgLevel) {
				1643	pkgLevel--;
				1644	}
				1645	continue;
				1646	}
				1647	for (proc = 0; (int)proc < nApics; proc++) {
				1648	new_retval[proc].first.labels[new_level]
				1649	= retval[proc].first.labels[level];
				1650	}
				1651	new_level++;
				1652	}
				1653
				1654	__kmp_free(retval);
				1655	retval = new_retval;
				1656	depth = new_depth;
				1657	}
				1658
				1659	if (__kmp_affinity_gran_levels < 0) {
				1660	//
				1661	// Set the granularity level based on what levels are modeled
				1662	// in the machine topology map.
				1663	//
				1664	__kmp_affinity_gran_levels = 0;
				1665	if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
				1666	__kmp_affinity_gran_levels++;
				1667	}
				1668	if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
				1669	__kmp_affinity_gran_levels++;
				1670	}
				1671	if (__kmp_affinity_gran > affinity_gran_package) {
				1672	__kmp_affinity_gran_levels++;
				1673	}
				1674	}
				1675
				1676	if (__kmp_affinity_verbose) {
				1677	__kmp_affinity_print_topology(retval, nApics, depth, pkgLevel,
				1678	coreLevel, threadLevel);
				1679	}
				1680
				1681	__kmp_free(last);
				1682	__kmp_free(maxCt);
				1683	__kmp_free(counts);
				1684	__kmp_free(totals);
				1685	KMP_CPU_FREE(oldMask);
				1686	*address2os = retval;
				1687	return depth;
				1688	}
				1689
				1690
				1691	# endif /* KMP_ARCH_X86 \|\| KMP_ARCH_X86_64 */
				1692
				1693
				1694	#define osIdIndex 0
				1695	#define threadIdIndex 1
				1696	#define coreIdIndex 2
				1697	#define pkgIdIndex 3
				1698	#define nodeIdIndex 4
				1699
				1700	typedef unsigned *ProcCpuInfo;
				1701	static unsigned maxIndex = pkgIdIndex;
				1702
				1703
				1704	static int
				1705	__kmp_affinity_cmp_ProcCpuInfo_os_id(const void a, const void b)
				1706	{
				1707	const unsigned aa = (const unsigned )a;
				1708	const unsigned bb = (const unsigned )b;
				1709	if (aa[osIdIndex] < bb[osIdIndex]) return -1;
				1710	if (aa[osIdIndex] > bb[osIdIndex]) return 1;
				1711	return 0;
				1712	};
				1713
				1714
				1715	static int
				1716	__kmp_affinity_cmp_ProcCpuInfo_phys_id(const void a, const void b)
				1717	{
				1718	unsigned i;
				1719	const unsigned aa = ((const unsigned **)a);
				1720	const unsigned bb = ((const unsigned **)b);
				1721	for (i = maxIndex; ; i--) {
				1722	if (aa[i] < bb[i]) return -1;
				1723	if (aa[i] > bb[i]) return 1;
				1724	if (i == osIdIndex) break;
				1725	}
				1726	return 0;
				1727	}
				1728
				1729
				1730	//
				1731	// Parse /proc/cpuinfo (or an alternate file in the same format) to obtain the
				1732	// affinity map.
				1733	//
				1734	static int
				1735	__kmp_affinity_create_cpuinfo_map(AddrUnsPair *address2os, int line,
				1736	kmp_i18n_id_t const msg_id, FILE f)
				1737	{
				1738	*address2os = NULL;
				1739	*msg_id = kmp_i18n_null;
				1740
				1741	//
				1742	// Scan of the file, and count the number of "processor" (osId) fields,
				1743	// and find the higest value of <n> for a node_<n> field.
				1744	//
				1745	char buf[256];
				1746	unsigned num_records = 0;
				1747	while (! feof(f)) {
				1748	buf[sizeof(buf) - 1] = 1;
				1749	if (! fgets(buf, sizeof(buf), f)) {
				1750	//
				1751	// Read errors presumably because of EOF
				1752	//
				1753	break;
				1754	}
				1755
				1756	char s1[] = "processor";
				1757	if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
				1758	num_records++;
				1759	continue;
				1760	}
				1761
				1762	//
				1763	// FIXME - this will match "node_<n> <garbage>"
				1764	//
				1765	unsigned level;
				1766	if (sscanf(buf, "node_%d id", &level) == 1) {
				1767	if (nodeIdIndex + level >= maxIndex) {
				1768	maxIndex = nodeIdIndex + level;
				1769	}
				1770	continue;
				1771	}
				1772	}
				1773
				1774	//
				1775	// Check for empty file / no valid processor records, or too many.
				1776	// The number of records can't exceed the number of valid bits in the
				1777	// affinity mask.
				1778	//
				1779	if (num_records == 0) {
				1780	*line = 0;
				1781	*msg_id = kmp_i18n_str_NoProcRecords;
				1782	return -1;
				1783	}
				1784	if (num_records > (unsigned)__kmp_xproc) {
				1785	*line = 0;
				1786	*msg_id = kmp_i18n_str_TooManyProcRecords;
				1787	return -1;
				1788	}
				1789
				1790	//
				1791	// Set the file pointer back to the begginning, so that we can scan the
				1792	// file again, this time performing a full parse of the data.
				1793	// Allocate a vector of ProcCpuInfo object, where we will place the data.
				1794	// Adding an extra element at the end allows us to remove a lot of extra
				1795	// checks for termination conditions.
				1796	//
				1797	if (fseek(f, 0, SEEK_SET) != 0) {
				1798	*line = 0;
				1799	*msg_id = kmp_i18n_str_CantRewindCpuinfo;
				1800	return -1;
				1801	}
				1802
				1803	//
				1804	// Allocate the array of records to store the proc info in. The dummy
				1805	// element at the end makes the logic in filling them out easier to code.
				1806	//
				1807	unsigned threadInfo = (unsigned )__kmp_allocate((num_records + 1)
				1808	* sizeof(unsigned *));
				1809	unsigned i;
				1810	for (i = 0; i <= num_records; i++) {
				1811	threadInfo[i] = (unsigned *)__kmp_allocate((maxIndex + 1)
				1812	* sizeof(unsigned));
				1813	}
				1814
				1815	#define CLEANUP_THREAD_INFO \
				1816	for (i = 0; i <= num_records; i++) { \
				1817	__kmp_free(threadInfo[i]); \
				1818	} \
				1819	__kmp_free(threadInfo);
				1820
				1821	//
				1822	// A value of UINT_MAX means that we didn't find the field
				1823	//
				1824	unsigned __index;
				1825
				1826	#define INIT_PROC_INFO(p) \
				1827	for (__index = 0; __index <= maxIndex; __index++) { \
				1828	(p)[__index] = UINT_MAX; \
				1829	}
				1830
				1831	for (i = 0; i <= num_records; i++) {
				1832	INIT_PROC_INFO(threadInfo[i]);
				1833	}
				1834
				1835	unsigned num_avail = 0;
				1836	*line = 0;
				1837	while (! feof(f)) {
				1838	//
				1839	// Create an inner scoping level, so that all the goto targets at the
				1840	// end of the loop appear in an outer scoping level. This avoids
				1841	// warnings about jumping past an initialization to a target in the
				1842	// same block.
				1843	//
				1844	{
				1845	buf[sizeof(buf) - 1] = 1;
				1846	bool long_line = false;
				1847	if (! fgets(buf, sizeof(buf), f)) {
				1848	//
				1849	// Read errors presumably because of EOF
				1850	//
				1851	// If there is valid data in threadInfo[num_avail], then fake
				1852	// a blank line in ensure that the last address gets parsed.
				1853	//
				1854	bool valid = false;
				1855	for (i = 0; i <= maxIndex; i++) {
				1856	if (threadInfo[num_avail][i] != UINT_MAX) {
				1857	valid = true;
				1858	}
				1859	}
				1860	if (! valid) {
				1861	break;
				1862	}
				1863	buf[0] = 0;
				1864	} else if (!buf[sizeof(buf) - 1]) {
				1865	//
				1866	// The line is longer than the buffer. Set a flag and don't
				1867	// emit an error if we were going to ignore the line, anyway.
				1868	//
				1869	long_line = true;
				1870
				1871	#define CHECK_LINE \
				1872	if (long_line) { \
				1873	CLEANUP_THREAD_INFO; \
				1874	*msg_id = kmp_i18n_str_LongLineCpuinfo; \
				1875	return -1; \
				1876	}
				1877	}
				1878	(*line)++;
				1879
				1880	char s1[] = "processor";
				1881	if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
				1882	CHECK_LINE;
				1883	char *p = strchr(buf + sizeof(s1) - 1, ':');
				1884	unsigned val;
				1885	if ((p == NULL) \|\| (sscanf(p + 1, "%u\n", &val) != 1)) goto no_val;
				1886	if (threadInfo[num_avail][osIdIndex] != UINT_MAX) goto dup_field;
				1887	threadInfo[num_avail][osIdIndex] = val;
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	1888	#if KMP_OS_LINUX && USE_SYSFS_INFO
				1889	char path[256];
				1890	snprintf(path, sizeof(path),
				1891	"/sys/devices/system/cpu/cpu%u/topology/physical_package_id",
				1892	threadInfo[num_avail][osIdIndex]);
				1893	__kmp_read_from_file(path, "%u", &threadInfo[num_avail][pkgIdIndex]);
				1894
				1895	snprintf(path, sizeof(path),
				1896	"/sys/devices/system/cpu/cpu%u/topology/core_id",
				1897	threadInfo[num_avail][osIdIndex]);
				1898	__kmp_read_from_file(path, "%u", &threadInfo[num_avail][coreIdIndex]);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1899	continue;
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	1900	#else
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1901	}
				1902	char s2[] = "physical id";
				1903	if (strncmp(buf, s2, sizeof(s2) - 1) == 0) {
				1904	CHECK_LINE;
				1905	char *p = strchr(buf + sizeof(s2) - 1, ':');
				1906	unsigned val;
				1907	if ((p == NULL) \|\| (sscanf(p + 1, "%u\n", &val) != 1)) goto no_val;
				1908	if (threadInfo[num_avail][pkgIdIndex] != UINT_MAX) goto dup_field;
				1909	threadInfo[num_avail][pkgIdIndex] = val;
				1910	continue;
				1911	}
				1912	char s3[] = "core id";
				1913	if (strncmp(buf, s3, sizeof(s3) - 1) == 0) {
				1914	CHECK_LINE;
				1915	char *p = strchr(buf + sizeof(s3) - 1, ':');
				1916	unsigned val;
				1917	if ((p == NULL) \|\| (sscanf(p + 1, "%u\n", &val) != 1)) goto no_val;
				1918	if (threadInfo[num_avail][coreIdIndex] != UINT_MAX) goto dup_field;
				1919	threadInfo[num_avail][coreIdIndex] = val;
				1920	continue;
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	1921	#endif // KMP_OS_LINUX && USE_SYSFS_INFO
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1922	}
				1923	char s4[] = "thread id";
				1924	if (strncmp(buf, s4, sizeof(s4) - 1) == 0) {
				1925	CHECK_LINE;
				1926	char *p = strchr(buf + sizeof(s4) - 1, ':');
				1927	unsigned val;
				1928	if ((p == NULL) \|\| (sscanf(p + 1, "%u\n", &val) != 1)) goto no_val;
				1929	if (threadInfo[num_avail][threadIdIndex] != UINT_MAX) goto dup_field;
				1930	threadInfo[num_avail][threadIdIndex] = val;
				1931	continue;
				1932	}
				1933	unsigned level;
				1934	if (sscanf(buf, "node_%d id", &level) == 1) {
				1935	CHECK_LINE;
				1936	char *p = strchr(buf + sizeof(s4) - 1, ':');
				1937	unsigned val;
				1938	if ((p == NULL) \|\| (sscanf(p + 1, "%u\n", &val) != 1)) goto no_val;
				1939	KMP_ASSERT(nodeIdIndex + level <= maxIndex);
				1940	if (threadInfo[num_avail][nodeIdIndex + level] != UINT_MAX) goto dup_field;
				1941	threadInfo[num_avail][nodeIdIndex + level] = val;
				1942	continue;
				1943	}
				1944
				1945	//
				1946	// We didn't recognize the leading token on the line.
				1947	// There are lots of leading tokens that we don't recognize -
				1948	// if the line isn't empty, go on to the next line.
				1949	//
				1950	if ((buf != 0) && (buf != '\n')) {
				1951	//
				1952	// If the line is longer than the buffer, read characters
				1953	// until we find a newline.
				1954	//
				1955	if (long_line) {
				1956	int ch;
				1957	while (((ch = fgetc(f)) != EOF) && (ch != '\n'));
				1958	}
				1959	continue;
				1960	}
				1961
				1962	//
				1963	// A newline has signalled the end of the processor record.
				1964	// Check that there aren't too many procs specified.
				1965	//
				1966	if (num_avail == __kmp_xproc) {
				1967	CLEANUP_THREAD_INFO;
				1968	*msg_id = kmp_i18n_str_TooManyEntries;
				1969	return -1;
				1970	}
				1971
				1972	//
				1973	// Check for missing fields. The osId field must be there, and we
				1974	// currently require that the physical id field is specified, also.
				1975	//
				1976	if (threadInfo[num_avail][osIdIndex] == UINT_MAX) {
				1977	CLEANUP_THREAD_INFO;
				1978	*msg_id = kmp_i18n_str_MissingProcField;
				1979	return -1;
				1980	}
				1981	if (threadInfo[0][pkgIdIndex] == UINT_MAX) {
				1982	CLEANUP_THREAD_INFO;
				1983	*msg_id = kmp_i18n_str_MissingPhysicalIDField;
				1984	return -1;
				1985	}
				1986
				1987	//
				1988	// Skip this proc if it is not included in the machine model.
				1989	//
				1990	if (! KMP_CPU_ISSET(threadInfo[num_avail][osIdIndex], fullMask)) {
				1991	INIT_PROC_INFO(threadInfo[num_avail]);
				1992	continue;
				1993	}
				1994
				1995	//
				1996	// We have a successful parse of this proc's info.
				1997	// Increment the counter, and prepare for the next proc.
				1998	//
				1999	num_avail++;
				2000	KMP_ASSERT(num_avail <= num_records);
				2001	INIT_PROC_INFO(threadInfo[num_avail]);
				2002	}
				2003	continue;
				2004
				2005	no_val:
				2006	CLEANUP_THREAD_INFO;
				2007	*msg_id = kmp_i18n_str_MissingValCpuinfo;
				2008	return -1;
				2009
				2010	dup_field:
				2011	CLEANUP_THREAD_INFO;
				2012	*msg_id = kmp_i18n_str_DuplicateFieldCpuinfo;
				2013	return -1;
				2014	}
				2015	*line = 0;
				2016
				2017	# if KMP_MIC && REDUCE_TEAM_SIZE
				2018	unsigned teamSize = 0;
				2019	# endif // KMP_MIC && REDUCE_TEAM_SIZE
				2020
				2021	// check for num_records == __kmp_xproc ???
				2022
				2023	//
				2024	// If there's only one thread context to bind to, form an Address object
				2025	// with depth 1 and return immediately (or, if affinity is off, set
				2026	// address2os to NULL and return).
				2027	//
				2028	// If it is configured to omit the package level when there is only a
				2029	// single package, the logic at the end of this routine won't work if
				2030	// there is only a single thread - it would try to form an Address
				2031	// object with depth 0.
				2032	//
				2033	KMP_ASSERT(num_avail > 0);
				2034	KMP_ASSERT(num_avail <= num_records);
				2035	if (num_avail == 1) {
				2036	__kmp_ncores = 1;
				2037	__kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
				2038	__kmp_ht_enabled = FALSE;
				2039	if (__kmp_affinity_verbose) {
				2040	if (! KMP_AFFINITY_CAPABLE()) {
				2041	KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
				2042	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				2043	KMP_INFORM(Uniform, "KMP_AFFINITY");
				2044	}
				2045	else {
				2046	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				2047	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				2048	fullMask);
				2049	KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
				2050	if (__kmp_affinity_respect_mask) {
				2051	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				2052	} else {
				2053	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				2054	}
				2055	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				2056	KMP_INFORM(Uniform, "KMP_AFFINITY");
				2057	}
				2058	int index;
				2059	kmp_str_buf_t buf;
				2060	__kmp_str_buf_init(&buf);
				2061	__kmp_str_buf_print(&buf, "1");
				2062	for (index = maxIndex - 1; index > pkgIdIndex; index--) {
				2063	__kmp_str_buf_print(&buf, " x 1");
				2064	}
				2065	KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, 1, 1, 1);
				2066	__kmp_str_buf_free(&buf);
				2067	}
				2068
				2069	if (__kmp_affinity_type == affinity_none) {
				2070	CLEANUP_THREAD_INFO;
				2071	return 0;
				2072	}
				2073
				2074	address2os = (AddrUnsPair)__kmp_allocate(sizeof(AddrUnsPair));
				2075	Address addr(1);
				2076	addr.labels[0] = threadInfo[0][pkgIdIndex];
				2077	(*address2os)[0] = AddrUnsPair(addr, threadInfo[0][osIdIndex]);
				2078
				2079	if (__kmp_affinity_gran_levels < 0) {
				2080	__kmp_affinity_gran_levels = 0;
				2081	}
				2082
				2083	if (__kmp_affinity_verbose) {
				2084	__kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
				2085	}
				2086
				2087	CLEANUP_THREAD_INFO;
				2088	return 1;
				2089	}
				2090
				2091	//
				2092	// Sort the threadInfo table by physical Id.
				2093	//
				2094	qsort(threadInfo, num_avail, sizeof(*threadInfo),
				2095	__kmp_affinity_cmp_ProcCpuInfo_phys_id);
				2096
				2097	//
				2098	// The table is now sorted by pkgId / coreId / threadId, but we really
				2099	// don't know the radix of any of the fields. pkgId's may be sparsely
				2100	// assigned among the chips on a system. Although coreId's are usually
				2101	// assigned [0 .. coresPerPkg-1] and threadId's are usually assigned
				2102	// [0..threadsPerCore-1], we don't want to make any such assumptions.
				2103	//
				2104	// For that matter, we don't know what coresPerPkg and threadsPerCore
				2105	// (or the total # packages) are at this point - we want to determine
				2106	// that now. We only have an upper bound on the first two figures.
				2107	//
				2108	unsigned counts = (unsigned )__kmp_allocate((maxIndex + 1)
				2109	* sizeof(unsigned));
				2110	unsigned maxCt = (unsigned )__kmp_allocate((maxIndex + 1)
				2111	* sizeof(unsigned));
				2112	unsigned totals = (unsigned )__kmp_allocate((maxIndex + 1)
				2113	* sizeof(unsigned));
				2114	unsigned lastId = (unsigned )__kmp_allocate((maxIndex + 1)
				2115	* sizeof(unsigned));
				2116
				2117	bool assign_thread_ids = false;
				2118	unsigned threadIdCt;
				2119	unsigned index;
				2120
				2121	restart_radix_check:
				2122	threadIdCt = 0;
				2123
				2124	//
				2125	// Initialize the counter arrays with data from threadInfo[0].
				2126	//
				2127	if (assign_thread_ids) {
				2128	if (threadInfo[0][threadIdIndex] == UINT_MAX) {
				2129	threadInfo[0][threadIdIndex] = threadIdCt++;
				2130	}
				2131	else if (threadIdCt <= threadInfo[0][threadIdIndex]) {
				2132	threadIdCt = threadInfo[0][threadIdIndex] + 1;
				2133	}
				2134	}
				2135	for (index = 0; index <= maxIndex; index++) {
				2136	counts[index] = 1;
				2137	maxCt[index] = 1;
				2138	totals[index] = 1;
				2139	lastId[index] = threadInfo[0][index];;
				2140	}
				2141
				2142	//
				2143	// Run through the rest of the OS procs.
				2144	//
				2145	for (i = 1; i < num_avail; i++) {
				2146	//
				2147	// Find the most significant index whose id differs
				2148	// from the id for the previous OS proc.
				2149	//
				2150	for (index = maxIndex; index >= threadIdIndex; index--) {
				2151	if (assign_thread_ids && (index == threadIdIndex)) {
				2152	//
				2153	// Auto-assign the thread id field if it wasn't specified.
				2154	//
				2155	if (threadInfo[i][threadIdIndex] == UINT_MAX) {
				2156	threadInfo[i][threadIdIndex] = threadIdCt++;
				2157	}
				2158
				2159	//
				2160	// Aparrently the thread id field was specified for some
				2161	// entries and not others. Start the thread id counter
				2162	// off at the next higher thread id.
				2163	//
				2164	else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
				2165	threadIdCt = threadInfo[i][threadIdIndex] + 1;
				2166	}
				2167	}
				2168	if (threadInfo[i][index] != lastId[index]) {
				2169	//
				2170	// Run through all indices which are less significant,
				2171	// and reset the counts to 1.
				2172	//
				2173	// At all levels up to and including index, we need to
				2174	// increment the totals and record the last id.
				2175	//
				2176	unsigned index2;
				2177	for (index2 = threadIdIndex; index2 < index; index2++) {
				2178	totals[index2]++;
				2179	if (counts[index2] > maxCt[index2]) {
				2180	maxCt[index2] = counts[index2];
				2181	}
				2182	counts[index2] = 1;
				2183	lastId[index2] = threadInfo[i][index2];
				2184	}
				2185	counts[index]++;
				2186	totals[index]++;
				2187	lastId[index] = threadInfo[i][index];
				2188
				2189	if (assign_thread_ids && (index > threadIdIndex)) {
				2190
				2191	# if KMP_MIC && REDUCE_TEAM_SIZE
				2192	//
				2193	// The default team size is the total #threads in the machine
				2194	// minus 1 thread for every core that has 3 or more threads.
				2195	//
				2196	teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 );
				2197	# endif // KMP_MIC && REDUCE_TEAM_SIZE
				2198
				2199	//
				2200	// Restart the thread counter, as we are on a new core.
				2201	//
				2202	threadIdCt = 0;
				2203
				2204	//
				2205	// Auto-assign the thread id field if it wasn't specified.
				2206	//
				2207	if (threadInfo[i][threadIdIndex] == UINT_MAX) {
				2208	threadInfo[i][threadIdIndex] = threadIdCt++;
				2209	}
				2210
				2211	//
				2212	// Aparrently the thread id field was specified for some
				2213	// entries and not others. Start the thread id counter
				2214	// off at the next higher thread id.
				2215	//
				2216	else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
				2217	threadIdCt = threadInfo[i][threadIdIndex] + 1;
				2218	}
				2219	}
				2220	break;
				2221	}
				2222	}
				2223	if (index < threadIdIndex) {
				2224	//
				2225	// If thread ids were specified, it is an error if they are not
				2226	// unique. Also, check that we waven't already restarted the
				2227	// loop (to be safe - shouldn't need to).
				2228	//
				2229	if ((threadInfo[i][threadIdIndex] != UINT_MAX)
				2230	\|\| assign_thread_ids) {
				2231	__kmp_free(lastId);
				2232	__kmp_free(totals);
				2233	__kmp_free(maxCt);
				2234	__kmp_free(counts);
				2235	CLEANUP_THREAD_INFO;
				2236	*msg_id = kmp_i18n_str_PhysicalIDsNotUnique;
				2237	return -1;
				2238	}
				2239
				2240	//
				2241	// If the thread ids were not specified and we see entries
				2242	// entries that are duplicates, start the loop over and
				2243	// assign the thread ids manually.
				2244	//
				2245	assign_thread_ids = true;
				2246	goto restart_radix_check;
				2247	}
				2248	}
				2249
				2250	# if KMP_MIC && REDUCE_TEAM_SIZE
				2251	//
				2252	// The default team size is the total #threads in the machine
				2253	// minus 1 thread for every core that has 3 or more threads.
				2254	//
				2255	teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 );
				2256	# endif // KMP_MIC && REDUCE_TEAM_SIZE
				2257
				2258	for (index = threadIdIndex; index <= maxIndex; index++) {
				2259	if (counts[index] > maxCt[index]) {
				2260	maxCt[index] = counts[index];
				2261	}
				2262	}
				2263
				2264	__kmp_nThreadsPerCore = maxCt[threadIdIndex];
				2265	nCoresPerPkg = maxCt[coreIdIndex];
				2266	nPackages = totals[pkgIdIndex];
				2267
				2268	//
				2269	// Check to see if the machine topology is uniform
				2270	//
				2271	unsigned prod = totals[maxIndex];
				2272	for (index = threadIdIndex; index < maxIndex; index++) {
				2273	prod *= maxCt[index];
				2274	}
				2275	bool uniform = (prod == totals[threadIdIndex]);
				2276
				2277	//
				2278	// When affinity is off, this routine will still be called to set
				2279	// __kmp_ht_enabled, & __kmp_ncores, as well as __kmp_nThreadsPerCore,
				2280	// nCoresPerPkg, & nPackages. Make sure all these vars are set
				2281	// correctly, and return now if affinity is not enabled.
				2282	//
				2283	__kmp_ht_enabled = (maxCt[threadIdIndex] > 1); // threads per core > 1
				2284	__kmp_ncores = totals[coreIdIndex];
				2285
				2286	if (__kmp_affinity_verbose) {
				2287	if (! KMP_AFFINITY_CAPABLE()) {
				2288	KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
				2289	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				2290	if (uniform) {
				2291	KMP_INFORM(Uniform, "KMP_AFFINITY");
				2292	} else {
				2293	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				2294	}
				2295	}
				2296	else {
				2297	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				2298	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, fullMask);
				2299	KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
				2300	if (__kmp_affinity_respect_mask) {
				2301	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				2302	} else {
				2303	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				2304	}
				2305	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				2306	if (uniform) {
				2307	KMP_INFORM(Uniform, "KMP_AFFINITY");
				2308	} else {
				2309	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				2310	}
				2311	}
				2312	kmp_str_buf_t buf;
				2313	__kmp_str_buf_init(&buf);
				2314
				2315	__kmp_str_buf_print(&buf, "%d", totals[maxIndex]);
				2316	for (index = maxIndex - 1; index >= pkgIdIndex; index--) {
				2317	__kmp_str_buf_print(&buf, " x %d", maxCt[index]);
				2318	}
				2319	KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, maxCt[coreIdIndex],
				2320	maxCt[threadIdIndex], __kmp_ncores);
				2321
				2322	__kmp_str_buf_free(&buf);
				2323	}
				2324
				2325	# if KMP_MIC && REDUCE_TEAM_SIZE
				2326	//
				2327	// Set the default team size.
				2328	//
				2329	if ((__kmp_dflt_team_nth == 0) && (teamSize > 0)) {
				2330	__kmp_dflt_team_nth = teamSize;
				2331	KA_TRACE(20, ("__kmp_affinity_create_cpuinfo_map: setting __kmp_dflt_team_nth = %d\n",
				2332	__kmp_dflt_team_nth));
				2333	}
				2334	# endif // KMP_MIC && REDUCE_TEAM_SIZE
				2335
				2336	if (__kmp_affinity_type == affinity_none) {
				2337	__kmp_free(lastId);
				2338	__kmp_free(totals);
				2339	__kmp_free(maxCt);
				2340	__kmp_free(counts);
				2341	CLEANUP_THREAD_INFO;
				2342	return 0;
				2343	}
				2344
				2345	//
				2346	// Count the number of levels which have more nodes at that level than
				2347	// at the parent's level (with there being an implicit root node of
				2348	// the top level). This is equivalent to saying that there is at least
				2349	// one node at this level which has a sibling. These levels are in the
				2350	// map, and the package level is always in the map.
				2351	//
				2352	bool inMap = (bool )__kmp_allocate((maxIndex + 1) * sizeof(bool));
				2353	int level = 0;
				2354	for (index = threadIdIndex; index < maxIndex; index++) {
				2355	KMP_ASSERT(totals[index] >= totals[index + 1]);
				2356	inMap[index] = (totals[index] > totals[index + 1]);
				2357	}
				2358	inMap[maxIndex] = (totals[maxIndex] > 1);
				2359	inMap[pkgIdIndex] = true;
				2360
				2361	int depth = 0;
				2362	for (index = threadIdIndex; index <= maxIndex; index++) {
				2363	if (inMap[index]) {
				2364	depth++;
				2365	}
				2366	}
				2367	KMP_ASSERT(depth > 0);
				2368
				2369	//
				2370	// Construct the data structure that is to be returned.
				2371	//
				2372	address2os = (AddrUnsPair)
				2373	__kmp_allocate(sizeof(AddrUnsPair) * num_avail);
				2374	int pkgLevel = -1;
				2375	int coreLevel = -1;
				2376	int threadLevel = -1;
				2377
				2378	for (i = 0; i < num_avail; ++i) {
				2379	Address addr(depth);
				2380	unsigned os = threadInfo[i][osIdIndex];
				2381	int src_index;
				2382	int dst_index = 0;
				2383
				2384	for (src_index = maxIndex; src_index >= threadIdIndex; src_index--) {
				2385	if (! inMap[src_index]) {
				2386	continue;
				2387	}
				2388	addr.labels[dst_index] = threadInfo[i][src_index];
				2389	if (src_index == pkgIdIndex) {
				2390	pkgLevel = dst_index;
				2391	}
				2392	else if (src_index == coreIdIndex) {
				2393	coreLevel = dst_index;
				2394	}
				2395	else if (src_index == threadIdIndex) {
				2396	threadLevel = dst_index;
				2397	}
				2398	dst_index++;
				2399	}
				2400	(*address2os)[i] = AddrUnsPair(addr, os);
				2401	}
				2402
				2403	if (__kmp_affinity_gran_levels < 0) {
				2404	//
				2405	// Set the granularity level based on what levels are modeled
				2406	// in the machine topology map.
				2407	//
				2408	unsigned src_index;
				2409	__kmp_affinity_gran_levels = 0;
				2410	for (src_index = threadIdIndex; src_index <= maxIndex; src_index++) {
				2411	if (! inMap[src_index]) {
				2412	continue;
				2413	}
				2414	switch (src_index) {
				2415	case threadIdIndex:
				2416	if (__kmp_affinity_gran > affinity_gran_thread) {
				2417	__kmp_affinity_gran_levels++;
				2418	}
				2419
				2420	break;
				2421	case coreIdIndex:
				2422	if (__kmp_affinity_gran > affinity_gran_core) {
				2423	__kmp_affinity_gran_levels++;
				2424	}
				2425	break;
				2426
				2427	case pkgIdIndex:
				2428	if (__kmp_affinity_gran > affinity_gran_package) {
				2429	__kmp_affinity_gran_levels++;
				2430	}
				2431	break;
				2432	}
				2433	}
				2434	}
				2435
				2436	if (__kmp_affinity_verbose) {
				2437	__kmp_affinity_print_topology(*address2os, num_avail, depth, pkgLevel,
				2438	coreLevel, threadLevel);
				2439	}
				2440
				2441	__kmp_free(inMap);
				2442	__kmp_free(lastId);
				2443	__kmp_free(totals);
				2444	__kmp_free(maxCt);
				2445	__kmp_free(counts);
				2446	CLEANUP_THREAD_INFO;
				2447	return depth;
				2448	}
				2449
				2450
				2451	//
				2452	// Create and return a table of affinity masks, indexed by OS thread ID.
				2453	// This routine handles OR'ing together all the affinity masks of threads
				2454	// that are sufficiently close, if granularity > fine.
				2455	//
				2456	static kmp_affin_mask_t *
				2457	__kmp_create_masks(unsigned maxIndex, unsigned numUnique,
				2458	AddrUnsPair *address2os, unsigned numAddrs)
				2459	{
				2460	//
				2461	// First form a table of affinity masks in order of OS thread id.
				2462	//
				2463	unsigned depth;
				2464	unsigned maxOsId;
				2465	unsigned i;
				2466
				2467	KMP_ASSERT(numAddrs > 0);
				2468	depth = address2os[0].first.depth;
				2469
				2470	maxOsId = 0;
				2471	for (i = 0; i < numAddrs; i++) {
				2472	unsigned osId = address2os[i].second;
				2473	if (osId > maxOsId) {
				2474	maxOsId = osId;
				2475	}
				2476	}
				2477	kmp_affin_mask_t osId2Mask = (kmp_affin_mask_t )__kmp_allocate(
				2478	(maxOsId + 1) * __kmp_affin_mask_size);
				2479
				2480	//
				2481	// Sort the address2os table according to physical order. Doing so
				2482	// will put all threads on the same core/package/node in consecutive
				2483	// locations.
				2484	//
				2485	qsort(address2os, numAddrs, sizeof(*address2os),
				2486	__kmp_affinity_cmp_Address_labels);
				2487
				2488	KMP_ASSERT(__kmp_affinity_gran_levels >= 0);
				2489	if (__kmp_affinity_verbose && (__kmp_affinity_gran_levels > 0)) {
				2490	KMP_INFORM(ThreadsMigrate, "KMP_AFFINITY", __kmp_affinity_gran_levels);
				2491	}
				2492	if (__kmp_affinity_gran_levels >= (int)depth) {
				2493	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				2494	&& (__kmp_affinity_type != affinity_none))) {
				2495	KMP_WARNING(AffThreadsMayMigrate);
				2496	}
				2497	}
				2498
				2499	//
				2500	// Run through the table, forming the masks for all threads on each
				2501	// core. Threads on the same core will have identical "Address"
				2502	// objects, not considering the last level, which must be the thread
				2503	// id. All threads on a core will appear consecutively.
				2504	//
				2505	unsigned unique = 0;
				2506	unsigned j = 0; // index of 1st thread on core
				2507	unsigned leader = 0;
				2508	Address *leaderAddr = &(address2os[0].first);
				2509	kmp_affin_mask_t *sum
				2510	= (kmp_affin_mask_t *)alloca(__kmp_affin_mask_size);
				2511	KMP_CPU_ZERO(sum);
				2512	KMP_CPU_SET(address2os[0].second, sum);
				2513	for (i = 1; i < numAddrs; i++) {
				2514	//
				2515	// If this thread is sufficiently close to the leader (withing the
				2516	// granularity setting), then set the bit for this os thread in the
				2517	// affinity mask for this group, and go on to the next thread.
				2518	//
				2519	if (leaderAddr->isClose(address2os[i].first,
				2520	__kmp_affinity_gran_levels)) {
				2521	KMP_CPU_SET(address2os[i].second, sum);
				2522	continue;
				2523	}
				2524
				2525	//
				2526	// For every thread in this group, copy the mask to the thread's
				2527	// entry in the osId2Mask table. Mark the first address as a
				2528	// leader.
				2529	//
				2530	for (; j < i; j++) {
				2531	unsigned osId = address2os[j].second;
				2532	KMP_DEBUG_ASSERT(osId <= maxOsId);
				2533	kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
				2534	KMP_CPU_COPY(mask, sum);
				2535	address2os[j].first.leader = (j == leader);
				2536	}
				2537	unique++;
				2538
				2539	//
				2540	// Start a new mask.
				2541	//
				2542	leader = i;
				2543	leaderAddr = &(address2os[i].first);
				2544	KMP_CPU_ZERO(sum);
				2545	KMP_CPU_SET(address2os[i].second, sum);
				2546	}
				2547
				2548	//
				2549	// For every thread in last group, copy the mask to the thread's
				2550	// entry in the osId2Mask table.
				2551	//
				2552	for (; j < i; j++) {
				2553	unsigned osId = address2os[j].second;
				2554	KMP_DEBUG_ASSERT(osId <= maxOsId);
				2555	kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
				2556	KMP_CPU_COPY(mask, sum);
				2557	address2os[j].first.leader = (j == leader);
				2558	}
				2559	unique++;
				2560
				2561	*maxIndex = maxOsId;
				2562	*numUnique = unique;
				2563	return osId2Mask;
				2564	}
				2565
				2566
				2567	//
				2568	// Stuff for the affinity proclist parsers. It's easier to declare these vars
				2569	// as file-static than to try and pass them through the calling sequence of
				2570	// the recursive-descent OMP_PLACES parser.
				2571	//
				2572	static kmp_affin_mask_t *newMasks;
				2573	static int numNewMasks;
				2574	static int nextNewMask;
				2575
				2576	#define ADD_MASK(_mask) \
				2577	{ \
				2578	if (nextNewMask >= numNewMasks) { \
				2579	numNewMasks *= 2; \
				2580	newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_REALLOC(newMasks, \
				2581	numNewMasks * __kmp_affin_mask_size); \
				2582	} \
				2583	KMP_CPU_COPY(KMP_CPU_INDEX(newMasks, nextNewMask), (_mask)); \
				2584	nextNewMask++; \
				2585	}
				2586
				2587	#define ADD_MASK_OSID(_osId,_osId2Mask,_maxOsId) \
				2588	{ \
				2589	if (((_osId) > _maxOsId) \|\| \
				2590	(! KMP_CPU_ISSET((_osId), KMP_CPU_INDEX(_osId2Mask, (_osId))))) {\
				2591	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings \
				2592	&& (__kmp_affinity_type != affinity_none))) { \
				2593	KMP_WARNING(AffIgnoreInvalidProcID, _osId); \
				2594	} \
				2595	} \
				2596	else { \
				2597	ADD_MASK(KMP_CPU_INDEX(_osId2Mask, (_osId))); \
				2598	} \
				2599	}
				2600
				2601
				2602	//
				2603	// Re-parse the proclist (for the explicit affinity type), and form the list
				2604	// of affinity newMasks indexed by gtid.
				2605	//
				2606	static void
				2607	__kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks,
				2608	unsigned int out_numMasks, const char proclist,
				2609	kmp_affin_mask_t *osId2Mask, int maxOsId)
				2610	{
				2611	const char *scan = proclist;
				2612	const char *next = proclist;
				2613
				2614	//
				2615	// We use malloc() for the temporary mask vector,
				2616	// so that we can use realloc() to extend it.
				2617	//
				2618	numNewMasks = 2;
				2619	newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(numNewMasks
				2620	* __kmp_affin_mask_size);
				2621	nextNewMask = 0;
				2622	kmp_affin_mask_t sumMask = (kmp_affin_mask_t )__kmp_allocate(
				2623	__kmp_affin_mask_size);
				2624	int setSize = 0;
				2625
				2626	for (;;) {
				2627	int start, end, stride;
				2628
				2629	SKIP_WS(scan);
				2630	next = scan;
				2631	if (*next == '\0') {
				2632	break;
				2633	}
				2634
				2635	if (*next == '{') {
				2636	int num;
				2637	setSize = 0;
				2638	next++; // skip '{'
				2639	SKIP_WS(next);
				2640	scan = next;
				2641
				2642	//
				2643	// Read the first integer in the set.
				2644	//
				2645	KMP_ASSERT2((next >= '0') && (next <= '9'),
				2646	"bad proclist");
				2647	SKIP_DIGITS(next);
				2648	num = __kmp_str_to_int(scan, *next);
				2649	KMP_ASSERT2(num >= 0, "bad explicit proc list");
				2650
				2651	//
				2652	// Copy the mask for that osId to the sum (union) mask.
				2653	//
				2654	if ((num > maxOsId) \|\|
				2655	(! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
				2656	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				2657	&& (__kmp_affinity_type != affinity_none))) {
				2658	KMP_WARNING(AffIgnoreInvalidProcID, num);
				2659	}
				2660	KMP_CPU_ZERO(sumMask);
				2661	}
				2662	else {
				2663	KMP_CPU_COPY(sumMask, KMP_CPU_INDEX(osId2Mask, num));
				2664	setSize = 1;
				2665	}
				2666
				2667	for (;;) {
				2668	//
				2669	// Check for end of set.
				2670	//
				2671	SKIP_WS(next);
				2672	if (*next == '}') {
				2673	next++; // skip '}'
				2674	break;
				2675	}
				2676
				2677	//
				2678	// Skip optional comma.
				2679	//
				2680	if (*next == ',') {
				2681	next++;
				2682	}
				2683	SKIP_WS(next);
				2684
				2685	//
				2686	// Read the next integer in the set.
				2687	//
				2688	scan = next;
				2689	KMP_ASSERT2((next >= '0') && (next <= '9'),
				2690	"bad explicit proc list");
				2691
				2692	SKIP_DIGITS(next);
				2693	num = __kmp_str_to_int(scan, *next);
				2694	KMP_ASSERT2(num >= 0, "bad explicit proc list");
				2695
				2696	//
				2697	// Add the mask for that osId to the sum mask.
				2698	//
				2699	if ((num > maxOsId) \|\|
				2700	(! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
				2701	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				2702	&& (__kmp_affinity_type != affinity_none))) {
				2703	KMP_WARNING(AffIgnoreInvalidProcID, num);
				2704	}
				2705	}
				2706	else {
				2707	KMP_CPU_UNION(sumMask, KMP_CPU_INDEX(osId2Mask, num));
				2708	setSize++;
				2709	}
				2710	}
				2711	if (setSize > 0) {
				2712	ADD_MASK(sumMask);
				2713	}
				2714
				2715	SKIP_WS(next);
				2716	if (*next == ',') {
				2717	next++;
				2718	}
				2719	scan = next;
				2720	continue;
				2721	}
				2722
				2723	//
				2724	// Read the first integer.
				2725	//
				2726	KMP_ASSERT2((next >= '0') && (next <= '9'), "bad explicit proc list");
				2727	SKIP_DIGITS(next);
				2728	start = __kmp_str_to_int(scan, *next);
				2729	KMP_ASSERT2(start >= 0, "bad explicit proc list");
				2730	SKIP_WS(next);
				2731
				2732	//
				2733	// If this isn't a range, then add a mask to the list and go on.
				2734	//
				2735	if (*next != '-') {
				2736	ADD_MASK_OSID(start, osId2Mask, maxOsId);
				2737
				2738	//
				2739	// Skip optional comma.
				2740	//
				2741	if (*next == ',') {
				2742	next++;
				2743	}
				2744	scan = next;
				2745	continue;
				2746	}
				2747
				2748	//
				2749	// This is a range. Skip over the '-' and read in the 2nd int.
				2750	//
				2751	next++; // skip '-'
				2752	SKIP_WS(next);
				2753	scan = next;
				2754	KMP_ASSERT2((next >= '0') && (next <= '9'), "bad explicit proc list");
				2755	SKIP_DIGITS(next);
				2756	end = __kmp_str_to_int(scan, *next);
				2757	KMP_ASSERT2(end >= 0, "bad explicit proc list");
				2758
				2759	//
				2760	// Check for a stride parameter
				2761	//
				2762	stride = 1;
				2763	SKIP_WS(next);
				2764	if (*next == ':') {
				2765	//
				2766	// A stride is specified. Skip over the ':" and read the 3rd int.
				2767	//
				2768	int sign = +1;
				2769	next++; // skip ':'
				2770	SKIP_WS(next);
				2771	scan = next;
				2772	if (*next == '-') {
				2773	sign = -1;
				2774	next++;
				2775	SKIP_WS(next);
				2776	scan = next;
				2777	}
				2778	KMP_ASSERT2((next >= '0') && (next <= '9'),
				2779	"bad explicit proc list");
				2780	SKIP_DIGITS(next);
				2781	stride = __kmp_str_to_int(scan, *next);
				2782	KMP_ASSERT2(stride >= 0, "bad explicit proc list");
				2783	stride *= sign;
				2784	}
				2785
				2786	//
				2787	// Do some range checks.
				2788	//
				2789	KMP_ASSERT2(stride != 0, "bad explicit proc list");
				2790	if (stride > 0) {
				2791	KMP_ASSERT2(start <= end, "bad explicit proc list");
				2792	}
				2793	else {
				2794	KMP_ASSERT2(start >= end, "bad explicit proc list");
				2795	}
				2796	KMP_ASSERT2((end - start) / stride <= 65536, "bad explicit proc list");
				2797
				2798	//
				2799	// Add the mask for each OS proc # to the list.
				2800	//
				2801	if (stride > 0) {
				2802	do {
				2803	ADD_MASK_OSID(start, osId2Mask, maxOsId);
				2804	start += stride;
				2805	} while (start <= end);
				2806	}
				2807	else {
				2808	do {
				2809	ADD_MASK_OSID(start, osId2Mask, maxOsId);
				2810	start += stride;
				2811	} while (start >= end);
				2812	}
				2813
				2814	//
				2815	// Skip optional comma.
				2816	//
				2817	SKIP_WS(next);
				2818	if (*next == ',') {
				2819	next++;
				2820	}
				2821	scan = next;
				2822	}
				2823
				2824	*out_numMasks = nextNewMask;
				2825	if (nextNewMask == 0) {
				2826	*out_masks = NULL;
				2827	KMP_INTERNAL_FREE(newMasks);
				2828	return;
				2829	}
				2830	*out_masks
				2831	= (kmp_affin_mask_t )__kmp_allocate(nextNewMask __kmp_affin_mask_size);
				2832	memcpy(out_masks, newMasks, nextNewMask __kmp_affin_mask_size);
				2833	__kmp_free(sumMask);
				2834	KMP_INTERNAL_FREE(newMasks);
				2835	}
				2836
				2837
				2838	# if OMP_40_ENABLED
				2839
				2840	/*-----------------------------------------------------------------------------
				2841
				2842	Re-parse the OMP_PLACES proc id list, forming the newMasks for the different
				2843	places. Again, Here is the grammar:
				2844
				2845	place_list := place
				2846	place_list := place , place_list
				2847	place := num
				2848	place := place : num
				2849	place := place : num : signed
				2850	place := { subplacelist }
				2851	place := ! place // (lowest priority)
				2852	subplace_list := subplace
				2853	subplace_list := subplace , subplace_list
				2854	subplace := num
				2855	subplace := num : num
				2856	subplace := num : num : signed
				2857	signed := num
				2858	signed := + signed
				2859	signed := - signed
				2860
				2861	-----------------------------------------------------------------------------*/
				2862
				2863	static void
				2864	__kmp_process_subplace_list(const char *scan, kmp_affin_mask_t osId2Mask,
				2865	int maxOsId, kmp_affin_mask_t tempMask, int setSize)
				2866	{
				2867	const char *next;
				2868
				2869	for (;;) {
				2870	int start, count, stride, i;
				2871
				2872	//
				2873	// Read in the starting proc id
				2874	//
				2875	SKIP_WS(*scan);
				2876	KMP_ASSERT2((scan >= '0') && (scan <= '9'),
				2877	"bad explicit places list");
				2878	next = *scan;
				2879	SKIP_DIGITS(next);
				2880	start = __kmp_str_to_int(scan, next);
				2881	KMP_ASSERT(start >= 0);
				2882	*scan = next;
				2883
				2884	//
				2885	// valid follow sets are ',' ':' and '}'
				2886	//
				2887	SKIP_WS(*scan);
				2888	if (scan == '}' \|\| scan == ',') {
				2889	if ((start > maxOsId) \|\|
				2890	(! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
				2891	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				2892	&& (__kmp_affinity_type != affinity_none))) {
				2893	KMP_WARNING(AffIgnoreInvalidProcID, start);
				2894	}
				2895	}
				2896	else {
				2897	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
				2898	(*setSize)++;
				2899	}
				2900	if (**scan == '}') {
				2901	break;
				2902	}
				2903	(*scan)++; // skip ','
				2904	continue;
				2905	}
				2906	KMP_ASSERT2(**scan == ':', "bad explicit places list");
				2907	(*scan)++; // skip ':'
				2908
				2909	//
				2910	// Read count parameter
				2911	//
				2912	SKIP_WS(*scan);
				2913	KMP_ASSERT2((scan >= '0') && (scan <= '9'),
				2914	"bad explicit places list");
				2915	next = *scan;
				2916	SKIP_DIGITS(next);
				2917	count = __kmp_str_to_int(scan, next);
				2918	KMP_ASSERT(count >= 0);
				2919	*scan = next;
				2920
				2921	//
				2922	// valid follow sets are ',' ':' and '}'
				2923	//
				2924	SKIP_WS(*scan);
				2925	if (scan == '}' \|\| scan == ',') {
				2926	for (i = 0; i < count; i++) {
				2927	if ((start > maxOsId) \|\|
				2928	(! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
				2929	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				2930	&& (__kmp_affinity_type != affinity_none))) {
				2931	KMP_WARNING(AffIgnoreInvalidProcID, start);
				2932	}
				2933	break; // don't proliferate warnings for large count
				2934	}
				2935	else {
				2936	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
				2937	start++;
				2938	(*setSize)++;
				2939	}
				2940	}
				2941	if (**scan == '}') {
				2942	break;
				2943	}
				2944	(*scan)++; // skip ','
				2945	continue;
				2946	}
				2947	KMP_ASSERT2(**scan == ':', "bad explicit places list");
				2948	(*scan)++; // skip ':'
				2949
				2950	//
				2951	// Read stride parameter
				2952	//
				2953	int sign = +1;
				2954	for (;;) {
				2955	SKIP_WS(*scan);
				2956	if (**scan == '+') {
				2957	(*scan)++; // skip '+'
				2958	continue;
				2959	}
				2960	if (**scan == '-') {
				2961	sign *= -1;
				2962	(*scan)++; // skip '-'
				2963	continue;
				2964	}
				2965	break;
				2966	}
				2967	SKIP_WS(*scan);
				2968	KMP_ASSERT2((scan >= '0') && (scan <= '9'),
				2969	"bad explicit places list");
				2970	next = *scan;
				2971	SKIP_DIGITS(next);
				2972	stride = __kmp_str_to_int(scan, next);
				2973	KMP_ASSERT(stride >= 0);
				2974	*scan = next;
				2975	stride *= sign;
				2976
				2977	//
				2978	// valid follow sets are ',' and '}'
				2979	//
				2980	SKIP_WS(*scan);
				2981	if (scan == '}' \|\| scan == ',') {
				2982	for (i = 0; i < count; i++) {
				2983	if ((start > maxOsId) \|\|
				2984	(! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
				2985	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				2986	&& (__kmp_affinity_type != affinity_none))) {
				2987	KMP_WARNING(AffIgnoreInvalidProcID, start);
				2988	}
				2989	break; // don't proliferate warnings for large count
				2990	}
				2991	else {
				2992	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
				2993	start += stride;
				2994	(*setSize)++;
				2995	}
				2996	}
				2997	if (**scan == '}') {
				2998	break;
				2999	}
				3000	(*scan)++; // skip ','
				3001	continue;
				3002	}
				3003
				3004	KMP_ASSERT2(0, "bad explicit places list");
				3005	}
				3006	}
				3007
				3008
				3009	static void
				3010	__kmp_process_place(const char *scan, kmp_affin_mask_t osId2Mask,
				3011	int maxOsId, kmp_affin_mask_t tempMask, int setSize)
				3012	{
				3013	const char *next;
				3014
				3015	//
				3016	// valid follow sets are '{' '!' and num
				3017	//
				3018	SKIP_WS(*scan);
				3019	if (**scan == '{') {
				3020	(*scan)++; // skip '{'
				3021	__kmp_process_subplace_list(scan, osId2Mask, maxOsId , tempMask,
				3022	setSize);
				3023	KMP_ASSERT2(**scan == '}', "bad explicit places list");
				3024	(*scan)++; // skip '}'
				3025	}
				3026	else if (**scan == '!') {
				3027	__kmp_process_place(scan, osId2Mask, maxOsId, tempMask, setSize);
				3028	KMP_CPU_COMPLEMENT(tempMask);
				3029	(*scan)++; // skip '!'
				3030	}
				3031	else if ((scan >= '0') && (scan <= '9')) {
				3032	next = *scan;
				3033	SKIP_DIGITS(next);
				3034	int num = __kmp_str_to_int(scan, next);
				3035	KMP_ASSERT(num >= 0);
				3036	if ((num > maxOsId) \|\|
				3037	(! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
				3038	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3039	&& (__kmp_affinity_type != affinity_none))) {
				3040	KMP_WARNING(AffIgnoreInvalidProcID, num);
				3041	}
				3042	}
				3043	else {
				3044	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, num));
				3045	(*setSize)++;
				3046	}
				3047	*scan = next; // skip num
				3048	}
				3049	else {
				3050	KMP_ASSERT2(0, "bad explicit places list");
				3051	}
				3052	}
				3053
				3054
				3055	static void
				3056	__kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
				3057	unsigned int out_numMasks, const char placelist,
				3058	kmp_affin_mask_t *osId2Mask, int maxOsId)
				3059	{
				3060	const char *scan = placelist;
				3061	const char *next = placelist;
				3062
				3063	numNewMasks = 2;
				3064	newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(numNewMasks
				3065	* __kmp_affin_mask_size);
				3066	nextNewMask = 0;
				3067
				3068	kmp_affin_mask_t tempMask = (kmp_affin_mask_t )__kmp_allocate(
				3069	__kmp_affin_mask_size);
				3070	KMP_CPU_ZERO(tempMask);
				3071	int setSize = 0;
				3072
				3073	for (;;) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3074	__kmp_process_place(&scan, osId2Mask, maxOsId, tempMask, &setSize);
				3075
				3076	//
				3077	// valid follow sets are ',' ':' and EOL
				3078	//
				3079	SKIP_WS(scan);
				3080	if (scan == '\0' \|\| scan == ',') {
				3081	if (setSize > 0) {
				3082	ADD_MASK(tempMask);
				3083	}
				3084	KMP_CPU_ZERO(tempMask);
				3085	setSize = 0;
				3086	if (*scan == '\0') {
				3087	break;
				3088	}
				3089	scan++; // skip ','
				3090	continue;
				3091	}
				3092
				3093	KMP_ASSERT2(*scan == ':', "bad explicit places list");
				3094	scan++; // skip ':'
				3095
				3096	//
				3097	// Read count parameter
				3098	//
				3099	SKIP_WS(scan);
				3100	KMP_ASSERT2((scan >= '0') && (scan <= '9'),
				3101	"bad explicit places list");
				3102	next = scan;
				3103	SKIP_DIGITS(next);
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	3104	int count = __kmp_str_to_int(scan, *next);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3105	KMP_ASSERT(count >= 0);
				3106	scan = next;
				3107
				3108	//
				3109	// valid follow sets are ',' ':' and EOL
				3110	//
				3111	SKIP_WS(scan);
				3112	if (scan == '\0' \|\| scan == ',') {
				3113	int i;
				3114	for (i = 0; i < count; i++) {
				3115	int j;
				3116	if (setSize == 0) {
				3117	break;
				3118	}
				3119	ADD_MASK(tempMask);
				3120	setSize = 0;
				3121	for (j = __kmp_affin_mask_size * CHAR_BIT - 1; j > 0; j--) {
				3122	//
				3123	// Use a temp var in case macro is changed to evaluate
				3124	// args multiple times.
				3125	//
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	3126	if (KMP_CPU_ISSET(j - 1, tempMask)) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3127	KMP_CPU_SET(j, tempMask);
				3128	setSize++;
				3129	}
				3130	else {
				3131	KMP_CPU_CLR(j, tempMask);
				3132	}
				3133	}
				3134	for (; j >= 0; j--) {
				3135	KMP_CPU_CLR(j, tempMask);
				3136	}
				3137	}
				3138	KMP_CPU_ZERO(tempMask);
				3139	setSize = 0;
				3140
				3141	if (*scan == '\0') {
				3142	break;
				3143	}
				3144	scan++; // skip ','
				3145	continue;
				3146	}
				3147
				3148	KMP_ASSERT2(*scan == ':', "bad explicit places list");
				3149	scan++; // skip ':'
				3150
				3151	//
				3152	// Read stride parameter
				3153	//
				3154	int sign = +1;
				3155	for (;;) {
				3156	SKIP_WS(scan);
				3157	if (*scan == '+') {
				3158	scan++; // skip '+'
				3159	continue;
				3160	}
				3161	if (*scan == '-') {
				3162	sign *= -1;
				3163	scan++; // skip '-'
				3164	continue;
				3165	}
				3166	break;
				3167	}
				3168	SKIP_WS(scan);
				3169	KMP_ASSERT2((scan >= '0') && (scan <= '9'),
				3170	"bad explicit places list");
				3171	next = scan;
				3172	SKIP_DIGITS(next);
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	3173	int stride = __kmp_str_to_int(scan, *next);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3174	KMP_DEBUG_ASSERT(stride >= 0);
				3175	scan = next;
				3176	stride *= sign;
				3177
				3178	if (stride > 0) {
				3179	int i;
				3180	for (i = 0; i < count; i++) {
				3181	int j;
				3182	if (setSize == 0) {
				3183	break;
				3184	}
				3185	ADD_MASK(tempMask);
				3186	setSize = 0;
				3187	for (j = __kmp_affin_mask_size * CHAR_BIT - 1; j >= stride; j--) {
				3188	if (KMP_CPU_ISSET(j - stride, tempMask)) {
				3189	KMP_CPU_SET(j, tempMask);
				3190	setSize++;
				3191	}
				3192	else {
				3193	KMP_CPU_CLR(j, tempMask);
				3194	}
				3195	}
				3196	for (; j >= 0; j--) {
				3197	KMP_CPU_CLR(j, tempMask);
				3198	}
				3199	}
				3200	}
				3201	else {
				3202	int i;
				3203	for (i = 0; i < count; i++) {
				3204	unsigned j;
				3205	if (setSize == 0) {
				3206	break;
				3207	}
				3208	ADD_MASK(tempMask);
				3209	setSize = 0;
				3210	for (j = 0; j < (__kmp_affin_mask_size * CHAR_BIT) + stride;
				3211	j++) {
				3212	if (KMP_CPU_ISSET(j - stride, tempMask)) {
				3213	KMP_CPU_SET(j, tempMask);
				3214	setSize++;
				3215	}
				3216	else {
				3217	KMP_CPU_CLR(j, tempMask);
				3218	}
				3219	}
				3220	for (; j < __kmp_affin_mask_size * CHAR_BIT; j++) {
				3221	KMP_CPU_CLR(j, tempMask);
				3222	}
				3223	}
				3224	}
				3225	KMP_CPU_ZERO(tempMask);
				3226	setSize = 0;
				3227
				3228	//
				3229	// valid follow sets are ',' and EOL
				3230	//
				3231	SKIP_WS(scan);
				3232	if (*scan == '\0') {
				3233	break;
				3234	}
				3235	if (*scan == ',') {
				3236	scan++; // skip ','
				3237	continue;
				3238	}
				3239
				3240	KMP_ASSERT2(0, "bad explicit places list");
				3241	}
				3242
				3243	*out_numMasks = nextNewMask;
				3244	if (nextNewMask == 0) {
				3245	*out_masks = NULL;
				3246	KMP_INTERNAL_FREE(newMasks);
				3247	return;
				3248	}
				3249	*out_masks
				3250	= (kmp_affin_mask_t )__kmp_allocate(nextNewMask __kmp_affin_mask_size);
				3251	memcpy(out_masks, newMasks, nextNewMask __kmp_affin_mask_size);
				3252	__kmp_free(tempMask);
				3253	KMP_INTERNAL_FREE(newMasks);
				3254	}
				3255
				3256	# endif /* OMP_40_ENABLED */
				3257
				3258	#undef ADD_MASK
				3259	#undef ADD_MASK_OSID
				3260
				3261
				3262	# if KMP_MIC
				3263
				3264	static void
				3265	__kmp_apply_thread_places(AddrUnsPair **pAddr, int depth)
				3266	{
				3267	if ( __kmp_place_num_cores == 0 ) {
				3268	if ( __kmp_place_num_threads_per_core == 0 ) {
				3269	return; // no cores limiting actions requested, exit
				3270	}
				3271	__kmp_place_num_cores = nCoresPerPkg; // use all available cores
				3272	}
				3273	if ( !__kmp_affinity_uniform_topology() \|\| depth != 3 ) {
				3274	KMP_WARNING( AffThrPlaceUnsupported );
				3275	return; // don't support non-uniform topology or not-3-level architecture
				3276	}
				3277	if ( __kmp_place_num_threads_per_core == 0 ) {
				3278	__kmp_place_num_threads_per_core = __kmp_nThreadsPerCore; // use all HW contexts
				3279	}
				3280	if ( __kmp_place_core_offset + __kmp_place_num_cores > nCoresPerPkg ) {
				3281	KMP_WARNING( AffThrPlaceManyCores );
				3282	return;
				3283	}
				3284
				3285	AddrUnsPair newAddr = (AddrUnsPair )__kmp_allocate( sizeof(AddrUnsPair) *
				3286	nPackages * __kmp_place_num_cores * __kmp_place_num_threads_per_core);
				3287	int i, j, k, n_old = 0, n_new = 0;
				3288	for ( i = 0; i < nPackages; ++i ) {
				3289	for ( j = 0; j < nCoresPerPkg; ++j ) {
				3290	if ( j < __kmp_place_core_offset \|\| j >= __kmp_place_core_offset + __kmp_place_num_cores ) {
				3291	n_old += __kmp_nThreadsPerCore; // skip not-requested core
				3292	} else {
				3293	for ( k = 0; k < __kmp_nThreadsPerCore; ++k ) {
				3294	if ( k < __kmp_place_num_threads_per_core ) {
				3295	newAddr[n_new] = (*pAddr)[n_old]; // copy requested core' data to new location
				3296	n_new++;
				3297	}
				3298	n_old++;
				3299	}
				3300	}
				3301	}
				3302	}
				3303	nCoresPerPkg = __kmp_place_num_cores; // correct nCoresPerPkg
				3304	__kmp_nThreadsPerCore = __kmp_place_num_threads_per_core; // correct __kmp_nThreadsPerCore
				3305	__kmp_avail_proc = n_new; // correct avail_proc
				3306	__kmp_ncores = nPackages * __kmp_place_num_cores; // correct ncores
				3307
				3308	__kmp_free( *pAddr );
				3309	*pAddr = newAddr; // replace old topology with new one
				3310	}
				3311
				3312	# endif /* KMP_MIC */
				3313
				3314
				3315	static AddrUnsPair *address2os = NULL;
				3316	static int * procarr = NULL;
				3317	static int __kmp_aff_depth = 0;
				3318
				3319	static void
				3320	__kmp_aux_affinity_initialize(void)
				3321	{
				3322	if (__kmp_affinity_masks != NULL) {
				3323	KMP_ASSERT(fullMask != NULL);
				3324	return;
				3325	}
				3326
				3327	//
				3328	// Create the "full" mask - this defines all of the processors that we
				3329	// consider to be in the machine model. If respect is set, then it is
				3330	// the initialization thread's affinity mask. Otherwise, it is all
				3331	// processors that we know about on the machine.
				3332	//
				3333	if (fullMask == NULL) {
				3334	fullMask = (kmp_affin_mask_t *)__kmp_allocate(__kmp_affin_mask_size);
				3335	}
				3336	if (KMP_AFFINITY_CAPABLE()) {
				3337	if (__kmp_affinity_respect_mask) {
				3338	__kmp_get_system_affinity(fullMask, TRUE);
				3339
				3340	//
				3341	// Count the number of available processors.
				3342	//
				3343	unsigned i;
				3344	__kmp_avail_proc = 0;
				3345	for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
				3346	if (! KMP_CPU_ISSET(i, fullMask)) {
				3347	continue;
				3348	}
				3349	__kmp_avail_proc++;
				3350	}
				3351	if (__kmp_avail_proc > __kmp_xproc) {
				3352	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3353	&& (__kmp_affinity_type != affinity_none))) {
				3354	KMP_WARNING(ErrorInitializeAffinity);
				3355	}
				3356	__kmp_affinity_type = affinity_none;
				3357	__kmp_affin_mask_size = 0;
				3358	return;
				3359	}
				3360	}
				3361	else {
				3362	__kmp_affinity_entire_machine_mask(fullMask);
				3363	__kmp_avail_proc = __kmp_xproc;
				3364	}
				3365	}
				3366
				3367	int depth = -1;
				3368	kmp_i18n_id_t msg_id = kmp_i18n_null;
				3369
				3370	//
				3371	// For backward compatiblity, setting KMP_CPUINFO_FILE =>
				3372	// KMP_TOPOLOGY_METHOD=cpuinfo
				3373	//
				3374	if ((__kmp_cpuinfo_file != NULL) &&
				3375	(__kmp_affinity_top_method == affinity_top_method_all)) {
				3376	__kmp_affinity_top_method = affinity_top_method_cpuinfo;
				3377	}
				3378
				3379	if (__kmp_affinity_top_method == affinity_top_method_all) {
				3380	//
				3381	// In the default code path, errors are not fatal - we just try using
				3382	// another method. We only emit a warning message if affinity is on,
				3383	// or the verbose flag is set, an the nowarnings flag was not set.
				3384	//
				3385	const char *file_name = NULL;
				3386	int line = 0;
				3387
				3388	# if KMP_ARCH_X86 \|\| KMP_ARCH_X86_64
				3389
				3390	if (__kmp_affinity_verbose) {
				3391	KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
				3392	}
				3393
				3394	file_name = NULL;
				3395	depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
				3396	if (depth == 0) {
				3397	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3398	KMP_ASSERT(address2os == NULL);
				3399	return;
				3400	}
				3401
				3402	if (depth < 0) {
				3403	if ((msg_id != kmp_i18n_null)
				3404	&& (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3405	&& (__kmp_affinity_type != affinity_none)))) {
				3406	# if KMP_MIC
				3407	if (__kmp_affinity_verbose) {
				3408	KMP_INFORM(AffInfoStrStr, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id),
				3409	KMP_I18N_STR(DecodingLegacyAPIC));
				3410	}
				3411	# else
				3412	KMP_WARNING(AffInfoStrStr, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id),
				3413	KMP_I18N_STR(DecodingLegacyAPIC));
				3414	# endif
				3415	}
				3416
				3417	file_name = NULL;
				3418	depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
				3419	if (depth == 0) {
				3420	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3421	KMP_ASSERT(address2os == NULL);
				3422	return;
				3423	}
				3424	}
				3425
				3426	# endif /* KMP_ARCH_X86 \|\| KMP_ARCH_X86_64 */
				3427
				3428	# if KMP_OS_LINUX
				3429
				3430	if (depth < 0) {
				3431	if ((msg_id != kmp_i18n_null)
				3432	&& (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3433	&& (__kmp_affinity_type != affinity_none)))) {
				3434	# if KMP_MIC
				3435	if (__kmp_affinity_verbose) {
				3436	KMP_INFORM(AffStrParseFilename, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id), "/proc/cpuinfo");
				3437	}
				3438	# else
				3439	KMP_WARNING(AffStrParseFilename, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id), "/proc/cpuinfo");
				3440	# endif
				3441	}
				3442	else if (__kmp_affinity_verbose) {
				3443	KMP_INFORM(AffParseFilename, "KMP_AFFINITY", "/proc/cpuinfo");
				3444	}
				3445
				3446	FILE *f = fopen("/proc/cpuinfo", "r");
				3447	if (f == NULL) {
				3448	msg_id = kmp_i18n_str_CantOpenCpuinfo;
				3449	}
				3450	else {
				3451	file_name = "/proc/cpuinfo";
				3452	depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
				3453	fclose(f);
				3454	if (depth == 0) {
				3455	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3456	KMP_ASSERT(address2os == NULL);
				3457	return;
				3458	}
				3459	}
				3460	}
				3461
				3462	# endif /* KMP_OS_LINUX */
				3463
				3464	if (depth < 0) {
				3465	if (msg_id != kmp_i18n_null
				3466	&& (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3467	&& (__kmp_affinity_type != affinity_none)))) {
				3468	if (file_name == NULL) {
				3469	KMP_WARNING(UsingFlatOS, __kmp_i18n_catgets(msg_id));
				3470	}
				3471	else if (line == 0) {
				3472	KMP_WARNING(UsingFlatOSFile, file_name, __kmp_i18n_catgets(msg_id));
				3473	}
				3474	else {
				3475	KMP_WARNING(UsingFlatOSFileLine, file_name, line, __kmp_i18n_catgets(msg_id));
				3476	}
				3477	}
				3478
				3479	file_name = "";
				3480	depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
				3481	if (depth == 0) {
				3482	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3483	KMP_ASSERT(address2os == NULL);
				3484	return;
				3485	}
				3486	KMP_ASSERT(depth > 0);
				3487	KMP_ASSERT(address2os != NULL);
				3488	}
				3489	}
				3490
				3491	//
				3492	// If the user has specified that a paricular topology discovery method
				3493	// is to be used, then we abort if that method fails. The exception is
				3494	// group affinity, which might have been implicitly set.
				3495	//
				3496
				3497	# if KMP_ARCH_X86 \|\| KMP_ARCH_X86_64
				3498
				3499	else if (__kmp_affinity_top_method == affinity_top_method_x2apicid) {
				3500	if (__kmp_affinity_verbose) {
				3501	KMP_INFORM(AffInfoStr, "KMP_AFFINITY",
				3502	KMP_I18N_STR(Decodingx2APIC));
				3503	}
				3504
				3505	depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
				3506	if (depth == 0) {
				3507	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3508	KMP_ASSERT(address2os == NULL);
				3509	return;
				3510	}
				3511
				3512	if (depth < 0) {
				3513	KMP_ASSERT(msg_id != kmp_i18n_null);
				3514	KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
				3515	}
				3516	}
				3517	else if (__kmp_affinity_top_method == affinity_top_method_apicid) {
				3518	if (__kmp_affinity_verbose) {
				3519	KMP_INFORM(AffInfoStr, "KMP_AFFINITY",
				3520	KMP_I18N_STR(DecodingLegacyAPIC));
				3521	}
				3522
				3523	depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
				3524	if (depth == 0) {
				3525	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3526	KMP_ASSERT(address2os == NULL);
				3527	return;
				3528	}
				3529
				3530	if (depth < 0) {
				3531	KMP_ASSERT(msg_id != kmp_i18n_null);
				3532	KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
				3533	}
				3534	}
				3535
				3536	# endif /* KMP_ARCH_X86 \|\| KMP_ARCH_X86_64 */
				3537
				3538	else if (__kmp_affinity_top_method == affinity_top_method_cpuinfo) {
				3539	const char *filename;
				3540	if (__kmp_cpuinfo_file != NULL) {
				3541	filename = __kmp_cpuinfo_file;
				3542	}
				3543	else {
				3544	filename = "/proc/cpuinfo";
				3545	}
				3546
				3547	if (__kmp_affinity_verbose) {
				3548	KMP_INFORM(AffParseFilename, "KMP_AFFINITY", filename);
				3549	}
				3550
				3551	FILE *f = fopen(filename, "r");
				3552	if (f == NULL) {
				3553	int code = errno;
				3554	if (__kmp_cpuinfo_file != NULL) {
				3555	__kmp_msg(
				3556	kmp_ms_fatal,
				3557	KMP_MSG(CantOpenFileForReading, filename),
				3558	KMP_ERR(code),
				3559	KMP_HNT(NameComesFrom_CPUINFO_FILE),
				3560	__kmp_msg_null
				3561	);
				3562	}
				3563	else {
				3564	__kmp_msg(
				3565	kmp_ms_fatal,
				3566	KMP_MSG(CantOpenFileForReading, filename),
				3567	KMP_ERR(code),
				3568	__kmp_msg_null
				3569	);
				3570	}
				3571	}
				3572	int line = 0;
				3573	depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
				3574	fclose(f);
				3575	if (depth < 0) {
				3576	KMP_ASSERT(msg_id != kmp_i18n_null);
				3577	if (line > 0) {
				3578	KMP_FATAL(FileLineMsgExiting, filename, line, __kmp_i18n_catgets(msg_id));
				3579	}
				3580	else {
				3581	KMP_FATAL(FileMsgExiting, filename, __kmp_i18n_catgets(msg_id));
				3582	}
				3583	}
				3584	if (__kmp_affinity_type == affinity_none) {
				3585	KMP_ASSERT(depth == 0);
				3586	KMP_ASSERT(address2os == NULL);
				3587	return;
				3588	}
				3589	}
				3590
				3591	# if KMP_OS_WINDOWS && KMP_ARCH_X86_64
				3592
				3593	else if (__kmp_affinity_top_method == affinity_top_method_group) {
				3594	if (__kmp_affinity_verbose) {
				3595	KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
				3596	}
				3597
				3598	depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
				3599	KMP_ASSERT(depth != 0);
				3600
				3601	if (depth < 0) {
				3602	if ((msg_id != kmp_i18n_null)
				3603	&& (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3604	&& (__kmp_affinity_type != affinity_none)))) {
				3605	KMP_WARNING(UsingFlatOS, __kmp_i18n_catgets(msg_id));
				3606	}
				3607
				3608	depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
				3609	if (depth == 0) {
				3610	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3611	KMP_ASSERT(address2os == NULL);
				3612	return;
				3613	}
				3614	// should not fail
				3615	KMP_ASSERT(depth > 0);
				3616	KMP_ASSERT(address2os != NULL);
				3617	}
				3618	}
				3619
				3620	# endif /* KMP_OS_WINDOWS && KMP_ARCH_X86_64 */
				3621
				3622	else if (__kmp_affinity_top_method == affinity_top_method_flat) {
				3623	if (__kmp_affinity_verbose) {
				3624	KMP_INFORM(AffUsingFlatOS, "KMP_AFFINITY");
				3625	}
				3626
				3627	depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
				3628	if (depth == 0) {
				3629	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3630	KMP_ASSERT(address2os == NULL);
				3631	return;
				3632	}
				3633	// should not fail
				3634	KMP_ASSERT(depth > 0);
				3635	KMP_ASSERT(address2os != NULL);
				3636	}
				3637
				3638	if (address2os == NULL) {
				3639	if (KMP_AFFINITY_CAPABLE()
				3640	&& (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3641	&& (__kmp_affinity_type != affinity_none)))) {
				3642	KMP_WARNING(ErrorInitializeAffinity);
				3643	}
				3644	__kmp_affinity_type = affinity_none;
				3645	__kmp_affin_mask_size = 0;
				3646	return;
				3647	}
				3648
				3649	# if KMP_MIC
				3650	__kmp_apply_thread_places(&address2os, depth);
				3651	# endif
				3652
				3653	//
				3654	// Create the table of masks, indexed by thread Id.
				3655	//
				3656	unsigned maxIndex;
				3657	unsigned numUnique;
				3658	kmp_affin_mask_t *osId2Mask = __kmp_create_masks(&maxIndex, &numUnique,
				3659	address2os, __kmp_avail_proc);
				3660	if (__kmp_affinity_gran_levels == 0) {
				3661	KMP_DEBUG_ASSERT(numUnique == __kmp_avail_proc);
				3662	}
				3663
				3664	//
				3665	// Set the childNums vector in all Address objects. This must be done
				3666	// before we can sort using __kmp_affinity_cmp_Address_child_num(),
				3667	// which takes into account the setting of __kmp_affinity_compact.
				3668	//
				3669	__kmp_affinity_assign_child_nums(address2os, __kmp_avail_proc);
				3670
				3671	switch (__kmp_affinity_type) {
				3672
				3673	case affinity_explicit:
				3674	KMP_DEBUG_ASSERT(__kmp_affinity_proclist != NULL);
				3675	# if OMP_40_ENABLED
				3676	if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
				3677	# endif
				3678	{
				3679	__kmp_affinity_process_proclist(&__kmp_affinity_masks,
				3680	&__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask,
				3681	maxIndex);
				3682	}
				3683	# if OMP_40_ENABLED
				3684	else {
				3685	__kmp_affinity_process_placelist(&__kmp_affinity_masks,
				3686	&__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask,
				3687	maxIndex);
				3688	}
				3689	# endif
				3690	if (__kmp_affinity_num_masks == 0) {
				3691	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3692	&& (__kmp_affinity_type != affinity_none))) {
				3693	KMP_WARNING(AffNoValidProcID);
				3694	}
				3695	__kmp_affinity_type = affinity_none;
				3696	return;
				3697	}
				3698	break;
				3699
				3700	//
				3701	// The other affinity types rely on sorting the Addresses according
				3702	// to some permutation of the machine topology tree. Set
				3703	// __kmp_affinity_compact and __kmp_affinity_offset appropriately,
				3704	// then jump to a common code fragment to do the sort and create
				3705	// the array of affinity masks.
				3706	//
				3707
				3708	case affinity_logical:
				3709	__kmp_affinity_compact = 0;
				3710	if (__kmp_affinity_offset) {
				3711	__kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset
				3712	% __kmp_avail_proc;
				3713	}
				3714	goto sortAddresses;
				3715
				3716	case affinity_physical:
				3717	if (__kmp_nThreadsPerCore > 1) {
				3718	__kmp_affinity_compact = 1;
				3719	if (__kmp_affinity_compact >= depth) {
				3720	__kmp_affinity_compact = 0;
				3721	}
				3722	} else {
				3723	__kmp_affinity_compact = 0;
				3724	}
				3725	if (__kmp_affinity_offset) {
				3726	__kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset
				3727	% __kmp_avail_proc;
				3728	}
				3729	goto sortAddresses;
				3730
				3731	case affinity_scatter:
				3732	if (__kmp_affinity_compact >= depth) {
				3733	__kmp_affinity_compact = 0;
				3734	}
				3735	else {
				3736	__kmp_affinity_compact = depth - 1 - __kmp_affinity_compact;
				3737	}
				3738	goto sortAddresses;
				3739
				3740	case affinity_compact:
				3741	if (__kmp_affinity_compact >= depth) {
				3742	__kmp_affinity_compact = depth - 1;
				3743	}
				3744	goto sortAddresses;
				3745
				3746	# if KMP_MIC
				3747	case affinity_balanced:
				3748	// Balanced works only for the case of a single package and uniform topology
				3749	if( nPackages > 1 ) {
				3750	if( __kmp_affinity_verbose \|\| __kmp_affinity_warnings ) {
				3751	KMP_WARNING( AffBalancedNotAvail, "KMP_AFFINITY" );
				3752	}
				3753	__kmp_affinity_type = affinity_none;
				3754	return;
				3755	} else if( __kmp_affinity_uniform_topology() ) {
				3756	break;
				3757	} else { // Non-uniform topology
				3758
				3759	// Save the depth for further usage
				3760	__kmp_aff_depth = depth;
				3761
				3762	// Number of hyper threads per core in HT machine
				3763	int nth_per_core = __kmp_nThreadsPerCore;
				3764
				3765	int core_level;
				3766	if( nth_per_core > 1 ) {
				3767	core_level = depth - 2;
				3768	} else {
				3769	core_level = depth - 1;
				3770	}
				3771	int ncores = address2os[ __kmp_avail_proc - 1 ].first.labels[ core_level ] + 1;
				3772	int nproc = nth_per_core * ncores;
				3773
				3774	procarr = ( int * )__kmp_allocate( sizeof( int ) * nproc );
				3775	for( int i = 0; i < nproc; i++ ) {
				3776	procarr[ i ] = -1;
				3777	}
				3778
				3779	for( int i = 0; i < __kmp_avail_proc; i++ ) {
				3780	int proc = address2os[ i ].second;
				3781	// If depth == 3 then level=0 - package, level=1 - core, level=2 - thread.
				3782	// If there is only one thread per core then depth == 2: level 0 - package,
				3783	// level 1 - core.
				3784	int level = depth - 1;
				3785
				3786	// __kmp_nth_per_core == 1
				3787	int thread = 0;
				3788	int core = address2os[ i ].first.labels[ level ];
				3789	// If the thread level exists, that is we have more than one thread context per core
				3790	if( nth_per_core > 1 ) {
				3791	thread = address2os[ i ].first.labels[ level ] % nth_per_core;
				3792	core = address2os[ i ].first.labels[ level - 1 ];
				3793	}
				3794	procarr[ core * nth_per_core + thread ] = proc;
				3795	}
				3796
				3797	break;
				3798	}
				3799	# endif
				3800
				3801	sortAddresses:
				3802	//
				3803	// Allocate the gtid->affinity mask table.
				3804	//
				3805	if (__kmp_affinity_dups) {
				3806	__kmp_affinity_num_masks = __kmp_avail_proc;
				3807	}
				3808	else {
				3809	__kmp_affinity_num_masks = numUnique;
				3810	}
				3811
				3812	# if OMP_40_ENABLED
				3813	if ( ( __kmp_nested_proc_bind.bind_types[0] != proc_bind_intel )
				3814	&& ( __kmp_affinity_num_places > 0 )
				3815	&& ( (unsigned)__kmp_affinity_num_places < __kmp_affinity_num_masks ) ) {
				3816	__kmp_affinity_num_masks = __kmp_affinity_num_places;
				3817	}
				3818	# endif
				3819
				3820	__kmp_affinity_masks = (kmp_affin_mask_t*)__kmp_allocate(
				3821	__kmp_affinity_num_masks * __kmp_affin_mask_size);
				3822
				3823	//
				3824	// Sort the address2os table according to the current setting of
				3825	// __kmp_affinity_compact, then fill out __kmp_affinity_masks.
				3826	//
				3827	qsort(address2os, __kmp_avail_proc, sizeof(*address2os),
				3828	__kmp_affinity_cmp_Address_child_num);
				3829	{
				3830	int i;
				3831	unsigned j;
				3832	for (i = 0, j = 0; i < __kmp_avail_proc; i++) {
				3833	if ((! __kmp_affinity_dups) && (! address2os[i].first.leader)) {
				3834	continue;
				3835	}
				3836	unsigned osId = address2os[i].second;
				3837	kmp_affin_mask_t *src = KMP_CPU_INDEX(osId2Mask, osId);
				3838	kmp_affin_mask_t *dest
				3839	= KMP_CPU_INDEX(__kmp_affinity_masks, j);
				3840	KMP_ASSERT(KMP_CPU_ISSET(osId, src));
				3841	KMP_CPU_COPY(dest, src);
				3842	if (++j >= __kmp_affinity_num_masks) {
				3843	break;
				3844	}
				3845	}
				3846	KMP_DEBUG_ASSERT(j == __kmp_affinity_num_masks);
				3847	}
				3848	break;
				3849
				3850	default:
				3851	KMP_ASSERT2(0, "Unexpected affinity setting");
				3852	}
				3853
				3854	__kmp_free(osId2Mask);
				3855	}
				3856
				3857
				3858	void
				3859	__kmp_affinity_initialize(void)
				3860	{
				3861	//
				3862	// Much of the code above was written assumming that if a machine was not
				3863	// affinity capable, then __kmp_affinity_type == affinity_none. We now
				3864	// explicitly represent this as __kmp_affinity_type == affinity_disabled.
				3865	//
				3866	// There are too many checks for __kmp_affinity_type == affinity_none
				3867	// in this code. Instead of trying to change them all, check if
				3868	// __kmp_affinity_type == affinity_disabled, and if so, slam it with
				3869	// affinity_none, call the real initialization routine, then restore
				3870	// __kmp_affinity_type to affinity_disabled.
				3871	//
				3872	int disabled = (__kmp_affinity_type == affinity_disabled);
				3873	if (! KMP_AFFINITY_CAPABLE()) {
				3874	KMP_ASSERT(disabled);
				3875	}
				3876	if (disabled) {
				3877	__kmp_affinity_type = affinity_none;
				3878	}
				3879	__kmp_aux_affinity_initialize();
				3880	if (disabled) {
				3881	__kmp_affinity_type = affinity_disabled;
				3882	}
				3883	}
				3884
				3885
				3886	void
				3887	__kmp_affinity_uninitialize(void)
				3888	{
				3889	if (__kmp_affinity_masks != NULL) {
				3890	__kmp_free(__kmp_affinity_masks);
				3891	__kmp_affinity_masks = NULL;
				3892	}
				3893	if (fullMask != NULL) {
				3894	KMP_CPU_FREE(fullMask);
				3895	fullMask = NULL;
				3896	}
				3897	__kmp_affinity_num_masks = 0;
				3898	# if OMP_40_ENABLED
				3899	__kmp_affinity_num_places = 0;
				3900	# endif
				3901	if (__kmp_affinity_proclist != NULL) {
				3902	__kmp_free(__kmp_affinity_proclist);
				3903	__kmp_affinity_proclist = NULL;
				3904	}
				3905	if( address2os != NULL ) {
				3906	__kmp_free( address2os );
				3907	address2os = NULL;
				3908	}
				3909	if( procarr != NULL ) {
				3910	__kmp_free( procarr );
				3911	procarr = NULL;
				3912	}
				3913	}
				3914
				3915
				3916	void
				3917	__kmp_affinity_set_init_mask(int gtid, int isa_root)
				3918	{
				3919	if (! KMP_AFFINITY_CAPABLE()) {
				3920	return;
				3921	}
				3922
				3923	kmp_info_t th = (kmp_info_t )TCR_SYNC_PTR(__kmp_threads[gtid]);
				3924	if (th->th.th_affin_mask == NULL) {
				3925	KMP_CPU_ALLOC(th->th.th_affin_mask);
				3926	}
				3927	else {
				3928	KMP_CPU_ZERO(th->th.th_affin_mask);
				3929	}
				3930
				3931	//
				3932	// Copy the thread mask to the kmp_info_t strucuture.
				3933	// If __kmp_affinity_type == affinity_none, copy the "full" mask, i.e. one
				3934	// that has all of the OS proc ids set, or if __kmp_affinity_respect_mask
				3935	// is set, then the full mask is the same as the mask of the initialization
				3936	// thread.
				3937	//
				3938	kmp_affin_mask_t *mask;
				3939	int i;
				3940
				3941	# if OMP_40_ENABLED
				3942	if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
				3943	# endif
				3944	{
				3945	if ((__kmp_affinity_type == affinity_none)
				3946	# if KMP_MIC
				3947	\|\| (__kmp_affinity_type == affinity_balanced)
				3948	# endif
				3949	) {
				3950	# if KMP_OS_WINDOWS && KMP_ARCH_X86_64
				3951	if (__kmp_num_proc_groups > 1) {
				3952	return;
				3953	}
				3954	# endif
				3955	KMP_ASSERT(fullMask != NULL);
				3956	i = -1;
				3957	mask = fullMask;
				3958	}
				3959	else {
				3960	KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 );
				3961	i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
				3962	mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
				3963	}
				3964	}
				3965	# if OMP_40_ENABLED
				3966	else {
				3967	if ((! isa_root)
				3968	\|\| (__kmp_nested_proc_bind.bind_types[0] == proc_bind_false)) {
				3969	# if KMP_OS_WINDOWS && KMP_ARCH_X86_64
				3970	if (__kmp_num_proc_groups > 1) {
				3971	return;
				3972	}
				3973	# endif
				3974	KMP_ASSERT(fullMask != NULL);
				3975	i = KMP_PLACE_ALL;
				3976	mask = fullMask;
				3977	}
				3978	else {
				3979	//
				3980	// int i = some hash function or just a counter that doesn't
				3981	// always start at 0. Use gtid for now.
				3982	//
				3983	KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 );
				3984	i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
				3985	mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
				3986	}
				3987	}
				3988	# endif
				3989
				3990	# if OMP_40_ENABLED
				3991	th->th.th_current_place = i;
				3992	if (isa_root) {
				3993	th->th.th_new_place = i;
				3994	th->th.th_first_place = 0;
				3995	th->th.th_last_place = __kmp_affinity_num_masks - 1;
				3996	}
				3997
				3998	if (i == KMP_PLACE_ALL) {
				3999	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to all places\n",
				4000	gtid));
				4001	}
				4002	else {
				4003	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to place %d\n",
				4004	gtid, i));
				4005	}
				4006	# else
				4007	if (i == -1) {
				4008	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to fullMask\n",
				4009	gtid));
				4010	}
				4011	else {
				4012	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to mask %d\n",
				4013	gtid, i));
				4014	}
				4015	# endif /* OMP_40_ENABLED */
				4016
				4017	KMP_CPU_COPY(th->th.th_affin_mask, mask);
				4018
				4019	if (__kmp_affinity_verbose) {
				4020	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4021	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4022	th->th.th_affin_mask);
				4023	KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", gtid, buf);
				4024	}
				4025
				4026	# if KMP_OS_WINDOWS
				4027	//
				4028	// On Windows* OS, the process affinity mask might have changed.
				4029	// If the user didn't request affinity and this call fails,
				4030	// just continue silently. See CQ171393.
				4031	//
				4032	if ( __kmp_affinity_type == affinity_none ) {
				4033	__kmp_set_system_affinity(th->th.th_affin_mask, FALSE);
				4034	}
				4035	else
				4036	# endif
				4037	__kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
				4038	}
				4039
				4040
				4041	# if OMP_40_ENABLED
				4042
				4043	void
				4044	__kmp_affinity_set_place(int gtid)
				4045	{
				4046	int retval;
				4047
				4048	if (! KMP_AFFINITY_CAPABLE()) {
				4049	return;
				4050	}
				4051
				4052	kmp_info_t th = (kmp_info_t )TCR_SYNC_PTR(__kmp_threads[gtid]);
				4053
				4054	KA_TRACE(100, ("__kmp_affinity_set_place: binding T#%d to place %d (current place = %d)\n",
				4055	gtid, th->th.th_new_place, th->th.th_current_place));
				4056
				4057	//
				4058	// Check that the new place is withing this thread's partition.
				4059	//
				4060	KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
				4061	KMP_DEBUG_ASSERT(th->th.th_new_place >= 0);
				4062	KMP_DEBUG_ASSERT((unsigned)th->th.th_new_place <= __kmp_affinity_num_masks);
				4063	if (th->th.th_first_place <= th->th.th_last_place) {
				4064	KMP_DEBUG_ASSERT((th->th.th_new_place >= th->th.th_first_place)
				4065	&& (th->th.th_new_place <= th->th.th_last_place));
				4066	}
				4067	else {
				4068	KMP_DEBUG_ASSERT((th->th.th_new_place <= th->th.th_first_place)
				4069	\|\| (th->th.th_new_place >= th->th.th_last_place));
				4070	}
				4071
				4072	//
				4073	// Copy the thread mask to the kmp_info_t strucuture,
				4074	// and set this thread's affinity.
				4075	//
				4076	kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity_masks,
				4077	th->th.th_new_place);
				4078	KMP_CPU_COPY(th->th.th_affin_mask, mask);
				4079	th->th.th_current_place = th->th.th_new_place;
				4080
				4081	if (__kmp_affinity_verbose) {
				4082	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4083	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4084	th->th.th_affin_mask);
				4085	KMP_INFORM(BoundToOSProcSet, "OMP_PROC_BIND", gtid, buf);
				4086	}
				4087	__kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
				4088	}
				4089
				4090	# endif /* OMP_40_ENABLED */
				4091
				4092
				4093	int
				4094	__kmp_aux_set_affinity(void **mask)
				4095	{
				4096	int gtid;
				4097	kmp_info_t *th;
				4098	int retval;
				4099
				4100	if (! KMP_AFFINITY_CAPABLE()) {
				4101	return -1;
				4102	}
				4103
				4104	gtid = __kmp_entry_gtid();
				4105	KA_TRACE(1000, ;{
				4106	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4107	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4108	(kmp_affin_mask_t )(mask));
				4109	__kmp_debug_printf("kmp_set_affinity: setting affinity mask for thread %d = %s\n",
				4110	gtid, buf);
				4111	});
				4112
				4113	if (__kmp_env_consistency_check) {
				4114	if ((mask == NULL) \|\| (*mask == NULL)) {
				4115	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				4116	}
				4117	else {
				4118	unsigned proc;
				4119	int num_procs = 0;
				4120
				4121	for (proc = 0; proc < KMP_CPU_SETSIZE; proc++) {
				4122	if (! KMP_CPU_ISSET(proc, (kmp_affin_mask_t )(mask))) {
				4123	continue;
				4124	}
				4125	num_procs++;
				4126	if (! KMP_CPU_ISSET(proc, fullMask)) {
				4127	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				4128	break;
				4129	}
				4130	}
				4131	if (num_procs == 0) {
				4132	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				4133	}
				4134
				4135	# if KMP_OS_WINDOWS && KMP_ARCH_X86_64
				4136	if (__kmp_get_proc_group((kmp_affin_mask_t )(mask)) < 0) {
				4137	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				4138	}
				4139	# endif /* KMP_OS_WINDOWS && KMP_ARCH_X86_64 */
				4140
				4141	}
				4142	}
				4143
				4144	th = __kmp_threads[gtid];
				4145	KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
				4146	retval = __kmp_set_system_affinity((kmp_affin_mask_t )(mask), FALSE);
				4147	if (retval == 0) {
				4148	KMP_CPU_COPY(th->th.th_affin_mask, (kmp_affin_mask_t )(mask));
				4149	}
				4150
				4151	# if OMP_40_ENABLED
				4152	th->th.th_current_place = KMP_PLACE_UNDEFINED;
				4153	th->th.th_new_place = KMP_PLACE_UNDEFINED;
				4154	th->th.th_first_place = 0;
				4155	th->th.th_last_place = __kmp_affinity_num_masks - 1;
				4156	# endif
				4157
				4158	return retval;
				4159	}
				4160
				4161
				4162	int
				4163	__kmp_aux_get_affinity(void **mask)
				4164	{
				4165	int gtid;
				4166	int retval;
				4167	kmp_info_t *th;
				4168
				4169	if (! KMP_AFFINITY_CAPABLE()) {
				4170	return -1;
				4171	}
				4172
				4173	gtid = __kmp_entry_gtid();
				4174	th = __kmp_threads[gtid];
				4175	KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
				4176
				4177	KA_TRACE(1000, ;{
				4178	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4179	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4180	th->th.th_affin_mask);
				4181	__kmp_printf("kmp_get_affinity: stored affinity mask for thread %d = %s\n", gtid, buf);
				4182	});
				4183
				4184	if (__kmp_env_consistency_check) {
				4185	if ((mask == NULL) \|\| (*mask == NULL)) {
				4186	KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity");
				4187	}
				4188	}
				4189
				4190	# if !KMP_OS_WINDOWS
				4191
				4192	retval = __kmp_get_system_affinity((kmp_affin_mask_t )(mask), FALSE);
				4193	KA_TRACE(1000, ;{
				4194	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4195	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4196	(kmp_affin_mask_t )(mask));
				4197	__kmp_printf("kmp_get_affinity: system affinity mask for thread %d = %s\n", gtid, buf);
				4198	});
				4199	return retval;
				4200
				4201	# else
				4202
				4203	KMP_CPU_COPY((kmp_affin_mask_t )(mask), th->th.th_affin_mask);
				4204	return 0;
				4205
				4206	# endif /* KMP_OS_WINDOWS */
				4207
				4208	}
				4209
				4210
				4211	int
				4212	__kmp_aux_set_affinity_mask_proc(int proc, void **mask)
				4213	{
				4214	int retval;
				4215
				4216	if (! KMP_AFFINITY_CAPABLE()) {
				4217	return -1;
				4218	}
				4219
				4220	KA_TRACE(1000, ;{
				4221	int gtid = __kmp_entry_gtid();
				4222	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4223	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4224	(kmp_affin_mask_t )(mask));
				4225	__kmp_debug_printf("kmp_set_affinity_mask_proc: setting proc %d in affinity mask for thread %d = %s\n",
				4226	proc, gtid, buf);
				4227	});
				4228
				4229	if (__kmp_env_consistency_check) {
				4230	if ((mask == NULL) \|\| (*mask == NULL)) {
				4231	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity_mask_proc");
				4232	}
				4233	}
				4234
				4235	if ((proc < 0) \|\| ((unsigned)proc >= KMP_CPU_SETSIZE)) {
				4236	return -1;
				4237	}
				4238	if (! KMP_CPU_ISSET(proc, fullMask)) {
				4239	return -2;
				4240	}
				4241
				4242	KMP_CPU_SET(proc, (kmp_affin_mask_t )(mask));
				4243	return 0;
				4244	}
				4245
				4246
				4247	int
				4248	__kmp_aux_unset_affinity_mask_proc(int proc, void **mask)
				4249	{
				4250	int retval;
				4251
				4252	if (! KMP_AFFINITY_CAPABLE()) {
				4253	return -1;
				4254	}
				4255
				4256	KA_TRACE(1000, ;{
				4257	int gtid = __kmp_entry_gtid();
				4258	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4259	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4260	(kmp_affin_mask_t )(mask));
				4261	__kmp_debug_printf("kmp_unset_affinity_mask_proc: unsetting proc %d in affinity mask for thread %d = %s\n",
				4262	proc, gtid, buf);
				4263	});
				4264
				4265	if (__kmp_env_consistency_check) {
				4266	if ((mask == NULL) \|\| (*mask == NULL)) {
				4267	KMP_FATAL(AffinityInvalidMask, "kmp_unset_affinity_mask_proc");
				4268	}
				4269	}
				4270
				4271	if ((proc < 0) \|\| ((unsigned)proc >= KMP_CPU_SETSIZE)) {
				4272	return -1;
				4273	}
				4274	if (! KMP_CPU_ISSET(proc, fullMask)) {
				4275	return -2;
				4276	}
				4277
				4278	KMP_CPU_CLR(proc, (kmp_affin_mask_t )(mask));
				4279	return 0;
				4280	}
				4281
				4282
				4283	int
				4284	__kmp_aux_get_affinity_mask_proc(int proc, void **mask)
				4285	{
				4286	int retval;
				4287
				4288	if (! KMP_AFFINITY_CAPABLE()) {
				4289	return -1;
				4290	}
				4291
				4292	KA_TRACE(1000, ;{
				4293	int gtid = __kmp_entry_gtid();
				4294	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4295	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4296	(kmp_affin_mask_t )(mask));
				4297	__kmp_debug_printf("kmp_get_affinity_mask_proc: getting proc %d in affinity mask for thread %d = %s\n",
				4298	proc, gtid, buf);
				4299	});
				4300
				4301	if (__kmp_env_consistency_check) {
				4302	if ((mask == NULL) \|\| (*mask == NULL)) {
				4303	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity_mask_proc");
				4304	}
				4305	}
				4306
				4307	if ((proc < 0) \|\| ((unsigned)proc >= KMP_CPU_SETSIZE)) {
				4308	return 0;
				4309	}
				4310	if (! KMP_CPU_ISSET(proc, fullMask)) {
				4311	return 0;
				4312	}
				4313
				4314	return KMP_CPU_ISSET(proc, (kmp_affin_mask_t )(mask));
				4315	}
				4316
				4317	# if KMP_MIC
				4318
				4319	// Dynamic affinity settings - Affinity balanced
				4320	void __kmp_balanced_affinity( int tid, int nthreads )
				4321	{
				4322	if( __kmp_affinity_uniform_topology() ) {
				4323	int coreID;
				4324	int threadID;
				4325	// Number of hyper threads per core in HT machine
				4326	int __kmp_nth_per_core = __kmp_avail_proc / __kmp_ncores;
				4327	// Number of cores
				4328	int ncores = __kmp_ncores;
				4329	// How many threads will be bound to each core
				4330	int chunk = nthreads / ncores;
				4331	// How many cores will have an additional thread bound to it - "big cores"
				4332	int big_cores = nthreads % ncores;
				4333	// Number of threads on the big cores
				4334	int big_nth = ( chunk + 1 ) * big_cores;
				4335	if( tid < big_nth ) {
				4336	coreID = tid / (chunk + 1 );
				4337	threadID = ( tid % (chunk + 1 ) ) % __kmp_nth_per_core ;
				4338	} else { //tid >= big_nth
				4339	coreID = ( tid - big_cores ) / chunk;
				4340	threadID = ( ( tid - big_cores ) % chunk ) % __kmp_nth_per_core ;
				4341	}
				4342
				4343	KMP_DEBUG_ASSERT2(KMP_AFFINITY_CAPABLE(),
				4344	"Illegal set affinity operation when not capable");
				4345
				4346	kmp_affin_mask_t mask = (kmp_affin_mask_t )alloca(__kmp_affin_mask_size);
				4347	KMP_CPU_ZERO(mask);
				4348
				4349	// Granularity == thread
				4350	if( __kmp_affinity_gran == affinity_gran_fine \|\| __kmp_affinity_gran == affinity_gran_thread) {
				4351	int osID = address2os[ coreID * __kmp_nth_per_core + threadID ].second;
				4352	KMP_CPU_SET( osID, mask);
				4353	} else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
				4354	for( int i = 0; i < __kmp_nth_per_core; i++ ) {
				4355	int osID;
				4356	osID = address2os[ coreID * __kmp_nth_per_core + i ].second;
				4357	KMP_CPU_SET( osID, mask);
				4358	}
				4359	}
				4360	if (__kmp_affinity_verbose) {
				4361	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4362	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
				4363	KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", tid, buf);
				4364	}
				4365	__kmp_set_system_affinity( mask, TRUE );
				4366	} else { // Non-uniform topology
				4367
				4368	kmp_affin_mask_t mask = (kmp_affin_mask_t )alloca(__kmp_affin_mask_size);
				4369	KMP_CPU_ZERO(mask);
				4370
				4371	// Number of hyper threads per core in HT machine
				4372	int nth_per_core = __kmp_nThreadsPerCore;
				4373	int core_level;
				4374	if( nth_per_core > 1 ) {
				4375	core_level = __kmp_aff_depth - 2;
				4376	} else {
				4377	core_level = __kmp_aff_depth - 1;
				4378	}
				4379
				4380	// Number of cores - maximum value; it does not count trail cores with 0 processors
				4381	int ncores = address2os[ __kmp_avail_proc - 1 ].first.labels[ core_level ] + 1;
				4382
				4383	// For performance gain consider the special case nthreads == __kmp_avail_proc
				4384	if( nthreads == __kmp_avail_proc ) {
				4385	if( __kmp_affinity_gran == affinity_gran_fine \|\| __kmp_affinity_gran == affinity_gran_thread) {
				4386	int osID = address2os[ tid ].second;
				4387	KMP_CPU_SET( osID, mask);
				4388	} else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
				4389	int coreID = address2os[ tid ].first.labels[ core_level ];
				4390	// We'll count found osIDs for the current core; they can be not more than nth_per_core;
				4391	// since the address2os is sortied we can break when cnt==nth_per_core
				4392	int cnt = 0;
				4393	for( int i = 0; i < __kmp_avail_proc; i++ ) {
				4394	int osID = address2os[ i ].second;
				4395	int core = address2os[ i ].first.labels[ core_level ];
				4396	if( core == coreID ) {
				4397	KMP_CPU_SET( osID, mask);
				4398	cnt++;
				4399	if( cnt == nth_per_core ) {
				4400	break;
				4401	}
				4402	}
				4403	}
				4404	}
				4405	} else if( nthreads <= __kmp_ncores ) {
				4406
				4407	int core = 0;
				4408	for( int i = 0; i < ncores; i++ ) {
				4409	// Check if this core from procarr[] is in the mask
				4410	int in_mask = 0;
				4411	for( int j = 0; j < nth_per_core; j++ ) {
				4412	if( procarr[ i * nth_per_core + j ] != - 1 ) {
				4413	in_mask = 1;
				4414	break;
				4415	}
				4416	}
				4417	if( in_mask ) {
				4418	if( tid == core ) {
				4419	for( int j = 0; j < nth_per_core; j++ ) {
				4420	int osID = procarr[ i * nth_per_core + j ];
				4421	if( osID != -1 ) {
				4422	KMP_CPU_SET( osID, mask );
				4423	// For granularity=thread it is enough to set the first available osID for this core
				4424	if( __kmp_affinity_gran == affinity_gran_fine \|\| __kmp_affinity_gran == affinity_gran_thread) {
				4425	break;
				4426	}
				4427	}
				4428	}
				4429	break;
				4430	} else {
				4431	core++;
				4432	}
				4433	}
				4434	}
				4435
				4436	} else { // nthreads > __kmp_ncores
				4437
				4438	// Array to save the number of processors at each core
				4439	int nproc_at_core[ ncores ];
				4440	// Array to save the number of cores with "x" available processors;
				4441	int ncores_with_x_procs[ nth_per_core + 1 ];
				4442	// Array to save the number of cores with # procs from x to nth_per_core
				4443	int ncores_with_x_to_max_procs[ nth_per_core + 1 ];
				4444
				4445	for( int i = 0; i <= nth_per_core; i++ ) {
				4446	ncores_with_x_procs[ i ] = 0;
				4447	ncores_with_x_to_max_procs[ i ] = 0;
				4448	}
				4449
				4450	for( int i = 0; i < ncores; i++ ) {
				4451	int cnt = 0;
				4452	for( int j = 0; j < nth_per_core; j++ ) {
				4453	if( procarr[ i * nth_per_core + j ] != -1 ) {
				4454	cnt++;
				4455	}
				4456	}
				4457	nproc_at_core[ i ] = cnt;
				4458	ncores_with_x_procs[ cnt ]++;
				4459	}
				4460
				4461	for( int i = 0; i <= nth_per_core; i++ ) {
				4462	for( int j = i; j <= nth_per_core; j++ ) {
				4463	ncores_with_x_to_max_procs[ i ] += ncores_with_x_procs[ j ];
				4464	}
				4465	}
				4466
				4467	// Max number of processors
				4468	int nproc = nth_per_core * ncores;
				4469	// An array to keep number of threads per each context
				4470	int * newarr = ( int * )__kmp_allocate( sizeof( int ) * nproc );
				4471	for( int i = 0; i < nproc; i++ ) {
				4472	newarr[ i ] = 0;
				4473	}
				4474
				4475	int nth = nthreads;
				4476	int flag = 0;
				4477	while( nth > 0 ) {
				4478	for( int j = 1; j <= nth_per_core; j++ ) {
				4479	int cnt = ncores_with_x_to_max_procs[ j ];
				4480	for( int i = 0; i < ncores; i++ ) {
				4481	// Skip the core with 0 processors
				4482	if( nproc_at_core[ i ] == 0 ) {
				4483	continue;
				4484	}
				4485	for( int k = 0; k < nth_per_core; k++ ) {
				4486	if( procarr[ i * nth_per_core + k ] != -1 ) {
				4487	if( newarr[ i * nth_per_core + k ] == 0 ) {
				4488	newarr[ i * nth_per_core + k ] = 1;
				4489	cnt--;
				4490	nth--;
				4491	break;
				4492	} else {
				4493	if( flag != 0 ) {
				4494	newarr[ i * nth_per_core + k ] ++;
				4495	cnt--;
				4496	nth--;
				4497	break;
				4498	}
				4499	}
				4500	}
				4501	}
				4502	if( cnt == 0 \|\| nth == 0 ) {
				4503	break;
				4504	}
				4505	}
				4506	if( nth == 0 ) {
				4507	break;
				4508	}
				4509	}
				4510	flag = 1;
				4511	}
				4512	int sum = 0;
				4513	for( int i = 0; i < nproc; i++ ) {
				4514	sum += newarr[ i ];
				4515	if( sum > tid ) {
				4516	// Granularity == thread
				4517	if( __kmp_affinity_gran == affinity_gran_fine \|\| __kmp_affinity_gran == affinity_gran_thread) {
				4518	int osID = procarr[ i ];
				4519	KMP_CPU_SET( osID, mask);
				4520	} else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
				4521	int coreID = i / nth_per_core;
				4522	for( int ii = 0; ii < nth_per_core; ii++ ) {
				4523	int osID = procarr[ coreID * nth_per_core + ii ];
				4524	if( osID != -1 ) {
				4525	KMP_CPU_SET( osID, mask);
				4526	}
				4527	}
				4528	}
				4529	break;
				4530	}
				4531	}
				4532	__kmp_free( newarr );
				4533	}
				4534
				4535	if (__kmp_affinity_verbose) {
				4536	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4537	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
				4538	KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", tid, buf);
				4539	}
				4540	__kmp_set_system_affinity( mask, TRUE );
				4541	}
				4542	}
				4543
				4544	# endif /* KMP_MIC */
				4545
				4546	#elif KMP_OS_DARWIN
				4547	// affinity not supported
				4548	#else
				4549	#error "Unknown or unsupported OS"
				4550	#endif // KMP_OS_WINDOWS \|\| KMP_OS_LINUX
				4551