Blame - openmp/runtime/src/kmp_affinity.cpp - toolchain/llvm-project

blob: 00ba5c14a769ec2b5651417342fe1d8eaf638f87 [file] [log] [blame]

Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1	/*
				2	* kmp_affinity.cpp -- affinity management
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3	*/
				4
				5
				6	//===----------------------------------------------------------------------===//
				7	//
				8	// The LLVM Compiler Infrastructure
				9	//
				10	// This file is dual licensed under the MIT and the University of Illinois Open
				11	// Source Licenses. See LICENSE.txt for details.
				12	//
				13	//===----------------------------------------------------------------------===//
				14
				15
				16	#include "kmp.h"
				17	#include "kmp_i18n.h"
				18	#include "kmp_io.h"
				19	#include "kmp_str.h"
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	20	#include "kmp_wrapper_getpid.h"
Jonathan Peyton	1707836	2015-09-10 19:22:07 +0000	[diff] [blame]	21	#include "kmp_affinity.h"
				22
				23	// Store the real or imagined machine hierarchy here
				24	static hierarchy_info machine_hierarchy;
				25
				26	void __kmp_cleanup_hierarchy() {
				27	machine_hierarchy.fini();
				28	}
				29
				30	void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
				31	kmp_uint32 depth;
				32	// The test below is true if affinity is available, but set to "none". Need to init on first use of hierarchical barrier.
				33	if (TCR_1(machine_hierarchy.uninitialized))
				34	machine_hierarchy.init(NULL, nproc);
Jonathan Peyton	1707836	2015-09-10 19:22:07 +0000	[diff] [blame]	35
Jonathan Peyton	7dee82e	2015-11-09 16:24:53 +0000	[diff] [blame]	36	// Adjust the hierarchy in case num threads exceeds original
				37	if (nproc > machine_hierarchy.base_num_threads)
				38	machine_hierarchy.resize(nproc);
				39
Jonathan Peyton	1707836	2015-09-10 19:22:07 +0000	[diff] [blame]	40	depth = machine_hierarchy.depth;
				41	KMP_DEBUG_ASSERT(depth > 0);
Jonathan Peyton	1707836	2015-09-10 19:22:07 +0000	[diff] [blame]	42
				43	thr_bar->depth = depth;
				44	thr_bar->base_leaf_kids = (kmp_uint8)machine_hierarchy.numPerLevel[0]-1;
				45	thr_bar->skip_per_level = machine_hierarchy.skipPerLevel;
				46	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	47
Alp Toker	763b939	2014-02-28 09:42:41 +0000	[diff] [blame]	48	#if KMP_AFFINITY_SUPPORTED
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	49
				50	//
				51	// Print the affinity mask to the character array in a pretty format.
				52	//
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	53	#if KMP_USE_HWLOC
				54	char *
				55	__kmp_affinity_print_mask(char buf, int buf_len, kmp_affin_mask_t mask)
				56	{
				57	int num_chars_to_write, num_chars_written;
				58	char* scan;
				59	KMP_ASSERT(buf_len >= 40);
				60
				61	// bufsize of 0 just retrieves the needed buffer size.
				62	num_chars_to_write = hwloc_bitmap_list_snprintf(buf, 0, (hwloc_bitmap_t)mask);
				63
				64	// need '{', "xxxxxxxx...xx", '}', '\0' = num_chars_to_write + 3 bytes
				65	// * num_chars_to_write returned by hwloc_bitmap_list_snprintf does not
				66	// take into account the '\0' character.
				67	if(hwloc_bitmap_iszero((hwloc_bitmap_t)mask)) {
				68	KMP_SNPRINTF(buf, buf_len, "{<empty>}");
				69	} else if(num_chars_to_write < buf_len - 3) {
				70	// no problem fitting the mask into buf_len number of characters
				71	buf[0] = '{';
				72	// use buf_len-3 because we have the three characters: '{' '}' '\0' to add to the buffer
				73	num_chars_written = hwloc_bitmap_list_snprintf(buf+1, buf_len-3, (hwloc_bitmap_t)mask);
				74	buf[num_chars_written+1] = '}';
				75	buf[num_chars_written+2] = '\0';
				76	} else {
				77	// Need to truncate the affinity mask string and add ellipsis.
				78	// To do this, we first write out the '{' + str(mask)
				79	buf[0] = '{';
Jonathan Peyton	1d5487c	2016-04-25 21:08:31 +0000	[diff] [blame]	80	hwloc_bitmap_list_snprintf(buf+1, buf_len-1, (hwloc_bitmap_t)mask);
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	81	// then, what we do here is go to the 7th to last character, then go backwards until we are NOT
				82	// on a digit then write "...}\0". This way it is a clean ellipsis addition and we don't
				83	// overwrite part of an affinity number. i.e., we avoid something like { 45, 67, 8...} and get
				84	// { 45, 67,...} instead.
				85	scan = buf + buf_len - 7;
				86	while(scan >= '0' && scan <= '9' && scan >= buf)
				87	scan--;
				88	*(scan+1) = '.';
				89	*(scan+2) = '.';
				90	*(scan+3) = '.';
				91	*(scan+4) = '}';
				92	*(scan+5) = '\0';
				93	}
				94	return buf;
				95	}
				96	#else
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	97	char *
				98	__kmp_affinity_print_mask(char buf, int buf_len, kmp_affin_mask_t mask)
				99	{
				100	KMP_ASSERT(buf_len >= 40);
				101	char *scan = buf;
				102	char *end = buf + buf_len - 1;
				103
				104	//
				105	// Find first element / check for empty set.
				106	//
				107	size_t i;
				108	for (i = 0; i < KMP_CPU_SETSIZE; i++) {
				109	if (KMP_CPU_ISSET(i, mask)) {
				110	break;
				111	}
				112	}
				113	if (i == KMP_CPU_SETSIZE) {
Jonathan Peyton	7edeef1	2015-09-25 17:23:17 +0000	[diff] [blame]	114	KMP_SNPRINTF(scan, end-scan+1, "{<empty>}");
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	115	while (*scan != '\0') scan++;
				116	KMP_ASSERT(scan <= end);
				117	return buf;
				118	}
				119
Jonathan Peyton	7edeef1	2015-09-25 17:23:17 +0000	[diff] [blame]	120	KMP_SNPRINTF(scan, end-scan+1, "{%ld", (long)i);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	121	while (*scan != '\0') scan++;
				122	i++;
				123	for (; i < KMP_CPU_SETSIZE; i++) {
				124	if (! KMP_CPU_ISSET(i, mask)) {
				125	continue;
				126	}
				127
				128	//
				129	// Check for buffer overflow. A string of the form ",<n>" will have
				130	// at most 10 characters, plus we want to leave room to print ",...}"
				131	// if the set is too large to print for a total of 15 characters.
				132	// We already left room for '\0' in setting end.
				133	//
				134	if (end - scan < 15) {
				135	break;
				136	}
Jonathan Peyton	7edeef1	2015-09-25 17:23:17 +0000	[diff] [blame]	137	KMP_SNPRINTF(scan, end-scan+1, ",%-ld", (long)i);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	138	while (*scan != '\0') scan++;
				139	}
				140	if (i < KMP_CPU_SETSIZE) {
Jonathan Peyton	7edeef1	2015-09-25 17:23:17 +0000	[diff] [blame]	141	KMP_SNPRINTF(scan, end-scan+1, ",...");
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	142	while (*scan != '\0') scan++;
				143	}
Jonathan Peyton	7edeef1	2015-09-25 17:23:17 +0000	[diff] [blame]	144	KMP_SNPRINTF(scan, end-scan+1, "}");
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	145	while (*scan != '\0') scan++;
				146	KMP_ASSERT(scan <= end);
				147	return buf;
				148	}
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	149	#endif // KMP_USE_HWLOC
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	150
				151
				152	void
				153	__kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask)
				154	{
				155	KMP_CPU_ZERO(mask);
				156
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	157	# if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	158
				159	if (__kmp_num_proc_groups > 1) {
				160	int group;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	161	KMP_DEBUG_ASSERT(__kmp_GetActiveProcessorCount != NULL);
				162	for (group = 0; group < __kmp_num_proc_groups; group++) {
				163	int i;
				164	int num = __kmp_GetActiveProcessorCount(group);
				165	for (i = 0; i < num; i++) {
				166	KMP_CPU_SET(i + group * (CHAR_BIT * sizeof(DWORD_PTR)), mask);
				167	}
				168	}
				169	}
				170	else
				171
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	172	# endif /* KMP_GROUP_AFFINITY */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	173
				174	{
				175	int proc;
				176	for (proc = 0; proc < __kmp_xproc; proc++) {
				177	KMP_CPU_SET(proc, mask);
				178	}
				179	}
				180	}
				181
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	182	//
				183	// When sorting by labels, __kmp_affinity_assign_child_nums() must first be
				184	// called to renumber the labels from [0..n] and place them into the child_num
				185	// vector of the address object. This is done in case the labels used for
Alp Toker	8f2d3f0	2014-02-24 10:40:15 +0000	[diff] [blame]	186	// the children at one node of the hierarchy differ from those used for
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	187	// another node at the same level. Example: suppose the machine has 2 nodes
				188	// with 2 packages each. The first node contains packages 601 and 602, and
				189	// second node contains packages 603 and 604. If we try to sort the table
				190	// for "scatter" affinity, the table will still be sorted 601, 602, 603, 604
				191	// because we are paying attention to the labels themselves, not the ordinal
				192	// child numbers. By using the child numbers in the sort, the result is
				193	// {0,0}=601, {0,1}=603, {1,0}=602, {1,1}=604.
				194	//
				195	static void
				196	__kmp_affinity_assign_child_nums(AddrUnsPair *address2os,
				197	int numAddrs)
				198	{
				199	KMP_DEBUG_ASSERT(numAddrs > 0);
				200	int depth = address2os->first.depth;
				201	unsigned counts = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				202	unsigned lastLabel = (unsigned )__kmp_allocate(depth
				203	* sizeof(unsigned));
				204	int labCt;
				205	for (labCt = 0; labCt < depth; labCt++) {
				206	address2os[0].first.childNums[labCt] = counts[labCt] = 0;
				207	lastLabel[labCt] = address2os[0].first.labels[labCt];
				208	}
				209	int i;
				210	for (i = 1; i < numAddrs; i++) {
				211	for (labCt = 0; labCt < depth; labCt++) {
				212	if (address2os[i].first.labels[labCt] != lastLabel[labCt]) {
				213	int labCt2;
				214	for (labCt2 = labCt + 1; labCt2 < depth; labCt2++) {
				215	counts[labCt2] = 0;
				216	lastLabel[labCt2] = address2os[i].first.labels[labCt2];
				217	}
				218	counts[labCt]++;
				219	lastLabel[labCt] = address2os[i].first.labels[labCt];
				220	break;
				221	}
				222	}
				223	for (labCt = 0; labCt < depth; labCt++) {
				224	address2os[i].first.childNums[labCt] = counts[labCt];
				225	}
				226	for (; labCt < (int)Address::maxDepth; labCt++) {
				227	address2os[i].first.childNums[labCt] = 0;
				228	}
				229	}
				230	}
				231
				232
				233	//
				234	// All of the __kmp_affinity_create_*_map() routines should set
				235	// __kmp_affinity_masks to a vector of affinity mask objects of length
				236	// __kmp_affinity_num_masks, if __kmp_affinity_type != affinity_none, and
				237	// return the number of levels in the machine topology tree (zero if
				238	// __kmp_affinity_type == affinity_none).
				239	//
Jonathan Peyton	c5304aa	2016-06-13 21:28:03 +0000	[diff] [blame]	240	// All of the __kmp_affinity_create__map() routines should set __kmp_affin_fullMask
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	241	// to the affinity mask for the initialization thread. They need to save and
				242	// restore the mask, and it could be needed later, so saving it is just an
				243	// optimization to avoid calling kmp_get_system_affinity() again.
				244	//
Jonathan Peyton	c5304aa	2016-06-13 21:28:03 +0000	[diff] [blame]	245	kmp_affin_mask_t *__kmp_affin_fullMask = NULL;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	246
				247	static int nCoresPerPkg, nPackages;
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame]	248	static int __kmp_nThreadsPerCore;
				249	#ifndef KMP_DFLT_NTH_CORES
				250	static int __kmp_ncores;
				251	#endif
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	252
				253	//
				254	// __kmp_affinity_uniform_topology() doesn't work when called from
				255	// places which support arbitrarily many levels in the machine topology
				256	// map, i.e. the non-default cases in __kmp_affinity_create_cpuinfo_map()
				257	// __kmp_affinity_create_x2apicid_map().
				258	//
				259	inline static bool
				260	__kmp_affinity_uniform_topology()
				261	{
				262	return __kmp_avail_proc == (__kmp_nThreadsPerCore * nCoresPerPkg * nPackages);
				263	}
				264
				265
				266	//
				267	// Print out the detailed machine topology map, i.e. the physical locations
				268	// of each OS proc.
				269	//
				270	static void
				271	__kmp_affinity_print_topology(AddrUnsPair *address2os, int len, int depth,
				272	int pkgLevel, int coreLevel, int threadLevel)
				273	{
				274	int proc;
				275
				276	KMP_INFORM(OSProcToPhysicalThreadMap, "KMP_AFFINITY");
				277	for (proc = 0; proc < len; proc++) {
				278	int level;
				279	kmp_str_buf_t buf;
				280	__kmp_str_buf_init(&buf);
				281	for (level = 0; level < depth; level++) {
				282	if (level == threadLevel) {
				283	__kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Thread));
				284	}
				285	else if (level == coreLevel) {
				286	__kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Core));
				287	}
				288	else if (level == pkgLevel) {
				289	__kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Package));
				290	}
				291	else if (level > pkgLevel) {
				292	__kmp_str_buf_print(&buf, "%s_%d ", KMP_I18N_STR(Node),
				293	level - pkgLevel - 1);
				294	}
				295	else {
				296	__kmp_str_buf_print(&buf, "L%d ", level);
				297	}
				298	__kmp_str_buf_print(&buf, "%d ",
				299	address2os[proc].first.labels[level]);
				300	}
				301	KMP_INFORM(OSProcMapToPack, "KMP_AFFINITY", address2os[proc].second,
				302	buf.str);
				303	__kmp_str_buf_free(&buf);
				304	}
				305	}
				306
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	307	#if KMP_USE_HWLOC
Jonathan Peyton	202a24d	2016-06-13 17:30:08 +0000	[diff] [blame]	308
				309	// This function removes the topology levels that are radix 1 and don't offer
				310	// further information about the topology. The most common example is when you
				311	// have one thread context per core, we don't want the extra thread context
				312	// level if it offers no unique labels. So they are removed.
				313	// return value: the new depth of address2os
				314	static int
				315	__kmp_affinity_remove_radix_one_levels(AddrUnsPair address2os, int nActiveThreads, int depth, int pkgLevel, int* coreLevel, int* threadLevel) {
				316	int level;
				317	int i;
				318	int radix1_detected;
				319
				320	for (level = depth-1; level >= 0; --level) {
				321	// Always keep the package level
				322	if (level == *pkgLevel)
				323	continue;
				324	// Detect if this level is radix 1
				325	radix1_detected = 1;
				326	for (i = 1; i < nActiveThreads; ++i) {
				327	if (address2os[0].first.labels[level] != address2os[i].first.labels[level]) {
				328	// There are differing label values for this level so it stays
				329	radix1_detected = 0;
				330	break;
				331	}
				332	}
				333	if (!radix1_detected)
				334	continue;
				335	// Radix 1 was detected
				336	if (level == *threadLevel) {
				337	// If only one thread per core, then just decrement
				338	// the depth which removes the threadlevel from address2os
				339	for (i = 0; i < nActiveThreads; ++i) {
				340	address2os[i].first.depth--;
				341	}
				342	*threadLevel = -1;
				343	} else if (level == *coreLevel) {
				344	// For core level, we move the thread labels over if they are still
				345	// valid (*threadLevel != -1), and also reduce the depth another level
				346	for (i = 0; i < nActiveThreads; ++i) {
				347	if (*threadLevel != -1) {
				348	address2os[i].first.labels[coreLevel] = address2os[i].first.labels[threadLevel];
				349	}
				350	address2os[i].first.depth--;
				351	}
				352	*coreLevel = -1;
				353	}
				354	}
				355	return address2os[0].first.depth;
				356	}
				357
				358	// Returns the number of objects of type 'type' below 'obj' within the topology tree structure.
				359	// e.g., if obj is a HWLOC_OBJ_SOCKET object, and type is HWLOC_OBJ_PU, then
				360	// this will return the number of PU's under the SOCKET object.
				361	static int
				362	__kmp_hwloc_get_nobjs_under_obj(hwloc_obj_t obj, hwloc_obj_type_t type) {
				363	int retval = 0;
				364	hwloc_obj_t first;
				365	for(first = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, obj->type, obj->logical_index, type, 0);
				366	first != NULL && hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, obj->type, first) == obj;
				367	first = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, first->type, first))
				368	{
				369	++retval;
				370	}
				371	return retval;
				372	}
				373
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	374	static int
				375	__kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
				376	kmp_i18n_id_t *const msg_id)
				377	{
				378	*address2os = NULL;
				379	*msg_id = kmp_i18n_null;
				380
				381	//
				382	// Save the affinity mask for the current thread.
				383	//
				384	kmp_affin_mask_t *oldMask;
				385	KMP_CPU_ALLOC(oldMask);
				386	__kmp_get_system_affinity(oldMask, TRUE);
				387
Jonathan Peyton	202a24d	2016-06-13 17:30:08 +0000	[diff] [blame]	388	int depth = 3;
				389	int pkgLevel = 0;
				390	int coreLevel = 1;
				391	int threadLevel = 2;
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	392
				393	if (! KMP_AFFINITY_CAPABLE())
				394	{
				395	//
				396	// Hack to try and infer the machine topology using only the data
				397	// available from cpuid on the current thread, and __kmp_xproc.
				398	//
				399	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				400
Jonathan Peyton	bf35771	2016-06-16 20:31:19 +0000	[diff] [blame^]	401	nCoresPerPkg = __kmp_hwloc_get_nobjs_under_obj(hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_SOCKET, 0), HWLOC_OBJ_CORE);
				402	__kmp_nThreadsPerCore = __kmp_hwloc_get_nobjs_under_obj(hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_CORE, 0), HWLOC_OBJ_PU);
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	403	__kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
				404	nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
				405	if (__kmp_affinity_verbose) {
				406	KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY");
				407	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				408	if (__kmp_affinity_uniform_topology()) {
				409	KMP_INFORM(Uniform, "KMP_AFFINITY");
				410	} else {
				411	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				412	}
				413	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				414	__kmp_nThreadsPerCore, __kmp_ncores);
				415	}
Jonathan Peyton	72a8498	2016-06-16 20:14:54 +0000	[diff] [blame]	416	KMP_CPU_FREE(oldMask);
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	417	return 0;
				418	}
				419
				420	//
				421	// Allocate the data structure to be returned.
				422	//
				423	AddrUnsPair retval = (AddrUnsPair )__kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc);
				424
Jonathan Peyton	bf35771	2016-06-16 20:31:19 +0000	[diff] [blame^]	425	//
				426	// When affinity is off, this routine will still be called to set
				427	// __kmp_ncores, as well as __kmp_nThreadsPerCore,
				428	// nCoresPerPkg, & nPackages. Make sure all these vars are set
				429	// correctly, and return if affinity is not enabled.
				430	//
				431
Jonathan Peyton	202a24d	2016-06-13 17:30:08 +0000	[diff] [blame]	432	hwloc_obj_t pu;
				433	hwloc_obj_t core;
				434	hwloc_obj_t socket;
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	435	int nActiveThreads = 0;
Jonathan Peyton	202a24d	2016-06-13 17:30:08 +0000	[diff] [blame]	436	int socket_identifier = 0;
Jonathan Peyton	bf35771	2016-06-16 20:31:19 +0000	[diff] [blame^]	437	// re-calculate globals to count only accessible resources
				438	__kmp_ncores = nPackages = nCoresPerPkg = __kmp_nThreadsPerCore = 0;
Jonathan Peyton	202a24d	2016-06-13 17:30:08 +0000	[diff] [blame]	439	for(socket = hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_SOCKET, 0);
				440	socket != NULL;
				441	socket = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_SOCKET, socket),
				442	socket_identifier++)
				443	{
				444	int core_identifier = 0;
Jonathan Peyton	bf35771	2016-06-16 20:31:19 +0000	[diff] [blame^]	445	int num_active_cores = 0;
Jonathan Peyton	202a24d	2016-06-13 17:30:08 +0000	[diff] [blame]	446	for(core = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, socket->type, socket->logical_index, HWLOC_OBJ_CORE, 0);
				447	core != NULL && hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, socket->type, core) == socket;
				448	core = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_CORE, core),
				449	core_identifier++)
				450	{
				451	int pu_identifier = 0;
Jonathan Peyton	bf35771	2016-06-16 20:31:19 +0000	[diff] [blame^]	452	int num_active_threads = 0;
Jonathan Peyton	202a24d	2016-06-13 17:30:08 +0000	[diff] [blame]	453	for(pu = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, core->type, core->logical_index, HWLOC_OBJ_PU, 0);
				454	pu != NULL && hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, core->type, pu) == core;
				455	pu = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_PU, pu),
				456	pu_identifier++)
				457	{
				458	Address addr(3);
Jonathan Peyton	c5304aa	2016-06-13 21:28:03 +0000	[diff] [blame]	459	if(! KMP_CPU_ISSET(pu->os_index, __kmp_affin_fullMask))
Jonathan Peyton	bf35771	2016-06-16 20:31:19 +0000	[diff] [blame^]	460	continue; // skip inactive (inaccessible) unit
Jonathan Peyton	202a24d	2016-06-13 17:30:08 +0000	[diff] [blame]	461	KA_TRACE(20, ("Hwloc inserting %d (%d) %d (%d) %d (%d) into address2os\n",
				462	socket->os_index, socket->logical_index, core->os_index, core->logical_index, pu->os_index,pu->logical_index));
				463	addr.labels[0] = socket_identifier; // package
				464	addr.labels[1] = core_identifier; // core
				465	addr.labels[2] = pu_identifier; // pu
				466	retval[nActiveThreads] = AddrUnsPair(addr, pu->os_index);
				467	nActiveThreads++;
Jonathan Peyton	bf35771	2016-06-16 20:31:19 +0000	[diff] [blame^]	468	++num_active_threads; // count active threads per core
Jonathan Peyton	202a24d	2016-06-13 17:30:08 +0000	[diff] [blame]	469	}
Jonathan Peyton	bf35771	2016-06-16 20:31:19 +0000	[diff] [blame^]	470	if (num_active_threads) { // were there any active threads on the core?
				471	++__kmp_ncores; // count total active cores
				472	++num_active_cores; // count active cores per socket
				473	if (num_active_threads > __kmp_nThreadsPerCore)
				474	__kmp_nThreadsPerCore = num_active_threads; // calc maximum
				475	}
				476	}
				477	if (num_active_cores) { // were there any active cores on the socket?
				478	++nPackages; // count total active packages
				479	if (num_active_cores > nCoresPerPkg)
				480	nCoresPerPkg = num_active_cores; // calc maximum
Jonathan Peyton	202a24d	2016-06-13 17:30:08 +0000	[diff] [blame]	481	}
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	482	}
				483
				484	//
				485	// If there's only one thread context to bind to, return now.
				486	//
Jonathan Peyton	bf35771	2016-06-16 20:31:19 +0000	[diff] [blame^]	487	KMP_DEBUG_ASSERT(nActiveThreads == __kmp_avail_proc);
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	488	KMP_ASSERT(nActiveThreads > 0);
				489	if (nActiveThreads == 1) {
				490	__kmp_ncores = nPackages = 1;
				491	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
				492	if (__kmp_affinity_verbose) {
				493	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				494	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				495
				496	KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
				497	if (__kmp_affinity_respect_mask) {
				498	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				499	} else {
				500	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				501	}
				502	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				503	KMP_INFORM(Uniform, "KMP_AFFINITY");
				504	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				505	__kmp_nThreadsPerCore, __kmp_ncores);
				506	}
				507
				508	if (__kmp_affinity_type == affinity_none) {
				509	__kmp_free(retval);
				510	KMP_CPU_FREE(oldMask);
				511	return 0;
				512	}
				513
				514	//
				515	// Form an Address object which only includes the package level.
				516	//
				517	Address addr(1);
Jonathan Peyton	202a24d	2016-06-13 17:30:08 +0000	[diff] [blame]	518	addr.labels[0] = retval[0].first.labels[pkgLevel];
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	519	retval[0].first = addr;
				520
				521	if (__kmp_affinity_gran_levels < 0) {
				522	__kmp_affinity_gran_levels = 0;
				523	}
				524
				525	if (__kmp_affinity_verbose) {
				526	__kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
				527	}
				528
				529	*address2os = retval;
				530	KMP_CPU_FREE(oldMask);
				531	return 1;
				532	}
				533
				534	//
				535	// Sort the table by physical Id.
				536	//
				537	qsort(retval, nActiveThreads, sizeof(*retval), __kmp_affinity_cmp_Address_labels);
				538
				539	//
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	540	// Check to see if the machine topology is uniform
				541	//
Jonathan Peyton	bf35771	2016-06-16 20:31:19 +0000	[diff] [blame^]	542	unsigned uniform = (nPackages * nCoresPerPkg * __kmp_nThreadsPerCore == nActiveThreads);
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	543
				544	//
				545	// Print the machine topology summary.
				546	//
				547	if (__kmp_affinity_verbose) {
				548	char mask[KMP_AFFIN_MASK_PRINT_LEN];
				549	__kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				550
				551	KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
				552	if (__kmp_affinity_respect_mask) {
				553	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask);
				554	} else {
				555	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask);
				556	}
				557	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				558	if (uniform) {
				559	KMP_INFORM(Uniform, "KMP_AFFINITY");
				560	} else {
				561	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				562	}
				563
				564	kmp_str_buf_t buf;
				565	__kmp_str_buf_init(&buf);
				566
Jonathan Peyton	bf35771	2016-06-16 20:31:19 +0000	[diff] [blame^]	567	__kmp_str_buf_print(&buf, "%d", nPackages);
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	568	//for (level = 1; level <= pkgLevel; level++) {
				569	// __kmp_str_buf_print(&buf, " x %d", maxCt[level]);
				570	// }
				571	KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg,
				572	__kmp_nThreadsPerCore, __kmp_ncores);
				573
				574	__kmp_str_buf_free(&buf);
				575	}
				576
				577	if (__kmp_affinity_type == affinity_none) {
Jonathan Peyton	72a8498	2016-06-16 20:14:54 +0000	[diff] [blame]	578	__kmp_free(retval);
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	579	KMP_CPU_FREE(oldMask);
				580	return 0;
				581	}
				582
				583	//
				584	// Find any levels with radiix 1, and remove them from the map
				585	// (except for the package level).
				586	//
Jonathan Peyton	202a24d	2016-06-13 17:30:08 +0000	[diff] [blame]	587	depth = __kmp_affinity_remove_radix_one_levels(retval, nActiveThreads, depth, &pkgLevel, &coreLevel, &threadLevel);
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	588
				589	if (__kmp_affinity_gran_levels < 0) {
				590	//
				591	// Set the granularity level based on what levels are modeled
				592	// in the machine topology map.
				593	//
				594	__kmp_affinity_gran_levels = 0;
Jonathan Peyton	202a24d	2016-06-13 17:30:08 +0000	[diff] [blame]	595	if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	596	__kmp_affinity_gran_levels++;
				597	}
Jonathan Peyton	202a24d	2016-06-13 17:30:08 +0000	[diff] [blame]	598	if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	599	__kmp_affinity_gran_levels++;
				600	}
				601	if (__kmp_affinity_gran > affinity_gran_package) {
				602	__kmp_affinity_gran_levels++;
				603	}
				604	}
				605
				606	if (__kmp_affinity_verbose) {
Jonathan Peyton	202a24d	2016-06-13 17:30:08 +0000	[diff] [blame]	607	__kmp_affinity_print_topology(retval, nActiveThreads, depth, pkgLevel,
				608	coreLevel, threadLevel);
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	609	}
				610
				611	KMP_CPU_FREE(oldMask);
				612	*address2os = retval;
Jonathan Peyton	202a24d	2016-06-13 17:30:08 +0000	[diff] [blame]	613	return depth;
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	614	}
				615	#endif // KMP_USE_HWLOC
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	616
				617	//
				618	// If we don't know how to retrieve the machine's processor topology, or
				619	// encounter an error in doing so, this routine is called to form a "flat"
				620	// mapping of os thread id's <-> processor id's.
				621	//
				622	static int
				623	__kmp_affinity_create_flat_map(AddrUnsPair **address2os,
				624	kmp_i18n_id_t *const msg_id)
				625	{
				626	*address2os = NULL;
				627	*msg_id = kmp_i18n_null;
				628
				629	//
				630	// Even if __kmp_affinity_type == affinity_none, this routine might still
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame]	631	// called to set __kmp_ncores, as well as
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	632	// __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
				633	//
				634	if (! KMP_AFFINITY_CAPABLE()) {
				635	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				636	__kmp_ncores = nPackages = __kmp_xproc;
				637	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	638	if (__kmp_affinity_verbose) {
				639	KMP_INFORM(AffFlatTopology, "KMP_AFFINITY");
				640	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				641	KMP_INFORM(Uniform, "KMP_AFFINITY");
				642	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				643	__kmp_nThreadsPerCore, __kmp_ncores);
				644	}
				645	return 0;
				646	}
				647
				648	//
				649	// When affinity is off, this routine will still be called to set
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame]	650	// __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	651	// nCoresPerPkg, & nPackages. Make sure all these vars are set
				652	// correctly, and return now if affinity is not enabled.
				653	//
				654	__kmp_ncores = nPackages = __kmp_avail_proc;
				655	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	656	if (__kmp_affinity_verbose) {
				657	char buf[KMP_AFFIN_MASK_PRINT_LEN];
Jonathan Peyton	c5304aa	2016-06-13 21:28:03 +0000	[diff] [blame]	658	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, __kmp_affin_fullMask);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	659
				660	KMP_INFORM(AffCapableUseFlat, "KMP_AFFINITY");
				661	if (__kmp_affinity_respect_mask) {
				662	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				663	} else {
				664	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				665	}
				666	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				667	KMP_INFORM(Uniform, "KMP_AFFINITY");
				668	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				669	__kmp_nThreadsPerCore, __kmp_ncores);
				670	}
				671	if (__kmp_affinity_type == affinity_none) {
				672	return 0;
				673	}
				674
				675	//
				676	// Contruct the data structure to be returned.
				677	//
				678	address2os = (AddrUnsPair)
				679	__kmp_allocate(sizeof(*address2os) __kmp_avail_proc);
				680	int avail_ct = 0;
				681	unsigned int i;
Jonathan Peyton	c5304aa	2016-06-13 21:28:03 +0000	[diff] [blame]	682	KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	683	//
				684	// Skip this proc if it is not included in the machine model.
				685	//
Jonathan Peyton	c5304aa	2016-06-13 21:28:03 +0000	[diff] [blame]	686	if (! KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	687	continue;
				688	}
				689
				690	Address addr(1);
				691	addr.labels[0] = i;
				692	(*address2os)[avail_ct++] = AddrUnsPair(addr,i);
				693	}
				694	if (__kmp_affinity_verbose) {
				695	KMP_INFORM(OSProcToPackage, "KMP_AFFINITY");
				696	}
				697
				698	if (__kmp_affinity_gran_levels < 0) {
				699	//
				700	// Only the package level is modeled in the machine topology map,
				701	// so the #levels of granularity is either 0 or 1.
				702	//
				703	if (__kmp_affinity_gran > affinity_gran_package) {
				704	__kmp_affinity_gran_levels = 1;
				705	}
				706	else {
				707	__kmp_affinity_gran_levels = 0;
				708	}
				709	}
				710	return 1;
				711	}
				712
				713
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	714	# if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	715
				716	//
				717	// If multiple Windows* OS processor groups exist, we can create a 2-level
				718	// topology map with the groups at level 0 and the individual procs at
				719	// level 1.
				720	//
				721	// This facilitates letting the threads float among all procs in a group,
				722	// if granularity=group (the default when there are multiple groups).
				723	//
				724	static int
				725	__kmp_affinity_create_proc_group_map(AddrUnsPair **address2os,
				726	kmp_i18n_id_t *const msg_id)
				727	{
				728	*address2os = NULL;
				729	*msg_id = kmp_i18n_null;
				730
				731	//
				732	// If we don't have multiple processor groups, return now.
				733	// The flat mapping will be used.
				734	//
Jonathan Peyton	c5304aa	2016-06-13 21:28:03 +0000	[diff] [blame]	735	if ((! KMP_AFFINITY_CAPABLE()) \|\| (__kmp_get_proc_group(__kmp_affin_fullMask) >= 0)) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	736	// FIXME set *msg_id
				737	return -1;
				738	}
				739
				740	//
				741	// Contruct the data structure to be returned.
				742	//
				743	address2os = (AddrUnsPair)
				744	__kmp_allocate(sizeof(*address2os) __kmp_avail_proc);
				745	int avail_ct = 0;
				746	int i;
Jonathan Peyton	c5304aa	2016-06-13 21:28:03 +0000	[diff] [blame]	747	KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	748	//
				749	// Skip this proc if it is not included in the machine model.
				750	//
Jonathan Peyton	c5304aa	2016-06-13 21:28:03 +0000	[diff] [blame]	751	if (! KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	752	continue;
				753	}
				754
				755	Address addr(2);
				756	addr.labels[0] = i / (CHAR_BIT * sizeof(DWORD_PTR));
				757	addr.labels[1] = i % (CHAR_BIT * sizeof(DWORD_PTR));
				758	(*address2os)[avail_ct++] = AddrUnsPair(addr,i);
				759
				760	if (__kmp_affinity_verbose) {
				761	KMP_INFORM(AffOSProcToGroup, "KMP_AFFINITY", i, addr.labels[0],
				762	addr.labels[1]);
				763	}
				764	}
				765
				766	if (__kmp_affinity_gran_levels < 0) {
				767	if (__kmp_affinity_gran == affinity_gran_group) {
				768	__kmp_affinity_gran_levels = 1;
				769	}
				770	else if ((__kmp_affinity_gran == affinity_gran_fine)
				771	\|\| (__kmp_affinity_gran == affinity_gran_thread)) {
				772	__kmp_affinity_gran_levels = 0;
				773	}
				774	else {
				775	const char *gran_str = NULL;
				776	if (__kmp_affinity_gran == affinity_gran_core) {
				777	gran_str = "core";
				778	}
				779	else if (__kmp_affinity_gran == affinity_gran_package) {
				780	gran_str = "package";
				781	}
				782	else if (__kmp_affinity_gran == affinity_gran_node) {
				783	gran_str = "node";
				784	}
				785	else {
				786	KMP_ASSERT(0);
				787	}
				788
				789	// Warning: can't use affinity granularity \"gran\" with group topology method, using "thread"
				790	__kmp_affinity_gran_levels = 0;
				791	}
				792	}
				793	return 2;
				794	}
				795
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	796	# endif /* KMP_GROUP_AFFINITY */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	797
				798
				799	# if KMP_ARCH_X86 \|\| KMP_ARCH_X86_64
				800
				801	static int
				802	__kmp_cpuid_mask_width(int count) {
				803	int r = 0;
				804
				805	while((1<<r) < count)
				806	++r;
				807	return r;
				808	}
				809
				810
				811	class apicThreadInfo {
				812	public:
				813	unsigned osId; // param to __kmp_affinity_bind_thread
				814	unsigned apicId; // from cpuid after binding
				815	unsigned maxCoresPerPkg; // ""
				816	unsigned maxThreadsPerPkg; // ""
				817	unsigned pkgId; // inferred from above values
				818	unsigned coreId; // ""
				819	unsigned threadId; // ""
				820	};
				821
				822
				823	static int
				824	__kmp_affinity_cmp_apicThreadInfo_os_id(const void a, const void b)
				825	{
				826	const apicThreadInfo aa = (const apicThreadInfo )a;
				827	const apicThreadInfo bb = (const apicThreadInfo )b;
				828	if (aa->osId < bb->osId) return -1;
				829	if (aa->osId > bb->osId) return 1;
				830	return 0;
				831	}
				832
				833
				834	static int
				835	__kmp_affinity_cmp_apicThreadInfo_phys_id(const void a, const void b)
				836	{
				837	const apicThreadInfo aa = (const apicThreadInfo )a;
				838	const apicThreadInfo bb = (const apicThreadInfo )b;
				839	if (aa->pkgId < bb->pkgId) return -1;
				840	if (aa->pkgId > bb->pkgId) return 1;
				841	if (aa->coreId < bb->coreId) return -1;
				842	if (aa->coreId > bb->coreId) return 1;
				843	if (aa->threadId < bb->threadId) return -1;
				844	if (aa->threadId > bb->threadId) return 1;
				845	return 0;
				846	}
				847
				848
				849	//
				850	// On IA-32 architecture and Intel(R) 64 architecture, we attempt to use
				851	// an algorithm which cycles through the available os threads, setting
				852	// the current thread's affinity mask to that thread, and then retrieves
				853	// the Apic Id for each thread context using the cpuid instruction.
				854	//
				855	static int
				856	__kmp_affinity_create_apicid_map(AddrUnsPair **address2os,
				857	kmp_i18n_id_t *const msg_id)
				858	{
Andrey Churbanov	1c33129	2015-01-27 17:03:42 +0000	[diff] [blame]	859	kmp_cpuid buf;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	860	int rc;
				861	*address2os = NULL;
				862	*msg_id = kmp_i18n_null;
				863
Andrey Churbanov	1c33129	2015-01-27 17:03:42 +0000	[diff] [blame]	864	//
				865	// Check if cpuid leaf 4 is supported.
				866	//
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	867	__kmp_x86_cpuid(0, 0, &buf);
				868	if (buf.eax < 4) {
				869	*msg_id = kmp_i18n_str_NoLeaf4Support;
				870	return -1;
				871	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	872
				873	//
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	874	// The algorithm used starts by setting the affinity to each available
Andrey Churbanov	1c33129	2015-01-27 17:03:42 +0000	[diff] [blame]	875	// thread and retrieving info from the cpuid instruction, so if we are
				876	// not capable of calling __kmp_get_system_affinity() and
				877	// _kmp_get_system_affinity(), then we need to do something else - use
				878	// the defaults that we calculated from issuing cpuid without binding
				879	// to each proc.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	880	//
				881	if (! KMP_AFFINITY_CAPABLE()) {
				882	//
				883	// Hack to try and infer the machine topology using only the data
				884	// available from cpuid on the current thread, and __kmp_xproc.
				885	//
				886	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				887
				888	//
				889	// Get an upper bound on the number of threads per package using
				890	// cpuid(1).
				891	//
				892	// On some OS/chps combinations where HT is supported by the chip
				893	// but is disabled, this value will be 2 on a single core chip.
				894	// Usually, it will be 2 if HT is enabled and 1 if HT is disabled.
				895	//
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	896	__kmp_x86_cpuid(1, 0, &buf);
				897	int maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
				898	if (maxThreadsPerPkg == 0) {
				899	maxThreadsPerPkg = 1;
				900	}
				901
				902	//
				903	// The num cores per pkg comes from cpuid(4).
				904	// 1 must be added to the encoded value.
				905	//
				906	// The author of cpu_count.cpp treated this only an upper bound
				907	// on the number of cores, but I haven't seen any cases where it
				908	// was greater than the actual number of cores, so we will treat
				909	// it as exact in this block of code.
				910	//
				911	// First, we need to check if cpuid(4) is supported on this chip.
				912	// To see if cpuid(n) is supported, issue cpuid(0) and check if eax
				913	// has the value n or greater.
				914	//
				915	__kmp_x86_cpuid(0, 0, &buf);
				916	if (buf.eax >= 4) {
				917	__kmp_x86_cpuid(4, 0, &buf);
				918	nCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
				919	}
				920	else {
				921	nCoresPerPkg = 1;
				922	}
				923
				924	//
				925	// There is no way to reliably tell if HT is enabled without issuing
				926	// the cpuid instruction from every thread, can correlating the cpuid
				927	// info, so if the machine is not affinity capable, we assume that HT
				928	// is off. We have seen quite a few machines where maxThreadsPerPkg
				929	// is 2, yet the machine does not support HT.
				930	//
				931	// - Older OSes are usually found on machines with older chips, which
				932	// do not support HT.
				933	//
				934	// - The performance penalty for mistakenly identifying a machine as
				935	// HT when it isn't (which results in blocktime being incorrecly set
				936	// to 0) is greater than the penalty when for mistakenly identifying
				937	// a machine as being 1 thread/core when it is really HT enabled
				938	// (which results in blocktime being incorrectly set to a positive
				939	// value).
				940	//
				941	__kmp_ncores = __kmp_xproc;
				942	nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
				943	__kmp_nThreadsPerCore = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	944	if (__kmp_affinity_verbose) {
				945	KMP_INFORM(AffNotCapableUseLocCpuid, "KMP_AFFINITY");
				946	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				947	if (__kmp_affinity_uniform_topology()) {
				948	KMP_INFORM(Uniform, "KMP_AFFINITY");
				949	} else {
				950	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				951	}
				952	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				953	__kmp_nThreadsPerCore, __kmp_ncores);
				954	}
				955	return 0;
				956	}
				957
				958	//
				959	//
				960	// From here on, we can assume that it is safe to call
				961	// __kmp_get_system_affinity() and __kmp_set_system_affinity(),
				962	// even if __kmp_affinity_type = affinity_none.
				963	//
				964
				965	//
				966	// Save the affinity mask for the current thread.
				967	//
				968	kmp_affin_mask_t *oldMask;
				969	KMP_CPU_ALLOC(oldMask);
				970	KMP_ASSERT(oldMask != NULL);
				971	__kmp_get_system_affinity(oldMask, TRUE);
				972
				973	//
				974	// Run through each of the available contexts, binding the current thread
				975	// to it, and obtaining the pertinent information using the cpuid instr.
				976	//
				977	// The relevant information is:
				978	//
				979	// Apic Id: Bits 24:31 of ebx after issuing cpuid(1) - each thread context
				980	// has a uniqie Apic Id, which is of the form pkg# : core# : thread#.
				981	//
				982	// Max Threads Per Pkg: Bits 16:23 of ebx after issuing cpuid(1). The
				983	// value of this field determines the width of the core# + thread#
				984	// fields in the Apic Id. It is also an upper bound on the number
				985	// of threads per package, but it has been verified that situations
				986	// happen were it is not exact. In particular, on certain OS/chip
				987	// combinations where Intel(R) Hyper-Threading Technology is supported
				988	// by the chip but has
				989	// been disabled, the value of this field will be 2 (for a single core
				990	// chip). On other OS/chip combinations supporting
				991	// Intel(R) Hyper-Threading Technology, the value of
				992	// this field will be 1 when Intel(R) Hyper-Threading Technology is
				993	// disabled and 2 when it is enabled.
				994	//
				995	// Max Cores Per Pkg: Bits 26:31 of eax after issuing cpuid(4). The
				996	// value of this field (+1) determines the width of the core# field in
				997	// the Apic Id. The comments in "cpucount.cpp" say that this value is
				998	// an upper bound, but the IA-32 architecture manual says that it is
				999	// exactly the number of cores per package, and I haven't seen any
				1000	// case where it wasn't.
				1001	//
				1002	// From this information, deduce the package Id, core Id, and thread Id,
				1003	// and set the corresponding fields in the apicThreadInfo struct.
				1004	//
				1005	unsigned i;
				1006	apicThreadInfo threadInfo = (apicThreadInfo )__kmp_allocate(
				1007	__kmp_avail_proc * sizeof(apicThreadInfo));
				1008	unsigned nApics = 0;
Jonathan Peyton	c5304aa	2016-06-13 21:28:03 +0000	[diff] [blame]	1009	KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1010	//
				1011	// Skip this proc if it is not included in the machine model.
				1012	//
Jonathan Peyton	c5304aa	2016-06-13 21:28:03 +0000	[diff] [blame]	1013	if (! KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1014	continue;
				1015	}
				1016	KMP_DEBUG_ASSERT((int)nApics < __kmp_avail_proc);
				1017
				1018	__kmp_affinity_bind_thread(i);
				1019	threadInfo[nApics].osId = i;
				1020
				1021	//
				1022	// The apic id and max threads per pkg come from cpuid(1).
				1023	//
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1024	__kmp_x86_cpuid(1, 0, &buf);
				1025	if (! (buf.edx >> 9) & 1) {
				1026	__kmp_set_system_affinity(oldMask, TRUE);
				1027	__kmp_free(threadInfo);
				1028	KMP_CPU_FREE(oldMask);
				1029	*msg_id = kmp_i18n_str_ApicNotPresent;
				1030	return -1;
				1031	}
				1032	threadInfo[nApics].apicId = (buf.ebx >> 24) & 0xff;
				1033	threadInfo[nApics].maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
				1034	if (threadInfo[nApics].maxThreadsPerPkg == 0) {
				1035	threadInfo[nApics].maxThreadsPerPkg = 1;
				1036	}
				1037
				1038	//
				1039	// Max cores per pkg comes from cpuid(4).
				1040	// 1 must be added to the encoded value.
				1041	//
				1042	// First, we need to check if cpuid(4) is supported on this chip.
				1043	// To see if cpuid(n) is supported, issue cpuid(0) and check if eax
				1044	// has the value n or greater.
				1045	//
				1046	__kmp_x86_cpuid(0, 0, &buf);
				1047	if (buf.eax >= 4) {
				1048	__kmp_x86_cpuid(4, 0, &buf);
				1049	threadInfo[nApics].maxCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
				1050	}
				1051	else {
				1052	threadInfo[nApics].maxCoresPerPkg = 1;
				1053	}
				1054
				1055	//
				1056	// Infer the pkgId / coreId / threadId using only the info
				1057	// obtained locally.
				1058	//
				1059	int widthCT = __kmp_cpuid_mask_width(
				1060	threadInfo[nApics].maxThreadsPerPkg);
				1061	threadInfo[nApics].pkgId = threadInfo[nApics].apicId >> widthCT;
				1062
				1063	int widthC = __kmp_cpuid_mask_width(
				1064	threadInfo[nApics].maxCoresPerPkg);
				1065	int widthT = widthCT - widthC;
				1066	if (widthT < 0) {
				1067	//
				1068	// I've never seen this one happen, but I suppose it could, if
				1069	// the cpuid instruction on a chip was really screwed up.
				1070	// Make sure to restore the affinity mask before the tail call.
				1071	//
				1072	__kmp_set_system_affinity(oldMask, TRUE);
				1073	__kmp_free(threadInfo);
				1074	KMP_CPU_FREE(oldMask);
				1075	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1076	return -1;
				1077	}
				1078
				1079	int maskC = (1 << widthC) - 1;
				1080	threadInfo[nApics].coreId = (threadInfo[nApics].apicId >> widthT)
				1081	&maskC;
				1082
				1083	int maskT = (1 << widthT) - 1;
				1084	threadInfo[nApics].threadId = threadInfo[nApics].apicId &maskT;
				1085
				1086	nApics++;
				1087	}
				1088
				1089	//
				1090	// We've collected all the info we need.
				1091	// Restore the old affinity mask for this thread.
				1092	//
				1093	__kmp_set_system_affinity(oldMask, TRUE);
				1094
				1095	//
				1096	// If there's only one thread context to bind to, form an Address object
				1097	// with depth 1 and return immediately (or, if affinity is off, set
				1098	// address2os to NULL and return).
				1099	//
				1100	// If it is configured to omit the package level when there is only a
				1101	// single package, the logic at the end of this routine won't work if
				1102	// there is only a single thread - it would try to form an Address
				1103	// object with depth 0.
				1104	//
				1105	KMP_ASSERT(nApics > 0);
				1106	if (nApics == 1) {
				1107	__kmp_ncores = nPackages = 1;
				1108	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1109	if (__kmp_affinity_verbose) {
				1110	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				1111	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				1112
				1113	KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
				1114	if (__kmp_affinity_respect_mask) {
				1115	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				1116	} else {
				1117	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				1118	}
				1119	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1120	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1121	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				1122	__kmp_nThreadsPerCore, __kmp_ncores);
				1123	}
				1124
				1125	if (__kmp_affinity_type == affinity_none) {
				1126	__kmp_free(threadInfo);
				1127	KMP_CPU_FREE(oldMask);
				1128	return 0;
				1129	}
				1130
				1131	address2os = (AddrUnsPair)__kmp_allocate(sizeof(AddrUnsPair));
				1132	Address addr(1);
				1133	addr.labels[0] = threadInfo[0].pkgId;
				1134	(*address2os)[0] = AddrUnsPair(addr, threadInfo[0].osId);
				1135
				1136	if (__kmp_affinity_gran_levels < 0) {
				1137	__kmp_affinity_gran_levels = 0;
				1138	}
				1139
				1140	if (__kmp_affinity_verbose) {
				1141	__kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
				1142	}
				1143
				1144	__kmp_free(threadInfo);
				1145	KMP_CPU_FREE(oldMask);
				1146	return 1;
				1147	}
				1148
				1149	//
				1150	// Sort the threadInfo table by physical Id.
				1151	//
				1152	qsort(threadInfo, nApics, sizeof(*threadInfo),
				1153	__kmp_affinity_cmp_apicThreadInfo_phys_id);
				1154
				1155	//
				1156	// The table is now sorted by pkgId / coreId / threadId, but we really
				1157	// don't know the radix of any of the fields. pkgId's may be sparsely
				1158	// assigned among the chips on a system. Although coreId's are usually
				1159	// assigned [0 .. coresPerPkg-1] and threadId's are usually assigned
				1160	// [0..threadsPerCore-1], we don't want to make any such assumptions.
				1161	//
				1162	// For that matter, we don't know what coresPerPkg and threadsPerCore
				1163	// (or the total # packages) are at this point - we want to determine
				1164	// that now. We only have an upper bound on the first two figures.
				1165	//
				1166	// We also perform a consistency check at this point: the values returned
				1167	// by the cpuid instruction for any thread bound to a given package had
				1168	// better return the same info for maxThreadsPerPkg and maxCoresPerPkg.
				1169	//
				1170	nPackages = 1;
				1171	nCoresPerPkg = 1;
				1172	__kmp_nThreadsPerCore = 1;
				1173	unsigned nCores = 1;
				1174
				1175	unsigned pkgCt = 1; // to determine radii
				1176	unsigned lastPkgId = threadInfo[0].pkgId;
				1177	unsigned coreCt = 1;
				1178	unsigned lastCoreId = threadInfo[0].coreId;
				1179	unsigned threadCt = 1;
				1180	unsigned lastThreadId = threadInfo[0].threadId;
				1181
				1182	// intra-pkg consist checks
				1183	unsigned prevMaxCoresPerPkg = threadInfo[0].maxCoresPerPkg;
				1184	unsigned prevMaxThreadsPerPkg = threadInfo[0].maxThreadsPerPkg;
				1185
				1186	for (i = 1; i < nApics; i++) {
				1187	if (threadInfo[i].pkgId != lastPkgId) {
				1188	nCores++;
				1189	pkgCt++;
				1190	lastPkgId = threadInfo[i].pkgId;
				1191	if ((int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt;
				1192	coreCt = 1;
				1193	lastCoreId = threadInfo[i].coreId;
				1194	if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
				1195	threadCt = 1;
				1196	lastThreadId = threadInfo[i].threadId;
				1197
				1198	//
				1199	// This is a different package, so go on to the next iteration
				1200	// without doing any consistency checks. Reset the consistency
				1201	// check vars, though.
				1202	//
				1203	prevMaxCoresPerPkg = threadInfo[i].maxCoresPerPkg;
				1204	prevMaxThreadsPerPkg = threadInfo[i].maxThreadsPerPkg;
				1205	continue;
				1206	}
				1207
				1208	if (threadInfo[i].coreId != lastCoreId) {
				1209	nCores++;
				1210	coreCt++;
				1211	lastCoreId = threadInfo[i].coreId;
				1212	if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
				1213	threadCt = 1;
				1214	lastThreadId = threadInfo[i].threadId;
				1215	}
				1216	else if (threadInfo[i].threadId != lastThreadId) {
				1217	threadCt++;
				1218	lastThreadId = threadInfo[i].threadId;
				1219	}
				1220	else {
				1221	__kmp_free(threadInfo);
				1222	KMP_CPU_FREE(oldMask);
				1223	*msg_id = kmp_i18n_str_LegacyApicIDsNotUnique;
				1224	return -1;
				1225	}
				1226
				1227	//
				1228	// Check to make certain that the maxCoresPerPkg and maxThreadsPerPkg
				1229	// fields agree between all the threads bounds to a given package.
				1230	//
				1231	if ((prevMaxCoresPerPkg != threadInfo[i].maxCoresPerPkg)
				1232	\|\| (prevMaxThreadsPerPkg != threadInfo[i].maxThreadsPerPkg)) {
				1233	__kmp_free(threadInfo);
				1234	KMP_CPU_FREE(oldMask);
				1235	*msg_id = kmp_i18n_str_InconsistentCpuidInfo;
				1236	return -1;
				1237	}
				1238	}
				1239	nPackages = pkgCt;
				1240	if ((int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt;
				1241	if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
				1242
				1243	//
				1244	// When affinity is off, this routine will still be called to set
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame]	1245	// __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1246	// nCoresPerPkg, & nPackages. Make sure all these vars are set
				1247	// correctly, and return now if affinity is not enabled.
				1248	//
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1249	__kmp_ncores = nCores;
				1250	if (__kmp_affinity_verbose) {
				1251	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				1252	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				1253
				1254	KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
				1255	if (__kmp_affinity_respect_mask) {
				1256	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				1257	} else {
				1258	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				1259	}
				1260	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1261	if (__kmp_affinity_uniform_topology()) {
				1262	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1263	} else {
				1264	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				1265	}
				1266	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				1267	__kmp_nThreadsPerCore, __kmp_ncores);
				1268
				1269	}
				1270
				1271	if (__kmp_affinity_type == affinity_none) {
				1272	__kmp_free(threadInfo);
				1273	KMP_CPU_FREE(oldMask);
				1274	return 0;
				1275	}
				1276
				1277	//
				1278	// Now that we've determined the number of packages, the number of cores
				1279	// per package, and the number of threads per core, we can construct the
				1280	// data structure that is to be returned.
				1281	//
				1282	int pkgLevel = 0;
				1283	int coreLevel = (nCoresPerPkg <= 1) ? -1 : 1;
				1284	int threadLevel = (__kmp_nThreadsPerCore <= 1) ? -1 : ((coreLevel >= 0) ? 2 : 1);
				1285	unsigned depth = (pkgLevel >= 0) + (coreLevel >= 0) + (threadLevel >= 0);
				1286
				1287	KMP_ASSERT(depth > 0);
				1288	address2os = (AddrUnsPair)__kmp_allocate(sizeof(AddrUnsPair) * nApics);
				1289
				1290	for (i = 0; i < nApics; ++i) {
				1291	Address addr(depth);
				1292	unsigned os = threadInfo[i].osId;
				1293	int d = 0;
				1294
				1295	if (pkgLevel >= 0) {
				1296	addr.labels[d++] = threadInfo[i].pkgId;
				1297	}
				1298	if (coreLevel >= 0) {
				1299	addr.labels[d++] = threadInfo[i].coreId;
				1300	}
				1301	if (threadLevel >= 0) {
				1302	addr.labels[d++] = threadInfo[i].threadId;
				1303	}
				1304	(*address2os)[i] = AddrUnsPair(addr, os);
				1305	}
				1306
				1307	if (__kmp_affinity_gran_levels < 0) {
				1308	//
				1309	// Set the granularity level based on what levels are modeled
				1310	// in the machine topology map.
				1311	//
				1312	__kmp_affinity_gran_levels = 0;
				1313	if ((threadLevel >= 0)
				1314	&& (__kmp_affinity_gran > affinity_gran_thread)) {
				1315	__kmp_affinity_gran_levels++;
				1316	}
				1317	if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
				1318	__kmp_affinity_gran_levels++;
				1319	}
				1320	if ((pkgLevel >= 0) && (__kmp_affinity_gran > affinity_gran_package)) {
				1321	__kmp_affinity_gran_levels++;
				1322	}
				1323	}
				1324
				1325	if (__kmp_affinity_verbose) {
				1326	__kmp_affinity_print_topology(*address2os, nApics, depth, pkgLevel,
				1327	coreLevel, threadLevel);
				1328	}
				1329
				1330	__kmp_free(threadInfo);
				1331	KMP_CPU_FREE(oldMask);
				1332	return depth;
				1333	}
				1334
				1335
				1336	//
				1337	// Intel(R) microarchitecture code name Nehalem, Dunnington and later
				1338	// architectures support a newer interface for specifying the x2APIC Ids,
				1339	// based on cpuid leaf 11.
				1340	//
				1341	static int
				1342	__kmp_affinity_create_x2apicid_map(AddrUnsPair **address2os,
				1343	kmp_i18n_id_t *const msg_id)
				1344	{
				1345	kmp_cpuid buf;
				1346
				1347	*address2os = NULL;
				1348	*msg_id = kmp_i18n_null;
				1349
				1350	//
				1351	// Check to see if cpuid leaf 11 is supported.
				1352	//
				1353	__kmp_x86_cpuid(0, 0, &buf);
				1354	if (buf.eax < 11) {
				1355	*msg_id = kmp_i18n_str_NoLeaf11Support;
				1356	return -1;
				1357	}
				1358	__kmp_x86_cpuid(11, 0, &buf);
				1359	if (buf.ebx == 0) {
				1360	*msg_id = kmp_i18n_str_NoLeaf11Support;
				1361	return -1;
				1362	}
				1363
				1364	//
				1365	// Find the number of levels in the machine topology. While we're at it,
				1366	// get the default values for __kmp_nThreadsPerCore & nCoresPerPkg. We will
				1367	// try to get more accurate values later by explicitly counting them,
				1368	// but get reasonable defaults now, in case we return early.
				1369	//
				1370	int level;
				1371	int threadLevel = -1;
				1372	int coreLevel = -1;
				1373	int pkgLevel = -1;
				1374	__kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
				1375
				1376	for (level = 0;; level++) {
				1377	if (level > 31) {
				1378	//
				1379	// FIXME: Hack for DPD200163180
				1380	//
				1381	// If level is big then something went wrong -> exiting
				1382	//
				1383	// There could actually be 32 valid levels in the machine topology,
				1384	// but so far, the only machine we have seen which does not exit
				1385	// this loop before iteration 32 has fubar x2APIC settings.
				1386	//
				1387	// For now, just reject this case based upon loop trip count.
				1388	//
				1389	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1390	return -1;
				1391	}
				1392	__kmp_x86_cpuid(11, level, &buf);
				1393	if (buf.ebx == 0) {
				1394	if (pkgLevel < 0) {
				1395	//
				1396	// Will infer nPackages from __kmp_xproc
				1397	//
				1398	pkgLevel = level;
				1399	level++;
				1400	}
				1401	break;
				1402	}
				1403	int kind = (buf.ecx >> 8) & 0xff;
				1404	if (kind == 1) {
				1405	//
				1406	// SMT level
				1407	//
				1408	threadLevel = level;
				1409	coreLevel = -1;
				1410	pkgLevel = -1;
				1411	__kmp_nThreadsPerCore = buf.ebx & 0xff;
				1412	if (__kmp_nThreadsPerCore == 0) {
				1413	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1414	return -1;
				1415	}
				1416	}
				1417	else if (kind == 2) {
				1418	//
				1419	// core level
				1420	//
				1421	coreLevel = level;
				1422	pkgLevel = -1;
				1423	nCoresPerPkg = buf.ebx & 0xff;
				1424	if (nCoresPerPkg == 0) {
				1425	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1426	return -1;
				1427	}
				1428	}
				1429	else {
				1430	if (level <= 0) {
				1431	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1432	return -1;
				1433	}
				1434	if (pkgLevel >= 0) {
				1435	continue;
				1436	}
				1437	pkgLevel = level;
				1438	nPackages = buf.ebx & 0xff;
				1439	if (nPackages == 0) {
				1440	*msg_id = kmp_i18n_str_InvalidCpuidInfo;
				1441	return -1;
				1442	}
				1443	}
				1444	}
				1445	int depth = level;
				1446
				1447	//
				1448	// In the above loop, "level" was counted from the finest level (usually
				1449	// thread) to the coarsest. The caller expects that we will place the
				1450	// labels in (*address2os)[].first.labels[] in the inverse order, so
				1451	// we need to invert the vars saying which level means what.
				1452	//
				1453	if (threadLevel >= 0) {
				1454	threadLevel = depth - threadLevel - 1;
				1455	}
				1456	if (coreLevel >= 0) {
				1457	coreLevel = depth - coreLevel - 1;
				1458	}
				1459	KMP_DEBUG_ASSERT(pkgLevel >= 0);
				1460	pkgLevel = depth - pkgLevel - 1;
				1461
				1462	//
				1463	// The algorithm used starts by setting the affinity to each available
Andrey Churbanov	1c33129	2015-01-27 17:03:42 +0000	[diff] [blame]	1464	// thread and retrieving info from the cpuid instruction, so if we are
				1465	// not capable of calling __kmp_get_system_affinity() and
				1466	// _kmp_get_system_affinity(), then we need to do something else - use
				1467	// the defaults that we calculated from issuing cpuid without binding
				1468	// to each proc.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1469	//
				1470	if (! KMP_AFFINITY_CAPABLE())
				1471	{
				1472	//
				1473	// Hack to try and infer the machine topology using only the data
				1474	// available from cpuid on the current thread, and __kmp_xproc.
				1475	//
				1476	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				1477
				1478	__kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
				1479	nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1480	if (__kmp_affinity_verbose) {
				1481	KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY");
				1482	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1483	if (__kmp_affinity_uniform_topology()) {
				1484	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1485	} else {
				1486	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				1487	}
				1488	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				1489	__kmp_nThreadsPerCore, __kmp_ncores);
				1490	}
				1491	return 0;
				1492	}
				1493
				1494	//
				1495	//
				1496	// From here on, we can assume that it is safe to call
				1497	// __kmp_get_system_affinity() and __kmp_set_system_affinity(),
				1498	// even if __kmp_affinity_type = affinity_none.
				1499	//
				1500
				1501	//
				1502	// Save the affinity mask for the current thread.
				1503	//
				1504	kmp_affin_mask_t *oldMask;
				1505	KMP_CPU_ALLOC(oldMask);
				1506	__kmp_get_system_affinity(oldMask, TRUE);
				1507
				1508	//
				1509	// Allocate the data structure to be returned.
				1510	//
				1511	AddrUnsPair retval = (AddrUnsPair )
				1512	__kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc);
				1513
				1514	//
				1515	// Run through each of the available contexts, binding the current thread
				1516	// to it, and obtaining the pertinent information using the cpuid instr.
				1517	//
				1518	unsigned int proc;
				1519	int nApics = 0;
Jonathan Peyton	c5304aa	2016-06-13 21:28:03 +0000	[diff] [blame]	1520	KMP_CPU_SET_ITERATE(proc, __kmp_affin_fullMask) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1521	//
				1522	// Skip this proc if it is not included in the machine model.
				1523	//
Jonathan Peyton	c5304aa	2016-06-13 21:28:03 +0000	[diff] [blame]	1524	if (! KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1525	continue;
				1526	}
				1527	KMP_DEBUG_ASSERT(nApics < __kmp_avail_proc);
				1528
				1529	__kmp_affinity_bind_thread(proc);
				1530
				1531	//
				1532	// Extrach the labels for each level in the machine topology map
				1533	// from the Apic ID.
				1534	//
				1535	Address addr(depth);
				1536	int prev_shift = 0;
				1537
				1538	for (level = 0; level < depth; level++) {
				1539	__kmp_x86_cpuid(11, level, &buf);
				1540	unsigned apicId = buf.edx;
				1541	if (buf.ebx == 0) {
				1542	if (level != depth - 1) {
				1543	KMP_CPU_FREE(oldMask);
				1544	*msg_id = kmp_i18n_str_InconsistentCpuidInfo;
				1545	return -1;
				1546	}
				1547	addr.labels[depth - level - 1] = apicId >> prev_shift;
				1548	level++;
				1549	break;
				1550	}
				1551	int shift = buf.eax & 0x1f;
				1552	int mask = (1 << shift) - 1;
				1553	addr.labels[depth - level - 1] = (apicId & mask) >> prev_shift;
				1554	prev_shift = shift;
				1555	}
				1556	if (level != depth) {
				1557	KMP_CPU_FREE(oldMask);
				1558	*msg_id = kmp_i18n_str_InconsistentCpuidInfo;
				1559	return -1;
				1560	}
				1561
				1562	retval[nApics] = AddrUnsPair(addr, proc);
				1563	nApics++;
				1564	}
				1565
				1566	//
				1567	// We've collected all the info we need.
				1568	// Restore the old affinity mask for this thread.
				1569	//
				1570	__kmp_set_system_affinity(oldMask, TRUE);
				1571
				1572	//
				1573	// If there's only one thread context to bind to, return now.
				1574	//
				1575	KMP_ASSERT(nApics > 0);
				1576	if (nApics == 1) {
				1577	__kmp_ncores = nPackages = 1;
				1578	__kmp_nThreadsPerCore = nCoresPerPkg = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1579	if (__kmp_affinity_verbose) {
				1580	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				1581	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				1582
				1583	KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
				1584	if (__kmp_affinity_respect_mask) {
				1585	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				1586	} else {
				1587	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				1588	}
				1589	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1590	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1591	KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
				1592	__kmp_nThreadsPerCore, __kmp_ncores);
				1593	}
				1594
				1595	if (__kmp_affinity_type == affinity_none) {
				1596	__kmp_free(retval);
				1597	KMP_CPU_FREE(oldMask);
				1598	return 0;
				1599	}
				1600
				1601	//
				1602	// Form an Address object which only includes the package level.
				1603	//
				1604	Address addr(1);
				1605	addr.labels[0] = retval[0].first.labels[pkgLevel];
				1606	retval[0].first = addr;
				1607
				1608	if (__kmp_affinity_gran_levels < 0) {
				1609	__kmp_affinity_gran_levels = 0;
				1610	}
				1611
				1612	if (__kmp_affinity_verbose) {
				1613	__kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
				1614	}
				1615
				1616	*address2os = retval;
				1617	KMP_CPU_FREE(oldMask);
				1618	return 1;
				1619	}
				1620
				1621	//
				1622	// Sort the table by physical Id.
				1623	//
				1624	qsort(retval, nApics, sizeof(*retval), __kmp_affinity_cmp_Address_labels);
				1625
				1626	//
				1627	// Find the radix at each of the levels.
				1628	//
				1629	unsigned totals = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1630	unsigned counts = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1631	unsigned maxCt = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1632	unsigned last = (unsigned )__kmp_allocate(depth * sizeof(unsigned));
				1633	for (level = 0; level < depth; level++) {
				1634	totals[level] = 1;
				1635	maxCt[level] = 1;
				1636	counts[level] = 1;
				1637	last[level] = retval[0].first.labels[level];
				1638	}
				1639
				1640	//
				1641	// From here on, the iteration variable "level" runs from the finest
				1642	// level to the coarsest, i.e. we iterate forward through
				1643	// (*address2os)[].first.labels[] - in the previous loops, we iterated
				1644	// backwards.
				1645	//
				1646	for (proc = 1; (int)proc < nApics; proc++) {
				1647	int level;
				1648	for (level = 0; level < depth; level++) {
				1649	if (retval[proc].first.labels[level] != last[level]) {
				1650	int j;
				1651	for (j = level + 1; j < depth; j++) {
				1652	totals[j]++;
				1653	counts[j] = 1;
				1654	// The line below causes printing incorrect topology information
				1655	// in case the max value for some level (maxCt[level]) is encountered earlier than
				1656	// some less value while going through the array.
				1657	// For example, let pkg0 has 4 cores and pkg1 has 2 cores. Then maxCt[1] == 2
				1658	// whereas it must be 4.
				1659	// TODO!!! Check if it can be commented safely
				1660	//maxCt[j] = 1;
				1661	last[j] = retval[proc].first.labels[j];
				1662	}
				1663	totals[level]++;
				1664	counts[level]++;
				1665	if (counts[level] > maxCt[level]) {
				1666	maxCt[level] = counts[level];
				1667	}
				1668	last[level] = retval[proc].first.labels[level];
				1669	break;
				1670	}
				1671	else if (level == depth - 1) {
				1672	__kmp_free(last);
				1673	__kmp_free(maxCt);
				1674	__kmp_free(counts);
				1675	__kmp_free(totals);
				1676	__kmp_free(retval);
				1677	KMP_CPU_FREE(oldMask);
				1678	*msg_id = kmp_i18n_str_x2ApicIDsNotUnique;
				1679	return -1;
				1680	}
				1681	}
				1682	}
				1683
				1684	//
				1685	// When affinity is off, this routine will still be called to set
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame]	1686	// __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1687	// nCoresPerPkg, & nPackages. Make sure all these vars are set
				1688	// correctly, and return if affinity is not enabled.
				1689	//
				1690	if (threadLevel >= 0) {
				1691	__kmp_nThreadsPerCore = maxCt[threadLevel];
				1692	}
				1693	else {
				1694	__kmp_nThreadsPerCore = 1;
				1695	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1696	nPackages = totals[pkgLevel];
				1697
				1698	if (coreLevel >= 0) {
				1699	__kmp_ncores = totals[coreLevel];
				1700	nCoresPerPkg = maxCt[coreLevel];
				1701	}
				1702	else {
				1703	__kmp_ncores = nPackages;
				1704	nCoresPerPkg = 1;
				1705	}
				1706
				1707	//
				1708	// Check to see if the machine topology is uniform
				1709	//
				1710	unsigned prod = maxCt[0];
				1711	for (level = 1; level < depth; level++) {
				1712	prod *= maxCt[level];
				1713	}
				1714	bool uniform = (prod == totals[level - 1]);
				1715
				1716	//
				1717	// Print the machine topology summary.
				1718	//
				1719	if (__kmp_affinity_verbose) {
				1720	char mask[KMP_AFFIN_MASK_PRINT_LEN];
				1721	__kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
				1722
				1723	KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
				1724	if (__kmp_affinity_respect_mask) {
				1725	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask);
				1726	} else {
				1727	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask);
				1728	}
				1729	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				1730	if (uniform) {
				1731	KMP_INFORM(Uniform, "KMP_AFFINITY");
				1732	} else {
				1733	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				1734	}
				1735
				1736	kmp_str_buf_t buf;
				1737	__kmp_str_buf_init(&buf);
				1738
				1739	__kmp_str_buf_print(&buf, "%d", totals[0]);
				1740	for (level = 1; level <= pkgLevel; level++) {
				1741	__kmp_str_buf_print(&buf, " x %d", maxCt[level]);
				1742	}
				1743	KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg,
				1744	__kmp_nThreadsPerCore, __kmp_ncores);
				1745
				1746	__kmp_str_buf_free(&buf);
				1747	}
				1748
				1749	if (__kmp_affinity_type == affinity_none) {
				1750	__kmp_free(last);
				1751	__kmp_free(maxCt);
				1752	__kmp_free(counts);
				1753	__kmp_free(totals);
				1754	__kmp_free(retval);
				1755	KMP_CPU_FREE(oldMask);
				1756	return 0;
				1757	}
				1758
				1759	//
				1760	// Find any levels with radiix 1, and remove them from the map
				1761	// (except for the package level).
				1762	//
				1763	int new_depth = 0;
				1764	for (level = 0; level < depth; level++) {
				1765	if ((maxCt[level] == 1) && (level != pkgLevel)) {
				1766	continue;
				1767	}
				1768	new_depth++;
				1769	}
				1770
				1771	//
				1772	// If we are removing any levels, allocate a new vector to return,
				1773	// and copy the relevant information to it.
				1774	//
				1775	if (new_depth != depth) {
				1776	AddrUnsPair new_retval = (AddrUnsPair )__kmp_allocate(
				1777	sizeof(AddrUnsPair) * nApics);
				1778	for (proc = 0; (int)proc < nApics; proc++) {
				1779	Address addr(new_depth);
				1780	new_retval[proc] = AddrUnsPair(addr, retval[proc].second);
				1781	}
				1782	int new_level = 0;
Jonathan Peyton	62f3840	2015-08-25 18:44:41 +0000	[diff] [blame]	1783	int newPkgLevel = -1;
				1784	int newCoreLevel = -1;
				1785	int newThreadLevel = -1;
				1786	int i;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1787	for (level = 0; level < depth; level++) {
Jonathan Peyton	62f3840	2015-08-25 18:44:41 +0000	[diff] [blame]	1788	if ((maxCt[level] == 1)
				1789	&& (level != pkgLevel)) {
				1790	//
				1791	// Remove this level. Never remove the package level
				1792	//
				1793	continue;
				1794	}
				1795	if (level == pkgLevel) {
				1796	newPkgLevel = level;
				1797	}
				1798	if (level == coreLevel) {
				1799	newCoreLevel = level;
				1800	}
				1801	if (level == threadLevel) {
				1802	newThreadLevel = level;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1803	}
				1804	for (proc = 0; (int)proc < nApics; proc++) {
				1805	new_retval[proc].first.labels[new_level]
				1806	= retval[proc].first.labels[level];
				1807	}
				1808	new_level++;
				1809	}
				1810
				1811	__kmp_free(retval);
				1812	retval = new_retval;
				1813	depth = new_depth;
Jonathan Peyton	62f3840	2015-08-25 18:44:41 +0000	[diff] [blame]	1814	pkgLevel = newPkgLevel;
				1815	coreLevel = newCoreLevel;
				1816	threadLevel = newThreadLevel;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1817	}
				1818
				1819	if (__kmp_affinity_gran_levels < 0) {
				1820	//
				1821	// Set the granularity level based on what levels are modeled
				1822	// in the machine topology map.
				1823	//
				1824	__kmp_affinity_gran_levels = 0;
				1825	if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
				1826	__kmp_affinity_gran_levels++;
				1827	}
				1828	if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
				1829	__kmp_affinity_gran_levels++;
				1830	}
				1831	if (__kmp_affinity_gran > affinity_gran_package) {
				1832	__kmp_affinity_gran_levels++;
				1833	}
				1834	}
				1835
				1836	if (__kmp_affinity_verbose) {
				1837	__kmp_affinity_print_topology(retval, nApics, depth, pkgLevel,
				1838	coreLevel, threadLevel);
				1839	}
				1840
				1841	__kmp_free(last);
				1842	__kmp_free(maxCt);
				1843	__kmp_free(counts);
				1844	__kmp_free(totals);
				1845	KMP_CPU_FREE(oldMask);
				1846	*address2os = retval;
				1847	return depth;
				1848	}
				1849
				1850
				1851	# endif /* KMP_ARCH_X86 \|\| KMP_ARCH_X86_64 */
				1852
				1853
				1854	#define osIdIndex 0
				1855	#define threadIdIndex 1
				1856	#define coreIdIndex 2
				1857	#define pkgIdIndex 3
				1858	#define nodeIdIndex 4
				1859
				1860	typedef unsigned *ProcCpuInfo;
				1861	static unsigned maxIndex = pkgIdIndex;
				1862
				1863
				1864	static int
				1865	__kmp_affinity_cmp_ProcCpuInfo_os_id(const void a, const void b)
				1866	{
				1867	const unsigned aa = (const unsigned )a;
				1868	const unsigned bb = (const unsigned )b;
				1869	if (aa[osIdIndex] < bb[osIdIndex]) return -1;
				1870	if (aa[osIdIndex] > bb[osIdIndex]) return 1;
				1871	return 0;
				1872	};
				1873
				1874
				1875	static int
				1876	__kmp_affinity_cmp_ProcCpuInfo_phys_id(const void a, const void b)
				1877	{
				1878	unsigned i;
				1879	const unsigned aa = ((const unsigned **)a);
				1880	const unsigned bb = ((const unsigned **)b);
				1881	for (i = maxIndex; ; i--) {
				1882	if (aa[i] < bb[i]) return -1;
				1883	if (aa[i] > bb[i]) return 1;
				1884	if (i == osIdIndex) break;
				1885	}
				1886	return 0;
				1887	}
				1888
				1889
				1890	//
				1891	// Parse /proc/cpuinfo (or an alternate file in the same format) to obtain the
				1892	// affinity map.
				1893	//
				1894	static int
				1895	__kmp_affinity_create_cpuinfo_map(AddrUnsPair *address2os, int line,
				1896	kmp_i18n_id_t const msg_id, FILE f)
				1897	{
				1898	*address2os = NULL;
				1899	*msg_id = kmp_i18n_null;
				1900
				1901	//
				1902	// Scan of the file, and count the number of "processor" (osId) fields,
Alp Toker	8f2d3f0	2014-02-24 10:40:15 +0000	[diff] [blame]	1903	// and find the highest value of <n> for a node_<n> field.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1904	//
				1905	char buf[256];
				1906	unsigned num_records = 0;
				1907	while (! feof(f)) {
				1908	buf[sizeof(buf) - 1] = 1;
				1909	if (! fgets(buf, sizeof(buf), f)) {
				1910	//
				1911	// Read errors presumably because of EOF
				1912	//
				1913	break;
				1914	}
				1915
				1916	char s1[] = "processor";
				1917	if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
				1918	num_records++;
				1919	continue;
				1920	}
				1921
				1922	//
				1923	// FIXME - this will match "node_<n> <garbage>"
				1924	//
				1925	unsigned level;
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	1926	if (KMP_SSCANF(buf, "node_%d id", &level) == 1) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	1927	if (nodeIdIndex + level >= maxIndex) {
				1928	maxIndex = nodeIdIndex + level;
				1929	}
				1930	continue;
				1931	}
				1932	}
				1933
				1934	//
				1935	// Check for empty file / no valid processor records, or too many.
				1936	// The number of records can't exceed the number of valid bits in the
				1937	// affinity mask.
				1938	//
				1939	if (num_records == 0) {
				1940	*line = 0;
				1941	*msg_id = kmp_i18n_str_NoProcRecords;
				1942	return -1;
				1943	}
				1944	if (num_records > (unsigned)__kmp_xproc) {
				1945	*line = 0;
				1946	*msg_id = kmp_i18n_str_TooManyProcRecords;
				1947	return -1;
				1948	}
				1949
				1950	//
				1951	// Set the file pointer back to the begginning, so that we can scan the
				1952	// file again, this time performing a full parse of the data.
				1953	// Allocate a vector of ProcCpuInfo object, where we will place the data.
				1954	// Adding an extra element at the end allows us to remove a lot of extra
				1955	// checks for termination conditions.
				1956	//
				1957	if (fseek(f, 0, SEEK_SET) != 0) {
				1958	*line = 0;
				1959	*msg_id = kmp_i18n_str_CantRewindCpuinfo;
				1960	return -1;
				1961	}
				1962
				1963	//
				1964	// Allocate the array of records to store the proc info in. The dummy
				1965	// element at the end makes the logic in filling them out easier to code.
				1966	//
				1967	unsigned threadInfo = (unsigned )__kmp_allocate((num_records + 1)
				1968	* sizeof(unsigned *));
				1969	unsigned i;
				1970	for (i = 0; i <= num_records; i++) {
				1971	threadInfo[i] = (unsigned *)__kmp_allocate((maxIndex + 1)
				1972	* sizeof(unsigned));
				1973	}
				1974
				1975	#define CLEANUP_THREAD_INFO \
				1976	for (i = 0; i <= num_records; i++) { \
				1977	__kmp_free(threadInfo[i]); \
				1978	} \
				1979	__kmp_free(threadInfo);
				1980
				1981	//
				1982	// A value of UINT_MAX means that we didn't find the field
				1983	//
				1984	unsigned __index;
				1985
				1986	#define INIT_PROC_INFO(p) \
				1987	for (__index = 0; __index <= maxIndex; __index++) { \
				1988	(p)[__index] = UINT_MAX; \
				1989	}
				1990
				1991	for (i = 0; i <= num_records; i++) {
				1992	INIT_PROC_INFO(threadInfo[i]);
				1993	}
				1994
				1995	unsigned num_avail = 0;
				1996	*line = 0;
				1997	while (! feof(f)) {
				1998	//
				1999	// Create an inner scoping level, so that all the goto targets at the
				2000	// end of the loop appear in an outer scoping level. This avoids
				2001	// warnings about jumping past an initialization to a target in the
				2002	// same block.
				2003	//
				2004	{
				2005	buf[sizeof(buf) - 1] = 1;
				2006	bool long_line = false;
				2007	if (! fgets(buf, sizeof(buf), f)) {
				2008	//
				2009	// Read errors presumably because of EOF
				2010	//
				2011	// If there is valid data in threadInfo[num_avail], then fake
				2012	// a blank line in ensure that the last address gets parsed.
				2013	//
				2014	bool valid = false;
				2015	for (i = 0; i <= maxIndex; i++) {
				2016	if (threadInfo[num_avail][i] != UINT_MAX) {
				2017	valid = true;
				2018	}
				2019	}
				2020	if (! valid) {
				2021	break;
				2022	}
				2023	buf[0] = 0;
				2024	} else if (!buf[sizeof(buf) - 1]) {
				2025	//
				2026	// The line is longer than the buffer. Set a flag and don't
				2027	// emit an error if we were going to ignore the line, anyway.
				2028	//
				2029	long_line = true;
				2030
				2031	#define CHECK_LINE \
				2032	if (long_line) { \
				2033	CLEANUP_THREAD_INFO; \
				2034	*msg_id = kmp_i18n_str_LongLineCpuinfo; \
				2035	return -1; \
				2036	}
				2037	}
				2038	(*line)++;
				2039
				2040	char s1[] = "processor";
				2041	if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
				2042	CHECK_LINE;
				2043	char *p = strchr(buf + sizeof(s1) - 1, ':');
				2044	unsigned val;
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	2045	if ((p == NULL) \|\| (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2046	if (threadInfo[num_avail][osIdIndex] != UINT_MAX) goto dup_field;
				2047	threadInfo[num_avail][osIdIndex] = val;
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	2048	#if KMP_OS_LINUX && USE_SYSFS_INFO
				2049	char path[256];
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	2050	KMP_SNPRINTF(path, sizeof(path),
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	2051	"/sys/devices/system/cpu/cpu%u/topology/physical_package_id",
				2052	threadInfo[num_avail][osIdIndex]);
				2053	__kmp_read_from_file(path, "%u", &threadInfo[num_avail][pkgIdIndex]);
				2054
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	2055	KMP_SNPRINTF(path, sizeof(path),
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	2056	"/sys/devices/system/cpu/cpu%u/topology/core_id",
				2057	threadInfo[num_avail][osIdIndex]);
				2058	__kmp_read_from_file(path, "%u", &threadInfo[num_avail][coreIdIndex]);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2059	continue;
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	2060	#else
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2061	}
				2062	char s2[] = "physical id";
				2063	if (strncmp(buf, s2, sizeof(s2) - 1) == 0) {
				2064	CHECK_LINE;
				2065	char *p = strchr(buf + sizeof(s2) - 1, ':');
				2066	unsigned val;
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	2067	if ((p == NULL) \|\| (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2068	if (threadInfo[num_avail][pkgIdIndex] != UINT_MAX) goto dup_field;
				2069	threadInfo[num_avail][pkgIdIndex] = val;
				2070	continue;
				2071	}
				2072	char s3[] = "core id";
				2073	if (strncmp(buf, s3, sizeof(s3) - 1) == 0) {
				2074	CHECK_LINE;
				2075	char *p = strchr(buf + sizeof(s3) - 1, ':');
				2076	unsigned val;
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	2077	if ((p == NULL) \|\| (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2078	if (threadInfo[num_avail][coreIdIndex] != UINT_MAX) goto dup_field;
				2079	threadInfo[num_avail][coreIdIndex] = val;
				2080	continue;
Jim Cownie	181b4bb	2013-12-23 17:28:57 +0000	[diff] [blame]	2081	#endif // KMP_OS_LINUX && USE_SYSFS_INFO
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2082	}
				2083	char s4[] = "thread id";
				2084	if (strncmp(buf, s4, sizeof(s4) - 1) == 0) {
				2085	CHECK_LINE;
				2086	char *p = strchr(buf + sizeof(s4) - 1, ':');
				2087	unsigned val;
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	2088	if ((p == NULL) \|\| (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2089	if (threadInfo[num_avail][threadIdIndex] != UINT_MAX) goto dup_field;
				2090	threadInfo[num_avail][threadIdIndex] = val;
				2091	continue;
				2092	}
				2093	unsigned level;
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	2094	if (KMP_SSCANF(buf, "node_%d id", &level) == 1) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2095	CHECK_LINE;
				2096	char *p = strchr(buf + sizeof(s4) - 1, ':');
				2097	unsigned val;
Andrey Churbanov	74bf17b	2015-04-02 13:27:08 +0000	[diff] [blame]	2098	if ((p == NULL) \|\| (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2099	KMP_ASSERT(nodeIdIndex + level <= maxIndex);
				2100	if (threadInfo[num_avail][nodeIdIndex + level] != UINT_MAX) goto dup_field;
				2101	threadInfo[num_avail][nodeIdIndex + level] = val;
				2102	continue;
				2103	}
				2104
				2105	//
				2106	// We didn't recognize the leading token on the line.
				2107	// There are lots of leading tokens that we don't recognize -
				2108	// if the line isn't empty, go on to the next line.
				2109	//
				2110	if ((buf != 0) && (buf != '\n')) {
				2111	//
				2112	// If the line is longer than the buffer, read characters
				2113	// until we find a newline.
				2114	//
				2115	if (long_line) {
				2116	int ch;
				2117	while (((ch = fgetc(f)) != EOF) && (ch != '\n'));
				2118	}
				2119	continue;
				2120	}
				2121
				2122	//
				2123	// A newline has signalled the end of the processor record.
				2124	// Check that there aren't too many procs specified.
				2125	//
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	2126	if ((int)num_avail == __kmp_xproc) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2127	CLEANUP_THREAD_INFO;
				2128	*msg_id = kmp_i18n_str_TooManyEntries;
				2129	return -1;
				2130	}
				2131
				2132	//
				2133	// Check for missing fields. The osId field must be there, and we
				2134	// currently require that the physical id field is specified, also.
				2135	//
				2136	if (threadInfo[num_avail][osIdIndex] == UINT_MAX) {
				2137	CLEANUP_THREAD_INFO;
				2138	*msg_id = kmp_i18n_str_MissingProcField;
				2139	return -1;
				2140	}
				2141	if (threadInfo[0][pkgIdIndex] == UINT_MAX) {
				2142	CLEANUP_THREAD_INFO;
				2143	*msg_id = kmp_i18n_str_MissingPhysicalIDField;
				2144	return -1;
				2145	}
				2146
				2147	//
				2148	// Skip this proc if it is not included in the machine model.
				2149	//
Jonathan Peyton	c5304aa	2016-06-13 21:28:03 +0000	[diff] [blame]	2150	if (! KMP_CPU_ISSET(threadInfo[num_avail][osIdIndex], __kmp_affin_fullMask)) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2151	INIT_PROC_INFO(threadInfo[num_avail]);
				2152	continue;
				2153	}
				2154
				2155	//
				2156	// We have a successful parse of this proc's info.
				2157	// Increment the counter, and prepare for the next proc.
				2158	//
				2159	num_avail++;
				2160	KMP_ASSERT(num_avail <= num_records);
				2161	INIT_PROC_INFO(threadInfo[num_avail]);
				2162	}
				2163	continue;
				2164
				2165	no_val:
				2166	CLEANUP_THREAD_INFO;
				2167	*msg_id = kmp_i18n_str_MissingValCpuinfo;
				2168	return -1;
				2169
				2170	dup_field:
				2171	CLEANUP_THREAD_INFO;
				2172	*msg_id = kmp_i18n_str_DuplicateFieldCpuinfo;
				2173	return -1;
				2174	}
				2175	*line = 0;
				2176
				2177	# if KMP_MIC && REDUCE_TEAM_SIZE
				2178	unsigned teamSize = 0;
				2179	# endif // KMP_MIC && REDUCE_TEAM_SIZE
				2180
				2181	// check for num_records == __kmp_xproc ???
				2182
				2183	//
				2184	// If there's only one thread context to bind to, form an Address object
				2185	// with depth 1 and return immediately (or, if affinity is off, set
				2186	// address2os to NULL and return).
				2187	//
				2188	// If it is configured to omit the package level when there is only a
				2189	// single package, the logic at the end of this routine won't work if
				2190	// there is only a single thread - it would try to form an Address
				2191	// object with depth 0.
				2192	//
				2193	KMP_ASSERT(num_avail > 0);
				2194	KMP_ASSERT(num_avail <= num_records);
				2195	if (num_avail == 1) {
				2196	__kmp_ncores = 1;
				2197	__kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2198	if (__kmp_affinity_verbose) {
				2199	if (! KMP_AFFINITY_CAPABLE()) {
				2200	KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
				2201	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				2202	KMP_INFORM(Uniform, "KMP_AFFINITY");
				2203	}
				2204	else {
				2205	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				2206	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
Jonathan Peyton	c5304aa	2016-06-13 21:28:03 +0000	[diff] [blame]	2207	__kmp_affin_fullMask);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2208	KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
				2209	if (__kmp_affinity_respect_mask) {
				2210	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				2211	} else {
				2212	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				2213	}
				2214	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				2215	KMP_INFORM(Uniform, "KMP_AFFINITY");
				2216	}
				2217	int index;
				2218	kmp_str_buf_t buf;
				2219	__kmp_str_buf_init(&buf);
				2220	__kmp_str_buf_print(&buf, "1");
				2221	for (index = maxIndex - 1; index > pkgIdIndex; index--) {
				2222	__kmp_str_buf_print(&buf, " x 1");
				2223	}
				2224	KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, 1, 1, 1);
				2225	__kmp_str_buf_free(&buf);
				2226	}
				2227
				2228	if (__kmp_affinity_type == affinity_none) {
				2229	CLEANUP_THREAD_INFO;
				2230	return 0;
				2231	}
				2232
				2233	address2os = (AddrUnsPair)__kmp_allocate(sizeof(AddrUnsPair));
				2234	Address addr(1);
				2235	addr.labels[0] = threadInfo[0][pkgIdIndex];
				2236	(*address2os)[0] = AddrUnsPair(addr, threadInfo[0][osIdIndex]);
				2237
				2238	if (__kmp_affinity_gran_levels < 0) {
				2239	__kmp_affinity_gran_levels = 0;
				2240	}
				2241
				2242	if (__kmp_affinity_verbose) {
				2243	__kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
				2244	}
				2245
				2246	CLEANUP_THREAD_INFO;
				2247	return 1;
				2248	}
				2249
				2250	//
				2251	// Sort the threadInfo table by physical Id.
				2252	//
				2253	qsort(threadInfo, num_avail, sizeof(*threadInfo),
				2254	__kmp_affinity_cmp_ProcCpuInfo_phys_id);
				2255
				2256	//
				2257	// The table is now sorted by pkgId / coreId / threadId, but we really
				2258	// don't know the radix of any of the fields. pkgId's may be sparsely
				2259	// assigned among the chips on a system. Although coreId's are usually
				2260	// assigned [0 .. coresPerPkg-1] and threadId's are usually assigned
				2261	// [0..threadsPerCore-1], we don't want to make any such assumptions.
				2262	//
				2263	// For that matter, we don't know what coresPerPkg and threadsPerCore
				2264	// (or the total # packages) are at this point - we want to determine
				2265	// that now. We only have an upper bound on the first two figures.
				2266	//
				2267	unsigned counts = (unsigned )__kmp_allocate((maxIndex + 1)
				2268	* sizeof(unsigned));
				2269	unsigned maxCt = (unsigned )__kmp_allocate((maxIndex + 1)
				2270	* sizeof(unsigned));
				2271	unsigned totals = (unsigned )__kmp_allocate((maxIndex + 1)
				2272	* sizeof(unsigned));
				2273	unsigned lastId = (unsigned )__kmp_allocate((maxIndex + 1)
				2274	* sizeof(unsigned));
				2275
				2276	bool assign_thread_ids = false;
				2277	unsigned threadIdCt;
				2278	unsigned index;
				2279
				2280	restart_radix_check:
				2281	threadIdCt = 0;
				2282
				2283	//
				2284	// Initialize the counter arrays with data from threadInfo[0].
				2285	//
				2286	if (assign_thread_ids) {
				2287	if (threadInfo[0][threadIdIndex] == UINT_MAX) {
				2288	threadInfo[0][threadIdIndex] = threadIdCt++;
				2289	}
				2290	else if (threadIdCt <= threadInfo[0][threadIdIndex]) {
				2291	threadIdCt = threadInfo[0][threadIdIndex] + 1;
				2292	}
				2293	}
				2294	for (index = 0; index <= maxIndex; index++) {
				2295	counts[index] = 1;
				2296	maxCt[index] = 1;
				2297	totals[index] = 1;
				2298	lastId[index] = threadInfo[0][index];;
				2299	}
				2300
				2301	//
				2302	// Run through the rest of the OS procs.
				2303	//
				2304	for (i = 1; i < num_avail; i++) {
				2305	//
				2306	// Find the most significant index whose id differs
				2307	// from the id for the previous OS proc.
				2308	//
				2309	for (index = maxIndex; index >= threadIdIndex; index--) {
				2310	if (assign_thread_ids && (index == threadIdIndex)) {
				2311	//
				2312	// Auto-assign the thread id field if it wasn't specified.
				2313	//
				2314	if (threadInfo[i][threadIdIndex] == UINT_MAX) {
				2315	threadInfo[i][threadIdIndex] = threadIdCt++;
				2316	}
				2317
				2318	//
				2319	// Aparrently the thread id field was specified for some
				2320	// entries and not others. Start the thread id counter
				2321	// off at the next higher thread id.
				2322	//
				2323	else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
				2324	threadIdCt = threadInfo[i][threadIdIndex] + 1;
				2325	}
				2326	}
				2327	if (threadInfo[i][index] != lastId[index]) {
				2328	//
				2329	// Run through all indices which are less significant,
				2330	// and reset the counts to 1.
				2331	//
				2332	// At all levels up to and including index, we need to
				2333	// increment the totals and record the last id.
				2334	//
				2335	unsigned index2;
				2336	for (index2 = threadIdIndex; index2 < index; index2++) {
				2337	totals[index2]++;
				2338	if (counts[index2] > maxCt[index2]) {
				2339	maxCt[index2] = counts[index2];
				2340	}
				2341	counts[index2] = 1;
				2342	lastId[index2] = threadInfo[i][index2];
				2343	}
				2344	counts[index]++;
				2345	totals[index]++;
				2346	lastId[index] = threadInfo[i][index];
				2347
				2348	if (assign_thread_ids && (index > threadIdIndex)) {
				2349
				2350	# if KMP_MIC && REDUCE_TEAM_SIZE
				2351	//
				2352	// The default team size is the total #threads in the machine
				2353	// minus 1 thread for every core that has 3 or more threads.
				2354	//
				2355	teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 );
				2356	# endif // KMP_MIC && REDUCE_TEAM_SIZE
				2357
				2358	//
				2359	// Restart the thread counter, as we are on a new core.
				2360	//
				2361	threadIdCt = 0;
				2362
				2363	//
				2364	// Auto-assign the thread id field if it wasn't specified.
				2365	//
				2366	if (threadInfo[i][threadIdIndex] == UINT_MAX) {
				2367	threadInfo[i][threadIdIndex] = threadIdCt++;
				2368	}
				2369
				2370	//
				2371	// Aparrently the thread id field was specified for some
				2372	// entries and not others. Start the thread id counter
				2373	// off at the next higher thread id.
				2374	//
				2375	else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
				2376	threadIdCt = threadInfo[i][threadIdIndex] + 1;
				2377	}
				2378	}
				2379	break;
				2380	}
				2381	}
				2382	if (index < threadIdIndex) {
				2383	//
				2384	// If thread ids were specified, it is an error if they are not
				2385	// unique. Also, check that we waven't already restarted the
				2386	// loop (to be safe - shouldn't need to).
				2387	//
				2388	if ((threadInfo[i][threadIdIndex] != UINT_MAX)
				2389	\|\| assign_thread_ids) {
				2390	__kmp_free(lastId);
				2391	__kmp_free(totals);
				2392	__kmp_free(maxCt);
				2393	__kmp_free(counts);
				2394	CLEANUP_THREAD_INFO;
				2395	*msg_id = kmp_i18n_str_PhysicalIDsNotUnique;
				2396	return -1;
				2397	}
				2398
				2399	//
				2400	// If the thread ids were not specified and we see entries
				2401	// entries that are duplicates, start the loop over and
				2402	// assign the thread ids manually.
				2403	//
				2404	assign_thread_ids = true;
				2405	goto restart_radix_check;
				2406	}
				2407	}
				2408
				2409	# if KMP_MIC && REDUCE_TEAM_SIZE
				2410	//
				2411	// The default team size is the total #threads in the machine
				2412	// minus 1 thread for every core that has 3 or more threads.
				2413	//
				2414	teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 );
				2415	# endif // KMP_MIC && REDUCE_TEAM_SIZE
				2416
				2417	for (index = threadIdIndex; index <= maxIndex; index++) {
				2418	if (counts[index] > maxCt[index]) {
				2419	maxCt[index] = counts[index];
				2420	}
				2421	}
				2422
				2423	__kmp_nThreadsPerCore = maxCt[threadIdIndex];
				2424	nCoresPerPkg = maxCt[coreIdIndex];
				2425	nPackages = totals[pkgIdIndex];
				2426
				2427	//
				2428	// Check to see if the machine topology is uniform
				2429	//
				2430	unsigned prod = totals[maxIndex];
				2431	for (index = threadIdIndex; index < maxIndex; index++) {
				2432	prod *= maxCt[index];
				2433	}
				2434	bool uniform = (prod == totals[threadIdIndex]);
				2435
				2436	//
				2437	// When affinity is off, this routine will still be called to set
Andrey Churbanov	f696c82	2015-01-27 16:55:43 +0000	[diff] [blame]	2438	// __kmp_ncores, as well as __kmp_nThreadsPerCore,
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2439	// nCoresPerPkg, & nPackages. Make sure all these vars are set
				2440	// correctly, and return now if affinity is not enabled.
				2441	//
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2442	__kmp_ncores = totals[coreIdIndex];
				2443
				2444	if (__kmp_affinity_verbose) {
				2445	if (! KMP_AFFINITY_CAPABLE()) {
				2446	KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
				2447	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				2448	if (uniform) {
				2449	KMP_INFORM(Uniform, "KMP_AFFINITY");
				2450	} else {
				2451	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				2452	}
				2453	}
				2454	else {
				2455	char buf[KMP_AFFIN_MASK_PRINT_LEN];
Jonathan Peyton	c5304aa	2016-06-13 21:28:03 +0000	[diff] [blame]	2456	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, __kmp_affin_fullMask);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2457	KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
				2458	if (__kmp_affinity_respect_mask) {
				2459	KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
				2460	} else {
				2461	KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
				2462	}
				2463	KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
				2464	if (uniform) {
				2465	KMP_INFORM(Uniform, "KMP_AFFINITY");
				2466	} else {
				2467	KMP_INFORM(NonUniform, "KMP_AFFINITY");
				2468	}
				2469	}
				2470	kmp_str_buf_t buf;
				2471	__kmp_str_buf_init(&buf);
				2472
				2473	__kmp_str_buf_print(&buf, "%d", totals[maxIndex]);
				2474	for (index = maxIndex - 1; index >= pkgIdIndex; index--) {
				2475	__kmp_str_buf_print(&buf, " x %d", maxCt[index]);
				2476	}
				2477	KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, maxCt[coreIdIndex],
				2478	maxCt[threadIdIndex], __kmp_ncores);
				2479
				2480	__kmp_str_buf_free(&buf);
				2481	}
				2482
				2483	# if KMP_MIC && REDUCE_TEAM_SIZE
				2484	//
				2485	// Set the default team size.
				2486	//
				2487	if ((__kmp_dflt_team_nth == 0) && (teamSize > 0)) {
				2488	__kmp_dflt_team_nth = teamSize;
				2489	KA_TRACE(20, ("__kmp_affinity_create_cpuinfo_map: setting __kmp_dflt_team_nth = %d\n",
				2490	__kmp_dflt_team_nth));
				2491	}
				2492	# endif // KMP_MIC && REDUCE_TEAM_SIZE
				2493
				2494	if (__kmp_affinity_type == affinity_none) {
				2495	__kmp_free(lastId);
				2496	__kmp_free(totals);
				2497	__kmp_free(maxCt);
				2498	__kmp_free(counts);
				2499	CLEANUP_THREAD_INFO;
				2500	return 0;
				2501	}
				2502
				2503	//
				2504	// Count the number of levels which have more nodes at that level than
				2505	// at the parent's level (with there being an implicit root node of
				2506	// the top level). This is equivalent to saying that there is at least
				2507	// one node at this level which has a sibling. These levels are in the
				2508	// map, and the package level is always in the map.
				2509	//
				2510	bool inMap = (bool )__kmp_allocate((maxIndex + 1) * sizeof(bool));
				2511	int level = 0;
				2512	for (index = threadIdIndex; index < maxIndex; index++) {
				2513	KMP_ASSERT(totals[index] >= totals[index + 1]);
				2514	inMap[index] = (totals[index] > totals[index + 1]);
				2515	}
				2516	inMap[maxIndex] = (totals[maxIndex] > 1);
				2517	inMap[pkgIdIndex] = true;
				2518
				2519	int depth = 0;
				2520	for (index = threadIdIndex; index <= maxIndex; index++) {
				2521	if (inMap[index]) {
				2522	depth++;
				2523	}
				2524	}
				2525	KMP_ASSERT(depth > 0);
				2526
				2527	//
				2528	// Construct the data structure that is to be returned.
				2529	//
				2530	address2os = (AddrUnsPair)
				2531	__kmp_allocate(sizeof(AddrUnsPair) * num_avail);
				2532	int pkgLevel = -1;
				2533	int coreLevel = -1;
				2534	int threadLevel = -1;
				2535
				2536	for (i = 0; i < num_avail; ++i) {
				2537	Address addr(depth);
				2538	unsigned os = threadInfo[i][osIdIndex];
				2539	int src_index;
				2540	int dst_index = 0;
				2541
				2542	for (src_index = maxIndex; src_index >= threadIdIndex; src_index--) {
				2543	if (! inMap[src_index]) {
				2544	continue;
				2545	}
				2546	addr.labels[dst_index] = threadInfo[i][src_index];
				2547	if (src_index == pkgIdIndex) {
				2548	pkgLevel = dst_index;
				2549	}
				2550	else if (src_index == coreIdIndex) {
				2551	coreLevel = dst_index;
				2552	}
				2553	else if (src_index == threadIdIndex) {
				2554	threadLevel = dst_index;
				2555	}
				2556	dst_index++;
				2557	}
				2558	(*address2os)[i] = AddrUnsPair(addr, os);
				2559	}
				2560
				2561	if (__kmp_affinity_gran_levels < 0) {
				2562	//
				2563	// Set the granularity level based on what levels are modeled
				2564	// in the machine topology map.
				2565	//
				2566	unsigned src_index;
				2567	__kmp_affinity_gran_levels = 0;
				2568	for (src_index = threadIdIndex; src_index <= maxIndex; src_index++) {
				2569	if (! inMap[src_index]) {
				2570	continue;
				2571	}
				2572	switch (src_index) {
				2573	case threadIdIndex:
				2574	if (__kmp_affinity_gran > affinity_gran_thread) {
				2575	__kmp_affinity_gran_levels++;
				2576	}
				2577
				2578	break;
				2579	case coreIdIndex:
				2580	if (__kmp_affinity_gran > affinity_gran_core) {
				2581	__kmp_affinity_gran_levels++;
				2582	}
				2583	break;
				2584
				2585	case pkgIdIndex:
				2586	if (__kmp_affinity_gran > affinity_gran_package) {
				2587	__kmp_affinity_gran_levels++;
				2588	}
				2589	break;
				2590	}
				2591	}
				2592	}
				2593
				2594	if (__kmp_affinity_verbose) {
				2595	__kmp_affinity_print_topology(*address2os, num_avail, depth, pkgLevel,
				2596	coreLevel, threadLevel);
				2597	}
				2598
				2599	__kmp_free(inMap);
				2600	__kmp_free(lastId);
				2601	__kmp_free(totals);
				2602	__kmp_free(maxCt);
				2603	__kmp_free(counts);
				2604	CLEANUP_THREAD_INFO;
				2605	return depth;
				2606	}
				2607
				2608
				2609	//
				2610	// Create and return a table of affinity masks, indexed by OS thread ID.
				2611	// This routine handles OR'ing together all the affinity masks of threads
				2612	// that are sufficiently close, if granularity > fine.
				2613	//
				2614	static kmp_affin_mask_t *
				2615	__kmp_create_masks(unsigned maxIndex, unsigned numUnique,
				2616	AddrUnsPair *address2os, unsigned numAddrs)
				2617	{
				2618	//
				2619	// First form a table of affinity masks in order of OS thread id.
				2620	//
				2621	unsigned depth;
				2622	unsigned maxOsId;
				2623	unsigned i;
				2624
				2625	KMP_ASSERT(numAddrs > 0);
				2626	depth = address2os[0].first.depth;
				2627
				2628	maxOsId = 0;
				2629	for (i = 0; i < numAddrs; i++) {
				2630	unsigned osId = address2os[i].second;
				2631	if (osId > maxOsId) {
				2632	maxOsId = osId;
				2633	}
				2634	}
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	2635	kmp_affin_mask_t *osId2Mask;
				2636	KMP_CPU_ALLOC_ARRAY(osId2Mask, (maxOsId+1));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2637
				2638	//
				2639	// Sort the address2os table according to physical order. Doing so
				2640	// will put all threads on the same core/package/node in consecutive
				2641	// locations.
				2642	//
				2643	qsort(address2os, numAddrs, sizeof(*address2os),
				2644	__kmp_affinity_cmp_Address_labels);
				2645
				2646	KMP_ASSERT(__kmp_affinity_gran_levels >= 0);
				2647	if (__kmp_affinity_verbose && (__kmp_affinity_gran_levels > 0)) {
				2648	KMP_INFORM(ThreadsMigrate, "KMP_AFFINITY", __kmp_affinity_gran_levels);
				2649	}
				2650	if (__kmp_affinity_gran_levels >= (int)depth) {
				2651	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				2652	&& (__kmp_affinity_type != affinity_none))) {
				2653	KMP_WARNING(AffThreadsMayMigrate);
				2654	}
				2655	}
				2656
				2657	//
				2658	// Run through the table, forming the masks for all threads on each
				2659	// core. Threads on the same core will have identical "Address"
				2660	// objects, not considering the last level, which must be the thread
				2661	// id. All threads on a core will appear consecutively.
				2662	//
				2663	unsigned unique = 0;
				2664	unsigned j = 0; // index of 1st thread on core
				2665	unsigned leader = 0;
				2666	Address *leaderAddr = &(address2os[0].first);
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	2667	kmp_affin_mask_t *sum;
				2668	KMP_CPU_ALLOC_ON_STACK(sum);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2669	KMP_CPU_ZERO(sum);
				2670	KMP_CPU_SET(address2os[0].second, sum);
				2671	for (i = 1; i < numAddrs; i++) {
				2672	//
Alp Toker	8f2d3f0	2014-02-24 10:40:15 +0000	[diff] [blame]	2673	// If this thread is sufficiently close to the leader (within the
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2674	// granularity setting), then set the bit for this os thread in the
				2675	// affinity mask for this group, and go on to the next thread.
				2676	//
				2677	if (leaderAddr->isClose(address2os[i].first,
				2678	__kmp_affinity_gran_levels)) {
				2679	KMP_CPU_SET(address2os[i].second, sum);
				2680	continue;
				2681	}
				2682
				2683	//
				2684	// For every thread in this group, copy the mask to the thread's
				2685	// entry in the osId2Mask table. Mark the first address as a
				2686	// leader.
				2687	//
				2688	for (; j < i; j++) {
				2689	unsigned osId = address2os[j].second;
				2690	KMP_DEBUG_ASSERT(osId <= maxOsId);
				2691	kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
				2692	KMP_CPU_COPY(mask, sum);
				2693	address2os[j].first.leader = (j == leader);
				2694	}
				2695	unique++;
				2696
				2697	//
				2698	// Start a new mask.
				2699	//
				2700	leader = i;
				2701	leaderAddr = &(address2os[i].first);
				2702	KMP_CPU_ZERO(sum);
				2703	KMP_CPU_SET(address2os[i].second, sum);
				2704	}
				2705
				2706	//
				2707	// For every thread in last group, copy the mask to the thread's
				2708	// entry in the osId2Mask table.
				2709	//
				2710	for (; j < i; j++) {
				2711	unsigned osId = address2os[j].second;
				2712	KMP_DEBUG_ASSERT(osId <= maxOsId);
				2713	kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
				2714	KMP_CPU_COPY(mask, sum);
				2715	address2os[j].first.leader = (j == leader);
				2716	}
				2717	unique++;
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	2718	KMP_CPU_FREE_FROM_STACK(sum);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2719
				2720	*maxIndex = maxOsId;
				2721	*numUnique = unique;
				2722	return osId2Mask;
				2723	}
				2724
				2725
				2726	//
				2727	// Stuff for the affinity proclist parsers. It's easier to declare these vars
				2728	// as file-static than to try and pass them through the calling sequence of
				2729	// the recursive-descent OMP_PLACES parser.
				2730	//
				2731	static kmp_affin_mask_t *newMasks;
				2732	static int numNewMasks;
				2733	static int nextNewMask;
				2734
				2735	#define ADD_MASK(_mask) \
				2736	{ \
				2737	if (nextNewMask >= numNewMasks) { \
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	2738	int i; \
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2739	numNewMasks *= 2; \
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	2740	kmp_affin_mask_t* temp; \
				2741	KMP_CPU_INTERNAL_ALLOC_ARRAY(temp, numNewMasks); \
				2742	for(i=0;i<numNewMasks/2;i++) { \
				2743	kmp_affin_mask_t* src = KMP_CPU_INDEX(newMasks, i); \
				2744	kmp_affin_mask_t* dest = KMP_CPU_INDEX(temp, i); \
				2745	KMP_CPU_COPY(dest, src); \
				2746	} \
				2747	KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks/2); \
				2748	newMasks = temp; \
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2749	} \
				2750	KMP_CPU_COPY(KMP_CPU_INDEX(newMasks, nextNewMask), (_mask)); \
				2751	nextNewMask++; \
				2752	}
				2753
				2754	#define ADD_MASK_OSID(_osId,_osId2Mask,_maxOsId) \
				2755	{ \
				2756	if (((_osId) > _maxOsId) \|\| \
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	2757	(! KMP_CPU_ISSET((_osId), KMP_CPU_INDEX((_osId2Mask), (_osId))))) { \
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2758	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings \
				2759	&& (__kmp_affinity_type != affinity_none))) { \
				2760	KMP_WARNING(AffIgnoreInvalidProcID, _osId); \
				2761	} \
				2762	} \
				2763	else { \
				2764	ADD_MASK(KMP_CPU_INDEX(_osId2Mask, (_osId))); \
				2765	} \
				2766	}
				2767
				2768
				2769	//
				2770	// Re-parse the proclist (for the explicit affinity type), and form the list
				2771	// of affinity newMasks indexed by gtid.
				2772	//
				2773	static void
				2774	__kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks,
				2775	unsigned int out_numMasks, const char proclist,
				2776	kmp_affin_mask_t *osId2Mask, int maxOsId)
				2777	{
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	2778	int i;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2779	const char *scan = proclist;
				2780	const char *next = proclist;
				2781
				2782	//
				2783	// We use malloc() for the temporary mask vector,
				2784	// so that we can use realloc() to extend it.
				2785	//
				2786	numNewMasks = 2;
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	2787	KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2788	nextNewMask = 0;
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	2789	kmp_affin_mask_t *sumMask;
				2790	KMP_CPU_ALLOC(sumMask);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2791	int setSize = 0;
				2792
				2793	for (;;) {
				2794	int start, end, stride;
				2795
				2796	SKIP_WS(scan);
				2797	next = scan;
				2798	if (*next == '\0') {
				2799	break;
				2800	}
				2801
				2802	if (*next == '{') {
				2803	int num;
				2804	setSize = 0;
				2805	next++; // skip '{'
				2806	SKIP_WS(next);
				2807	scan = next;
				2808
				2809	//
				2810	// Read the first integer in the set.
				2811	//
				2812	KMP_ASSERT2((next >= '0') && (next <= '9'),
				2813	"bad proclist");
				2814	SKIP_DIGITS(next);
				2815	num = __kmp_str_to_int(scan, *next);
				2816	KMP_ASSERT2(num >= 0, "bad explicit proc list");
				2817
				2818	//
				2819	// Copy the mask for that osId to the sum (union) mask.
				2820	//
				2821	if ((num > maxOsId) \|\|
				2822	(! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
				2823	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				2824	&& (__kmp_affinity_type != affinity_none))) {
				2825	KMP_WARNING(AffIgnoreInvalidProcID, num);
				2826	}
				2827	KMP_CPU_ZERO(sumMask);
				2828	}
				2829	else {
				2830	KMP_CPU_COPY(sumMask, KMP_CPU_INDEX(osId2Mask, num));
				2831	setSize = 1;
				2832	}
				2833
				2834	for (;;) {
				2835	//
				2836	// Check for end of set.
				2837	//
				2838	SKIP_WS(next);
				2839	if (*next == '}') {
				2840	next++; // skip '}'
				2841	break;
				2842	}
				2843
				2844	//
				2845	// Skip optional comma.
				2846	//
				2847	if (*next == ',') {
				2848	next++;
				2849	}
				2850	SKIP_WS(next);
				2851
				2852	//
				2853	// Read the next integer in the set.
				2854	//
				2855	scan = next;
				2856	KMP_ASSERT2((next >= '0') && (next <= '9'),
				2857	"bad explicit proc list");
				2858
				2859	SKIP_DIGITS(next);
				2860	num = __kmp_str_to_int(scan, *next);
				2861	KMP_ASSERT2(num >= 0, "bad explicit proc list");
				2862
				2863	//
				2864	// Add the mask for that osId to the sum mask.
				2865	//
				2866	if ((num > maxOsId) \|\|
				2867	(! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
				2868	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				2869	&& (__kmp_affinity_type != affinity_none))) {
				2870	KMP_WARNING(AffIgnoreInvalidProcID, num);
				2871	}
				2872	}
				2873	else {
				2874	KMP_CPU_UNION(sumMask, KMP_CPU_INDEX(osId2Mask, num));
				2875	setSize++;
				2876	}
				2877	}
				2878	if (setSize > 0) {
				2879	ADD_MASK(sumMask);
				2880	}
				2881
				2882	SKIP_WS(next);
				2883	if (*next == ',') {
				2884	next++;
				2885	}
				2886	scan = next;
				2887	continue;
				2888	}
				2889
				2890	//
				2891	// Read the first integer.
				2892	//
				2893	KMP_ASSERT2((next >= '0') && (next <= '9'), "bad explicit proc list");
				2894	SKIP_DIGITS(next);
				2895	start = __kmp_str_to_int(scan, *next);
				2896	KMP_ASSERT2(start >= 0, "bad explicit proc list");
				2897	SKIP_WS(next);
				2898
				2899	//
				2900	// If this isn't a range, then add a mask to the list and go on.
				2901	//
				2902	if (*next != '-') {
				2903	ADD_MASK_OSID(start, osId2Mask, maxOsId);
				2904
				2905	//
				2906	// Skip optional comma.
				2907	//
				2908	if (*next == ',') {
				2909	next++;
				2910	}
				2911	scan = next;
				2912	continue;
				2913	}
				2914
				2915	//
				2916	// This is a range. Skip over the '-' and read in the 2nd int.
				2917	//
				2918	next++; // skip '-'
				2919	SKIP_WS(next);
				2920	scan = next;
				2921	KMP_ASSERT2((next >= '0') && (next <= '9'), "bad explicit proc list");
				2922	SKIP_DIGITS(next);
				2923	end = __kmp_str_to_int(scan, *next);
				2924	KMP_ASSERT2(end >= 0, "bad explicit proc list");
				2925
				2926	//
				2927	// Check for a stride parameter
				2928	//
				2929	stride = 1;
				2930	SKIP_WS(next);
				2931	if (*next == ':') {
				2932	//
				2933	// A stride is specified. Skip over the ':" and read the 3rd int.
				2934	//
				2935	int sign = +1;
				2936	next++; // skip ':'
				2937	SKIP_WS(next);
				2938	scan = next;
				2939	if (*next == '-') {
				2940	sign = -1;
				2941	next++;
				2942	SKIP_WS(next);
				2943	scan = next;
				2944	}
				2945	KMP_ASSERT2((next >= '0') && (next <= '9'),
				2946	"bad explicit proc list");
				2947	SKIP_DIGITS(next);
				2948	stride = __kmp_str_to_int(scan, *next);
				2949	KMP_ASSERT2(stride >= 0, "bad explicit proc list");
				2950	stride *= sign;
				2951	}
				2952
				2953	//
				2954	// Do some range checks.
				2955	//
				2956	KMP_ASSERT2(stride != 0, "bad explicit proc list");
				2957	if (stride > 0) {
				2958	KMP_ASSERT2(start <= end, "bad explicit proc list");
				2959	}
				2960	else {
				2961	KMP_ASSERT2(start >= end, "bad explicit proc list");
				2962	}
				2963	KMP_ASSERT2((end - start) / stride <= 65536, "bad explicit proc list");
				2964
				2965	//
				2966	// Add the mask for each OS proc # to the list.
				2967	//
				2968	if (stride > 0) {
				2969	do {
				2970	ADD_MASK_OSID(start, osId2Mask, maxOsId);
				2971	start += stride;
				2972	} while (start <= end);
				2973	}
				2974	else {
				2975	do {
				2976	ADD_MASK_OSID(start, osId2Mask, maxOsId);
				2977	start += stride;
				2978	} while (start >= end);
				2979	}
				2980
				2981	//
				2982	// Skip optional comma.
				2983	//
				2984	SKIP_WS(next);
				2985	if (*next == ',') {
				2986	next++;
				2987	}
				2988	scan = next;
				2989	}
				2990
				2991	*out_numMasks = nextNewMask;
				2992	if (nextNewMask == 0) {
				2993	*out_masks = NULL;
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	2994	KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	2995	return;
				2996	}
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	2997	KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask);
				2998	for(i = 0; i < nextNewMask; i++) {
				2999	kmp_affin_mask_t* src = KMP_CPU_INDEX(newMasks, i);
				3000	kmp_affin_mask_t* dest = KMP_CPU_INDEX((*out_masks), i);
				3001	KMP_CPU_COPY(dest, src);
				3002	}
				3003	KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
				3004	KMP_CPU_FREE(sumMask);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3005	}
				3006
				3007
				3008	# if OMP_40_ENABLED
				3009
				3010	/*-----------------------------------------------------------------------------
				3011
				3012	Re-parse the OMP_PLACES proc id list, forming the newMasks for the different
				3013	places. Again, Here is the grammar:
				3014
				3015	place_list := place
				3016	place_list := place , place_list
				3017	place := num
				3018	place := place : num
				3019	place := place : num : signed
				3020	place := { subplacelist }
				3021	place := ! place // (lowest priority)
				3022	subplace_list := subplace
				3023	subplace_list := subplace , subplace_list
				3024	subplace := num
				3025	subplace := num : num
				3026	subplace := num : num : signed
				3027	signed := num
				3028	signed := + signed
				3029	signed := - signed
				3030
				3031	-----------------------------------------------------------------------------*/
				3032
				3033	static void
				3034	__kmp_process_subplace_list(const char *scan, kmp_affin_mask_t osId2Mask,
				3035	int maxOsId, kmp_affin_mask_t tempMask, int setSize)
				3036	{
				3037	const char *next;
				3038
				3039	for (;;) {
				3040	int start, count, stride, i;
				3041
				3042	//
				3043	// Read in the starting proc id
				3044	//
				3045	SKIP_WS(*scan);
				3046	KMP_ASSERT2((scan >= '0') && (scan <= '9'),
				3047	"bad explicit places list");
				3048	next = *scan;
				3049	SKIP_DIGITS(next);
				3050	start = __kmp_str_to_int(scan, next);
				3051	KMP_ASSERT(start >= 0);
				3052	*scan = next;
				3053
				3054	//
				3055	// valid follow sets are ',' ':' and '}'
				3056	//
				3057	SKIP_WS(*scan);
				3058	if (scan == '}' \|\| scan == ',') {
				3059	if ((start > maxOsId) \|\|
				3060	(! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
				3061	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3062	&& (__kmp_affinity_type != affinity_none))) {
				3063	KMP_WARNING(AffIgnoreInvalidProcID, start);
				3064	}
				3065	}
				3066	else {
				3067	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
				3068	(*setSize)++;
				3069	}
				3070	if (**scan == '}') {
				3071	break;
				3072	}
				3073	(*scan)++; // skip ','
				3074	continue;
				3075	}
				3076	KMP_ASSERT2(**scan == ':', "bad explicit places list");
				3077	(*scan)++; // skip ':'
				3078
				3079	//
				3080	// Read count parameter
				3081	//
				3082	SKIP_WS(*scan);
				3083	KMP_ASSERT2((scan >= '0') && (scan <= '9'),
				3084	"bad explicit places list");
				3085	next = *scan;
				3086	SKIP_DIGITS(next);
				3087	count = __kmp_str_to_int(scan, next);
				3088	KMP_ASSERT(count >= 0);
				3089	*scan = next;
				3090
				3091	//
				3092	// valid follow sets are ',' ':' and '}'
				3093	//
				3094	SKIP_WS(*scan);
				3095	if (scan == '}' \|\| scan == ',') {
				3096	for (i = 0; i < count; i++) {
				3097	if ((start > maxOsId) \|\|
				3098	(! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
				3099	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3100	&& (__kmp_affinity_type != affinity_none))) {
				3101	KMP_WARNING(AffIgnoreInvalidProcID, start);
				3102	}
				3103	break; // don't proliferate warnings for large count
				3104	}
				3105	else {
				3106	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
				3107	start++;
				3108	(*setSize)++;
				3109	}
				3110	}
				3111	if (**scan == '}') {
				3112	break;
				3113	}
				3114	(*scan)++; // skip ','
				3115	continue;
				3116	}
				3117	KMP_ASSERT2(**scan == ':', "bad explicit places list");
				3118	(*scan)++; // skip ':'
				3119
				3120	//
				3121	// Read stride parameter
				3122	//
				3123	int sign = +1;
				3124	for (;;) {
				3125	SKIP_WS(*scan);
				3126	if (**scan == '+') {
				3127	(*scan)++; // skip '+'
				3128	continue;
				3129	}
				3130	if (**scan == '-') {
				3131	sign *= -1;
				3132	(*scan)++; // skip '-'
				3133	continue;
				3134	}
				3135	break;
				3136	}
				3137	SKIP_WS(*scan);
				3138	KMP_ASSERT2((scan >= '0') && (scan <= '9'),
				3139	"bad explicit places list");
				3140	next = *scan;
				3141	SKIP_DIGITS(next);
				3142	stride = __kmp_str_to_int(scan, next);
				3143	KMP_ASSERT(stride >= 0);
				3144	*scan = next;
				3145	stride *= sign;
				3146
				3147	//
				3148	// valid follow sets are ',' and '}'
				3149	//
				3150	SKIP_WS(*scan);
				3151	if (scan == '}' \|\| scan == ',') {
				3152	for (i = 0; i < count; i++) {
				3153	if ((start > maxOsId) \|\|
				3154	(! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
				3155	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3156	&& (__kmp_affinity_type != affinity_none))) {
				3157	KMP_WARNING(AffIgnoreInvalidProcID, start);
				3158	}
				3159	break; // don't proliferate warnings for large count
				3160	}
				3161	else {
				3162	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
				3163	start += stride;
				3164	(*setSize)++;
				3165	}
				3166	}
				3167	if (**scan == '}') {
				3168	break;
				3169	}
				3170	(*scan)++; // skip ','
				3171	continue;
				3172	}
				3173
				3174	KMP_ASSERT2(0, "bad explicit places list");
				3175	}
				3176	}
				3177
				3178
				3179	static void
				3180	__kmp_process_place(const char *scan, kmp_affin_mask_t osId2Mask,
				3181	int maxOsId, kmp_affin_mask_t tempMask, int setSize)
				3182	{
				3183	const char *next;
				3184
				3185	//
				3186	// valid follow sets are '{' '!' and num
				3187	//
				3188	SKIP_WS(*scan);
				3189	if (**scan == '{') {
				3190	(*scan)++; // skip '{'
				3191	__kmp_process_subplace_list(scan, osId2Mask, maxOsId , tempMask,
				3192	setSize);
				3193	KMP_ASSERT2(**scan == '}', "bad explicit places list");
				3194	(*scan)++; // skip '}'
				3195	}
				3196	else if (**scan == '!') {
Jonathan Peyton	6778c73	2015-10-19 19:43:01 +0000	[diff] [blame]	3197	(*scan)++; // skip '!'
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3198	__kmp_process_place(scan, osId2Mask, maxOsId, tempMask, setSize);
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	3199	KMP_CPU_COMPLEMENT(maxOsId, tempMask);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3200	}
				3201	else if ((scan >= '0') && (scan <= '9')) {
				3202	next = *scan;
				3203	SKIP_DIGITS(next);
				3204	int num = __kmp_str_to_int(scan, next);
				3205	KMP_ASSERT(num >= 0);
				3206	if ((num > maxOsId) \|\|
				3207	(! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
				3208	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3209	&& (__kmp_affinity_type != affinity_none))) {
				3210	KMP_WARNING(AffIgnoreInvalidProcID, num);
				3211	}
				3212	}
				3213	else {
				3214	KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, num));
				3215	(*setSize)++;
				3216	}
				3217	*scan = next; // skip num
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3218	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3219	else {
				3220	KMP_ASSERT2(0, "bad explicit places list");
				3221	}
				3222	}
				3223
				3224
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3225	//static void
				3226	void
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3227	__kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
				3228	unsigned int out_numMasks, const char placelist,
				3229	kmp_affin_mask_t *osId2Mask, int maxOsId)
				3230	{
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	3231	int i,j,count,stride,sign;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3232	const char *scan = placelist;
				3233	const char *next = placelist;
				3234
				3235	numNewMasks = 2;
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	3236	KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3237	nextNewMask = 0;
				3238
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	3239	// tempMask is modified based on the previous or initial
				3240	// place to form the current place
				3241	// previousMask contains the previous place
				3242	kmp_affin_mask_t *tempMask;
				3243	kmp_affin_mask_t *previousMask;
				3244	KMP_CPU_ALLOC(tempMask);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3245	KMP_CPU_ZERO(tempMask);
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	3246	KMP_CPU_ALLOC(previousMask);
				3247	KMP_CPU_ZERO(previousMask);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3248	int setSize = 0;
				3249
				3250	for (;;) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3251	__kmp_process_place(&scan, osId2Mask, maxOsId, tempMask, &setSize);
				3252
				3253	//
				3254	// valid follow sets are ',' ':' and EOL
				3255	//
				3256	SKIP_WS(scan);
				3257	if (scan == '\0' \|\| scan == ',') {
				3258	if (setSize > 0) {
				3259	ADD_MASK(tempMask);
				3260	}
				3261	KMP_CPU_ZERO(tempMask);
				3262	setSize = 0;
				3263	if (*scan == '\0') {
				3264	break;
				3265	}
				3266	scan++; // skip ','
				3267	continue;
				3268	}
				3269
				3270	KMP_ASSERT2(*scan == ':', "bad explicit places list");
				3271	scan++; // skip ':'
				3272
				3273	//
				3274	// Read count parameter
				3275	//
				3276	SKIP_WS(scan);
				3277	KMP_ASSERT2((scan >= '0') && (scan <= '9'),
				3278	"bad explicit places list");
				3279	next = scan;
				3280	SKIP_DIGITS(next);
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	3281	count = __kmp_str_to_int(scan, *next);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3282	KMP_ASSERT(count >= 0);
				3283	scan = next;
				3284
				3285	//
				3286	// valid follow sets are ',' ':' and EOL
				3287	//
				3288	SKIP_WS(scan);
				3289	if (scan == '\0' \|\| scan == ',') {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3290	stride = +1;
				3291	}
				3292	else {
				3293	KMP_ASSERT2(*scan == ':', "bad explicit places list");
				3294	scan++; // skip ':'
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3295
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3296	//
				3297	// Read stride parameter
				3298	//
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	3299	sign = +1;
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3300	for (;;) {
				3301	SKIP_WS(scan);
				3302	if (*scan == '+') {
				3303	scan++; // skip '+'
				3304	continue;
				3305	}
				3306	if (*scan == '-') {
				3307	sign *= -1;
				3308	scan++; // skip '-'
				3309	continue;
				3310	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3311	break;
				3312	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3313	SKIP_WS(scan);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3314	KMP_ASSERT2((scan >= '0') && (scan <= '9'),
				3315	"bad explicit places list");
				3316	next = scan;
				3317	SKIP_DIGITS(next);
				3318	stride = __kmp_str_to_int(scan, *next);
				3319	KMP_DEBUG_ASSERT(stride >= 0);
				3320	scan = next;
				3321	stride *= sign;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3322	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3323
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	3324	// Add places determined by initial_place : count : stride
				3325	for (i = 0; i < count; i++) {
				3326	if (setSize == 0) {
				3327	break;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3328	}
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	3329	// Add the current place, then build the next place (tempMask) from that
				3330	KMP_CPU_COPY(previousMask, tempMask);
				3331	ADD_MASK(previousMask);
				3332	KMP_CPU_ZERO(tempMask);
				3333	setSize = 0;
				3334	KMP_CPU_SET_ITERATE(j, previousMask) {
				3335	if (! KMP_CPU_ISSET(j, previousMask)) {
				3336	continue;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3337	}
Jonathan Peyton	c5304aa	2016-06-13 21:28:03 +0000	[diff] [blame]	3338	if ((j+stride > maxOsId) \|\| (j+stride < 0) \|\|
				3339	(! KMP_CPU_ISSET(j, __kmp_affin_fullMask)) \|\|
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	3340	(! KMP_CPU_ISSET(j+stride, KMP_CPU_INDEX(osId2Mask, j+stride)))) {
				3341	if ((__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3342	&& (__kmp_affinity_type != affinity_none))) && i < count - 1) {
				3343	KMP_WARNING(AffIgnoreInvalidProcID, j+stride);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3344	}
Jonathan Peyton	c5304aa	2016-06-13 21:28:03 +0000	[diff] [blame]	3345	continue;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3346	}
Jonathan Peyton	c5304aa	2016-06-13 21:28:03 +0000	[diff] [blame]	3347	KMP_CPU_SET(j+stride, tempMask);
				3348	setSize++;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3349	}
				3350	}
				3351	KMP_CPU_ZERO(tempMask);
				3352	setSize = 0;
				3353
				3354	//
				3355	// valid follow sets are ',' and EOL
				3356	//
				3357	SKIP_WS(scan);
				3358	if (*scan == '\0') {
				3359	break;
				3360	}
				3361	if (*scan == ',') {
				3362	scan++; // skip ','
				3363	continue;
				3364	}
				3365
				3366	KMP_ASSERT2(0, "bad explicit places list");
				3367	}
				3368
				3369	*out_numMasks = nextNewMask;
				3370	if (nextNewMask == 0) {
				3371	*out_masks = NULL;
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	3372	KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3373	return;
				3374	}
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	3375	KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask);
				3376	KMP_CPU_FREE(tempMask);
				3377	KMP_CPU_FREE(previousMask);
				3378	for(i = 0; i < nextNewMask; i++) {
				3379	kmp_affin_mask_t* src = KMP_CPU_INDEX(newMasks, i);
				3380	kmp_affin_mask_t* dest = KMP_CPU_INDEX((*out_masks), i);
				3381	KMP_CPU_COPY(dest, src);
				3382	}
				3383	KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3384	}
				3385
				3386	# endif /* OMP_40_ENABLED */
				3387
				3388	#undef ADD_MASK
				3389	#undef ADD_MASK_OSID
				3390
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3391	static void
				3392	__kmp_apply_thread_places(AddrUnsPair **pAddr, int depth)
				3393	{
Jonathan Peyton	dd4aa9b	2015-10-08 17:55:54 +0000	[diff] [blame]	3394	if (__kmp_place_num_sockets == 0 &&
				3395	__kmp_place_num_cores == 0 &&
				3396	__kmp_place_num_threads_per_core == 0 )
				3397	return; // no topology limiting actions requested, exit
				3398	if (__kmp_place_num_sockets == 0)
				3399	__kmp_place_num_sockets = nPackages; // use all available sockets
				3400	if (__kmp_place_num_cores == 0)
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3401	__kmp_place_num_cores = nCoresPerPkg; // use all available cores
Jonathan Peyton	dd4aa9b	2015-10-08 17:55:54 +0000	[diff] [blame]	3402	if (__kmp_place_num_threads_per_core == 0 \|\|
				3403	__kmp_place_num_threads_per_core > __kmp_nThreadsPerCore)
				3404	__kmp_place_num_threads_per_core = __kmp_nThreadsPerCore; // use all HW contexts
				3405
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3406	if ( !__kmp_affinity_uniform_topology() ) {
Jonathan Peyton	b9d28fb	2016-06-16 18:53:48 +0000	[diff] [blame]	3407	KMP_WARNING( AffHWSubsetNonUniform );
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3408	return; // don't support non-uniform topology
				3409	}
				3410	if ( depth != 3 ) {
Jonathan Peyton	b9d28fb	2016-06-16 18:53:48 +0000	[diff] [blame]	3411	KMP_WARNING( AffHWSubsetNonThreeLevel );
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3412	return; // don't support not-3-level topology
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3413	}
Jonathan Peyton	dd4aa9b	2015-10-08 17:55:54 +0000	[diff] [blame]	3414	if (__kmp_place_socket_offset + __kmp_place_num_sockets > nPackages) {
Jonathan Peyton	b9d28fb	2016-06-16 18:53:48 +0000	[diff] [blame]	3415	KMP_WARNING(AffHWSubsetManySockets);
Jonathan Peyton	dd4aa9b	2015-10-08 17:55:54 +0000	[diff] [blame]	3416	return;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3417	}
Andrey Churbanov	1287557	2015-03-10 09:00:36 +0000	[diff] [blame]	3418	if ( __kmp_place_core_offset + __kmp_place_num_cores > nCoresPerPkg ) {
Jonathan Peyton	b9d28fb	2016-06-16 18:53:48 +0000	[diff] [blame]	3419	KMP_WARNING( AffHWSubsetManyCores );
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3420	return;
				3421	}
				3422
				3423	AddrUnsPair newAddr = (AddrUnsPair )__kmp_allocate( sizeof(AddrUnsPair) *
Jonathan Peyton	dd4aa9b	2015-10-08 17:55:54 +0000	[diff] [blame]	3424	__kmp_place_num_sockets * __kmp_place_num_cores * __kmp_place_num_threads_per_core);
				3425
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3426	int i, j, k, n_old = 0, n_new = 0;
Jonathan Peyton	dd4aa9b	2015-10-08 17:55:54 +0000	[diff] [blame]	3427	for (i = 0; i < nPackages; ++i)
				3428	if (i < __kmp_place_socket_offset \|\|
				3429	i >= __kmp_place_socket_offset + __kmp_place_num_sockets)
				3430	n_old += nCoresPerPkg * __kmp_nThreadsPerCore; // skip not-requested socket
				3431	else
				3432	for (j = 0; j < nCoresPerPkg; ++j) // walk through requested socket
				3433	if (j < __kmp_place_core_offset \|\|
				3434	j >= __kmp_place_core_offset + __kmp_place_num_cores)
				3435	n_old += __kmp_nThreadsPerCore; // skip not-requested core
				3436	else
				3437	for (k = 0; k < __kmp_nThreadsPerCore; ++k) { // walk through requested core
				3438	if (k < __kmp_place_num_threads_per_core) {
				3439	newAddr[n_new] = (*pAddr)[n_old]; // collect requested thread's data
				3440	n_new++;
				3441	}
				3442	n_old++;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3443	}
Jonathan Peyton	dd4aa9b	2015-10-08 17:55:54 +0000	[diff] [blame]	3444	KMP_DEBUG_ASSERT(n_old == nPackages * nCoresPerPkg * __kmp_nThreadsPerCore);
				3445	KMP_DEBUG_ASSERT(n_new == __kmp_place_num_sockets * __kmp_place_num_cores *
				3446	__kmp_place_num_threads_per_core);
				3447
				3448	nPackages = __kmp_place_num_sockets; // correct nPackages
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3449	nCoresPerPkg = __kmp_place_num_cores; // correct nCoresPerPkg
				3450	__kmp_nThreadsPerCore = __kmp_place_num_threads_per_core; // correct __kmp_nThreadsPerCore
				3451	__kmp_avail_proc = n_new; // correct avail_proc
				3452	__kmp_ncores = nPackages * __kmp_place_num_cores; // correct ncores
				3453
				3454	__kmp_free( *pAddr );
				3455	*pAddr = newAddr; // replace old topology with new one
				3456	}
				3457
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3458
				3459	static AddrUnsPair *address2os = NULL;
				3460	static int * procarr = NULL;
				3461	static int __kmp_aff_depth = 0;
				3462
				3463	static void
				3464	__kmp_aux_affinity_initialize(void)
				3465	{
				3466	if (__kmp_affinity_masks != NULL) {
Jonathan Peyton	c5304aa	2016-06-13 21:28:03 +0000	[diff] [blame]	3467	KMP_ASSERT(__kmp_affin_fullMask != NULL);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3468	return;
				3469	}
				3470
				3471	//
				3472	// Create the "full" mask - this defines all of the processors that we
				3473	// consider to be in the machine model. If respect is set, then it is
				3474	// the initialization thread's affinity mask. Otherwise, it is all
				3475	// processors that we know about on the machine.
				3476	//
Jonathan Peyton	c5304aa	2016-06-13 21:28:03 +0000	[diff] [blame]	3477	if (__kmp_affin_fullMask == NULL) {
				3478	KMP_CPU_ALLOC(__kmp_affin_fullMask);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3479	}
				3480	if (KMP_AFFINITY_CAPABLE()) {
				3481	if (__kmp_affinity_respect_mask) {
Jonathan Peyton	c5304aa	2016-06-13 21:28:03 +0000	[diff] [blame]	3482	__kmp_get_system_affinity(__kmp_affin_fullMask, TRUE);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3483
				3484	//
				3485	// Count the number of available processors.
				3486	//
				3487	unsigned i;
				3488	__kmp_avail_proc = 0;
Jonathan Peyton	c5304aa	2016-06-13 21:28:03 +0000	[diff] [blame]	3489	KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
				3490	if (! KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3491	continue;
				3492	}
				3493	__kmp_avail_proc++;
				3494	}
				3495	if (__kmp_avail_proc > __kmp_xproc) {
				3496	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3497	&& (__kmp_affinity_type != affinity_none))) {
				3498	KMP_WARNING(ErrorInitializeAffinity);
				3499	}
				3500	__kmp_affinity_type = affinity_none;
Andrey Churbanov	1f037e4	2015-03-10 09:15:26 +0000	[diff] [blame]	3501	KMP_AFFINITY_DISABLE();
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3502	return;
				3503	}
				3504	}
				3505	else {
Jonathan Peyton	c5304aa	2016-06-13 21:28:03 +0000	[diff] [blame]	3506	__kmp_affinity_entire_machine_mask(__kmp_affin_fullMask);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3507	__kmp_avail_proc = __kmp_xproc;
				3508	}
				3509	}
				3510
				3511	int depth = -1;
				3512	kmp_i18n_id_t msg_id = kmp_i18n_null;
				3513
				3514	//
Alp Toker	8f2d3f0	2014-02-24 10:40:15 +0000	[diff] [blame]	3515	// For backward compatibility, setting KMP_CPUINFO_FILE =>
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3516	// KMP_TOPOLOGY_METHOD=cpuinfo
				3517	//
				3518	if ((__kmp_cpuinfo_file != NULL) &&
				3519	(__kmp_affinity_top_method == affinity_top_method_all)) {
				3520	__kmp_affinity_top_method = affinity_top_method_cpuinfo;
				3521	}
				3522
				3523	if (__kmp_affinity_top_method == affinity_top_method_all) {
				3524	//
				3525	// In the default code path, errors are not fatal - we just try using
				3526	// another method. We only emit a warning message if affinity is on,
				3527	// or the verbose flag is set, an the nowarnings flag was not set.
				3528	//
				3529	const char *file_name = NULL;
				3530	int line = 0;
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	3531	# if KMP_USE_HWLOC
				3532	if (depth < 0) {
				3533	if (__kmp_affinity_verbose) {
				3534	KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
				3535	}
				3536	if(!__kmp_hwloc_error) {
				3537	depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id);
				3538	if (depth == 0) {
				3539	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3540	KMP_ASSERT(address2os == NULL);
				3541	return;
				3542	} else if(depth < 0 && __kmp_affinity_verbose) {
				3543	KMP_INFORM(AffIgnoringHwloc, "KMP_AFFINITY");
				3544	}
				3545	} else if(__kmp_affinity_verbose) {
				3546	KMP_INFORM(AffIgnoringHwloc, "KMP_AFFINITY");
				3547	}
				3548	}
				3549	# endif
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3550
				3551	# if KMP_ARCH_X86 \|\| KMP_ARCH_X86_64
				3552
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3553	if (depth < 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3554	if (__kmp_affinity_verbose) {
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	3555	KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3556	}
				3557
				3558	file_name = NULL;
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	3559	depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3560	if (depth == 0) {
				3561	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3562	KMP_ASSERT(address2os == NULL);
				3563	return;
				3564	}
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	3565
				3566	if (depth < 0) {
				3567	if (__kmp_affinity_verbose) {
				3568	if (msg_id != kmp_i18n_null) {
				3569	KMP_INFORM(AffInfoStrStr, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id),
				3570	KMP_I18N_STR(DecodingLegacyAPIC));
				3571	}
				3572	else {
				3573	KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(DecodingLegacyAPIC));
				3574	}
				3575	}
				3576
				3577	file_name = NULL;
				3578	depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
				3579	if (depth == 0) {
				3580	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3581	KMP_ASSERT(address2os == NULL);
				3582	return;
				3583	}
				3584	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3585	}
				3586
				3587	# endif /* KMP_ARCH_X86 \|\| KMP_ARCH_X86_64 */
				3588
				3589	# if KMP_OS_LINUX
				3590
				3591	if (depth < 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3592	if (__kmp_affinity_verbose) {
				3593	if (msg_id != kmp_i18n_null) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3594	KMP_INFORM(AffStrParseFilename, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id), "/proc/cpuinfo");
				3595	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3596	else {
				3597	KMP_INFORM(AffParseFilename, "KMP_AFFINITY", "/proc/cpuinfo");
				3598	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3599	}
				3600
				3601	FILE *f = fopen("/proc/cpuinfo", "r");
				3602	if (f == NULL) {
				3603	msg_id = kmp_i18n_str_CantOpenCpuinfo;
				3604	}
				3605	else {
				3606	file_name = "/proc/cpuinfo";
				3607	depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
				3608	fclose(f);
				3609	if (depth == 0) {
				3610	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3611	KMP_ASSERT(address2os == NULL);
				3612	return;
				3613	}
				3614	}
				3615	}
				3616
				3617	# endif /* KMP_OS_LINUX */
				3618
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	3619	# if KMP_GROUP_AFFINITY
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3620
				3621	if ((depth < 0) && (__kmp_num_proc_groups > 1)) {
				3622	if (__kmp_affinity_verbose) {
				3623	KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
				3624	}
				3625
				3626	depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
				3627	KMP_ASSERT(depth != 0);
				3628	}
				3629
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	3630	# endif /* KMP_GROUP_AFFINITY */
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3631
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3632	if (depth < 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3633	if (__kmp_affinity_verbose && (msg_id != kmp_i18n_null)) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3634	if (file_name == NULL) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3635	KMP_INFORM(UsingFlatOS, __kmp_i18n_catgets(msg_id));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3636	}
				3637	else if (line == 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3638	KMP_INFORM(UsingFlatOSFile, file_name, __kmp_i18n_catgets(msg_id));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3639	}
				3640	else {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3641	KMP_INFORM(UsingFlatOSFileLine, file_name, line, __kmp_i18n_catgets(msg_id));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3642	}
				3643	}
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3644	// FIXME - print msg if msg_id = kmp_i18n_null ???
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3645
				3646	file_name = "";
				3647	depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
				3648	if (depth == 0) {
				3649	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3650	KMP_ASSERT(address2os == NULL);
				3651	return;
				3652	}
				3653	KMP_ASSERT(depth > 0);
				3654	KMP_ASSERT(address2os != NULL);
				3655	}
				3656	}
				3657
				3658	//
				3659	// If the user has specified that a paricular topology discovery method
				3660	// is to be used, then we abort if that method fails. The exception is
				3661	// group affinity, which might have been implicitly set.
				3662	//
				3663
				3664	# if KMP_ARCH_X86 \|\| KMP_ARCH_X86_64
				3665
				3666	else if (__kmp_affinity_top_method == affinity_top_method_x2apicid) {
				3667	if (__kmp_affinity_verbose) {
				3668	KMP_INFORM(AffInfoStr, "KMP_AFFINITY",
				3669	KMP_I18N_STR(Decodingx2APIC));
				3670	}
				3671
				3672	depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
				3673	if (depth == 0) {
				3674	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3675	KMP_ASSERT(address2os == NULL);
				3676	return;
				3677	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3678	if (depth < 0) {
				3679	KMP_ASSERT(msg_id != kmp_i18n_null);
				3680	KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
				3681	}
				3682	}
				3683	else if (__kmp_affinity_top_method == affinity_top_method_apicid) {
				3684	if (__kmp_affinity_verbose) {
				3685	KMP_INFORM(AffInfoStr, "KMP_AFFINITY",
				3686	KMP_I18N_STR(DecodingLegacyAPIC));
				3687	}
				3688
				3689	depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
				3690	if (depth == 0) {
				3691	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3692	KMP_ASSERT(address2os == NULL);
				3693	return;
				3694	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3695	if (depth < 0) {
				3696	KMP_ASSERT(msg_id != kmp_i18n_null);
				3697	KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
				3698	}
				3699	}
				3700
				3701	# endif /* KMP_ARCH_X86 \|\| KMP_ARCH_X86_64 */
				3702
				3703	else if (__kmp_affinity_top_method == affinity_top_method_cpuinfo) {
				3704	const char *filename;
				3705	if (__kmp_cpuinfo_file != NULL) {
				3706	filename = __kmp_cpuinfo_file;
				3707	}
				3708	else {
				3709	filename = "/proc/cpuinfo";
				3710	}
				3711
				3712	if (__kmp_affinity_verbose) {
				3713	KMP_INFORM(AffParseFilename, "KMP_AFFINITY", filename);
				3714	}
				3715
				3716	FILE *f = fopen(filename, "r");
				3717	if (f == NULL) {
				3718	int code = errno;
				3719	if (__kmp_cpuinfo_file != NULL) {
				3720	__kmp_msg(
				3721	kmp_ms_fatal,
				3722	KMP_MSG(CantOpenFileForReading, filename),
				3723	KMP_ERR(code),
				3724	KMP_HNT(NameComesFrom_CPUINFO_FILE),
				3725	__kmp_msg_null
				3726	);
				3727	}
				3728	else {
				3729	__kmp_msg(
				3730	kmp_ms_fatal,
				3731	KMP_MSG(CantOpenFileForReading, filename),
				3732	KMP_ERR(code),
				3733	__kmp_msg_null
				3734	);
				3735	}
				3736	}
				3737	int line = 0;
				3738	depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
				3739	fclose(f);
				3740	if (depth < 0) {
				3741	KMP_ASSERT(msg_id != kmp_i18n_null);
				3742	if (line > 0) {
				3743	KMP_FATAL(FileLineMsgExiting, filename, line, __kmp_i18n_catgets(msg_id));
				3744	}
				3745	else {
				3746	KMP_FATAL(FileMsgExiting, filename, __kmp_i18n_catgets(msg_id));
				3747	}
				3748	}
				3749	if (__kmp_affinity_type == affinity_none) {
				3750	KMP_ASSERT(depth == 0);
				3751	KMP_ASSERT(address2os == NULL);
				3752	return;
				3753	}
				3754	}
				3755
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	3756	# if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3757
				3758	else if (__kmp_affinity_top_method == affinity_top_method_group) {
				3759	if (__kmp_affinity_verbose) {
				3760	KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
				3761	}
				3762
				3763	depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
				3764	KMP_ASSERT(depth != 0);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3765	if (depth < 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3766	KMP_ASSERT(msg_id != kmp_i18n_null);
				3767	KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3768	}
				3769	}
				3770
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	3771	# endif /* KMP_GROUP_AFFINITY */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3772
				3773	else if (__kmp_affinity_top_method == affinity_top_method_flat) {
				3774	if (__kmp_affinity_verbose) {
				3775	KMP_INFORM(AffUsingFlatOS, "KMP_AFFINITY");
				3776	}
				3777
				3778	depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
				3779	if (depth == 0) {
				3780	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3781	KMP_ASSERT(address2os == NULL);
				3782	return;
				3783	}
				3784	// should not fail
				3785	KMP_ASSERT(depth > 0);
				3786	KMP_ASSERT(address2os != NULL);
				3787	}
				3788
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	3789	# if KMP_USE_HWLOC
				3790	else if (__kmp_affinity_top_method == affinity_top_method_hwloc) {
				3791	if (__kmp_affinity_verbose) {
				3792	KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
				3793	}
				3794	depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id);
				3795	if (depth == 0) {
				3796	KMP_ASSERT(__kmp_affinity_type == affinity_none);
				3797	KMP_ASSERT(address2os == NULL);
				3798	return;
				3799	}
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	3800	}
				3801	# endif // KMP_USE_HWLOC
				3802
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3803	if (address2os == NULL) {
				3804	if (KMP_AFFINITY_CAPABLE()
				3805	&& (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3806	&& (__kmp_affinity_type != affinity_none)))) {
				3807	KMP_WARNING(ErrorInitializeAffinity);
				3808	}
				3809	__kmp_affinity_type = affinity_none;
Andrey Churbanov	1f037e4	2015-03-10 09:15:26 +0000	[diff] [blame]	3810	KMP_AFFINITY_DISABLE();
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3811	return;
				3812	}
				3813
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3814	__kmp_apply_thread_places(&address2os, depth);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3815
				3816	//
				3817	// Create the table of masks, indexed by thread Id.
				3818	//
				3819	unsigned maxIndex;
				3820	unsigned numUnique;
				3821	kmp_affin_mask_t *osId2Mask = __kmp_create_masks(&maxIndex, &numUnique,
				3822	address2os, __kmp_avail_proc);
				3823	if (__kmp_affinity_gran_levels == 0) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	3824	KMP_DEBUG_ASSERT((int)numUnique == __kmp_avail_proc);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3825	}
				3826
				3827	//
				3828	// Set the childNums vector in all Address objects. This must be done
				3829	// before we can sort using __kmp_affinity_cmp_Address_child_num(),
				3830	// which takes into account the setting of __kmp_affinity_compact.
				3831	//
				3832	__kmp_affinity_assign_child_nums(address2os, __kmp_avail_proc);
				3833
				3834	switch (__kmp_affinity_type) {
				3835
				3836	case affinity_explicit:
				3837	KMP_DEBUG_ASSERT(__kmp_affinity_proclist != NULL);
				3838	# if OMP_40_ENABLED
				3839	if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
				3840	# endif
				3841	{
				3842	__kmp_affinity_process_proclist(&__kmp_affinity_masks,
				3843	&__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask,
				3844	maxIndex);
				3845	}
				3846	# if OMP_40_ENABLED
				3847	else {
				3848	__kmp_affinity_process_placelist(&__kmp_affinity_masks,
				3849	&__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask,
				3850	maxIndex);
				3851	}
				3852	# endif
				3853	if (__kmp_affinity_num_masks == 0) {
				3854	if (__kmp_affinity_verbose \|\| (__kmp_affinity_warnings
				3855	&& (__kmp_affinity_type != affinity_none))) {
				3856	KMP_WARNING(AffNoValidProcID);
				3857	}
				3858	__kmp_affinity_type = affinity_none;
				3859	return;
				3860	}
				3861	break;
				3862
				3863	//
				3864	// The other affinity types rely on sorting the Addresses according
				3865	// to some permutation of the machine topology tree. Set
				3866	// __kmp_affinity_compact and __kmp_affinity_offset appropriately,
				3867	// then jump to a common code fragment to do the sort and create
				3868	// the array of affinity masks.
				3869	//
				3870
				3871	case affinity_logical:
				3872	__kmp_affinity_compact = 0;
				3873	if (__kmp_affinity_offset) {
				3874	__kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset
				3875	% __kmp_avail_proc;
				3876	}
				3877	goto sortAddresses;
				3878
				3879	case affinity_physical:
				3880	if (__kmp_nThreadsPerCore > 1) {
				3881	__kmp_affinity_compact = 1;
				3882	if (__kmp_affinity_compact >= depth) {
				3883	__kmp_affinity_compact = 0;
				3884	}
				3885	} else {
				3886	__kmp_affinity_compact = 0;
				3887	}
				3888	if (__kmp_affinity_offset) {
				3889	__kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset
				3890	% __kmp_avail_proc;
				3891	}
				3892	goto sortAddresses;
				3893
				3894	case affinity_scatter:
				3895	if (__kmp_affinity_compact >= depth) {
				3896	__kmp_affinity_compact = 0;
				3897	}
				3898	else {
				3899	__kmp_affinity_compact = depth - 1 - __kmp_affinity_compact;
				3900	}
				3901	goto sortAddresses;
				3902
				3903	case affinity_compact:
				3904	if (__kmp_affinity_compact >= depth) {
				3905	__kmp_affinity_compact = depth - 1;
				3906	}
				3907	goto sortAddresses;
				3908
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3909	case affinity_balanced:
Jonathan Peyton	caf09fe	2015-05-27 23:27:33 +0000	[diff] [blame]	3910	// Balanced works only for the case of a single package
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3911	if( nPackages > 1 ) {
				3912	if( __kmp_affinity_verbose \|\| __kmp_affinity_warnings ) {
				3913	KMP_WARNING( AffBalancedNotAvail, "KMP_AFFINITY" );
				3914	}
				3915	__kmp_affinity_type = affinity_none;
				3916	return;
				3917	} else if( __kmp_affinity_uniform_topology() ) {
				3918	break;
				3919	} else { // Non-uniform topology
				3920
				3921	// Save the depth for further usage
				3922	__kmp_aff_depth = depth;
				3923
				3924	// Number of hyper threads per core in HT machine
				3925	int nth_per_core = __kmp_nThreadsPerCore;
				3926
				3927	int core_level;
				3928	if( nth_per_core > 1 ) {
				3929	core_level = depth - 2;
				3930	} else {
				3931	core_level = depth - 1;
				3932	}
				3933	int ncores = address2os[ __kmp_avail_proc - 1 ].first.labels[ core_level ] + 1;
				3934	int nproc = nth_per_core * ncores;
				3935
				3936	procarr = ( int * )__kmp_allocate( sizeof( int ) * nproc );
				3937	for( int i = 0; i < nproc; i++ ) {
				3938	procarr[ i ] = -1;
				3939	}
				3940
				3941	for( int i = 0; i < __kmp_avail_proc; i++ ) {
				3942	int proc = address2os[ i ].second;
				3943	// If depth == 3 then level=0 - package, level=1 - core, level=2 - thread.
				3944	// If there is only one thread per core then depth == 2: level 0 - package,
				3945	// level 1 - core.
				3946	int level = depth - 1;
				3947
				3948	// __kmp_nth_per_core == 1
				3949	int thread = 0;
				3950	int core = address2os[ i ].first.labels[ level ];
				3951	// If the thread level exists, that is we have more than one thread context per core
				3952	if( nth_per_core > 1 ) {
				3953	thread = address2os[ i ].first.labels[ level ] % nth_per_core;
				3954	core = address2os[ i ].first.labels[ level - 1 ];
				3955	}
				3956	procarr[ core * nth_per_core + thread ] = proc;
				3957	}
				3958
				3959	break;
				3960	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3961
				3962	sortAddresses:
				3963	//
				3964	// Allocate the gtid->affinity mask table.
				3965	//
				3966	if (__kmp_affinity_dups) {
				3967	__kmp_affinity_num_masks = __kmp_avail_proc;
				3968	}
				3969	else {
				3970	__kmp_affinity_num_masks = numUnique;
				3971	}
				3972
				3973	# if OMP_40_ENABLED
				3974	if ( ( __kmp_nested_proc_bind.bind_types[0] != proc_bind_intel )
				3975	&& ( __kmp_affinity_num_places > 0 )
				3976	&& ( (unsigned)__kmp_affinity_num_places < __kmp_affinity_num_masks ) ) {
				3977	__kmp_affinity_num_masks = __kmp_affinity_num_places;
				3978	}
				3979	# endif
				3980
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	3981	KMP_CPU_ALLOC_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	3982
				3983	//
				3984	// Sort the address2os table according to the current setting of
				3985	// __kmp_affinity_compact, then fill out __kmp_affinity_masks.
				3986	//
				3987	qsort(address2os, __kmp_avail_proc, sizeof(*address2os),
				3988	__kmp_affinity_cmp_Address_child_num);
				3989	{
				3990	int i;
				3991	unsigned j;
				3992	for (i = 0, j = 0; i < __kmp_avail_proc; i++) {
				3993	if ((! __kmp_affinity_dups) && (! address2os[i].first.leader)) {
				3994	continue;
				3995	}
				3996	unsigned osId = address2os[i].second;
				3997	kmp_affin_mask_t *src = KMP_CPU_INDEX(osId2Mask, osId);
				3998	kmp_affin_mask_t *dest
				3999	= KMP_CPU_INDEX(__kmp_affinity_masks, j);
				4000	KMP_ASSERT(KMP_CPU_ISSET(osId, src));
				4001	KMP_CPU_COPY(dest, src);
				4002	if (++j >= __kmp_affinity_num_masks) {
				4003	break;
				4004	}
				4005	}
				4006	KMP_DEBUG_ASSERT(j == __kmp_affinity_num_masks);
				4007	}
				4008	break;
				4009
				4010	default:
				4011	KMP_ASSERT2(0, "Unexpected affinity setting");
				4012	}
				4013
				4014	__kmp_free(osId2Mask);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4015	machine_hierarchy.init(address2os, __kmp_avail_proc);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4016	}
				4017
				4018
				4019	void
				4020	__kmp_affinity_initialize(void)
				4021	{
				4022	//
				4023	// Much of the code above was written assumming that if a machine was not
				4024	// affinity capable, then __kmp_affinity_type == affinity_none. We now
				4025	// explicitly represent this as __kmp_affinity_type == affinity_disabled.
				4026	//
				4027	// There are too many checks for __kmp_affinity_type == affinity_none
				4028	// in this code. Instead of trying to change them all, check if
				4029	// __kmp_affinity_type == affinity_disabled, and if so, slam it with
				4030	// affinity_none, call the real initialization routine, then restore
				4031	// __kmp_affinity_type to affinity_disabled.
				4032	//
				4033	int disabled = (__kmp_affinity_type == affinity_disabled);
				4034	if (! KMP_AFFINITY_CAPABLE()) {
				4035	KMP_ASSERT(disabled);
				4036	}
				4037	if (disabled) {
				4038	__kmp_affinity_type = affinity_none;
				4039	}
				4040	__kmp_aux_affinity_initialize();
				4041	if (disabled) {
				4042	__kmp_affinity_type = affinity_disabled;
				4043	}
				4044	}
				4045
				4046
				4047	void
				4048	__kmp_affinity_uninitialize(void)
				4049	{
				4050	if (__kmp_affinity_masks != NULL) {
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	4051	KMP_CPU_FREE_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4052	__kmp_affinity_masks = NULL;
				4053	}
Jonathan Peyton	c5304aa	2016-06-13 21:28:03 +0000	[diff] [blame]	4054	if (__kmp_affin_fullMask != NULL) {
				4055	KMP_CPU_FREE(__kmp_affin_fullMask);
				4056	__kmp_affin_fullMask = NULL;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4057	}
				4058	__kmp_affinity_num_masks = 0;
				4059	# if OMP_40_ENABLED
				4060	__kmp_affinity_num_places = 0;
				4061	# endif
				4062	if (__kmp_affinity_proclist != NULL) {
				4063	__kmp_free(__kmp_affinity_proclist);
				4064	__kmp_affinity_proclist = NULL;
				4065	}
				4066	if( address2os != NULL ) {
				4067	__kmp_free( address2os );
				4068	address2os = NULL;
				4069	}
				4070	if( procarr != NULL ) {
				4071	__kmp_free( procarr );
				4072	procarr = NULL;
				4073	}
Jonathan Peyton	202a24d	2016-06-13 17:30:08 +0000	[diff] [blame]	4074	# if KMP_USE_HWLOC
				4075	if (__kmp_hwloc_topology != NULL) {
				4076	hwloc_topology_destroy(__kmp_hwloc_topology);
				4077	__kmp_hwloc_topology = NULL;
				4078	}
				4079	# endif
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4080	}
				4081
				4082
				4083	void
				4084	__kmp_affinity_set_init_mask(int gtid, int isa_root)
				4085	{
				4086	if (! KMP_AFFINITY_CAPABLE()) {
				4087	return;
				4088	}
				4089
				4090	kmp_info_t th = (kmp_info_t )TCR_SYNC_PTR(__kmp_threads[gtid]);
				4091	if (th->th.th_affin_mask == NULL) {
				4092	KMP_CPU_ALLOC(th->th.th_affin_mask);
				4093	}
				4094	else {
				4095	KMP_CPU_ZERO(th->th.th_affin_mask);
				4096	}
				4097
				4098	//
				4099	// Copy the thread mask to the kmp_info_t strucuture.
				4100	// If __kmp_affinity_type == affinity_none, copy the "full" mask, i.e. one
				4101	// that has all of the OS proc ids set, or if __kmp_affinity_respect_mask
				4102	// is set, then the full mask is the same as the mask of the initialization
				4103	// thread.
				4104	//
				4105	kmp_affin_mask_t *mask;
				4106	int i;
				4107
				4108	# if OMP_40_ENABLED
				4109	if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
				4110	# endif
				4111	{
Andrey Churbanov	f28f613	2015-01-13 14:54:00 +0000	[diff] [blame]	4112	if ((__kmp_affinity_type == affinity_none) \|\| (__kmp_affinity_type == affinity_balanced)
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4113	) {
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	4114	# if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4115	if (__kmp_num_proc_groups > 1) {
				4116	return;
				4117	}
				4118	# endif
Jonathan Peyton	c5304aa	2016-06-13 21:28:03 +0000	[diff] [blame]	4119	KMP_ASSERT(__kmp_affin_fullMask != NULL);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4120	i = KMP_PLACE_ALL;
Jonathan Peyton	c5304aa	2016-06-13 21:28:03 +0000	[diff] [blame]	4121	mask = __kmp_affin_fullMask;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4122	}
				4123	else {
				4124	KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 );
				4125	i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
				4126	mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
				4127	}
				4128	}
				4129	# if OMP_40_ENABLED
				4130	else {
				4131	if ((! isa_root)
				4132	\|\| (__kmp_nested_proc_bind.bind_types[0] == proc_bind_false)) {
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	4133	# if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4134	if (__kmp_num_proc_groups > 1) {
				4135	return;
				4136	}
				4137	# endif
Jonathan Peyton	c5304aa	2016-06-13 21:28:03 +0000	[diff] [blame]	4138	KMP_ASSERT(__kmp_affin_fullMask != NULL);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4139	i = KMP_PLACE_ALL;
Jonathan Peyton	c5304aa	2016-06-13 21:28:03 +0000	[diff] [blame]	4140	mask = __kmp_affin_fullMask;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4141	}
				4142	else {
				4143	//
				4144	// int i = some hash function or just a counter that doesn't
				4145	// always start at 0. Use gtid for now.
				4146	//
				4147	KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 );
				4148	i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
				4149	mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
				4150	}
				4151	}
				4152	# endif
				4153
				4154	# if OMP_40_ENABLED
				4155	th->th.th_current_place = i;
				4156	if (isa_root) {
				4157	th->th.th_new_place = i;
				4158	th->th.th_first_place = 0;
				4159	th->th.th_last_place = __kmp_affinity_num_masks - 1;
				4160	}
				4161
				4162	if (i == KMP_PLACE_ALL) {
				4163	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to all places\n",
				4164	gtid));
				4165	}
				4166	else {
				4167	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to place %d\n",
				4168	gtid, i));
				4169	}
				4170	# else
				4171	if (i == -1) {
Jonathan Peyton	c5304aa	2016-06-13 21:28:03 +0000	[diff] [blame]	4172	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to __kmp_affin_fullMask\n",
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4173	gtid));
				4174	}
				4175	else {
				4176	KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to mask %d\n",
				4177	gtid, i));
				4178	}
				4179	# endif /* OMP_40_ENABLED */
				4180
				4181	KMP_CPU_COPY(th->th.th_affin_mask, mask);
				4182
				4183	if (__kmp_affinity_verbose) {
				4184	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4185	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4186	th->th.th_affin_mask);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4187	KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(), gtid,
				4188	buf);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4189	}
				4190
				4191	# if KMP_OS_WINDOWS
				4192	//
				4193	// On Windows* OS, the process affinity mask might have changed.
				4194	// If the user didn't request affinity and this call fails,
				4195	// just continue silently. See CQ171393.
				4196	//
				4197	if ( __kmp_affinity_type == affinity_none ) {
				4198	__kmp_set_system_affinity(th->th.th_affin_mask, FALSE);
				4199	}
				4200	else
				4201	# endif
				4202	__kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
				4203	}
				4204
				4205
				4206	# if OMP_40_ENABLED
				4207
				4208	void
				4209	__kmp_affinity_set_place(int gtid)
				4210	{
				4211	int retval;
				4212
				4213	if (! KMP_AFFINITY_CAPABLE()) {
				4214	return;
				4215	}
				4216
				4217	kmp_info_t th = (kmp_info_t )TCR_SYNC_PTR(__kmp_threads[gtid]);
				4218
				4219	KA_TRACE(100, ("__kmp_affinity_set_place: binding T#%d to place %d (current place = %d)\n",
				4220	gtid, th->th.th_new_place, th->th.th_current_place));
				4221
				4222	//
Alp Toker	8f2d3f0	2014-02-24 10:40:15 +0000	[diff] [blame]	4223	// Check that the new place is within this thread's partition.
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4224	//
				4225	KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4226	KMP_ASSERT(th->th.th_new_place >= 0);
				4227	KMP_ASSERT((unsigned)th->th.th_new_place <= __kmp_affinity_num_masks);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4228	if (th->th.th_first_place <= th->th.th_last_place) {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4229	KMP_ASSERT((th->th.th_new_place >= th->th.th_first_place)
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4230	&& (th->th.th_new_place <= th->th.th_last_place));
				4231	}
				4232	else {
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4233	KMP_ASSERT((th->th.th_new_place <= th->th.th_first_place)
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4234	\|\| (th->th.th_new_place >= th->th.th_last_place));
				4235	}
				4236
				4237	//
				4238	// Copy the thread mask to the kmp_info_t strucuture,
				4239	// and set this thread's affinity.
				4240	//
				4241	kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity_masks,
				4242	th->th.th_new_place);
				4243	KMP_CPU_COPY(th->th.th_affin_mask, mask);
				4244	th->th.th_current_place = th->th.th_new_place;
				4245
				4246	if (__kmp_affinity_verbose) {
				4247	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4248	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4249	th->th.th_affin_mask);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4250	KMP_INFORM(BoundToOSProcSet, "OMP_PROC_BIND", (kmp_int32)getpid(),
				4251	gtid, buf);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4252	}
				4253	__kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
				4254	}
				4255
				4256	# endif /* OMP_40_ENABLED */
				4257
				4258
				4259	int
				4260	__kmp_aux_set_affinity(void **mask)
				4261	{
				4262	int gtid;
				4263	kmp_info_t *th;
				4264	int retval;
				4265
				4266	if (! KMP_AFFINITY_CAPABLE()) {
				4267	return -1;
				4268	}
				4269
				4270	gtid = __kmp_entry_gtid();
				4271	KA_TRACE(1000, ;{
				4272	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4273	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4274	(kmp_affin_mask_t )(mask));
				4275	__kmp_debug_printf("kmp_set_affinity: setting affinity mask for thread %d = %s\n",
				4276	gtid, buf);
				4277	});
				4278
				4279	if (__kmp_env_consistency_check) {
				4280	if ((mask == NULL) \|\| (*mask == NULL)) {
				4281	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				4282	}
				4283	else {
				4284	unsigned proc;
				4285	int num_procs = 0;
				4286
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	4287	KMP_CPU_SET_ITERATE(proc, ((kmp_affin_mask_t)(mask))) {
Jonathan Peyton	c5304aa	2016-06-13 21:28:03 +0000	[diff] [blame]	4288	if (! KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
				4289	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				4290	}
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4291	if (! KMP_CPU_ISSET(proc, (kmp_affin_mask_t )(mask))) {
				4292	continue;
				4293	}
				4294	num_procs++;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4295	}
				4296	if (num_procs == 0) {
				4297	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				4298	}
				4299
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	4300	# if KMP_GROUP_AFFINITY
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4301	if (__kmp_get_proc_group((kmp_affin_mask_t )(mask)) < 0) {
				4302	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				4303	}
Andrey Churbanov	7daf980	2015-01-27 16:52:57 +0000	[diff] [blame]	4304	# endif /* KMP_GROUP_AFFINITY */
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4305
				4306	}
				4307	}
				4308
				4309	th = __kmp_threads[gtid];
				4310	KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
				4311	retval = __kmp_set_system_affinity((kmp_affin_mask_t )(mask), FALSE);
				4312	if (retval == 0) {
				4313	KMP_CPU_COPY(th->th.th_affin_mask, (kmp_affin_mask_t )(mask));
				4314	}
				4315
				4316	# if OMP_40_ENABLED
				4317	th->th.th_current_place = KMP_PLACE_UNDEFINED;
				4318	th->th.th_new_place = KMP_PLACE_UNDEFINED;
				4319	th->th.th_first_place = 0;
				4320	th->th.th_last_place = __kmp_affinity_num_masks - 1;
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4321
				4322	//
				4323	// Turn off 4.0 affinity for the current tread at this parallel level.
				4324	//
				4325	th->th.th_current_task->td_icvs.proc_bind = proc_bind_false;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4326	# endif
				4327
				4328	return retval;
				4329	}
				4330
				4331
				4332	int
				4333	__kmp_aux_get_affinity(void **mask)
				4334	{
				4335	int gtid;
				4336	int retval;
				4337	kmp_info_t *th;
				4338
				4339	if (! KMP_AFFINITY_CAPABLE()) {
				4340	return -1;
				4341	}
				4342
				4343	gtid = __kmp_entry_gtid();
				4344	th = __kmp_threads[gtid];
				4345	KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
				4346
				4347	KA_TRACE(1000, ;{
				4348	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4349	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4350	th->th.th_affin_mask);
				4351	__kmp_printf("kmp_get_affinity: stored affinity mask for thread %d = %s\n", gtid, buf);
				4352	});
				4353
				4354	if (__kmp_env_consistency_check) {
				4355	if ((mask == NULL) \|\| (*mask == NULL)) {
				4356	KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity");
				4357	}
				4358	}
				4359
				4360	# if !KMP_OS_WINDOWS
				4361
				4362	retval = __kmp_get_system_affinity((kmp_affin_mask_t )(mask), FALSE);
				4363	KA_TRACE(1000, ;{
				4364	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4365	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4366	(kmp_affin_mask_t )(mask));
				4367	__kmp_printf("kmp_get_affinity: system affinity mask for thread %d = %s\n", gtid, buf);
				4368	});
				4369	return retval;
				4370
				4371	# else
				4372
				4373	KMP_CPU_COPY((kmp_affin_mask_t )(mask), th->th.th_affin_mask);
				4374	return 0;
				4375
				4376	# endif /* KMP_OS_WINDOWS */
				4377
				4378	}
				4379
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4380	int
				4381	__kmp_aux_set_affinity_mask_proc(int proc, void **mask)
				4382	{
				4383	int retval;
				4384
				4385	if (! KMP_AFFINITY_CAPABLE()) {
				4386	return -1;
				4387	}
				4388
				4389	KA_TRACE(1000, ;{
				4390	int gtid = __kmp_entry_gtid();
				4391	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4392	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4393	(kmp_affin_mask_t )(mask));
				4394	__kmp_debug_printf("kmp_set_affinity_mask_proc: setting proc %d in affinity mask for thread %d = %s\n",
				4395	proc, gtid, buf);
				4396	});
				4397
				4398	if (__kmp_env_consistency_check) {
				4399	if ((mask == NULL) \|\| (*mask == NULL)) {
				4400	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity_mask_proc");
				4401	}
				4402	}
				4403
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	4404	if ((proc < 0)
				4405	# if !KMP_USE_HWLOC
				4406	\|\| ((unsigned)proc >= KMP_CPU_SETSIZE)
				4407	# endif
				4408	) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4409	return -1;
				4410	}
Jonathan Peyton	c5304aa	2016-06-13 21:28:03 +0000	[diff] [blame]	4411	if (! KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4412	return -2;
				4413	}
				4414
				4415	KMP_CPU_SET(proc, (kmp_affin_mask_t )(mask));
				4416	return 0;
				4417	}
				4418
				4419
				4420	int
				4421	__kmp_aux_unset_affinity_mask_proc(int proc, void **mask)
				4422	{
				4423	int retval;
				4424
				4425	if (! KMP_AFFINITY_CAPABLE()) {
				4426	return -1;
				4427	}
				4428
				4429	KA_TRACE(1000, ;{
				4430	int gtid = __kmp_entry_gtid();
				4431	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4432	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4433	(kmp_affin_mask_t )(mask));
				4434	__kmp_debug_printf("kmp_unset_affinity_mask_proc: unsetting proc %d in affinity mask for thread %d = %s\n",
				4435	proc, gtid, buf);
				4436	});
				4437
				4438	if (__kmp_env_consistency_check) {
				4439	if ((mask == NULL) \|\| (*mask == NULL)) {
				4440	KMP_FATAL(AffinityInvalidMask, "kmp_unset_affinity_mask_proc");
				4441	}
				4442	}
				4443
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	4444	if ((proc < 0)
				4445	# if !KMP_USE_HWLOC
				4446	\|\| ((unsigned)proc >= KMP_CPU_SETSIZE)
				4447	# endif
				4448	) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4449	return -1;
				4450	}
Jonathan Peyton	c5304aa	2016-06-13 21:28:03 +0000	[diff] [blame]	4451	if (! KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4452	return -2;
				4453	}
				4454
				4455	KMP_CPU_CLR(proc, (kmp_affin_mask_t )(mask));
				4456	return 0;
				4457	}
				4458
				4459
				4460	int
				4461	__kmp_aux_get_affinity_mask_proc(int proc, void **mask)
				4462	{
				4463	int retval;
				4464
				4465	if (! KMP_AFFINITY_CAPABLE()) {
				4466	return -1;
				4467	}
				4468
				4469	KA_TRACE(1000, ;{
				4470	int gtid = __kmp_entry_gtid();
				4471	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4472	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
				4473	(kmp_affin_mask_t )(mask));
				4474	__kmp_debug_printf("kmp_get_affinity_mask_proc: getting proc %d in affinity mask for thread %d = %s\n",
				4475	proc, gtid, buf);
				4476	});
				4477
				4478	if (__kmp_env_consistency_check) {
				4479	if ((mask == NULL) \|\| (*mask == NULL)) {
Andrey Churbanov	4b2f17a	2015-01-29 15:49:22 +0000	[diff] [blame]	4480	KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity_mask_proc");
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4481	}
				4482	}
				4483
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	4484	if ((proc < 0)
				4485	# if !KMP_USE_HWLOC
				4486	\|\| ((unsigned)proc >= KMP_CPU_SETSIZE)
				4487	# endif
				4488	) {
				4489	return -1;
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4490	}
Jonathan Peyton	c5304aa	2016-06-13 21:28:03 +0000	[diff] [blame]	4491	if (! KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4492	return 0;
				4493	}
				4494
				4495	return KMP_CPU_ISSET(proc, (kmp_affin_mask_t )(mask));
				4496	}
				4497
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4498
				4499	// Dynamic affinity settings - Affinity balanced
				4500	void __kmp_balanced_affinity( int tid, int nthreads )
				4501	{
				4502	if( __kmp_affinity_uniform_topology() ) {
				4503	int coreID;
				4504	int threadID;
				4505	// Number of hyper threads per core in HT machine
				4506	int __kmp_nth_per_core = __kmp_avail_proc / __kmp_ncores;
				4507	// Number of cores
				4508	int ncores = __kmp_ncores;
				4509	// How many threads will be bound to each core
				4510	int chunk = nthreads / ncores;
				4511	// How many cores will have an additional thread bound to it - "big cores"
				4512	int big_cores = nthreads % ncores;
				4513	// Number of threads on the big cores
				4514	int big_nth = ( chunk + 1 ) * big_cores;
				4515	if( tid < big_nth ) {
				4516	coreID = tid / (chunk + 1 );
				4517	threadID = ( tid % (chunk + 1 ) ) % __kmp_nth_per_core ;
				4518	} else { //tid >= big_nth
				4519	coreID = ( tid - big_cores ) / chunk;
				4520	threadID = ( ( tid - big_cores ) % chunk ) % __kmp_nth_per_core ;
				4521	}
				4522
				4523	KMP_DEBUG_ASSERT2(KMP_AFFINITY_CAPABLE(),
				4524	"Illegal set affinity operation when not capable");
				4525
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	4526	kmp_affin_mask_t *mask;
				4527	KMP_CPU_ALLOC_ON_STACK(mask);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4528	KMP_CPU_ZERO(mask);
				4529
				4530	// Granularity == thread
				4531	if( __kmp_affinity_gran == affinity_gran_fine \|\| __kmp_affinity_gran == affinity_gran_thread) {
				4532	int osID = address2os[ coreID * __kmp_nth_per_core + threadID ].second;
				4533	KMP_CPU_SET( osID, mask);
				4534	} else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
				4535	for( int i = 0; i < __kmp_nth_per_core; i++ ) {
				4536	int osID;
				4537	osID = address2os[ coreID * __kmp_nth_per_core + i ].second;
				4538	KMP_CPU_SET( osID, mask);
				4539	}
				4540	}
				4541	if (__kmp_affinity_verbose) {
				4542	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4543	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4544	KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
				4545	tid, buf);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4546	}
				4547	__kmp_set_system_affinity( mask, TRUE );
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	4548	KMP_CPU_FREE_FROM_STACK(mask);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4549	} else { // Non-uniform topology
				4550
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	4551	kmp_affin_mask_t *mask;
				4552	KMP_CPU_ALLOC_ON_STACK(mask);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4553	KMP_CPU_ZERO(mask);
				4554
				4555	// Number of hyper threads per core in HT machine
				4556	int nth_per_core = __kmp_nThreadsPerCore;
				4557	int core_level;
				4558	if( nth_per_core > 1 ) {
				4559	core_level = __kmp_aff_depth - 2;
				4560	} else {
				4561	core_level = __kmp_aff_depth - 1;
				4562	}
				4563
				4564	// Number of cores - maximum value; it does not count trail cores with 0 processors
				4565	int ncores = address2os[ __kmp_avail_proc - 1 ].first.labels[ core_level ] + 1;
				4566
				4567	// For performance gain consider the special case nthreads == __kmp_avail_proc
				4568	if( nthreads == __kmp_avail_proc ) {
				4569	if( __kmp_affinity_gran == affinity_gran_fine \|\| __kmp_affinity_gran == affinity_gran_thread) {
				4570	int osID = address2os[ tid ].second;
				4571	KMP_CPU_SET( osID, mask);
				4572	} else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
				4573	int coreID = address2os[ tid ].first.labels[ core_level ];
				4574	// We'll count found osIDs for the current core; they can be not more than nth_per_core;
				4575	// since the address2os is sortied we can break when cnt==nth_per_core
				4576	int cnt = 0;
				4577	for( int i = 0; i < __kmp_avail_proc; i++ ) {
				4578	int osID = address2os[ i ].second;
				4579	int core = address2os[ i ].first.labels[ core_level ];
				4580	if( core == coreID ) {
				4581	KMP_CPU_SET( osID, mask);
				4582	cnt++;
				4583	if( cnt == nth_per_core ) {
				4584	break;
				4585	}
				4586	}
				4587	}
				4588	}
				4589	} else if( nthreads <= __kmp_ncores ) {
				4590
				4591	int core = 0;
				4592	for( int i = 0; i < ncores; i++ ) {
				4593	// Check if this core from procarr[] is in the mask
				4594	int in_mask = 0;
				4595	for( int j = 0; j < nth_per_core; j++ ) {
				4596	if( procarr[ i * nth_per_core + j ] != - 1 ) {
				4597	in_mask = 1;
				4598	break;
				4599	}
				4600	}
				4601	if( in_mask ) {
				4602	if( tid == core ) {
				4603	for( int j = 0; j < nth_per_core; j++ ) {
				4604	int osID = procarr[ i * nth_per_core + j ];
				4605	if( osID != -1 ) {
				4606	KMP_CPU_SET( osID, mask );
				4607	// For granularity=thread it is enough to set the first available osID for this core
				4608	if( __kmp_affinity_gran == affinity_gran_fine \|\| __kmp_affinity_gran == affinity_gran_thread) {
				4609	break;
				4610	}
				4611	}
				4612	}
				4613	break;
				4614	} else {
				4615	core++;
				4616	}
				4617	}
				4618	}
				4619
				4620	} else { // nthreads > __kmp_ncores
				4621
				4622	// Array to save the number of processors at each core
Jonathan Peyton	7be07533	2015-06-22 15:53:50 +0000	[diff] [blame]	4623	int* nproc_at_core = (int)KMP_ALLOCA(sizeof(int)ncores);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4624	// Array to save the number of cores with "x" available processors;
Jonathan Peyton	7be07533	2015-06-22 15:53:50 +0000	[diff] [blame]	4625	int* ncores_with_x_procs = (int)KMP_ALLOCA(sizeof(int)(nth_per_core+1));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4626	// Array to save the number of cores with # procs from x to nth_per_core
Jonathan Peyton	7be07533	2015-06-22 15:53:50 +0000	[diff] [blame]	4627	int* ncores_with_x_to_max_procs = (int)KMP_ALLOCA(sizeof(int)(nth_per_core+1));
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4628
				4629	for( int i = 0; i <= nth_per_core; i++ ) {
				4630	ncores_with_x_procs[ i ] = 0;
				4631	ncores_with_x_to_max_procs[ i ] = 0;
				4632	}
				4633
				4634	for( int i = 0; i < ncores; i++ ) {
				4635	int cnt = 0;
				4636	for( int j = 0; j < nth_per_core; j++ ) {
				4637	if( procarr[ i * nth_per_core + j ] != -1 ) {
				4638	cnt++;
				4639	}
				4640	}
				4641	nproc_at_core[ i ] = cnt;
				4642	ncores_with_x_procs[ cnt ]++;
				4643	}
				4644
				4645	for( int i = 0; i <= nth_per_core; i++ ) {
				4646	for( int j = i; j <= nth_per_core; j++ ) {
				4647	ncores_with_x_to_max_procs[ i ] += ncores_with_x_procs[ j ];
				4648	}
				4649	}
				4650
				4651	// Max number of processors
				4652	int nproc = nth_per_core * ncores;
				4653	// An array to keep number of threads per each context
				4654	int * newarr = ( int * )__kmp_allocate( sizeof( int ) * nproc );
				4655	for( int i = 0; i < nproc; i++ ) {
				4656	newarr[ i ] = 0;
				4657	}
				4658
				4659	int nth = nthreads;
				4660	int flag = 0;
				4661	while( nth > 0 ) {
				4662	for( int j = 1; j <= nth_per_core; j++ ) {
				4663	int cnt = ncores_with_x_to_max_procs[ j ];
				4664	for( int i = 0; i < ncores; i++ ) {
				4665	// Skip the core with 0 processors
				4666	if( nproc_at_core[ i ] == 0 ) {
				4667	continue;
				4668	}
				4669	for( int k = 0; k < nth_per_core; k++ ) {
				4670	if( procarr[ i * nth_per_core + k ] != -1 ) {
				4671	if( newarr[ i * nth_per_core + k ] == 0 ) {
				4672	newarr[ i * nth_per_core + k ] = 1;
				4673	cnt--;
				4674	nth--;
				4675	break;
				4676	} else {
				4677	if( flag != 0 ) {
				4678	newarr[ i * nth_per_core + k ] ++;
				4679	cnt--;
				4680	nth--;
				4681	break;
				4682	}
				4683	}
				4684	}
				4685	}
				4686	if( cnt == 0 \|\| nth == 0 ) {
				4687	break;
				4688	}
				4689	}
				4690	if( nth == 0 ) {
				4691	break;
				4692	}
				4693	}
				4694	flag = 1;
				4695	}
				4696	int sum = 0;
				4697	for( int i = 0; i < nproc; i++ ) {
				4698	sum += newarr[ i ];
				4699	if( sum > tid ) {
				4700	// Granularity == thread
				4701	if( __kmp_affinity_gran == affinity_gran_fine \|\| __kmp_affinity_gran == affinity_gran_thread) {
				4702	int osID = procarr[ i ];
				4703	KMP_CPU_SET( osID, mask);
				4704	} else if( __kmp_affinity_gran == affinity_gran_core ) { // Granularity == core
				4705	int coreID = i / nth_per_core;
				4706	for( int ii = 0; ii < nth_per_core; ii++ ) {
				4707	int osID = procarr[ coreID * nth_per_core + ii ];
				4708	if( osID != -1 ) {
				4709	KMP_CPU_SET( osID, mask);
				4710	}
				4711	}
				4712	}
				4713	break;
				4714	}
				4715	}
				4716	__kmp_free( newarr );
				4717	}
				4718
				4719	if (__kmp_affinity_verbose) {
				4720	char buf[KMP_AFFIN_MASK_PRINT_LEN];
				4721	__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
Jim Cownie	4cc4bb4	2014-10-07 16:25:50 +0000	[diff] [blame]	4722	KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
				4723	tid, buf);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4724	}
				4725	__kmp_set_system_affinity( mask, TRUE );
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	4726	KMP_CPU_FREE_FROM_STACK(mask);
Jim Cownie	5e8470a	2013-09-27 10:38:44 +0000	[diff] [blame]	4727	}
				4728	}
				4729
Jonathan Peyton	3076fa4	2016-01-12 17:21:55 +0000	[diff] [blame]	4730	#if KMP_OS_LINUX
				4731	// We don't need this entry for Windows because
				4732	// there is GetProcessAffinityMask() api
				4733	//
				4734	// The intended usage is indicated by these steps:
				4735	// 1) The user gets the current affinity mask
				4736	// 2) Then sets the affinity by calling this function
				4737	// 3) Error check the return value
				4738	// 4) Use non-OpenMP parallelization
				4739	// 5) Reset the affinity to what was stored in step 1)
				4740	#ifdef __cplusplus
				4741	extern "C"
				4742	#endif
				4743	int
				4744	kmp_set_thread_affinity_mask_initial()
				4745	// the function returns 0 on success,
				4746	// -1 if we cannot bind thread
				4747	// >0 (errno) if an error happened during binding
				4748	{
				4749	int gtid = __kmp_get_gtid();
				4750	if (gtid < 0) {
				4751	// Do not touch non-omp threads
				4752	KA_TRACE(30, ( "kmp_set_thread_affinity_mask_initial: "
				4753	"non-omp thread, returning\n"));
				4754	return -1;
				4755	}
				4756	if (!KMP_AFFINITY_CAPABLE() \|\| !__kmp_init_middle) {
				4757	KA_TRACE(30, ( "kmp_set_thread_affinity_mask_initial: "
				4758	"affinity not initialized, returning\n"));
				4759	return -1;
				4760	}
				4761	KA_TRACE(30, ( "kmp_set_thread_affinity_mask_initial: "
				4762	"set full mask for thread %d\n", gtid));
Jonathan Peyton	c5304aa	2016-06-13 21:28:03 +0000	[diff] [blame]	4763	KMP_DEBUG_ASSERT(__kmp_affin_fullMask != NULL);
				4764	return __kmp_set_system_affinity(__kmp_affin_fullMask, FALSE);
Jonathan Peyton	3076fa4	2016-01-12 17:21:55 +0000	[diff] [blame]	4765	}
				4766	#endif
				4767
Alp Toker	763b939	2014-02-28 09:42:41 +0000	[diff] [blame]	4768	#endif // KMP_AFFINITY_SUPPORTED