Blame - openmp/runtime/src/kmp_affinity.h - toolchain/llvm-project

blob: 142acf7f514fbacaa3336563689eb4a010ffabe8 [file] [log] [blame]

Jonathan Peyton	1707836	2015-09-10 19:22:07 +0000	[diff] [blame]	1	/*
				2	* kmp_affinity.h -- header for affinity management
				3	*/
				4
				5
				6	//===----------------------------------------------------------------------===//
				7	//
				8	// The LLVM Compiler Infrastructure
				9	//
				10	// This file is dual licensed under the MIT and the University of Illinois Open
				11	// Source Licenses. See LICENSE.txt for details.
				12	//
				13	//===----------------------------------------------------------------------===//
				14
				15	#ifndef KMP_AFFINITY_H
				16	#define KMP_AFFINITY_H
				17
Jonathan Peyton	1cdd87a	2016-11-14 21:08:35 +0000	[diff] [blame]	18	#include "kmp_os.h"
				19	#include "kmp.h"
				20
				21	#if KMP_AFFINITY_SUPPORTED
				22	#if KMP_USE_HWLOC
				23	class KMPHwlocAffinity: public KMPAffinity {
				24	public:
				25	class Mask : public KMPAffinity::Mask {
				26	hwloc_cpuset_t mask;
				27	public:
				28	Mask() { mask = hwloc_bitmap_alloc(); this->zero(); }
				29	~Mask() { hwloc_bitmap_free(mask); }
				30	void set(int i) override { hwloc_bitmap_set(mask, i); }
				31	bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); }
				32	void clear(int i) override { hwloc_bitmap_clr(mask, i); }
				33	void zero() override { hwloc_bitmap_zero(mask); }
				34	void copy(const KMPAffinity::Mask* src) override {
				35	const Mask* convert = static_cast<const Mask*>(src);
				36	hwloc_bitmap_copy(mask, convert->mask);
				37	}
				38	void bitwise_and(const KMPAffinity::Mask* rhs) override {
				39	const Mask* convert = static_cast<const Mask*>(rhs);
				40	hwloc_bitmap_and(mask, mask, convert->mask);
				41	}
				42	void bitwise_or(const KMPAffinity::Mask * rhs) override {
				43	const Mask* convert = static_cast<const Mask*>(rhs);
				44	hwloc_bitmap_or(mask, mask, convert->mask);
				45	}
				46	void bitwise_not() override { hwloc_bitmap_not(mask, mask); }
				47	int begin() const override { return hwloc_bitmap_first(mask); }
				48	int end() const override { return -1; }
				49	int next(int previous) const override { return hwloc_bitmap_next(mask, previous); }
				50	int get_system_affinity(bool abort_on_error) override {
				51	KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
				52	"Illegal get affinity operation when not capable");
				53	int retval = hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
				54	if (retval >= 0) {
				55	return 0;
				56	}
				57	int error = errno;
				58	if (abort_on_error) {
				59	__kmp_msg(kmp_ms_fatal, KMP_MSG( FatalSysError ), KMP_ERR( error ), __kmp_msg_null);
				60	}
				61	return error;
				62	}
				63	int set_system_affinity(bool abort_on_error) const override {
				64	KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
				65	"Illegal get affinity operation when not capable");
				66	int retval = hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
				67	if (retval >= 0) {
				68	return 0;
				69	}
				70	int error = errno;
				71	if (abort_on_error) {
				72	__kmp_msg(kmp_ms_fatal, KMP_MSG( FatalSysError ), KMP_ERR( error ), __kmp_msg_null);
				73	}
				74	return error;
				75	}
				76	int get_proc_group() const override {
				77	int i;
				78	int group = -1;
				79	# if KMP_OS_WINDOWS
				80	if (__kmp_num_proc_groups == 1) {
				81	return 1;
				82	}
				83	for (i = 0; i < __kmp_num_proc_groups; i++) {
				84	// On windows, the long type is always 32 bits
				85	unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i*2);
				86	unsigned long second_32_bits = hwloc_bitmap_to_ith_ulong(mask, i*2+1);
				87	if (first_32_bits == 0 && second_32_bits == 0) {
				88	continue;
				89	}
				90	if (group >= 0) {
				91	return -1;
				92	}
				93	group = i;
				94	}
				95	# endif /* KMP_OS_WINDOWS */
				96	return group;
				97	}
				98	};
				99	void determine_capable(const char* var) override {
				100	const hwloc_topology_support* topology_support;
				101	if(__kmp_hwloc_topology == NULL) {
				102	if(hwloc_topology_init(&__kmp_hwloc_topology) < 0) {
				103	__kmp_hwloc_error = TRUE;
				104	if(__kmp_affinity_verbose)
				105	KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()");
				106	}
				107	if(hwloc_topology_load(__kmp_hwloc_topology) < 0) {
				108	__kmp_hwloc_error = TRUE;
				109	if(__kmp_affinity_verbose)
				110	KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()");
				111	}
				112	}
				113	topology_support = hwloc_topology_get_support(__kmp_hwloc_topology);
				114	// Is the system capable of setting/getting this thread's affinity?
				115	// also, is topology discovery possible? (pu indicates ability to discover processing units)
				116	// and finally, were there no errors when calling any hwloc_* API functions?
				117	if(topology_support && topology_support->cpubind->set_thisthread_cpubind &&
				118	topology_support->cpubind->get_thisthread_cpubind &&
				119	topology_support->discovery->pu &&
				120	!__kmp_hwloc_error)
				121	{
				122	// enables affinity according to KMP_AFFINITY_CAPABLE() macro
				123	KMP_AFFINITY_ENABLE(TRUE);
				124	} else {
				125	// indicate that hwloc didn't work and disable affinity
				126	__kmp_hwloc_error = TRUE;
				127	KMP_AFFINITY_DISABLE();
				128	}
				129	}
				130	void bind_thread(int which) override {
				131	KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
				132	"Illegal set affinity operation when not capable");
				133	KMPAffinity::Mask *mask;
				134	KMP_CPU_ALLOC_ON_STACK(mask);
				135	KMP_CPU_ZERO(mask);
				136	KMP_CPU_SET(which, mask);
				137	__kmp_set_system_affinity(mask, TRUE);
				138	KMP_CPU_FREE_FROM_STACK(mask);
				139	}
				140	KMPAffinity::Mask* allocate_mask() override { return new Mask(); }
				141	void deallocate_mask(KMPAffinity::Mask* m) override { delete m; }
				142	KMPAffinity::Mask* allocate_mask_array(int num) override { return new Mask[num]; }
				143	void deallocate_mask_array(KMPAffinity::Mask* array) override {
				144	Mask* hwloc_array = static_cast<Mask*>(array);
				145	delete[] hwloc_array;
				146	}
				147	KMPAffinity::Mask* index_mask_array(KMPAffinity::Mask* array, int index) override {
				148	Mask* hwloc_array = static_cast<Mask*>(array);
				149	return &(hwloc_array[index]);
				150	}
				151	api_type get_api_type() const override { return HWLOC; }
				152	};
				153	#endif /* KMP_USE_HWLOC */
				154
				155	#if KMP_OS_LINUX
				156	/*
				157	* On some of the older OS's that we build on, these constants aren't present
				158	* in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on
				159	* all systems of the same arch where they are defined, and they cannot change.
				160	* stone forever.
				161	*/
				162	#include <sys/syscall.h>
				163	# if KMP_ARCH_X86 \|\| KMP_ARCH_ARM
				164	# ifndef __NR_sched_setaffinity
				165	# define __NR_sched_setaffinity 241
				166	# elif __NR_sched_setaffinity != 241
				167	# error Wrong code for setaffinity system call.
				168	# endif /* __NR_sched_setaffinity */
				169	# ifndef __NR_sched_getaffinity
				170	# define __NR_sched_getaffinity 242
				171	# elif __NR_sched_getaffinity != 242
				172	# error Wrong code for getaffinity system call.
				173	# endif /* __NR_sched_getaffinity */
				174	# elif KMP_ARCH_AARCH64
				175	# ifndef __NR_sched_setaffinity
				176	# define __NR_sched_setaffinity 122
				177	# elif __NR_sched_setaffinity != 122
				178	# error Wrong code for setaffinity system call.
				179	# endif /* __NR_sched_setaffinity */
				180	# ifndef __NR_sched_getaffinity
				181	# define __NR_sched_getaffinity 123
				182	# elif __NR_sched_getaffinity != 123
				183	# error Wrong code for getaffinity system call.
				184	# endif /* __NR_sched_getaffinity */
				185	# elif KMP_ARCH_X86_64
				186	# ifndef __NR_sched_setaffinity
				187	# define __NR_sched_setaffinity 203
				188	# elif __NR_sched_setaffinity != 203
				189	# error Wrong code for setaffinity system call.
				190	# endif /* __NR_sched_setaffinity */
				191	# ifndef __NR_sched_getaffinity
				192	# define __NR_sched_getaffinity 204
				193	# elif __NR_sched_getaffinity != 204
				194	# error Wrong code for getaffinity system call.
				195	# endif /* __NR_sched_getaffinity */
				196	# elif KMP_ARCH_PPC64
				197	# ifndef __NR_sched_setaffinity
				198	# define __NR_sched_setaffinity 222
				199	# elif __NR_sched_setaffinity != 222
				200	# error Wrong code for setaffinity system call.
				201	# endif /* __NR_sched_setaffinity */
				202	# ifndef __NR_sched_getaffinity
				203	# define __NR_sched_getaffinity 223
				204	# elif __NR_sched_getaffinity != 223
				205	# error Wrong code for getaffinity system call.
				206	# endif /* __NR_sched_getaffinity */
Sylvestre Ledru	cd9d374	2016-12-08 09:22:24 +0000	[diff] [blame]	207	# elif KMP_ARCH_MIPS
				208	# ifndef __NR_sched_setaffinity
				209	# define __NR_sched_setaffinity 4239
				210	# elif __NR_sched_setaffinity != 4239
				211	# error Wrong code for setaffinity system call.
				212	# endif /* __NR_sched_setaffinity */
				213	# ifndef __NR_sched_getaffinity
				214	# define __NR_sched_getaffinity 4240
				215	# elif __NR_sched_getaffinity != 4240
				216	# error Wrong code for getaffinity system call.
				217	# endif /* __NR_sched_getaffinity */
				218	# elif KMP_ARCH_MIPS64
				219	# ifndef __NR_sched_setaffinity
				220	# define __NR_sched_setaffinity 5195
				221	# elif __NR_sched_setaffinity != 5195
				222	# error Wrong code for setaffinity system call.
				223	# endif /* __NR_sched_setaffinity */
				224	# ifndef __NR_sched_getaffinity
				225	# define __NR_sched_getaffinity 5196
				226	# elif __NR_sched_getaffinity != 5196
				227	# error Wrong code for getaffinity system call.
				228	# endif /* __NR_sched_getaffinity */
Jonathan Peyton	1cdd87a	2016-11-14 21:08:35 +0000	[diff] [blame]	229	# error Unknown or unsupported architecture
				230	# endif /* KMP_ARCH_* */
				231	class KMPNativeAffinity : public KMPAffinity {
				232	class Mask : public KMPAffinity::Mask {
				233	typedef unsigned char mask_t;
				234	static const int BITS_PER_MASK_T = sizeof(mask_t)*CHAR_BIT;
				235	public:
				236	mask_t* mask;
				237	Mask() { mask = (mask_t*)__kmp_allocate(__kmp_affin_mask_size); }
				238	~Mask() { if (mask) __kmp_free(mask); }
				239	void set(int i) override { mask[i/BITS_PER_MASK_T] \|= ((mask_t)1 << (i % BITS_PER_MASK_T)); }
				240	bool is_set(int i) const override { return (mask[i/BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T))); }
				241	void clear(int i) override { mask[i/BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T)); }
				242	void zero() override {
				243	for (size_t i=0; i<__kmp_affin_mask_size; ++i)
				244	mask[i] = 0;
				245	}
				246	void copy(const KMPAffinity::Mask* src) override {
				247	const Mask * convert = static_cast<const Mask*>(src);
				248	for (size_t i=0; i<__kmp_affin_mask_size; ++i)
				249	mask[i] = convert->mask[i];
				250	}
				251	void bitwise_and(const KMPAffinity::Mask* rhs) override {
				252	const Mask * convert = static_cast<const Mask*>(rhs);
				253	for (size_t i=0; i<__kmp_affin_mask_size; ++i)
				254	mask[i] &= convert->mask[i];
				255	}
				256	void bitwise_or(const KMPAffinity::Mask* rhs) override {
				257	const Mask * convert = static_cast<const Mask*>(rhs);
				258	for (size_t i=0; i<__kmp_affin_mask_size; ++i)
				259	mask[i] \|= convert->mask[i];
				260	}
				261	void bitwise_not() override {
				262	for (size_t i=0; i<__kmp_affin_mask_size; ++i)
				263	mask[i] = ~(mask[i]);
				264	}
				265	int begin() const override {
				266	int retval = 0;
				267	while (retval < end() && !is_set(retval))
				268	++retval;
				269	return retval;
				270	}
				271	int end() const override { return __kmp_affin_mask_size*BITS_PER_MASK_T; }
				272	int next(int previous) const override {
				273	int retval = previous+1;
				274	while (retval < end() && !is_set(retval))
				275	++retval;
				276	return retval;
				277	}
				278	int get_system_affinity(bool abort_on_error) override {
				279	KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
				280	"Illegal get affinity operation when not capable");
				281	int retval = syscall( __NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask );
				282	if (retval >= 0) {
				283	return 0;
				284	}
				285	int error = errno;
				286	if (abort_on_error) {
				287	__kmp_msg(kmp_ms_fatal, KMP_MSG( FatalSysError ), KMP_ERR( error ), __kmp_msg_null);
				288	}
				289	return error;
				290	}
				291	int set_system_affinity(bool abort_on_error) const override {
				292	KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
				293	"Illegal get affinity operation when not capable");
				294	int retval = syscall( __NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask );
				295	if (retval >= 0) {
				296	return 0;
				297	}
				298	int error = errno;
				299	if (abort_on_error) {
				300	__kmp_msg(kmp_ms_fatal, KMP_MSG( FatalSysError ), KMP_ERR( error ), __kmp_msg_null);
				301	}
				302	return error;
				303	}
				304	};
				305	void determine_capable(const char* env_var) override {
				306	__kmp_affinity_determine_capable(env_var);
				307	}
				308	void bind_thread(int which) override {
				309	__kmp_affinity_bind_thread(which);
				310	}
				311	KMPAffinity::Mask* allocate_mask() override {
				312	KMPNativeAffinity::Mask* retval = new Mask();
				313	return retval;
				314	}
				315	void deallocate_mask(KMPAffinity::Mask* m) override {
				316	KMPNativeAffinity::Mask* native_mask = static_cast<KMPNativeAffinity::Mask*>(m);
				317	delete m;
				318	}
				319	KMPAffinity::Mask* allocate_mask_array(int num) override { return new Mask[num]; }
				320	void deallocate_mask_array(KMPAffinity::Mask* array) override {
				321	Mask* linux_array = static_cast<Mask*>(array);
				322	delete[] linux_array;
				323	}
				324	KMPAffinity::Mask* index_mask_array(KMPAffinity::Mask* array, int index) override {
				325	Mask* linux_array = static_cast<Mask*>(array);
				326	return &(linux_array[index]);
				327	}
				328	api_type get_api_type() const override { return NATIVE_OS; }
				329	};
				330	#endif /* KMP_OS_LINUX */
				331
				332	#if KMP_OS_WINDOWS
				333	class KMPNativeAffinity : public KMPAffinity {
				334	class Mask : public KMPAffinity::Mask {
				335	typedef ULONG_PTR mask_t;
				336	static const int BITS_PER_MASK_T = sizeof(mask_t)*CHAR_BIT;
				337	mask_t* mask;
				338	public:
				339	Mask() { mask = (mask_t)__kmp_allocate(sizeof(mask_t)__kmp_num_proc_groups); }
				340	~Mask() { if (mask) __kmp_free(mask); }
				341	void set(int i) override { mask[i/BITS_PER_MASK_T] \|= ((mask_t)1 << (i % BITS_PER_MASK_T)); }
				342	bool is_set(int i) const override { return (mask[i/BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T))); }
				343	void clear(int i) override { mask[i/BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T)); }
				344	void zero() override {
				345	for (size_t i=0; i<__kmp_num_proc_groups; ++i)
				346	mask[i] = 0;
				347	}
				348	void copy(const KMPAffinity::Mask* src) override {
				349	const Mask * convert = static_cast<const Mask*>(src);
				350	for (size_t i=0; i<__kmp_num_proc_groups; ++i)
				351	mask[i] = convert->mask[i];
				352	}
				353	void bitwise_and(const KMPAffinity::Mask* rhs) override {
				354	const Mask * convert = static_cast<const Mask*>(rhs);
				355	for (size_t i=0; i<__kmp_num_proc_groups; ++i)
				356	mask[i] &= convert->mask[i];
				357	}
				358	void bitwise_or(const KMPAffinity::Mask* rhs) override {
				359	const Mask * convert = static_cast<const Mask*>(rhs);
				360	for (size_t i=0; i<__kmp_num_proc_groups; ++i)
				361	mask[i] \|= convert->mask[i];
				362	}
				363	void bitwise_not() override {
				364	for (size_t i=0; i<__kmp_num_proc_groups; ++i)
				365	mask[i] = ~(mask[i]);
				366	}
				367	int begin() const override {
				368	int retval = 0;
				369	while (retval < end() && !is_set(retval))
				370	++retval;
				371	return retval;
				372	}
				373	int end() const override { return __kmp_num_proc_groups*BITS_PER_MASK_T; }
				374	int next(int previous) const override {
				375	int retval = previous+1;
				376	while (retval < end() && !is_set(retval))
				377	++retval;
				378	return retval;
				379	}
				380	int set_system_affinity(bool abort_on_error) const override {
				381	if (__kmp_num_proc_groups > 1) {
				382	// Check for a valid mask.
				383	GROUP_AFFINITY ga;
				384	int group = get_proc_group();
				385	if (group < 0) {
				386	if (abort_on_error) {
				387	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				388	}
				389	return -1;
				390	}
				391	// Transform the bit vector into a GROUP_AFFINITY struct
				392	// and make the system call to set affinity.
				393	ga.Group = group;
				394	ga.Mask = mask[group];
				395	ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0;
				396
				397	KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL);
				398	if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) {
				399	DWORD error = GetLastError();
				400	if (abort_on_error) {
				401	__kmp_msg(kmp_ms_fatal, KMP_MSG( CantSetThreadAffMask ),
				402	KMP_ERR( error ), __kmp_msg_null);
				403	}
				404	return error;
				405	}
				406	} else {
				407	if (!SetThreadAffinityMask( GetCurrentThread(), *mask )) {
				408	DWORD error = GetLastError();
				409	if (abort_on_error) {
				410	__kmp_msg(kmp_ms_fatal, KMP_MSG( CantSetThreadAffMask ),
				411	KMP_ERR( error ), __kmp_msg_null);
				412	}
				413	return error;
				414	}
				415	}
				416	return 0;
				417	}
				418	int get_system_affinity(bool abort_on_error) override {
				419	if (__kmp_num_proc_groups > 1) {
				420	this->zero();
				421	GROUP_AFFINITY ga;
				422	KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL);
				423	if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) {
				424	DWORD error = GetLastError();
				425	if (abort_on_error) {
				426	__kmp_msg(kmp_ms_fatal, KMP_MSG(FunctionError, "GetThreadGroupAffinity()"),
				427	KMP_ERR(error), __kmp_msg_null);
				428	}
				429	return error;
				430	}
				431	if ((ga.Group < 0) \|\| (ga.Group > __kmp_num_proc_groups) \|\| (ga.Mask == 0)) {
				432	return -1;
				433	}
				434	mask[ga.Group] = ga.Mask;
				435	} else {
				436	mask_t newMask, sysMask, retval;
				437	if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) {
				438	DWORD error = GetLastError();
				439	if (abort_on_error) {
				440	__kmp_msg(kmp_ms_fatal, KMP_MSG(FunctionError, "GetProcessAffinityMask()"),
				441	KMP_ERR(error), __kmp_msg_null);
				442	}
				443	return error;
				444	}
				445	retval = SetThreadAffinityMask(GetCurrentThread(), newMask);
				446	if (! retval) {
				447	DWORD error = GetLastError();
				448	if (abort_on_error) {
				449	__kmp_msg(kmp_ms_fatal, KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
				450	KMP_ERR(error), __kmp_msg_null);
				451	}
				452	return error;
				453	}
				454	newMask = SetThreadAffinityMask(GetCurrentThread(), retval);
				455	if (! newMask) {
				456	DWORD error = GetLastError();
				457	if (abort_on_error) {
				458	__kmp_msg(kmp_ms_fatal, KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
				459	KMP_ERR(error), __kmp_msg_null);
				460	}
				461	}
				462	*mask = retval;
				463	}
				464	return 0;
				465	}
				466	int get_proc_group() const override {
				467	int group = -1;
				468	if (__kmp_num_proc_groups == 1) {
				469	return 1;
				470	}
				471	for (int i = 0; i < __kmp_num_proc_groups; i++) {
				472	if (mask[i] == 0)
				473	continue;
				474	if (group >= 0)
				475	return -1;
				476	group = i;
				477	}
				478	return group;
				479	}
				480	};
				481	void determine_capable(const char* env_var) override {
				482	__kmp_affinity_determine_capable(env_var);
				483	}
				484	void bind_thread(int which) override {
				485	__kmp_affinity_bind_thread(which);
				486	}
				487	KMPAffinity::Mask* allocate_mask() override { return new Mask(); }
				488	void deallocate_mask(KMPAffinity::Mask* m) override { delete m; }
				489	KMPAffinity::Mask* allocate_mask_array(int num) override { return new Mask[num]; }
				490	void deallocate_mask_array(KMPAffinity::Mask* array) override {
				491	Mask* windows_array = static_cast<Mask*>(array);
				492	delete[] windows_array;
				493	}
				494	KMPAffinity::Mask* index_mask_array(KMPAffinity::Mask* array, int index) override {
				495	Mask* windows_array = static_cast<Mask*>(array);
				496	return &(windows_array[index]);
				497	}
				498	api_type get_api_type() const override { return NATIVE_OS; }
				499	};
				500	#endif /* KMP_OS_WINDOWS */
				501	#endif /* KMP_AFFINITY_SUPPORTED */
				502
Jonathan Peyton	1707836	2015-09-10 19:22:07 +0000	[diff] [blame]	503	class Address {
				504	public:
				505	static const unsigned maxDepth = 32;
				506	unsigned labels[maxDepth];
				507	unsigned childNums[maxDepth];
				508	unsigned depth;
				509	unsigned leader;
				510	Address(unsigned _depth)
				511	: depth(_depth), leader(FALSE) {
				512	}
				513	Address &operator=(const Address &b) {
				514	depth = b.depth;
				515	for (unsigned i = 0; i < depth; i++) {
				516	labels[i] = b.labels[i];
				517	childNums[i] = b.childNums[i];
				518	}
				519	leader = FALSE;
				520	return *this;
				521	}
				522	bool operator==(const Address &b) const {
				523	if (depth != b.depth)
				524	return false;
				525	for (unsigned i = 0; i < depth; i++)
				526	if(labels[i] != b.labels[i])
				527	return false;
				528	return true;
				529	}
				530	bool isClose(const Address &b, int level) const {
				531	if (depth != b.depth)
				532	return false;
				533	if ((unsigned)level >= depth)
				534	return true;
				535	for (unsigned i = 0; i < (depth - level); i++)
				536	if(labels[i] != b.labels[i])
				537	return false;
				538	return true;
				539	}
				540	bool operator!=(const Address &b) const {
				541	return !operator==(b);
				542	}
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	543	void print() const {
				544	unsigned i;
				545	printf("Depth: %u --- ", depth);
				546	for(i=0;i<depth;i++) {
				547	printf("%u ", labels[i]);
				548	}
				549	}
Jonathan Peyton	1707836	2015-09-10 19:22:07 +0000	[diff] [blame]	550	};
				551
				552	class AddrUnsPair {
				553	public:
				554	Address first;
				555	unsigned second;
				556	AddrUnsPair(Address _first, unsigned _second)
				557	: first(_first), second(_second) {
				558	}
				559	AddrUnsPair &operator=(const AddrUnsPair &b)
				560	{
				561	first = b.first;
				562	second = b.second;
				563	return *this;
				564	}
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	565	void print() const {
				566	printf("first = "); first.print();
				567	printf(" --- second = %u", second);
				568	}
				569	bool operator==(const AddrUnsPair &b) const {
				570	if(first != b.first) return false;
				571	if(second != b.second) return false;
				572	return true;
				573	}
				574	bool operator!=(const AddrUnsPair &b) const {
				575	return !operator==(b);
				576	}
Jonathan Peyton	1707836	2015-09-10 19:22:07 +0000	[diff] [blame]	577	};
				578
				579
				580	static int
				581	__kmp_affinity_cmp_Address_labels(const void a, const void b)
				582	{
				583	const Address aa = (const Address )&(((AddrUnsPair *)a)
				584	->first);
				585	const Address bb = (const Address )&(((AddrUnsPair *)b)
				586	->first);
				587	unsigned depth = aa->depth;
				588	unsigned i;
				589	KMP_DEBUG_ASSERT(depth == bb->depth);
				590	for (i = 0; i < depth; i++) {
				591	if (aa->labels[i] < bb->labels[i]) return -1;
				592	if (aa->labels[i] > bb->labels[i]) return 1;
				593	}
				594	return 0;
				595	}
				596
				597
Jonathan Peyton	df4d3dd	2015-09-10 20:34:32 +0000	[diff] [blame]	598	/** A structure for holding machine-specific hierarchy info to be computed once at init.
				599	This structure represents a mapping of threads to the actual machine hierarchy, or to
				600	our best guess at what the hierarchy might be, for the purpose of performing an
				601	efficient barrier. In the worst case, when there is no machine hierarchy information,
				602	it produces a tree suitable for a barrier, similar to the tree used in the hyper barrier. */
Jonathan Peyton	1707836	2015-09-10 19:22:07 +0000	[diff] [blame]	603	class hierarchy_info {
				604	public:
				605	/** Good default values for number of leaves and branching factor, given no affinity information.
				606	Behaves a bit like hyper barrier. */
				607	static const kmp_uint32 maxLeaves=4;
				608	static const kmp_uint32 minBranch=4;
Jonathan Peyton	df4d3dd	2015-09-10 20:34:32 +0000	[diff] [blame]	609	/** Number of levels in the hierarchy. Typical levels are threads/core, cores/package
				610	or socket, packages/node, nodes/machine, etc. We don't want to get specific with
				611	nomenclature. When the machine is oversubscribed we add levels to duplicate the
				612	hierarchy, doubling the thread capacity of the hierarchy each time we add a level. */
Jonathan Peyton	1707836	2015-09-10 19:22:07 +0000	[diff] [blame]	613	kmp_uint32 maxLevels;
				614
				615	/** This is specifically the depth of the machine configuration hierarchy, in terms of the
				616	number of levels along the longest path from root to any leaf. It corresponds to the
				617	number of entries in numPerLevel if we exclude all but one trailing 1. */
				618	kmp_uint32 depth;
				619	kmp_uint32 base_num_threads;
				620	enum init_status { initialized=0, not_initialized=1, initializing=2 };
				621	volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized, 2=initialization in progress
				622	volatile kmp_int8 resizing; // 0=not resizing, 1=resizing
				623
				624	/** Level 0 corresponds to leaves. numPerLevel[i] is the number of children the parent of a
				625	node at level i has. For example, if we have a machine with 4 packages, 4 cores/package
				626	and 2 HT per core, then numPerLevel = {2, 4, 4, 1, 1}. All empty levels are set to 1. */
				627	kmp_uint32 *numPerLevel;
				628	kmp_uint32 *skipPerLevel;
				629
				630	void deriveLevels(AddrUnsPair *adr2os, int num_addrs) {
				631	int hier_depth = adr2os[0].first.depth;
				632	int level = 0;
				633	for (int i=hier_depth-1; i>=0; --i) {
				634	int max = -1;
				635	for (int j=0; j<num_addrs; ++j) {
				636	int next = adr2os[j].first.childNums[i];
				637	if (next > max) max = next;
				638	}
				639	numPerLevel[level] = max+1;
				640	++level;
				641	}
				642	}
				643
				644	hierarchy_info() : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {}
				645
				646	void fini() { if (!uninitialized && numPerLevel) __kmp_free(numPerLevel); }
				647
				648	void init(AddrUnsPair *adr2os, int num_addrs)
				649	{
				650	kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&uninitialized, not_initialized, initializing);
				651	if (bool_result == 0) { // Wait for initialization
				652	while (TCR_1(uninitialized) != initialized) KMP_CPU_PAUSE();
				653	return;
				654	}
				655	KMP_DEBUG_ASSERT(bool_result==1);
				656
				657	/* Added explicit initialization of the data fields here to prevent usage of dirty value
				658	observed when static library is re-initialized multiple times (e.g. when
				659	non-OpenMP thread repeatedly launches/joins thread that uses OpenMP). */
				660	depth = 1;
				661	resizing = 0;
				662	maxLevels = 7;
				663	numPerLevel = (kmp_uint32 )__kmp_allocate(maxLevels2*sizeof(kmp_uint32));
				664	skipPerLevel = &(numPerLevel[maxLevels]);
				665	for (kmp_uint32 i=0; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
				666	numPerLevel[i] = 1;
				667	skipPerLevel[i] = 1;
				668	}
				669
				670	// Sort table by physical ID
				671	if (adr2os) {
				672	qsort(adr2os, num_addrs, sizeof(*adr2os), __kmp_affinity_cmp_Address_labels);
				673	deriveLevels(adr2os, num_addrs);
				674	}
				675	else {
				676	numPerLevel[0] = maxLeaves;
				677	numPerLevel[1] = num_addrs/maxLeaves;
				678	if (num_addrs%maxLeaves) numPerLevel[1]++;
				679	}
				680
				681	base_num_threads = num_addrs;
				682	for (int i=maxLevels-1; i>=0; --i) // count non-empty levels to get depth
				683	if (numPerLevel[i] != 1 \|\| depth > 1) // only count one top-level '1'
				684	depth++;
				685
				686	kmp_uint32 branch = minBranch;
				687	if (numPerLevel[0] == 1) branch = num_addrs/maxLeaves;
				688	if (branch<minBranch) branch=minBranch;
				689	for (kmp_uint32 d=0; d<depth-1; ++d) { // optimize hierarchy width
				690	while (numPerLevel[d] > branch \|\| (d==0 && numPerLevel[d]>maxLeaves)) { // max 4 on level 0!
				691	if (numPerLevel[d] & 1) numPerLevel[d]++;
				692	numPerLevel[d] = numPerLevel[d] >> 1;
				693	if (numPerLevel[d+1] == 1) depth++;
				694	numPerLevel[d+1] = numPerLevel[d+1] << 1;
				695	}
				696	if(numPerLevel[0] == 1) {
				697	branch = branch >> 1;
				698	if (branch<4) branch = minBranch;
				699	}
				700	}
				701
				702	for (kmp_uint32 i=1; i<depth; ++i)
				703	skipPerLevel[i] = numPerLevel[i-1] * skipPerLevel[i-1];
				704	// Fill in hierarchy in the case of oversubscription
				705	for (kmp_uint32 i=depth; i<maxLevels; ++i)
				706	skipPerLevel[i] = 2*skipPerLevel[i-1];
				707
				708	uninitialized = initialized; // One writer
				709
				710	}
				711
Jonathan Peyton	df4d3dd	2015-09-10 20:34:32 +0000	[diff] [blame]	712	// Resize the hierarchy if nproc changes to something larger than before
Jonathan Peyton	1707836	2015-09-10 19:22:07 +0000	[diff] [blame]	713	void resize(kmp_uint32 nproc)
				714	{
				715	kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
Jonathan Peyton	7dee82e	2015-11-09 16:24:53 +0000	[diff] [blame]	716	while (bool_result == 0) { // someone else is trying to resize
				717	KMP_CPU_PAUSE();
				718	if (nproc <= base_num_threads) // happy with other thread's resize
				719	return;
				720	else // try to resize
				721	bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
Jonathan Peyton	1707836	2015-09-10 19:22:07 +0000	[diff] [blame]	722	}
				723	KMP_DEBUG_ASSERT(bool_result!=0);
Jonathan Peyton	7dee82e	2015-11-09 16:24:53 +0000	[diff] [blame]	724	if (nproc <= base_num_threads) return; // happy with other thread's resize
Jonathan Peyton	1707836	2015-09-10 19:22:07 +0000	[diff] [blame]	725
Jonathan Peyton	df4d3dd	2015-09-10 20:34:32 +0000	[diff] [blame]	726	// Calculate new maxLevels
Jonathan Peyton	1707836	2015-09-10 19:22:07 +0000	[diff] [blame]	727	kmp_uint32 old_sz = skipPerLevel[depth-1];
Jonathan Peyton	df4d3dd	2015-09-10 20:34:32 +0000	[diff] [blame]	728	kmp_uint32 incs = 0, old_maxLevels = maxLevels;
Jonathan Peyton	7dee82e	2015-11-09 16:24:53 +0000	[diff] [blame]	729	// First see if old maxLevels is enough to contain new size
Jonathan Peyton	df4d3dd	2015-09-10 20:34:32 +0000	[diff] [blame]	730	for (kmp_uint32 i=depth; i<maxLevels && nproc>old_sz; ++i) {
				731	skipPerLevel[i] = 2*skipPerLevel[i-1];
Jonathan Peyton	7dee82e	2015-11-09 16:24:53 +0000	[diff] [blame]	732	numPerLevel[i-1] *= 2;
Jonathan Peyton	df4d3dd	2015-09-10 20:34:32 +0000	[diff] [blame]	733	old_sz *= 2;
				734	depth++;
				735	}
Jonathan Peyton	7dee82e	2015-11-09 16:24:53 +0000	[diff] [blame]	736	if (nproc > old_sz) { // Not enough space, need to expand hierarchy
				737	while (nproc > old_sz) {
				738	old_sz *=2;
				739	incs++;
				740	depth++;
				741	}
				742	maxLevels += incs;
Jonathan Peyton	1707836	2015-09-10 19:22:07 +0000	[diff] [blame]	743
Jonathan Peyton	7dee82e	2015-11-09 16:24:53 +0000	[diff] [blame]	744	// Resize arrays
				745	kmp_uint32 *old_numPerLevel = numPerLevel;
				746	kmp_uint32 *old_skipPerLevel = skipPerLevel;
				747	numPerLevel = skipPerLevel = NULL;
				748	numPerLevel = (kmp_uint32 )__kmp_allocate(maxLevels2*sizeof(kmp_uint32));
				749	skipPerLevel = &(numPerLevel[maxLevels]);
Jonathan Peyton	1707836	2015-09-10 19:22:07 +0000	[diff] [blame]	750
Jonathan Peyton	7dee82e	2015-11-09 16:24:53 +0000	[diff] [blame]	751	// Copy old elements from old arrays
				752	for (kmp_uint32 i=0; i<old_maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
				753	numPerLevel[i] = old_numPerLevel[i];
				754	skipPerLevel[i] = old_skipPerLevel[i];
				755	}
				756
				757	// Init new elements in arrays to 1
				758	for (kmp_uint32 i=old_maxLevels; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
				759	numPerLevel[i] = 1;
				760	skipPerLevel[i] = 1;
				761	}
				762
				763	// Free old arrays
				764	__kmp_free(old_numPerLevel);
Jonathan Peyton	1707836	2015-09-10 19:22:07 +0000	[diff] [blame]	765	}
				766
Jonathan Peyton	1707836	2015-09-10 19:22:07 +0000	[diff] [blame]	767	// Fill in oversubscription levels of hierarchy
				768	for (kmp_uint32 i=old_maxLevels; i<maxLevels; ++i)
				769	skipPerLevel[i] = 2*skipPerLevel[i-1];
				770
				771	base_num_threads = nproc;
				772	resizing = 0; // One writer
				773
				774	}
				775	};
				776	#endif // KMP_AFFINITY_H