Blame - openmp/runtime/src/kmp_affinity.h - toolchain/llvm-project

blob: fc43e8a74621b74217daf4834fd1a3b57c7a1b39 [file] [log] [blame]

Jonathan Peyton	1707836	2015-09-10 19:22:07 +0000	[diff] [blame]	1	/*
				2	* kmp_affinity.h -- header for affinity management
				3	*/
				4
				5
				6	//===----------------------------------------------------------------------===//
				7	//
				8	// The LLVM Compiler Infrastructure
				9	//
				10	// This file is dual licensed under the MIT and the University of Illinois Open
				11	// Source Licenses. See LICENSE.txt for details.
				12	//
				13	//===----------------------------------------------------------------------===//
				14
				15	#ifndef KMP_AFFINITY_H
				16	#define KMP_AFFINITY_H
				17
Jonathan Peyton	1cdd87a	2016-11-14 21:08:35 +0000	[diff] [blame^]	18	#include "kmp_os.h"
				19	#include "kmp.h"
				20
				21	#if KMP_AFFINITY_SUPPORTED
				22	#if KMP_USE_HWLOC
				23	class KMPHwlocAffinity: public KMPAffinity {
				24	public:
				25	class Mask : public KMPAffinity::Mask {
				26	hwloc_cpuset_t mask;
				27	public:
				28	Mask() { mask = hwloc_bitmap_alloc(); this->zero(); }
				29	~Mask() { hwloc_bitmap_free(mask); }
				30	void set(int i) override { hwloc_bitmap_set(mask, i); }
				31	bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); }
				32	void clear(int i) override { hwloc_bitmap_clr(mask, i); }
				33	void zero() override { hwloc_bitmap_zero(mask); }
				34	void copy(const KMPAffinity::Mask* src) override {
				35	const Mask* convert = static_cast<const Mask*>(src);
				36	hwloc_bitmap_copy(mask, convert->mask);
				37	}
				38	void bitwise_and(const KMPAffinity::Mask* rhs) override {
				39	const Mask* convert = static_cast<const Mask*>(rhs);
				40	hwloc_bitmap_and(mask, mask, convert->mask);
				41	}
				42	void bitwise_or(const KMPAffinity::Mask * rhs) override {
				43	const Mask* convert = static_cast<const Mask*>(rhs);
				44	hwloc_bitmap_or(mask, mask, convert->mask);
				45	}
				46	void bitwise_not() override { hwloc_bitmap_not(mask, mask); }
				47	int begin() const override { return hwloc_bitmap_first(mask); }
				48	int end() const override { return -1; }
				49	int next(int previous) const override { return hwloc_bitmap_next(mask, previous); }
				50	int get_system_affinity(bool abort_on_error) override {
				51	KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
				52	"Illegal get affinity operation when not capable");
				53	int retval = hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
				54	if (retval >= 0) {
				55	return 0;
				56	}
				57	int error = errno;
				58	if (abort_on_error) {
				59	__kmp_msg(kmp_ms_fatal, KMP_MSG( FatalSysError ), KMP_ERR( error ), __kmp_msg_null);
				60	}
				61	return error;
				62	}
				63	int set_system_affinity(bool abort_on_error) const override {
				64	KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
				65	"Illegal get affinity operation when not capable");
				66	int retval = hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
				67	if (retval >= 0) {
				68	return 0;
				69	}
				70	int error = errno;
				71	if (abort_on_error) {
				72	__kmp_msg(kmp_ms_fatal, KMP_MSG( FatalSysError ), KMP_ERR( error ), __kmp_msg_null);
				73	}
				74	return error;
				75	}
				76	int get_proc_group() const override {
				77	int i;
				78	int group = -1;
				79	# if KMP_OS_WINDOWS
				80	if (__kmp_num_proc_groups == 1) {
				81	return 1;
				82	}
				83	for (i = 0; i < __kmp_num_proc_groups; i++) {
				84	// On windows, the long type is always 32 bits
				85	unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i*2);
				86	unsigned long second_32_bits = hwloc_bitmap_to_ith_ulong(mask, i*2+1);
				87	if (first_32_bits == 0 && second_32_bits == 0) {
				88	continue;
				89	}
				90	if (group >= 0) {
				91	return -1;
				92	}
				93	group = i;
				94	}
				95	# endif /* KMP_OS_WINDOWS */
				96	return group;
				97	}
				98	};
				99	void determine_capable(const char* var) override {
				100	const hwloc_topology_support* topology_support;
				101	if(__kmp_hwloc_topology == NULL) {
				102	if(hwloc_topology_init(&__kmp_hwloc_topology) < 0) {
				103	__kmp_hwloc_error = TRUE;
				104	if(__kmp_affinity_verbose)
				105	KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()");
				106	}
				107	if(hwloc_topology_load(__kmp_hwloc_topology) < 0) {
				108	__kmp_hwloc_error = TRUE;
				109	if(__kmp_affinity_verbose)
				110	KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()");
				111	}
				112	}
				113	topology_support = hwloc_topology_get_support(__kmp_hwloc_topology);
				114	// Is the system capable of setting/getting this thread's affinity?
				115	// also, is topology discovery possible? (pu indicates ability to discover processing units)
				116	// and finally, were there no errors when calling any hwloc_* API functions?
				117	if(topology_support && topology_support->cpubind->set_thisthread_cpubind &&
				118	topology_support->cpubind->get_thisthread_cpubind &&
				119	topology_support->discovery->pu &&
				120	!__kmp_hwloc_error)
				121	{
				122	// enables affinity according to KMP_AFFINITY_CAPABLE() macro
				123	KMP_AFFINITY_ENABLE(TRUE);
				124	} else {
				125	// indicate that hwloc didn't work and disable affinity
				126	__kmp_hwloc_error = TRUE;
				127	KMP_AFFINITY_DISABLE();
				128	}
				129	}
				130	void bind_thread(int which) override {
				131	KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
				132	"Illegal set affinity operation when not capable");
				133	KMPAffinity::Mask *mask;
				134	KMP_CPU_ALLOC_ON_STACK(mask);
				135	KMP_CPU_ZERO(mask);
				136	KMP_CPU_SET(which, mask);
				137	__kmp_set_system_affinity(mask, TRUE);
				138	KMP_CPU_FREE_FROM_STACK(mask);
				139	}
				140	KMPAffinity::Mask* allocate_mask() override { return new Mask(); }
				141	void deallocate_mask(KMPAffinity::Mask* m) override { delete m; }
				142	KMPAffinity::Mask* allocate_mask_array(int num) override { return new Mask[num]; }
				143	void deallocate_mask_array(KMPAffinity::Mask* array) override {
				144	Mask* hwloc_array = static_cast<Mask*>(array);
				145	delete[] hwloc_array;
				146	}
				147	KMPAffinity::Mask* index_mask_array(KMPAffinity::Mask* array, int index) override {
				148	Mask* hwloc_array = static_cast<Mask*>(array);
				149	return &(hwloc_array[index]);
				150	}
				151	api_type get_api_type() const override { return HWLOC; }
				152	};
				153	#endif /* KMP_USE_HWLOC */
				154
				155	#if KMP_OS_LINUX
				156	/*
				157	* On some of the older OS's that we build on, these constants aren't present
				158	* in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on
				159	* all systems of the same arch where they are defined, and they cannot change.
				160	* stone forever.
				161	*/
				162	#include <sys/syscall.h>
				163	# if KMP_ARCH_X86 \|\| KMP_ARCH_ARM
				164	# ifndef __NR_sched_setaffinity
				165	# define __NR_sched_setaffinity 241
				166	# elif __NR_sched_setaffinity != 241
				167	# error Wrong code for setaffinity system call.
				168	# endif /* __NR_sched_setaffinity */
				169	# ifndef __NR_sched_getaffinity
				170	# define __NR_sched_getaffinity 242
				171	# elif __NR_sched_getaffinity != 242
				172	# error Wrong code for getaffinity system call.
				173	# endif /* __NR_sched_getaffinity */
				174	# elif KMP_ARCH_AARCH64
				175	# ifndef __NR_sched_setaffinity
				176	# define __NR_sched_setaffinity 122
				177	# elif __NR_sched_setaffinity != 122
				178	# error Wrong code for setaffinity system call.
				179	# endif /* __NR_sched_setaffinity */
				180	# ifndef __NR_sched_getaffinity
				181	# define __NR_sched_getaffinity 123
				182	# elif __NR_sched_getaffinity != 123
				183	# error Wrong code for getaffinity system call.
				184	# endif /* __NR_sched_getaffinity */
				185	# elif KMP_ARCH_X86_64
				186	# ifndef __NR_sched_setaffinity
				187	# define __NR_sched_setaffinity 203
				188	# elif __NR_sched_setaffinity != 203
				189	# error Wrong code for setaffinity system call.
				190	# endif /* __NR_sched_setaffinity */
				191	# ifndef __NR_sched_getaffinity
				192	# define __NR_sched_getaffinity 204
				193	# elif __NR_sched_getaffinity != 204
				194	# error Wrong code for getaffinity system call.
				195	# endif /* __NR_sched_getaffinity */
				196	# elif KMP_ARCH_PPC64
				197	# ifndef __NR_sched_setaffinity
				198	# define __NR_sched_setaffinity 222
				199	# elif __NR_sched_setaffinity != 222
				200	# error Wrong code for setaffinity system call.
				201	# endif /* __NR_sched_setaffinity */
				202	# ifndef __NR_sched_getaffinity
				203	# define __NR_sched_getaffinity 223
				204	# elif __NR_sched_getaffinity != 223
				205	# error Wrong code for getaffinity system call.
				206	# endif /* __NR_sched_getaffinity */
				207	# else
				208	# error Unknown or unsupported architecture
				209	# endif /* KMP_ARCH_* */
				210	class KMPNativeAffinity : public KMPAffinity {
				211	class Mask : public KMPAffinity::Mask {
				212	typedef unsigned char mask_t;
				213	static const int BITS_PER_MASK_T = sizeof(mask_t)*CHAR_BIT;
				214	public:
				215	mask_t* mask;
				216	Mask() { mask = (mask_t*)__kmp_allocate(__kmp_affin_mask_size); }
				217	~Mask() { if (mask) __kmp_free(mask); }
				218	void set(int i) override { mask[i/BITS_PER_MASK_T] \|= ((mask_t)1 << (i % BITS_PER_MASK_T)); }
				219	bool is_set(int i) const override { return (mask[i/BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T))); }
				220	void clear(int i) override { mask[i/BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T)); }
				221	void zero() override {
				222	for (size_t i=0; i<__kmp_affin_mask_size; ++i)
				223	mask[i] = 0;
				224	}
				225	void copy(const KMPAffinity::Mask* src) override {
				226	const Mask * convert = static_cast<const Mask*>(src);
				227	for (size_t i=0; i<__kmp_affin_mask_size; ++i)
				228	mask[i] = convert->mask[i];
				229	}
				230	void bitwise_and(const KMPAffinity::Mask* rhs) override {
				231	const Mask * convert = static_cast<const Mask*>(rhs);
				232	for (size_t i=0; i<__kmp_affin_mask_size; ++i)
				233	mask[i] &= convert->mask[i];
				234	}
				235	void bitwise_or(const KMPAffinity::Mask* rhs) override {
				236	const Mask * convert = static_cast<const Mask*>(rhs);
				237	for (size_t i=0; i<__kmp_affin_mask_size; ++i)
				238	mask[i] \|= convert->mask[i];
				239	}
				240	void bitwise_not() override {
				241	for (size_t i=0; i<__kmp_affin_mask_size; ++i)
				242	mask[i] = ~(mask[i]);
				243	}
				244	int begin() const override {
				245	int retval = 0;
				246	while (retval < end() && !is_set(retval))
				247	++retval;
				248	return retval;
				249	}
				250	int end() const override { return __kmp_affin_mask_size*BITS_PER_MASK_T; }
				251	int next(int previous) const override {
				252	int retval = previous+1;
				253	while (retval < end() && !is_set(retval))
				254	++retval;
				255	return retval;
				256	}
				257	int get_system_affinity(bool abort_on_error) override {
				258	KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
				259	"Illegal get affinity operation when not capable");
				260	int retval = syscall( __NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask );
				261	if (retval >= 0) {
				262	return 0;
				263	}
				264	int error = errno;
				265	if (abort_on_error) {
				266	__kmp_msg(kmp_ms_fatal, KMP_MSG( FatalSysError ), KMP_ERR( error ), __kmp_msg_null);
				267	}
				268	return error;
				269	}
				270	int set_system_affinity(bool abort_on_error) const override {
				271	KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
				272	"Illegal get affinity operation when not capable");
				273	int retval = syscall( __NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask );
				274	if (retval >= 0) {
				275	return 0;
				276	}
				277	int error = errno;
				278	if (abort_on_error) {
				279	__kmp_msg(kmp_ms_fatal, KMP_MSG( FatalSysError ), KMP_ERR( error ), __kmp_msg_null);
				280	}
				281	return error;
				282	}
				283	};
				284	void determine_capable(const char* env_var) override {
				285	__kmp_affinity_determine_capable(env_var);
				286	}
				287	void bind_thread(int which) override {
				288	__kmp_affinity_bind_thread(which);
				289	}
				290	KMPAffinity::Mask* allocate_mask() override {
				291	KMPNativeAffinity::Mask* retval = new Mask();
				292	return retval;
				293	}
				294	void deallocate_mask(KMPAffinity::Mask* m) override {
				295	KMPNativeAffinity::Mask* native_mask = static_cast<KMPNativeAffinity::Mask*>(m);
				296	delete m;
				297	}
				298	KMPAffinity::Mask* allocate_mask_array(int num) override { return new Mask[num]; }
				299	void deallocate_mask_array(KMPAffinity::Mask* array) override {
				300	Mask* linux_array = static_cast<Mask*>(array);
				301	delete[] linux_array;
				302	}
				303	KMPAffinity::Mask* index_mask_array(KMPAffinity::Mask* array, int index) override {
				304	Mask* linux_array = static_cast<Mask*>(array);
				305	return &(linux_array[index]);
				306	}
				307	api_type get_api_type() const override { return NATIVE_OS; }
				308	};
				309	#endif /* KMP_OS_LINUX */
				310
				311	#if KMP_OS_WINDOWS
				312	class KMPNativeAffinity : public KMPAffinity {
				313	class Mask : public KMPAffinity::Mask {
				314	typedef ULONG_PTR mask_t;
				315	static const int BITS_PER_MASK_T = sizeof(mask_t)*CHAR_BIT;
				316	mask_t* mask;
				317	public:
				318	Mask() { mask = (mask_t)__kmp_allocate(sizeof(mask_t)__kmp_num_proc_groups); }
				319	~Mask() { if (mask) __kmp_free(mask); }
				320	void set(int i) override { mask[i/BITS_PER_MASK_T] \|= ((mask_t)1 << (i % BITS_PER_MASK_T)); }
				321	bool is_set(int i) const override { return (mask[i/BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T))); }
				322	void clear(int i) override { mask[i/BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T)); }
				323	void zero() override {
				324	for (size_t i=0; i<__kmp_num_proc_groups; ++i)
				325	mask[i] = 0;
				326	}
				327	void copy(const KMPAffinity::Mask* src) override {
				328	const Mask * convert = static_cast<const Mask*>(src);
				329	for (size_t i=0; i<__kmp_num_proc_groups; ++i)
				330	mask[i] = convert->mask[i];
				331	}
				332	void bitwise_and(const KMPAffinity::Mask* rhs) override {
				333	const Mask * convert = static_cast<const Mask*>(rhs);
				334	for (size_t i=0; i<__kmp_num_proc_groups; ++i)
				335	mask[i] &= convert->mask[i];
				336	}
				337	void bitwise_or(const KMPAffinity::Mask* rhs) override {
				338	const Mask * convert = static_cast<const Mask*>(rhs);
				339	for (size_t i=0; i<__kmp_num_proc_groups; ++i)
				340	mask[i] \|= convert->mask[i];
				341	}
				342	void bitwise_not() override {
				343	for (size_t i=0; i<__kmp_num_proc_groups; ++i)
				344	mask[i] = ~(mask[i]);
				345	}
				346	int begin() const override {
				347	int retval = 0;
				348	while (retval < end() && !is_set(retval))
				349	++retval;
				350	return retval;
				351	}
				352	int end() const override { return __kmp_num_proc_groups*BITS_PER_MASK_T; }
				353	int next(int previous) const override {
				354	int retval = previous+1;
				355	while (retval < end() && !is_set(retval))
				356	++retval;
				357	return retval;
				358	}
				359	int set_system_affinity(bool abort_on_error) const override {
				360	if (__kmp_num_proc_groups > 1) {
				361	// Check for a valid mask.
				362	GROUP_AFFINITY ga;
				363	int group = get_proc_group();
				364	if (group < 0) {
				365	if (abort_on_error) {
				366	KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
				367	}
				368	return -1;
				369	}
				370	// Transform the bit vector into a GROUP_AFFINITY struct
				371	// and make the system call to set affinity.
				372	ga.Group = group;
				373	ga.Mask = mask[group];
				374	ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0;
				375
				376	KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL);
				377	if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) {
				378	DWORD error = GetLastError();
				379	if (abort_on_error) {
				380	__kmp_msg(kmp_ms_fatal, KMP_MSG( CantSetThreadAffMask ),
				381	KMP_ERR( error ), __kmp_msg_null);
				382	}
				383	return error;
				384	}
				385	} else {
				386	if (!SetThreadAffinityMask( GetCurrentThread(), *mask )) {
				387	DWORD error = GetLastError();
				388	if (abort_on_error) {
				389	__kmp_msg(kmp_ms_fatal, KMP_MSG( CantSetThreadAffMask ),
				390	KMP_ERR( error ), __kmp_msg_null);
				391	}
				392	return error;
				393	}
				394	}
				395	return 0;
				396	}
				397	int get_system_affinity(bool abort_on_error) override {
				398	if (__kmp_num_proc_groups > 1) {
				399	this->zero();
				400	GROUP_AFFINITY ga;
				401	KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL);
				402	if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) {
				403	DWORD error = GetLastError();
				404	if (abort_on_error) {
				405	__kmp_msg(kmp_ms_fatal, KMP_MSG(FunctionError, "GetThreadGroupAffinity()"),
				406	KMP_ERR(error), __kmp_msg_null);
				407	}
				408	return error;
				409	}
				410	if ((ga.Group < 0) \|\| (ga.Group > __kmp_num_proc_groups) \|\| (ga.Mask == 0)) {
				411	return -1;
				412	}
				413	mask[ga.Group] = ga.Mask;
				414	} else {
				415	mask_t newMask, sysMask, retval;
				416	if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) {
				417	DWORD error = GetLastError();
				418	if (abort_on_error) {
				419	__kmp_msg(kmp_ms_fatal, KMP_MSG(FunctionError, "GetProcessAffinityMask()"),
				420	KMP_ERR(error), __kmp_msg_null);
				421	}
				422	return error;
				423	}
				424	retval = SetThreadAffinityMask(GetCurrentThread(), newMask);
				425	if (! retval) {
				426	DWORD error = GetLastError();
				427	if (abort_on_error) {
				428	__kmp_msg(kmp_ms_fatal, KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
				429	KMP_ERR(error), __kmp_msg_null);
				430	}
				431	return error;
				432	}
				433	newMask = SetThreadAffinityMask(GetCurrentThread(), retval);
				434	if (! newMask) {
				435	DWORD error = GetLastError();
				436	if (abort_on_error) {
				437	__kmp_msg(kmp_ms_fatal, KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
				438	KMP_ERR(error), __kmp_msg_null);
				439	}
				440	}
				441	*mask = retval;
				442	}
				443	return 0;
				444	}
				445	int get_proc_group() const override {
				446	int group = -1;
				447	if (__kmp_num_proc_groups == 1) {
				448	return 1;
				449	}
				450	for (int i = 0; i < __kmp_num_proc_groups; i++) {
				451	if (mask[i] == 0)
				452	continue;
				453	if (group >= 0)
				454	return -1;
				455	group = i;
				456	}
				457	return group;
				458	}
				459	};
				460	void determine_capable(const char* env_var) override {
				461	__kmp_affinity_determine_capable(env_var);
				462	}
				463	void bind_thread(int which) override {
				464	__kmp_affinity_bind_thread(which);
				465	}
				466	KMPAffinity::Mask* allocate_mask() override { return new Mask(); }
				467	void deallocate_mask(KMPAffinity::Mask* m) override { delete m; }
				468	KMPAffinity::Mask* allocate_mask_array(int num) override { return new Mask[num]; }
				469	void deallocate_mask_array(KMPAffinity::Mask* array) override {
				470	Mask* windows_array = static_cast<Mask*>(array);
				471	delete[] windows_array;
				472	}
				473	KMPAffinity::Mask* index_mask_array(KMPAffinity::Mask* array, int index) override {
				474	Mask* windows_array = static_cast<Mask*>(array);
				475	return &(windows_array[index]);
				476	}
				477	api_type get_api_type() const override { return NATIVE_OS; }
				478	};
				479	#endif /* KMP_OS_WINDOWS */
				480	#endif /* KMP_AFFINITY_SUPPORTED */
				481
Jonathan Peyton	1707836	2015-09-10 19:22:07 +0000	[diff] [blame]	482	class Address {
				483	public:
				484	static const unsigned maxDepth = 32;
				485	unsigned labels[maxDepth];
				486	unsigned childNums[maxDepth];
				487	unsigned depth;
				488	unsigned leader;
				489	Address(unsigned _depth)
				490	: depth(_depth), leader(FALSE) {
				491	}
				492	Address &operator=(const Address &b) {
				493	depth = b.depth;
				494	for (unsigned i = 0; i < depth; i++) {
				495	labels[i] = b.labels[i];
				496	childNums[i] = b.childNums[i];
				497	}
				498	leader = FALSE;
				499	return *this;
				500	}
				501	bool operator==(const Address &b) const {
				502	if (depth != b.depth)
				503	return false;
				504	for (unsigned i = 0; i < depth; i++)
				505	if(labels[i] != b.labels[i])
				506	return false;
				507	return true;
				508	}
				509	bool isClose(const Address &b, int level) const {
				510	if (depth != b.depth)
				511	return false;
				512	if ((unsigned)level >= depth)
				513	return true;
				514	for (unsigned i = 0; i < (depth - level); i++)
				515	if(labels[i] != b.labels[i])
				516	return false;
				517	return true;
				518	}
				519	bool operator!=(const Address &b) const {
				520	return !operator==(b);
				521	}
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	522	void print() const {
				523	unsigned i;
				524	printf("Depth: %u --- ", depth);
				525	for(i=0;i<depth;i++) {
				526	printf("%u ", labels[i]);
				527	}
				528	}
Jonathan Peyton	1707836	2015-09-10 19:22:07 +0000	[diff] [blame]	529	};
				530
				531	class AddrUnsPair {
				532	public:
				533	Address first;
				534	unsigned second;
				535	AddrUnsPair(Address _first, unsigned _second)
				536	: first(_first), second(_second) {
				537	}
				538	AddrUnsPair &operator=(const AddrUnsPair &b)
				539	{
				540	first = b.first;
				541	second = b.second;
				542	return *this;
				543	}
Jonathan Peyton	01dcf36	2015-11-30 20:02:59 +0000	[diff] [blame]	544	void print() const {
				545	printf("first = "); first.print();
				546	printf(" --- second = %u", second);
				547	}
				548	bool operator==(const AddrUnsPair &b) const {
				549	if(first != b.first) return false;
				550	if(second != b.second) return false;
				551	return true;
				552	}
				553	bool operator!=(const AddrUnsPair &b) const {
				554	return !operator==(b);
				555	}
Jonathan Peyton	1707836	2015-09-10 19:22:07 +0000	[diff] [blame]	556	};
				557
				558
				559	static int
				560	__kmp_affinity_cmp_Address_labels(const void a, const void b)
				561	{
				562	const Address aa = (const Address )&(((AddrUnsPair *)a)
				563	->first);
				564	const Address bb = (const Address )&(((AddrUnsPair *)b)
				565	->first);
				566	unsigned depth = aa->depth;
				567	unsigned i;
				568	KMP_DEBUG_ASSERT(depth == bb->depth);
				569	for (i = 0; i < depth; i++) {
				570	if (aa->labels[i] < bb->labels[i]) return -1;
				571	if (aa->labels[i] > bb->labels[i]) return 1;
				572	}
				573	return 0;
				574	}
				575
				576
Jonathan Peyton	df4d3dd	2015-09-10 20:34:32 +0000	[diff] [blame]	577	/** A structure for holding machine-specific hierarchy info to be computed once at init.
				578	This structure represents a mapping of threads to the actual machine hierarchy, or to
				579	our best guess at what the hierarchy might be, for the purpose of performing an
				580	efficient barrier. In the worst case, when there is no machine hierarchy information,
				581	it produces a tree suitable for a barrier, similar to the tree used in the hyper barrier. */
Jonathan Peyton	1707836	2015-09-10 19:22:07 +0000	[diff] [blame]	582	class hierarchy_info {
				583	public:
				584	/** Good default values for number of leaves and branching factor, given no affinity information.
				585	Behaves a bit like hyper barrier. */
				586	static const kmp_uint32 maxLeaves=4;
				587	static const kmp_uint32 minBranch=4;
Jonathan Peyton	df4d3dd	2015-09-10 20:34:32 +0000	[diff] [blame]	588	/** Number of levels in the hierarchy. Typical levels are threads/core, cores/package
				589	or socket, packages/node, nodes/machine, etc. We don't want to get specific with
				590	nomenclature. When the machine is oversubscribed we add levels to duplicate the
				591	hierarchy, doubling the thread capacity of the hierarchy each time we add a level. */
Jonathan Peyton	1707836	2015-09-10 19:22:07 +0000	[diff] [blame]	592	kmp_uint32 maxLevels;
				593
				594	/** This is specifically the depth of the machine configuration hierarchy, in terms of the
				595	number of levels along the longest path from root to any leaf. It corresponds to the
				596	number of entries in numPerLevel if we exclude all but one trailing 1. */
				597	kmp_uint32 depth;
				598	kmp_uint32 base_num_threads;
				599	enum init_status { initialized=0, not_initialized=1, initializing=2 };
				600	volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized, 2=initialization in progress
				601	volatile kmp_int8 resizing; // 0=not resizing, 1=resizing
				602
				603	/** Level 0 corresponds to leaves. numPerLevel[i] is the number of children the parent of a
				604	node at level i has. For example, if we have a machine with 4 packages, 4 cores/package
				605	and 2 HT per core, then numPerLevel = {2, 4, 4, 1, 1}. All empty levels are set to 1. */
				606	kmp_uint32 *numPerLevel;
				607	kmp_uint32 *skipPerLevel;
				608
				609	void deriveLevels(AddrUnsPair *adr2os, int num_addrs) {
				610	int hier_depth = adr2os[0].first.depth;
				611	int level = 0;
				612	for (int i=hier_depth-1; i>=0; --i) {
				613	int max = -1;
				614	for (int j=0; j<num_addrs; ++j) {
				615	int next = adr2os[j].first.childNums[i];
				616	if (next > max) max = next;
				617	}
				618	numPerLevel[level] = max+1;
				619	++level;
				620	}
				621	}
				622
				623	hierarchy_info() : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {}
				624
				625	void fini() { if (!uninitialized && numPerLevel) __kmp_free(numPerLevel); }
				626
				627	void init(AddrUnsPair *adr2os, int num_addrs)
				628	{
				629	kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&uninitialized, not_initialized, initializing);
				630	if (bool_result == 0) { // Wait for initialization
				631	while (TCR_1(uninitialized) != initialized) KMP_CPU_PAUSE();
				632	return;
				633	}
				634	KMP_DEBUG_ASSERT(bool_result==1);
				635
				636	/* Added explicit initialization of the data fields here to prevent usage of dirty value
				637	observed when static library is re-initialized multiple times (e.g. when
				638	non-OpenMP thread repeatedly launches/joins thread that uses OpenMP). */
				639	depth = 1;
				640	resizing = 0;
				641	maxLevels = 7;
				642	numPerLevel = (kmp_uint32 )__kmp_allocate(maxLevels2*sizeof(kmp_uint32));
				643	skipPerLevel = &(numPerLevel[maxLevels]);
				644	for (kmp_uint32 i=0; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
				645	numPerLevel[i] = 1;
				646	skipPerLevel[i] = 1;
				647	}
				648
				649	// Sort table by physical ID
				650	if (adr2os) {
				651	qsort(adr2os, num_addrs, sizeof(*adr2os), __kmp_affinity_cmp_Address_labels);
				652	deriveLevels(adr2os, num_addrs);
				653	}
				654	else {
				655	numPerLevel[0] = maxLeaves;
				656	numPerLevel[1] = num_addrs/maxLeaves;
				657	if (num_addrs%maxLeaves) numPerLevel[1]++;
				658	}
				659
				660	base_num_threads = num_addrs;
				661	for (int i=maxLevels-1; i>=0; --i) // count non-empty levels to get depth
				662	if (numPerLevel[i] != 1 \|\| depth > 1) // only count one top-level '1'
				663	depth++;
				664
				665	kmp_uint32 branch = minBranch;
				666	if (numPerLevel[0] == 1) branch = num_addrs/maxLeaves;
				667	if (branch<minBranch) branch=minBranch;
				668	for (kmp_uint32 d=0; d<depth-1; ++d) { // optimize hierarchy width
				669	while (numPerLevel[d] > branch \|\| (d==0 && numPerLevel[d]>maxLeaves)) { // max 4 on level 0!
				670	if (numPerLevel[d] & 1) numPerLevel[d]++;
				671	numPerLevel[d] = numPerLevel[d] >> 1;
				672	if (numPerLevel[d+1] == 1) depth++;
				673	numPerLevel[d+1] = numPerLevel[d+1] << 1;
				674	}
				675	if(numPerLevel[0] == 1) {
				676	branch = branch >> 1;
				677	if (branch<4) branch = minBranch;
				678	}
				679	}
				680
				681	for (kmp_uint32 i=1; i<depth; ++i)
				682	skipPerLevel[i] = numPerLevel[i-1] * skipPerLevel[i-1];
				683	// Fill in hierarchy in the case of oversubscription
				684	for (kmp_uint32 i=depth; i<maxLevels; ++i)
				685	skipPerLevel[i] = 2*skipPerLevel[i-1];
				686
				687	uninitialized = initialized; // One writer
				688
				689	}
				690
Jonathan Peyton	df4d3dd	2015-09-10 20:34:32 +0000	[diff] [blame]	691	// Resize the hierarchy if nproc changes to something larger than before
Jonathan Peyton	1707836	2015-09-10 19:22:07 +0000	[diff] [blame]	692	void resize(kmp_uint32 nproc)
				693	{
				694	kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
Jonathan Peyton	7dee82e	2015-11-09 16:24:53 +0000	[diff] [blame]	695	while (bool_result == 0) { // someone else is trying to resize
				696	KMP_CPU_PAUSE();
				697	if (nproc <= base_num_threads) // happy with other thread's resize
				698	return;
				699	else // try to resize
				700	bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
Jonathan Peyton	1707836	2015-09-10 19:22:07 +0000	[diff] [blame]	701	}
				702	KMP_DEBUG_ASSERT(bool_result!=0);
Jonathan Peyton	7dee82e	2015-11-09 16:24:53 +0000	[diff] [blame]	703	if (nproc <= base_num_threads) return; // happy with other thread's resize
Jonathan Peyton	1707836	2015-09-10 19:22:07 +0000	[diff] [blame]	704
Jonathan Peyton	df4d3dd	2015-09-10 20:34:32 +0000	[diff] [blame]	705	// Calculate new maxLevels
Jonathan Peyton	1707836	2015-09-10 19:22:07 +0000	[diff] [blame]	706	kmp_uint32 old_sz = skipPerLevel[depth-1];
Jonathan Peyton	df4d3dd	2015-09-10 20:34:32 +0000	[diff] [blame]	707	kmp_uint32 incs = 0, old_maxLevels = maxLevels;
Jonathan Peyton	7dee82e	2015-11-09 16:24:53 +0000	[diff] [blame]	708	// First see if old maxLevels is enough to contain new size
Jonathan Peyton	df4d3dd	2015-09-10 20:34:32 +0000	[diff] [blame]	709	for (kmp_uint32 i=depth; i<maxLevels && nproc>old_sz; ++i) {
				710	skipPerLevel[i] = 2*skipPerLevel[i-1];
Jonathan Peyton	7dee82e	2015-11-09 16:24:53 +0000	[diff] [blame]	711	numPerLevel[i-1] *= 2;
Jonathan Peyton	df4d3dd	2015-09-10 20:34:32 +0000	[diff] [blame]	712	old_sz *= 2;
				713	depth++;
				714	}
Jonathan Peyton	7dee82e	2015-11-09 16:24:53 +0000	[diff] [blame]	715	if (nproc > old_sz) { // Not enough space, need to expand hierarchy
				716	while (nproc > old_sz) {
				717	old_sz *=2;
				718	incs++;
				719	depth++;
				720	}
				721	maxLevels += incs;
Jonathan Peyton	1707836	2015-09-10 19:22:07 +0000	[diff] [blame]	722
Jonathan Peyton	7dee82e	2015-11-09 16:24:53 +0000	[diff] [blame]	723	// Resize arrays
				724	kmp_uint32 *old_numPerLevel = numPerLevel;
				725	kmp_uint32 *old_skipPerLevel = skipPerLevel;
				726	numPerLevel = skipPerLevel = NULL;
				727	numPerLevel = (kmp_uint32 )__kmp_allocate(maxLevels2*sizeof(kmp_uint32));
				728	skipPerLevel = &(numPerLevel[maxLevels]);
Jonathan Peyton	1707836	2015-09-10 19:22:07 +0000	[diff] [blame]	729
Jonathan Peyton	7dee82e	2015-11-09 16:24:53 +0000	[diff] [blame]	730	// Copy old elements from old arrays
				731	for (kmp_uint32 i=0; i<old_maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
				732	numPerLevel[i] = old_numPerLevel[i];
				733	skipPerLevel[i] = old_skipPerLevel[i];
				734	}
				735
				736	// Init new elements in arrays to 1
				737	for (kmp_uint32 i=old_maxLevels; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
				738	numPerLevel[i] = 1;
				739	skipPerLevel[i] = 1;
				740	}
				741
				742	// Free old arrays
				743	__kmp_free(old_numPerLevel);
Jonathan Peyton	1707836	2015-09-10 19:22:07 +0000	[diff] [blame]	744	}
				745
Jonathan Peyton	1707836	2015-09-10 19:22:07 +0000	[diff] [blame]	746	// Fill in oversubscription levels of hierarchy
				747	for (kmp_uint32 i=old_maxLevels; i<maxLevels; ++i)
				748	skipPerLevel[i] = 2*skipPerLevel[i-1];
				749
				750	base_num_threads = nproc;
				751	resizing = 0; // One writer
				752
				753	}
				754	};
				755	#endif // KMP_AFFINITY_H