Blame - clang/lib/Headers/cuda_builtin_vars.h - toolchain/llvm-project

blob: 901356b3d5ce1039e26fc543924845938e5081be [file] [log] [blame]

Artem Belevich	4e192df	2015-04-21 22:14:13 +0000	[diff] [blame]	1	/*===---- cuda_builtin_vars.h - CUDA built-in variables ---------------------===
				2	*
				3	* Permission is hereby granted, free of charge, to any person obtaining a copy
				4	* of this software and associated documentation files (the "Software"), to deal
				5	* in the Software without restriction, including without limitation the rights
				6	* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
				7	* copies of the Software, and to permit persons to whom the Software is
				8	* furnished to do so, subject to the following conditions:
				9	*
				10	* The above copyright notice and this permission notice shall be included in
				11	* all copies or substantial portions of the Software.
				12	*
				13	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				14	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				15	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				16	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				17	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				18	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
				19	* THE SOFTWARE.
				20	*
				21	*===-----------------------------------------------------------------------===
				22	*/
				23
				24	#ifndef __CUDA_BUILTIN_VARS_H
				25	#define __CUDA_BUILTIN_VARS_H
				26
				27	// The file implements built-in CUDA variables using __declspec(property).
				28	// https://msdn.microsoft.com/en-us/library/yhfk0thd.aspx
				29	// All read accesses of built-in variable fields get converted into calls to a
				30	// getter function which in turn would call appropriate builtin to fetch the
				31	// value.
				32	//
				33	// Example:
				34	// int x = threadIdx.x;
				35	// IR output:
				36	// %0 = call i32 @llvm.ptx.read.tid.x() #3
				37	// PTX output:
				38	// mov.u32 %r2, %tid.x;
				39
				40	#define __CUDA_DEVICE_BUILTIN(FIELD, INTRINSIC) \
				41	__declspec(property(get = __fetch_builtin_##FIELD)) unsigned int FIELD; \
				42	static inline __attribute__((always_inline)) \
				43	__attribute__((device)) unsigned int __fetch_builtin_##FIELD(void) { \
				44	return INTRINSIC; \
				45	}
				46
				47	#if __cplusplus >= 201103L
				48	#define __DELETE =delete
				49	#else
				50	#define __DELETE
				51	#endif
				52
				53	// Make sure nobody can create instances of the special varible types. nvcc
				54	// also disallows taking address of special variables, so we disable address-of
				55	// operator as well.
				56	#define __CUDA_DISALLOW_BUILTINVAR_ACCESS(TypeName) \
				57	__attribute__((device)) TypeName() __DELETE; \
				58	__attribute__((device)) TypeName(const TypeName &) __DELETE; \
				59	__attribute__((device)) void operator=(const TypeName &) const __DELETE; \
				60	__attribute__((device)) TypeName *operator&() const __DELETE
				61
				62	struct __cuda_builtin_threadIdx_t {
				63	__CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_tid_x());
				64	__CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_tid_y());
				65	__CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_tid_z());
				66	private:
				67	__CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_threadIdx_t);
				68	};
				69
				70	struct __cuda_builtin_blockIdx_t {
				71	__CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_ctaid_x());
				72	__CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_ctaid_y());
				73	__CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_ctaid_z());
				74	private:
				75	__CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockIdx_t);
				76	};
				77
				78	struct __cuda_builtin_blockDim_t {
				79	__CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_ntid_x());
				80	__CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_ntid_y());
				81	__CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_ntid_z());
				82	private:
				83	__CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockDim_t);
				84	};
				85
				86	struct __cuda_builtin_gridDim_t {
				87	__CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_nctaid_x());
				88	__CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_nctaid_y());
				89	__CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_nctaid_z());
				90	private:
				91	__CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_gridDim_t);
				92	};
				93
				94	#define __CUDA_BUILTIN_VAR \
				95	extern const __attribute__((device)) __attribute__((weak))
				96	__CUDA_BUILTIN_VAR __cuda_builtin_threadIdx_t threadIdx;
				97	__CUDA_BUILTIN_VAR __cuda_builtin_blockIdx_t blockIdx;
				98	__CUDA_BUILTIN_VAR __cuda_builtin_blockDim_t blockDim;
				99	__CUDA_BUILTIN_VAR __cuda_builtin_gridDim_t gridDim;
				100
				101	// warpSize should translate to read of %WARP_SZ but there's currently no
				102	// builtin to do so. According to PTX v4.2 docs 'to date, all target
				103	// architectures have a WARP_SZ value of 32'.
				104	__attribute__((device)) const int warpSize = 32;
				105
				106	#undef __CUDA_DEVICE_BUILTIN
				107	#undef __CUDA_BUILTIN_VAR
				108	#undef __CUDA_DISALLOW_BUILTINVAR_ACCESS
				109
				110	#endif /* __CUDA_BUILTIN_VARS_H */