Blame - llvm/test/CodeGen/AMDGPU/clamp.ll - toolchain/llvm-project

blob: a0075066f68f6f6db4bc6731d5a9a264e7fe4b4a [file] [log] [blame]

Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	1	; RUN: llc -march=amdgcn -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN -check-prefix=SI %s
				2	; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN -check-prefix=VI %s
				3
				4	; GCN-LABEL: {{^}}v_clamp_f32:
				5	; GCN: {{buffer\|flat}}_load_dword [[A:v[0-9]+]]
				6	; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
				7	define amdgpu_kernel void @v_clamp_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
				8	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				9	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				10	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				11	%a = load float, float addrspace(1)* %gep0
				12	%max = call float @llvm.maxnum.f32(float %a, float 0.0)
				13	%med = call float @llvm.minnum.f32(float %max, float 1.0)
				14
				15	store float %med, float addrspace(1)* %out.gep
				16	ret void
				17	}
				18
				19	; GCN-LABEL: {{^}}v_clamp_neg_f32:
				20	; GCN: {{buffer\|flat}}_load_dword [[A:v[0-9]+]]
				21	; GCN: v_max_f32_e64 v{{[0-9]+}}, -[[A]], -[[A]] clamp{{$}}
				22	define amdgpu_kernel void @v_clamp_neg_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
				23	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				24	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				25	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				26	%a = load float, float addrspace(1)* %gep0
				27	%fneg.a = fsub float -0.0, %a
				28	%max = call float @llvm.maxnum.f32(float %fneg.a, float 0.0)
				29	%med = call float @llvm.minnum.f32(float %max, float 1.0)
				30
				31	store float %med, float addrspace(1)* %out.gep
				32	ret void
				33	}
				34
				35	; GCN-LABEL: {{^}}v_clamp_negabs_f32:
				36	; GCN: {{buffer\|flat}}_load_dword [[A:v[0-9]+]]
				37	; GCN: v_max_f32_e64 v{{[0-9]+}}, -\|[[A]]\|, -\|[[A]]\| clamp{{$}}
				38	define amdgpu_kernel void @v_clamp_negabs_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
				39	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				40	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				41	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				42	%a = load float, float addrspace(1)* %gep0
				43	%fabs.a = call float @llvm.fabs.f32(float %a)
				44	%fneg.fabs.a = fsub float -0.0, %fabs.a
				45
				46	%max = call float @llvm.maxnum.f32(float %fneg.fabs.a, float 0.0)
				47	%med = call float @llvm.minnum.f32(float %max, float 1.0)
				48
				49	store float %med, float addrspace(1)* %out.gep
				50	ret void
				51	}
				52
				53	; GCN-LABEL: {{^}}v_clamp_negzero_f32:
				54	; GCN: {{buffer\|flat}}_load_dword [[A:v[0-9]+]]
				55	; GCN: v_bfrev_b32_e32 [[SIGNBIT:v[0-9]+]], 1
				56	; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[SIGNBIT]], 1.0
				57	define amdgpu_kernel void @v_clamp_negzero_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
				58	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				59	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				60	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				61	%a = load float, float addrspace(1)* %gep0
				62	%max = call float @llvm.maxnum.f32(float %a, float -0.0)
				63	%med = call float @llvm.minnum.f32(float %max, float 1.0)
				64
				65	store float %med, float addrspace(1)* %out.gep
				66	ret void
				67	}
				68
				69	; GCN-LABEL: {{^}}v_clamp_multi_use_max_f32:
				70	; GCN: {{buffer\|flat}}_load_dword [[A:v[0-9]+]]
				71	; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], 0, [[A]]
				72	; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], 1.0, [[MAX]]
				73	define amdgpu_kernel void @v_clamp_multi_use_max_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
				74	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				75	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				76	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				77	%a = load float, float addrspace(1)* %gep0
				78	%max = call float @llvm.maxnum.f32(float %a, float 0.0)
				79	%med = call float @llvm.minnum.f32(float %max, float 1.0)
				80
				81	store float %med, float addrspace(1)* %out.gep
				82	store volatile float %max, float addrspace(1)* undef
				83	ret void
				84	}
				85
				86	; GCN-LABEL: {{^}}v_clamp_f16:
				87	; GCN: {{buffer\|flat}}_load_ushort [[A:v[0-9]+]]
				88	; VI: v_max_f16_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
				89
Matt Arsenault	d5c6515	2017-02-22 23:27:53 +0000	[diff] [blame^]	90	; SI: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], [[A]] clamp{{$}}
				91	; SI: v_cvt_f16_f32_e32 v{{[0-9]+}}, [[CVT]]
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	92	define amdgpu_kernel void @v_clamp_f16(half addrspace(1)* %out, half addrspace(1)* %aptr) #0 {
				93	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				94	%gep0 = getelementptr half, half addrspace(1)* %aptr, i32 %tid
				95	%out.gep = getelementptr half, half addrspace(1)* %out, i32 %tid
				96	%a = load half, half addrspace(1)* %gep0
				97	%max = call half @llvm.maxnum.f16(half %a, half 0.0)
				98	%med = call half @llvm.minnum.f16(half %max, half 1.0)
				99
				100	store half %med, half addrspace(1)* %out.gep
				101	ret void
				102	}
				103
				104	; GCN-LABEL: {{^}}v_clamp_neg_f16:
				105	; GCN: {{buffer\|flat}}_load_ushort [[A:v[0-9]+]]
				106	; VI: v_max_f16_e64 v{{[0-9]+}}, -[[A]], -[[A]] clamp{{$}}
				107
				108	; FIXME: Better to fold neg into max
Matt Arsenault	d5c6515	2017-02-22 23:27:53 +0000	[diff] [blame^]	109	; SI: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], -[[A]] clamp{{$}}
				110	; SI: v_cvt_f16_f32_e32 v{{[0-9]+}}, [[CVT]]
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	111	define amdgpu_kernel void @v_clamp_neg_f16(half addrspace(1)* %out, half addrspace(1)* %aptr) #0 {
				112	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				113	%gep0 = getelementptr half, half addrspace(1)* %aptr, i32 %tid
				114	%out.gep = getelementptr half, half addrspace(1)* %out, i32 %tid
				115	%a = load half, half addrspace(1)* %gep0
				116	%fneg.a = fsub half -0.0, %a
				117	%max = call half @llvm.maxnum.f16(half %fneg.a, half 0.0)
				118	%med = call half @llvm.minnum.f16(half %max, half 1.0)
				119
				120	store half %med, half addrspace(1)* %out.gep
				121	ret void
				122	}
				123
				124	; GCN-LABEL: {{^}}v_clamp_negabs_f16:
				125	; GCN: {{buffer\|flat}}_load_ushort [[A:v[0-9]+]]
				126	; VI: v_max_f16_e64 v{{[0-9]+}}, -\|[[A]]\|, -\|[[A]]\| clamp{{$}}
				127
				128	; FIXME: Better to fold neg/abs into max
				129
Matt Arsenault	d5c6515	2017-02-22 23:27:53 +0000	[diff] [blame^]	130	; SI: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], -\|[[A]]\| clamp{{$}}
				131	; SI: v_cvt_f16_f32_e32 v{{[0-9]+}}, [[CVT]]
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	132	define amdgpu_kernel void @v_clamp_negabs_f16(half addrspace(1)* %out, half addrspace(1)* %aptr) #0 {
				133	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				134	%gep0 = getelementptr half, half addrspace(1)* %aptr, i32 %tid
				135	%out.gep = getelementptr half, half addrspace(1)* %out, i32 %tid
				136	%a = load half, half addrspace(1)* %gep0
				137	%fabs.a = call half @llvm.fabs.f16(half %a)
				138	%fneg.fabs.a = fsub half -0.0, %fabs.a
				139
				140	%max = call half @llvm.maxnum.f16(half %fneg.fabs.a, half 0.0)
				141	%med = call half @llvm.minnum.f16(half %max, half 1.0)
				142
				143	store half %med, half addrspace(1)* %out.gep
				144	ret void
				145	}
				146
				147	; FIXME: Do f64 instructions support clamp?
				148	; GCN-LABEL: {{^}}v_clamp_f64:
				149	; GCN: {{buffer\|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
				150	; GCN: v_max_f64
				151	; GCN: v_min_f64
				152	define amdgpu_kernel void @v_clamp_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) #0 {
				153	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				154	%gep0 = getelementptr double, double addrspace(1)* %aptr, i32 %tid
				155	%out.gep = getelementptr double, double addrspace(1)* %out, i32 %tid
				156	%a = load double, double addrspace(1)* %gep0
				157	%max = call double @llvm.maxnum.f64(double %a, double 0.0)
				158	%med = call double @llvm.minnum.f64(double %max, double 1.0)
				159
				160	store double %med, double addrspace(1)* %out.gep
				161	ret void
				162	}
				163
				164	; GCN-LABEL: {{^}}v_clamp_neg_f64:
				165	; GCN: {{buffer\|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
				166	; GCN: v_max_f64
				167	; GCN: v_min_f64
				168	define amdgpu_kernel void @v_clamp_neg_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) #0 {
				169	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				170	%gep0 = getelementptr double, double addrspace(1)* %aptr, i32 %tid
				171	%out.gep = getelementptr double, double addrspace(1)* %out, i32 %tid
				172	%a = load double, double addrspace(1)* %gep0
				173	%fneg.a = fsub double -0.0, %a
				174	%max = call double @llvm.maxnum.f64(double %fneg.a, double 0.0)
				175	%med = call double @llvm.minnum.f64(double %max, double 1.0)
				176
				177	store double %med, double addrspace(1)* %out.gep
				178	ret void
				179	}
				180
				181	; GCN-LABEL: {{^}}v_clamp_negabs_f64:
				182	; GCN: {{buffer\|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
				183	; GCN: v_max_f64
				184	; GCN: v_min_f64
				185	define amdgpu_kernel void @v_clamp_negabs_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) #0 {
				186	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				187	%gep0 = getelementptr double, double addrspace(1)* %aptr, i32 %tid
				188	%out.gep = getelementptr double, double addrspace(1)* %out, i32 %tid
				189	%a = load double, double addrspace(1)* %gep0
				190	%fabs.a = call double @llvm.fabs.f64(double %a)
				191	%fneg.fabs.a = fsub double -0.0, %fabs.a
				192
				193	%max = call double @llvm.maxnum.f64(double %fneg.fabs.a, double 0.0)
				194	%med = call double @llvm.minnum.f64(double %max, double 1.0)
				195
				196	store double %med, double addrspace(1)* %out.gep
				197	ret void
				198	}
				199
				200	; GCN-LABEL: {{^}}v_clamp_med3_aby_negzero_f32:
				201	; GCN: {{buffer\|flat}}_load_dword [[A:v[0-9]+]]
				202	; GCN: v_med3_f32
				203	define amdgpu_kernel void @v_clamp_med3_aby_negzero_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
				204	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				205	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				206	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				207	%a = load float, float addrspace(1)* %gep0
				208	%med = call float @llvm.amdgcn.fmed3.f32(float -0.0, float 1.0, float %a)
				209	store float %med, float addrspace(1)* %out.gep
				210	ret void
				211	}
				212
				213	; GCN-LABEL: {{^}}v_clamp_med3_aby_f32:
				214	; GCN: {{buffer\|flat}}_load_dword [[A:v[0-9]+]]
				215	; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
				216	define amdgpu_kernel void @v_clamp_med3_aby_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
				217	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				218	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				219	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				220	%a = load float, float addrspace(1)* %gep0
				221	%med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float %a)
				222	store float %med, float addrspace(1)* %out.gep
				223	ret void
				224	}
				225
				226	; GCN-LABEL: {{^}}v_clamp_med3_bay_f32:
				227	; GCN: {{buffer\|flat}}_load_dword [[A:v[0-9]+]]
				228	; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
				229	define amdgpu_kernel void @v_clamp_med3_bay_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
				230	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				231	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				232	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				233	%a = load float, float addrspace(1)* %gep0
				234	%med = call float @llvm.amdgcn.fmed3.f32(float 1.0, float 0.0, float %a)
				235	store float %med, float addrspace(1)* %out.gep
				236	ret void
				237	}
				238
				239	; GCN-LABEL: {{^}}v_clamp_med3_yab_f32:
				240	; GCN: {{buffer\|flat}}_load_dword [[A:v[0-9]+]]
				241	; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
				242	define amdgpu_kernel void @v_clamp_med3_yab_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
				243	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				244	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				245	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				246	%a = load float, float addrspace(1)* %gep0
				247	%med = call float @llvm.amdgcn.fmed3.f32(float %a, float 0.0, float 1.0)
				248	store float %med, float addrspace(1)* %out.gep
				249	ret void
				250	}
				251
				252	; GCN-LABEL: {{^}}v_clamp_med3_yba_f32:
				253	; GCN: {{buffer\|flat}}_load_dword [[A:v[0-9]+]]
				254	; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
				255	define amdgpu_kernel void @v_clamp_med3_yba_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
				256	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				257	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				258	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				259	%a = load float, float addrspace(1)* %gep0
				260	%med = call float @llvm.amdgcn.fmed3.f32(float %a, float 1.0, float 0.0)
				261	store float %med, float addrspace(1)* %out.gep
				262	ret void
				263	}
				264
				265	; GCN-LABEL: {{^}}v_clamp_med3_ayb_f32:
				266	; GCN: {{buffer\|flat}}_load_dword [[A:v[0-9]+]]
				267	; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
				268	define amdgpu_kernel void @v_clamp_med3_ayb_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
				269	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				270	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				271	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				272	%a = load float, float addrspace(1)* %gep0
				273	%med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float %a, float 1.0)
				274	store float %med, float addrspace(1)* %out.gep
				275	ret void
				276	}
				277
				278	; GCN-LABEL: {{^}}v_clamp_med3_bya_f32:
				279	; GCN: {{buffer\|flat}}_load_dword [[A:v[0-9]+]]
				280	; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
				281	define amdgpu_kernel void @v_clamp_med3_bya_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
				282	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				283	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				284	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				285	%a = load float, float addrspace(1)* %gep0
				286	%med = call float @llvm.amdgcn.fmed3.f32(float 1.0, float %a, float 0.0)
				287	store float %med, float addrspace(1)* %out.gep
				288	ret void
				289	}
				290
				291	; GCN-LABEL: {{^}}v_clamp_constants_to_one_f32:
				292	; GCN: v_mov_b32_e32 v{{[0-9]+}}, 1.0
				293	define amdgpu_kernel void @v_clamp_constants_to_one_f32(float addrspace(1)* %out) #0 {
				294	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				295	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				296	%med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float 4.0)
				297	store float %med, float addrspace(1)* %out.gep
				298	ret void
				299	}
				300
				301	; GCN-LABEL: {{^}}v_clamp_constants_to_zero_f32:
				302	; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
				303	define amdgpu_kernel void @v_clamp_constants_to_zero_f32(float addrspace(1)* %out) #0 {
				304	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				305	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				306	%med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float -4.0)
				307	store float %med, float addrspace(1)* %out.gep
				308	ret void
				309	}
				310
				311	; GCN-LABEL: {{^}}v_clamp_constant_preserve_f32:
				312	; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0.5
				313	define amdgpu_kernel void @v_clamp_constant_preserve_f32(float addrspace(1)* %out) #0 {
				314	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				315	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				316	%med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float 0.5)
				317	store float %med, float addrspace(1)* %out.gep
				318	ret void
				319	}
				320
				321	; GCN-LABEL: {{^}}v_clamp_constant_preserve_denorm_f32:
				322	; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0x7fffff{{$}}
				323	define amdgpu_kernel void @v_clamp_constant_preserve_denorm_f32(float addrspace(1)* %out) #0 {
				324	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				325	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				326	%med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float bitcast (i32 8388607 to float))
				327	store float %med, float addrspace(1)* %out.gep
				328	ret void
				329	}
				330
				331	; GCN-LABEL: {{^}}v_clamp_constant_qnan_f32:
				332	; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
				333	define amdgpu_kernel void @v_clamp_constant_qnan_f32(float addrspace(1)* %out) #0 {
				334	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				335	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				336	%med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float 0x7FF8000000000000)
				337	store float %med, float addrspace(1)* %out.gep
				338	ret void
				339	}
				340
				341	; GCN-LABEL: {{^}}v_clamp_constant_snan_f32:
				342	; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
				343	define amdgpu_kernel void @v_clamp_constant_snan_f32(float addrspace(1)* %out) #0 {
				344	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				345	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				346	%med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float bitcast (i32 2139095041 to float))
				347	store float %med, float addrspace(1)* %out.gep
				348	ret void
				349	}
				350
				351	; ---------------------------------------------------------------------
				352	; Test non-default behaviors enabling snans and disabling dx10_clamp
				353	; ---------------------------------------------------------------------
				354
				355	; GCN-LABEL: {{^}}v_clamp_f32_no_dx10_clamp:
				356	; GCN: {{buffer\|flat}}_load_dword [[A:v[0-9]+]]
				357	; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], 0, 1.0
				358	define amdgpu_kernel void @v_clamp_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
				359	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				360	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				361	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				362	%a = load float, float addrspace(1)* %gep0
				363	%max = call float @llvm.maxnum.f32(float %a, float 0.0)
				364	%med = call float @llvm.minnum.f32(float %max, float 1.0)
				365
				366	store float %med, float addrspace(1)* %out.gep
				367	ret void
				368	}
				369
				370	; GCN-LABEL: {{^}}v_clamp_f32_snan_dx10clamp:
				371	; GCN: {{buffer\|flat}}_load_dword [[A:v[0-9]+]]
				372	; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
				373	define amdgpu_kernel void @v_clamp_f32_snan_dx10clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #3 {
				374	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				375	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				376	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				377	%a = load float, float addrspace(1)* %gep0
				378	%max = call float @llvm.maxnum.f32(float %a, float 0.0)
				379	%med = call float @llvm.minnum.f32(float %max, float 1.0)
				380
				381	store float %med, float addrspace(1)* %out.gep
				382	ret void
				383	}
				384
				385	; GCN-LABEL: {{^}}v_clamp_f32_snan_no_dx10clamp:
				386	; GCN: {{buffer\|flat}}_load_dword [[A:v[0-9]+]]
				387	; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], 0, [[A]]
				388	; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], 1.0, [[MAX]]
				389	define amdgpu_kernel void @v_clamp_f32_snan_no_dx10clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #4 {
				390	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				391	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				392	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				393	%a = load float, float addrspace(1)* %gep0
				394	%max = call float @llvm.maxnum.f32(float %a, float 0.0)
				395	%med = call float @llvm.minnum.f32(float %max, float 1.0)
				396
				397	store float %med, float addrspace(1)* %out.gep
				398	ret void
				399	}
				400
				401	; GCN-LABEL: {{^}}v_clamp_f32_snan_no_dx10clamp_nnan_src:
				402	; GCN: {{buffer\|flat}}_load_dword [[A:v[0-9]+]]
				403	; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], 0, 1.0
				404	define amdgpu_kernel void @v_clamp_f32_snan_no_dx10clamp_nnan_src(float addrspace(1)* %out, float addrspace(1)* %aptr) #4 {
				405	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				406	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				407	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				408	%a = load float, float addrspace(1)* %gep0
				409	%add = fadd nnan float %a, 1.0
				410	%max = call float @llvm.maxnum.f32(float %add, float 0.0)
				411	%med = call float @llvm.minnum.f32(float %max, float 1.0)
				412
				413	store float %med, float addrspace(1)* %out.gep
				414	ret void
				415	}
				416
				417	; GCN-LABEL: {{^}}v_clamp_med3_aby_f32_no_dx10_clamp:
				418	; GCN: {{buffer\|flat}}_load_dword [[A:v[0-9]+]]
				419	; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
				420	define amdgpu_kernel void @v_clamp_med3_aby_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
				421	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				422	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				423	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				424	%a = load float, float addrspace(1)* %gep0
				425	%med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float %a)
				426	store float %med, float addrspace(1)* %out.gep
				427	ret void
				428	}
				429
				430	; GCN-LABEL: {{^}}v_clamp_med3_bay_f32_no_dx10_clamp:
				431	; GCN: {{buffer\|flat}}_load_dword [[A:v[0-9]+]]
				432	; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
				433	define amdgpu_kernel void @v_clamp_med3_bay_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
				434	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				435	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				436	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				437	%a = load float, float addrspace(1)* %gep0
				438	%med = call float @llvm.amdgcn.fmed3.f32(float 1.0, float 0.0, float %a)
				439	store float %med, float addrspace(1)* %out.gep
				440	ret void
				441	}
				442
				443	; GCN-LABEL: {{^}}v_clamp_med3_yab_f32_no_dx10_clamp:
				444	; GCN: {{buffer\|flat}}_load_dword [[A:v[0-9]+]]
				445	; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], 0, 1.0
				446	define amdgpu_kernel void @v_clamp_med3_yab_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
				447	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				448	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				449	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				450	%a = load float, float addrspace(1)* %gep0
				451	%med = call float @llvm.amdgcn.fmed3.f32(float %a, float 0.0, float 1.0)
				452	store float %med, float addrspace(1)* %out.gep
				453	ret void
				454	}
				455
				456	; GCN-LABEL: {{^}}v_clamp_med3_yba_f32_no_dx10_clamp:
				457	; GCN: {{buffer\|flat}}_load_dword [[A:v[0-9]+]]
				458	; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], 1.0, 0
				459	define amdgpu_kernel void @v_clamp_med3_yba_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
				460	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				461	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				462	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				463	%a = load float, float addrspace(1)* %gep0
				464	%med = call float @llvm.amdgcn.fmed3.f32(float %a, float 1.0, float 0.0)
				465	store float %med, float addrspace(1)* %out.gep
				466	ret void
				467	}
				468
				469	; GCN-LABEL: {{^}}v_clamp_med3_ayb_f32_no_dx10_clamp:
				470	; GCN: {{buffer\|flat}}_load_dword [[A:v[0-9]+]]
				471	; GCN: v_med3_f32 v{{[0-9]+}}, 0, [[A]], 1.0
				472	define amdgpu_kernel void @v_clamp_med3_ayb_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
				473	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				474	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				475	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				476	%a = load float, float addrspace(1)* %gep0
				477	%med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float %a, float 1.0)
				478	store float %med, float addrspace(1)* %out.gep
				479	ret void
				480	}
				481
				482	; GCN-LABEL: {{^}}v_clamp_med3_bya_f32_no_dx10_clamp:
				483	; GCN: {{buffer\|flat}}_load_dword [[A:v[0-9]+]]
				484	; GCN: v_med3_f32 v{{[0-9]+}}, 1.0, [[A]], 0
				485	define amdgpu_kernel void @v_clamp_med3_bya_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
				486	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				487	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				488	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				489	%a = load float, float addrspace(1)* %gep0
				490	%med = call float @llvm.amdgcn.fmed3.f32(float 1.0, float %a, float 0.0)
				491	store float %med, float addrspace(1)* %out.gep
				492	ret void
				493	}
				494
				495	; GCN-LABEL: {{^}}v_clamp_constant_qnan_f32_no_dx10_clamp:
				496	; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0x7fc00000
				497	define amdgpu_kernel void @v_clamp_constant_qnan_f32_no_dx10_clamp(float addrspace(1)* %out) #2 {
				498	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				499	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				500	%med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float 0x7FF8000000000000)
				501	store float %med, float addrspace(1)* %out.gep
				502	ret void
				503	}
				504
				505	; GCN-LABEL: {{^}}v_clamp_constant_snan_f32_no_dx10_clamp:
				506	; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0x7f800001
				507	define amdgpu_kernel void @v_clamp_constant_snan_f32_no_dx10_clamp(float addrspace(1)* %out) #2 {
				508	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				509	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				510	%med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float bitcast (i32 2139095041 to float))
				511	store float %med, float addrspace(1)* %out.gep
				512	ret void
				513	}
				514
				515	declare i32 @llvm.amdgcn.workitem.id.x() #1
				516	declare float @llvm.fabs.f32(float) #1
				517	declare float @llvm.minnum.f32(float, float) #1
				518	declare float @llvm.maxnum.f32(float, float) #1
				519	declare float @llvm.amdgcn.fmed3.f32(float, float, float) #1
				520	declare double @llvm.fabs.f64(double) #1
				521	declare double @llvm.minnum.f64(double, double) #1
				522	declare double @llvm.maxnum.f64(double, double) #1
				523	declare half @llvm.fabs.f16(half) #1
				524	declare half @llvm.minnum.f16(half, half) #1
				525	declare half @llvm.maxnum.f16(half, half) #1
				526
				527
				528	attributes #0 = { nounwind }
				529	attributes #1 = { nounwind readnone }
				530	attributes #2 = { nounwind "target-features"="-dx10-clamp,-fp-exceptions" "no-nans-fp-math"="false" }
				531	attributes #3 = { nounwind "target-features"="+dx10-clamp,+fp-exceptions" "no-nans-fp-math"="false" }
				532	attributes #4 = { nounwind "target-features"="-dx10-clamp,+fp-exceptions" "no-nans-fp-math"="false" }