Blame - llvm/test/CodeGen/AMDGPU/clamp.ll - toolchain/llvm-project

blob: 6a78290f9a82d91bd5d25dfb12b629241e20e26b [file] [log] [blame]

Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	1	; RUN: llc -march=amdgcn -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN -check-prefix=SI %s
				2	; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN -check-prefix=VI %s
				3
				4	; GCN-LABEL: {{^}}v_clamp_f32:
				5	; GCN: {{buffer\|flat}}_load_dword [[A:v[0-9]+]]
				6	; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
				7	define amdgpu_kernel void @v_clamp_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
				8	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				9	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				10	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				11	%a = load float, float addrspace(1)* %gep0
				12	%max = call float @llvm.maxnum.f32(float %a, float 0.0)
				13	%med = call float @llvm.minnum.f32(float %max, float 1.0)
				14
				15	store float %med, float addrspace(1)* %out.gep
				16	ret void
				17	}
				18
				19	; GCN-LABEL: {{^}}v_clamp_neg_f32:
				20	; GCN: {{buffer\|flat}}_load_dword [[A:v[0-9]+]]
				21	; GCN: v_max_f32_e64 v{{[0-9]+}}, -[[A]], -[[A]] clamp{{$}}
				22	define amdgpu_kernel void @v_clamp_neg_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
				23	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				24	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				25	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				26	%a = load float, float addrspace(1)* %gep0
				27	%fneg.a = fsub float -0.0, %a
				28	%max = call float @llvm.maxnum.f32(float %fneg.a, float 0.0)
				29	%med = call float @llvm.minnum.f32(float %max, float 1.0)
				30
				31	store float %med, float addrspace(1)* %out.gep
				32	ret void
				33	}
				34
				35	; GCN-LABEL: {{^}}v_clamp_negabs_f32:
				36	; GCN: {{buffer\|flat}}_load_dword [[A:v[0-9]+]]
				37	; GCN: v_max_f32_e64 v{{[0-9]+}}, -\|[[A]]\|, -\|[[A]]\| clamp{{$}}
				38	define amdgpu_kernel void @v_clamp_negabs_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
				39	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				40	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				41	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				42	%a = load float, float addrspace(1)* %gep0
				43	%fabs.a = call float @llvm.fabs.f32(float %a)
				44	%fneg.fabs.a = fsub float -0.0, %fabs.a
				45
				46	%max = call float @llvm.maxnum.f32(float %fneg.fabs.a, float 0.0)
				47	%med = call float @llvm.minnum.f32(float %max, float 1.0)
				48
				49	store float %med, float addrspace(1)* %out.gep
				50	ret void
				51	}
				52
				53	; GCN-LABEL: {{^}}v_clamp_negzero_f32:
				54	; GCN: {{buffer\|flat}}_load_dword [[A:v[0-9]+]]
				55	; GCN: v_bfrev_b32_e32 [[SIGNBIT:v[0-9]+]], 1
				56	; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[SIGNBIT]], 1.0
				57	define amdgpu_kernel void @v_clamp_negzero_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
				58	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				59	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				60	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				61	%a = load float, float addrspace(1)* %gep0
				62	%max = call float @llvm.maxnum.f32(float %a, float -0.0)
				63	%med = call float @llvm.minnum.f32(float %max, float 1.0)
				64
				65	store float %med, float addrspace(1)* %out.gep
				66	ret void
				67	}
				68
				69	; GCN-LABEL: {{^}}v_clamp_multi_use_max_f32:
				70	; GCN: {{buffer\|flat}}_load_dword [[A:v[0-9]+]]
				71	; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], 0, [[A]]
				72	; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], 1.0, [[MAX]]
				73	define amdgpu_kernel void @v_clamp_multi_use_max_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
				74	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				75	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				76	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				77	%a = load float, float addrspace(1)* %gep0
				78	%max = call float @llvm.maxnum.f32(float %a, float 0.0)
				79	%med = call float @llvm.minnum.f32(float %max, float 1.0)
				80
				81	store float %med, float addrspace(1)* %out.gep
				82	store volatile float %max, float addrspace(1)* undef
				83	ret void
				84	}
				85
				86	; GCN-LABEL: {{^}}v_clamp_f16:
				87	; GCN: {{buffer\|flat}}_load_ushort [[A:v[0-9]+]]
				88	; VI: v_max_f16_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
				89
Matt Arsenault	d5c6515	2017-02-22 23:27:53 +0000	[diff] [blame]	90	; SI: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], [[A]] clamp{{$}}
				91	; SI: v_cvt_f16_f32_e32 v{{[0-9]+}}, [[CVT]]
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	92	define amdgpu_kernel void @v_clamp_f16(half addrspace(1)* %out, half addrspace(1)* %aptr) #0 {
				93	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				94	%gep0 = getelementptr half, half addrspace(1)* %aptr, i32 %tid
				95	%out.gep = getelementptr half, half addrspace(1)* %out, i32 %tid
				96	%a = load half, half addrspace(1)* %gep0
				97	%max = call half @llvm.maxnum.f16(half %a, half 0.0)
				98	%med = call half @llvm.minnum.f16(half %max, half 1.0)
				99
				100	store half %med, half addrspace(1)* %out.gep
				101	ret void
				102	}
				103
				104	; GCN-LABEL: {{^}}v_clamp_neg_f16:
				105	; GCN: {{buffer\|flat}}_load_ushort [[A:v[0-9]+]]
				106	; VI: v_max_f16_e64 v{{[0-9]+}}, -[[A]], -[[A]] clamp{{$}}
				107
				108	; FIXME: Better to fold neg into max
Matt Arsenault	d5c6515	2017-02-22 23:27:53 +0000	[diff] [blame]	109	; SI: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], -[[A]] clamp{{$}}
				110	; SI: v_cvt_f16_f32_e32 v{{[0-9]+}}, [[CVT]]
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	111	define amdgpu_kernel void @v_clamp_neg_f16(half addrspace(1)* %out, half addrspace(1)* %aptr) #0 {
				112	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				113	%gep0 = getelementptr half, half addrspace(1)* %aptr, i32 %tid
				114	%out.gep = getelementptr half, half addrspace(1)* %out, i32 %tid
				115	%a = load half, half addrspace(1)* %gep0
				116	%fneg.a = fsub half -0.0, %a
				117	%max = call half @llvm.maxnum.f16(half %fneg.a, half 0.0)
				118	%med = call half @llvm.minnum.f16(half %max, half 1.0)
				119
				120	store half %med, half addrspace(1)* %out.gep
				121	ret void
				122	}
				123
				124	; GCN-LABEL: {{^}}v_clamp_negabs_f16:
				125	; GCN: {{buffer\|flat}}_load_ushort [[A:v[0-9]+]]
				126	; VI: v_max_f16_e64 v{{[0-9]+}}, -\|[[A]]\|, -\|[[A]]\| clamp{{$}}
				127
				128	; FIXME: Better to fold neg/abs into max
				129
Matt Arsenault	d5c6515	2017-02-22 23:27:53 +0000	[diff] [blame]	130	; SI: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], -\|[[A]]\| clamp{{$}}
				131	; SI: v_cvt_f16_f32_e32 v{{[0-9]+}}, [[CVT]]
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	132	define amdgpu_kernel void @v_clamp_negabs_f16(half addrspace(1)* %out, half addrspace(1)* %aptr) #0 {
				133	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				134	%gep0 = getelementptr half, half addrspace(1)* %aptr, i32 %tid
				135	%out.gep = getelementptr half, half addrspace(1)* %out, i32 %tid
				136	%a = load half, half addrspace(1)* %gep0
				137	%fabs.a = call half @llvm.fabs.f16(half %a)
				138	%fneg.fabs.a = fsub half -0.0, %fabs.a
				139
				140	%max = call half @llvm.maxnum.f16(half %fneg.fabs.a, half 0.0)
				141	%med = call half @llvm.minnum.f16(half %max, half 1.0)
				142
				143	store half %med, half addrspace(1)* %out.gep
				144	ret void
				145	}
				146
				147	; FIXME: Do f64 instructions support clamp?
				148	; GCN-LABEL: {{^}}v_clamp_f64:
				149	; GCN: {{buffer\|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
Matt Arsenault	79a45db	2017-02-22 23:53:37 +0000	[diff] [blame]	150	; GCN: v_max_f64 v{{\[[0-9]+:[0-9]+\]}}, [[A]], [[A]] clamp{{$}}
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	151	define amdgpu_kernel void @v_clamp_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) #0 {
				152	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				153	%gep0 = getelementptr double, double addrspace(1)* %aptr, i32 %tid
				154	%out.gep = getelementptr double, double addrspace(1)* %out, i32 %tid
				155	%a = load double, double addrspace(1)* %gep0
				156	%max = call double @llvm.maxnum.f64(double %a, double 0.0)
				157	%med = call double @llvm.minnum.f64(double %max, double 1.0)
				158
				159	store double %med, double addrspace(1)* %out.gep
				160	ret void
				161	}
				162
				163	; GCN-LABEL: {{^}}v_clamp_neg_f64:
				164	; GCN: {{buffer\|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
Matt Arsenault	79a45db	2017-02-22 23:53:37 +0000	[diff] [blame]	165	; GCN: v_max_f64 v{{\[[0-9]+:[0-9]+\]}}, -[[A]], -[[A]] clamp{{$}}
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	166	define amdgpu_kernel void @v_clamp_neg_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) #0 {
				167	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				168	%gep0 = getelementptr double, double addrspace(1)* %aptr, i32 %tid
				169	%out.gep = getelementptr double, double addrspace(1)* %out, i32 %tid
				170	%a = load double, double addrspace(1)* %gep0
				171	%fneg.a = fsub double -0.0, %a
				172	%max = call double @llvm.maxnum.f64(double %fneg.a, double 0.0)
				173	%med = call double @llvm.minnum.f64(double %max, double 1.0)
				174
				175	store double %med, double addrspace(1)* %out.gep
				176	ret void
				177	}
				178
				179	; GCN-LABEL: {{^}}v_clamp_negabs_f64:
				180	; GCN: {{buffer\|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
Matt Arsenault	79a45db	2017-02-22 23:53:37 +0000	[diff] [blame]	181	; GCN: v_max_f64 v{{\[[0-9]+:[0-9]+\]}}, -\|[[A]]\|, -\|[[A]]\| clamp{{$}}
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	182	define amdgpu_kernel void @v_clamp_negabs_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) #0 {
				183	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				184	%gep0 = getelementptr double, double addrspace(1)* %aptr, i32 %tid
				185	%out.gep = getelementptr double, double addrspace(1)* %out, i32 %tid
				186	%a = load double, double addrspace(1)* %gep0
				187	%fabs.a = call double @llvm.fabs.f64(double %a)
				188	%fneg.fabs.a = fsub double -0.0, %fabs.a
				189
				190	%max = call double @llvm.maxnum.f64(double %fneg.fabs.a, double 0.0)
				191	%med = call double @llvm.minnum.f64(double %max, double 1.0)
				192
				193	store double %med, double addrspace(1)* %out.gep
				194	ret void
				195	}
				196
				197	; GCN-LABEL: {{^}}v_clamp_med3_aby_negzero_f32:
				198	; GCN: {{buffer\|flat}}_load_dword [[A:v[0-9]+]]
				199	; GCN: v_med3_f32
				200	define amdgpu_kernel void @v_clamp_med3_aby_negzero_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
				201	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				202	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				203	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				204	%a = load float, float addrspace(1)* %gep0
				205	%med = call float @llvm.amdgcn.fmed3.f32(float -0.0, float 1.0, float %a)
				206	store float %med, float addrspace(1)* %out.gep
				207	ret void
				208	}
				209
				210	; GCN-LABEL: {{^}}v_clamp_med3_aby_f32:
				211	; GCN: {{buffer\|flat}}_load_dword [[A:v[0-9]+]]
				212	; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
				213	define amdgpu_kernel void @v_clamp_med3_aby_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
				214	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				215	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				216	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				217	%a = load float, float addrspace(1)* %gep0
				218	%med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float %a)
				219	store float %med, float addrspace(1)* %out.gep
				220	ret void
				221	}
				222
				223	; GCN-LABEL: {{^}}v_clamp_med3_bay_f32:
				224	; GCN: {{buffer\|flat}}_load_dword [[A:v[0-9]+]]
				225	; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
				226	define amdgpu_kernel void @v_clamp_med3_bay_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
				227	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				228	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				229	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				230	%a = load float, float addrspace(1)* %gep0
				231	%med = call float @llvm.amdgcn.fmed3.f32(float 1.0, float 0.0, float %a)
				232	store float %med, float addrspace(1)* %out.gep
				233	ret void
				234	}
				235
				236	; GCN-LABEL: {{^}}v_clamp_med3_yab_f32:
				237	; GCN: {{buffer\|flat}}_load_dword [[A:v[0-9]+]]
				238	; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
				239	define amdgpu_kernel void @v_clamp_med3_yab_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
				240	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				241	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				242	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				243	%a = load float, float addrspace(1)* %gep0
				244	%med = call float @llvm.amdgcn.fmed3.f32(float %a, float 0.0, float 1.0)
				245	store float %med, float addrspace(1)* %out.gep
				246	ret void
				247	}
				248
				249	; GCN-LABEL: {{^}}v_clamp_med3_yba_f32:
				250	; GCN: {{buffer\|flat}}_load_dword [[A:v[0-9]+]]
				251	; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
				252	define amdgpu_kernel void @v_clamp_med3_yba_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
				253	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				254	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				255	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				256	%a = load float, float addrspace(1)* %gep0
				257	%med = call float @llvm.amdgcn.fmed3.f32(float %a, float 1.0, float 0.0)
				258	store float %med, float addrspace(1)* %out.gep
				259	ret void
				260	}
				261
				262	; GCN-LABEL: {{^}}v_clamp_med3_ayb_f32:
				263	; GCN: {{buffer\|flat}}_load_dword [[A:v[0-9]+]]
				264	; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
				265	define amdgpu_kernel void @v_clamp_med3_ayb_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
				266	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				267	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				268	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				269	%a = load float, float addrspace(1)* %gep0
				270	%med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float %a, float 1.0)
				271	store float %med, float addrspace(1)* %out.gep
				272	ret void
				273	}
				274
				275	; GCN-LABEL: {{^}}v_clamp_med3_bya_f32:
				276	; GCN: {{buffer\|flat}}_load_dword [[A:v[0-9]+]]
				277	; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
				278	define amdgpu_kernel void @v_clamp_med3_bya_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
				279	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				280	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				281	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				282	%a = load float, float addrspace(1)* %gep0
				283	%med = call float @llvm.amdgcn.fmed3.f32(float 1.0, float %a, float 0.0)
				284	store float %med, float addrspace(1)* %out.gep
				285	ret void
				286	}
				287
				288	; GCN-LABEL: {{^}}v_clamp_constants_to_one_f32:
				289	; GCN: v_mov_b32_e32 v{{[0-9]+}}, 1.0
				290	define amdgpu_kernel void @v_clamp_constants_to_one_f32(float addrspace(1)* %out) #0 {
				291	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				292	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				293	%med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float 4.0)
				294	store float %med, float addrspace(1)* %out.gep
				295	ret void
				296	}
				297
				298	; GCN-LABEL: {{^}}v_clamp_constants_to_zero_f32:
				299	; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
				300	define amdgpu_kernel void @v_clamp_constants_to_zero_f32(float addrspace(1)* %out) #0 {
				301	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				302	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				303	%med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float -4.0)
				304	store float %med, float addrspace(1)* %out.gep
				305	ret void
				306	}
				307
				308	; GCN-LABEL: {{^}}v_clamp_constant_preserve_f32:
				309	; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0.5
				310	define amdgpu_kernel void @v_clamp_constant_preserve_f32(float addrspace(1)* %out) #0 {
				311	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				312	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				313	%med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float 0.5)
				314	store float %med, float addrspace(1)* %out.gep
				315	ret void
				316	}
				317
				318	; GCN-LABEL: {{^}}v_clamp_constant_preserve_denorm_f32:
				319	; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0x7fffff{{$}}
				320	define amdgpu_kernel void @v_clamp_constant_preserve_denorm_f32(float addrspace(1)* %out) #0 {
				321	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				322	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				323	%med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float bitcast (i32 8388607 to float))
				324	store float %med, float addrspace(1)* %out.gep
				325	ret void
				326	}
				327
				328	; GCN-LABEL: {{^}}v_clamp_constant_qnan_f32:
				329	; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
				330	define amdgpu_kernel void @v_clamp_constant_qnan_f32(float addrspace(1)* %out) #0 {
				331	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				332	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				333	%med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float 0x7FF8000000000000)
				334	store float %med, float addrspace(1)* %out.gep
				335	ret void
				336	}
				337
				338	; GCN-LABEL: {{^}}v_clamp_constant_snan_f32:
				339	; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
				340	define amdgpu_kernel void @v_clamp_constant_snan_f32(float addrspace(1)* %out) #0 {
				341	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				342	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				343	%med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float bitcast (i32 2139095041 to float))
				344	store float %med, float addrspace(1)* %out.gep
				345	ret void
				346	}
				347
				348	; ---------------------------------------------------------------------
				349	; Test non-default behaviors enabling snans and disabling dx10_clamp
				350	; ---------------------------------------------------------------------
				351
				352	; GCN-LABEL: {{^}}v_clamp_f32_no_dx10_clamp:
				353	; GCN: {{buffer\|flat}}_load_dword [[A:v[0-9]+]]
				354	; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], 0, 1.0
				355	define amdgpu_kernel void @v_clamp_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
				356	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				357	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				358	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				359	%a = load float, float addrspace(1)* %gep0
				360	%max = call float @llvm.maxnum.f32(float %a, float 0.0)
				361	%med = call float @llvm.minnum.f32(float %max, float 1.0)
				362
				363	store float %med, float addrspace(1)* %out.gep
				364	ret void
				365	}
				366
				367	; GCN-LABEL: {{^}}v_clamp_f32_snan_dx10clamp:
				368	; GCN: {{buffer\|flat}}_load_dword [[A:v[0-9]+]]
				369	; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
				370	define amdgpu_kernel void @v_clamp_f32_snan_dx10clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #3 {
				371	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				372	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				373	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				374	%a = load float, float addrspace(1)* %gep0
				375	%max = call float @llvm.maxnum.f32(float %a, float 0.0)
				376	%med = call float @llvm.minnum.f32(float %max, float 1.0)
				377
				378	store float %med, float addrspace(1)* %out.gep
				379	ret void
				380	}
				381
				382	; GCN-LABEL: {{^}}v_clamp_f32_snan_no_dx10clamp:
				383	; GCN: {{buffer\|flat}}_load_dword [[A:v[0-9]+]]
				384	; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], 0, [[A]]
				385	; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], 1.0, [[MAX]]
				386	define amdgpu_kernel void @v_clamp_f32_snan_no_dx10clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #4 {
				387	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				388	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				389	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				390	%a = load float, float addrspace(1)* %gep0
				391	%max = call float @llvm.maxnum.f32(float %a, float 0.0)
				392	%med = call float @llvm.minnum.f32(float %max, float 1.0)
				393
				394	store float %med, float addrspace(1)* %out.gep
				395	ret void
				396	}
				397
				398	; GCN-LABEL: {{^}}v_clamp_f32_snan_no_dx10clamp_nnan_src:
				399	; GCN: {{buffer\|flat}}_load_dword [[A:v[0-9]+]]
				400	; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], 0, 1.0
				401	define amdgpu_kernel void @v_clamp_f32_snan_no_dx10clamp_nnan_src(float addrspace(1)* %out, float addrspace(1)* %aptr) #4 {
				402	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				403	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				404	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				405	%a = load float, float addrspace(1)* %gep0
				406	%add = fadd nnan float %a, 1.0
				407	%max = call float @llvm.maxnum.f32(float %add, float 0.0)
				408	%med = call float @llvm.minnum.f32(float %max, float 1.0)
				409
				410	store float %med, float addrspace(1)* %out.gep
				411	ret void
				412	}
				413
				414	; GCN-LABEL: {{^}}v_clamp_med3_aby_f32_no_dx10_clamp:
				415	; GCN: {{buffer\|flat}}_load_dword [[A:v[0-9]+]]
				416	; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
				417	define amdgpu_kernel void @v_clamp_med3_aby_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
				418	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				419	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				420	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				421	%a = load float, float addrspace(1)* %gep0
				422	%med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float %a)
				423	store float %med, float addrspace(1)* %out.gep
				424	ret void
				425	}
				426
				427	; GCN-LABEL: {{^}}v_clamp_med3_bay_f32_no_dx10_clamp:
				428	; GCN: {{buffer\|flat}}_load_dword [[A:v[0-9]+]]
				429	; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
				430	define amdgpu_kernel void @v_clamp_med3_bay_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
				431	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				432	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				433	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				434	%a = load float, float addrspace(1)* %gep0
				435	%med = call float @llvm.amdgcn.fmed3.f32(float 1.0, float 0.0, float %a)
				436	store float %med, float addrspace(1)* %out.gep
				437	ret void
				438	}
				439
				440	; GCN-LABEL: {{^}}v_clamp_med3_yab_f32_no_dx10_clamp:
				441	; GCN: {{buffer\|flat}}_load_dword [[A:v[0-9]+]]
				442	; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], 0, 1.0
				443	define amdgpu_kernel void @v_clamp_med3_yab_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
				444	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				445	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				446	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				447	%a = load float, float addrspace(1)* %gep0
				448	%med = call float @llvm.amdgcn.fmed3.f32(float %a, float 0.0, float 1.0)
				449	store float %med, float addrspace(1)* %out.gep
				450	ret void
				451	}
				452
				453	; GCN-LABEL: {{^}}v_clamp_med3_yba_f32_no_dx10_clamp:
				454	; GCN: {{buffer\|flat}}_load_dword [[A:v[0-9]+]]
				455	; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], 1.0, 0
				456	define amdgpu_kernel void @v_clamp_med3_yba_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
				457	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				458	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				459	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				460	%a = load float, float addrspace(1)* %gep0
				461	%med = call float @llvm.amdgcn.fmed3.f32(float %a, float 1.0, float 0.0)
				462	store float %med, float addrspace(1)* %out.gep
				463	ret void
				464	}
				465
				466	; GCN-LABEL: {{^}}v_clamp_med3_ayb_f32_no_dx10_clamp:
				467	; GCN: {{buffer\|flat}}_load_dword [[A:v[0-9]+]]
				468	; GCN: v_med3_f32 v{{[0-9]+}}, 0, [[A]], 1.0
				469	define amdgpu_kernel void @v_clamp_med3_ayb_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
				470	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				471	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				472	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				473	%a = load float, float addrspace(1)* %gep0
				474	%med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float %a, float 1.0)
				475	store float %med, float addrspace(1)* %out.gep
				476	ret void
				477	}
				478
				479	; GCN-LABEL: {{^}}v_clamp_med3_bya_f32_no_dx10_clamp:
				480	; GCN: {{buffer\|flat}}_load_dword [[A:v[0-9]+]]
				481	; GCN: v_med3_f32 v{{[0-9]+}}, 1.0, [[A]], 0
				482	define amdgpu_kernel void @v_clamp_med3_bya_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
				483	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				484	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				485	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				486	%a = load float, float addrspace(1)* %gep0
				487	%med = call float @llvm.amdgcn.fmed3.f32(float 1.0, float %a, float 0.0)
				488	store float %med, float addrspace(1)* %out.gep
				489	ret void
				490	}
				491
				492	; GCN-LABEL: {{^}}v_clamp_constant_qnan_f32_no_dx10_clamp:
				493	; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0x7fc00000
				494	define amdgpu_kernel void @v_clamp_constant_qnan_f32_no_dx10_clamp(float addrspace(1)* %out) #2 {
				495	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				496	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				497	%med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float 0x7FF8000000000000)
				498	store float %med, float addrspace(1)* %out.gep
				499	ret void
				500	}
				501
				502	; GCN-LABEL: {{^}}v_clamp_constant_snan_f32_no_dx10_clamp:
				503	; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0x7f800001
				504	define amdgpu_kernel void @v_clamp_constant_snan_f32_no_dx10_clamp(float addrspace(1)* %out) #2 {
				505	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				506	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				507	%med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float bitcast (i32 2139095041 to float))
				508	store float %med, float addrspace(1)* %out.gep
				509	ret void
				510	}
				511
				512	declare i32 @llvm.amdgcn.workitem.id.x() #1
				513	declare float @llvm.fabs.f32(float) #1
				514	declare float @llvm.minnum.f32(float, float) #1
				515	declare float @llvm.maxnum.f32(float, float) #1
				516	declare float @llvm.amdgcn.fmed3.f32(float, float, float) #1
				517	declare double @llvm.fabs.f64(double) #1
				518	declare double @llvm.minnum.f64(double, double) #1
				519	declare double @llvm.maxnum.f64(double, double) #1
				520	declare half @llvm.fabs.f16(half) #1
				521	declare half @llvm.minnum.f16(half, half) #1
				522	declare half @llvm.maxnum.f16(half, half) #1
				523
				524
				525	attributes #0 = { nounwind }
				526	attributes #1 = { nounwind readnone }
				527	attributes #2 = { nounwind "target-features"="-dx10-clamp,-fp-exceptions" "no-nans-fp-math"="false" }
				528	attributes #3 = { nounwind "target-features"="+dx10-clamp,+fp-exceptions" "no-nans-fp-math"="false" }
				529	attributes #4 = { nounwind "target-features"="-dx10-clamp,+fp-exceptions" "no-nans-fp-math"="false" }