Blame - llvm/test/CodeGen/AMDGPU/clamp.ll - toolchain/llvm-project

blob: e73f28604b5750ff81e0ab3060cd6a7ec51552ac [file] [log] [blame]

Matt Arsenault	6b114d2	2017-08-30 01:20:17 +0000	[diff] [blame]	1	; RUN: llc -march=amdgcn -verify-machineinstrs < %s \| FileCheck -enable-var-scope -check-prefixes=GCN,SI %s
				2	; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s \| FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,VI %s
				3	; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s \| FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9 %s
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	4
				5	; GCN-LABEL: {{^}}v_clamp_f32:
Matt Arsenault	6b114d2	2017-08-30 01:20:17 +0000	[diff] [blame]	6	; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	7	; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
				8	define amdgpu_kernel void @v_clamp_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
				9	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				10	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				11	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				12	%a = load float, float addrspace(1)* %gep0
				13	%max = call float @llvm.maxnum.f32(float %a, float 0.0)
				14	%med = call float @llvm.minnum.f32(float %max, float 1.0)
				15
				16	store float %med, float addrspace(1)* %out.gep
				17	ret void
				18	}
				19
				20	; GCN-LABEL: {{^}}v_clamp_neg_f32:
Matt Arsenault	6b114d2	2017-08-30 01:20:17 +0000	[diff] [blame]	21	; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	22	; GCN: v_max_f32_e64 v{{[0-9]+}}, -[[A]], -[[A]] clamp{{$}}
				23	define amdgpu_kernel void @v_clamp_neg_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
				24	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				25	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				26	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				27	%a = load float, float addrspace(1)* %gep0
				28	%fneg.a = fsub float -0.0, %a
				29	%max = call float @llvm.maxnum.f32(float %fneg.a, float 0.0)
				30	%med = call float @llvm.minnum.f32(float %max, float 1.0)
				31
				32	store float %med, float addrspace(1)* %out.gep
				33	ret void
				34	}
				35
				36	; GCN-LABEL: {{^}}v_clamp_negabs_f32:
Matt Arsenault	6b114d2	2017-08-30 01:20:17 +0000	[diff] [blame]	37	; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	38	; GCN: v_max_f32_e64 v{{[0-9]+}}, -\|[[A]]\|, -\|[[A]]\| clamp{{$}}
				39	define amdgpu_kernel void @v_clamp_negabs_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
				40	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				41	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				42	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				43	%a = load float, float addrspace(1)* %gep0
				44	%fabs.a = call float @llvm.fabs.f32(float %a)
				45	%fneg.fabs.a = fsub float -0.0, %fabs.a
				46
				47	%max = call float @llvm.maxnum.f32(float %fneg.fabs.a, float 0.0)
				48	%med = call float @llvm.minnum.f32(float %max, float 1.0)
				49
				50	store float %med, float addrspace(1)* %out.gep
				51	ret void
				52	}
				53
				54	; GCN-LABEL: {{^}}v_clamp_negzero_f32:
Matt Arsenault	6b114d2	2017-08-30 01:20:17 +0000	[diff] [blame]	55	; GCN-DAG: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
Matt Arsenault	c3dc8e6	2018-08-03 18:27:52 +0000	[diff] [blame]	56	; GCN-DAG: v_add_f32_e32 [[ADD:v[0-9]+]], 0.5, [[A]]
Matt Arsenault	ebf4614	2018-09-18 02:34:54 +0000	[diff] [blame^]	57	; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], 0x80000000, [[ADD]]
				58	; GCN: v_min_f32_e32 v{{[0-9]+}}, 1.0, [[MAX]]
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	59	define amdgpu_kernel void @v_clamp_negzero_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
				60	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				61	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				62	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				63	%a = load float, float addrspace(1)* %gep0
Matt Arsenault	c3dc8e6	2018-08-03 18:27:52 +0000	[diff] [blame]	64	%add = fadd nnan float %a, 0.5
				65	%max = call float @llvm.maxnum.f32(float %add, float -0.0)
				66	%med = call float @llvm.minnum.f32(float %max, float 1.0)
				67
				68	store float %med, float addrspace(1)* %out.gep
				69	ret void
				70	}
				71
				72	; FIXME: Weird inconsistency in how -0.0 is treated. Accepted if clamp
				73	; matched through med3, not if directly. Is this correct?
				74
				75	; GCN-LABEL: {{^}}v_clamp_negzero_maybe_snan_f32:
				76	; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
				77	; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], 0x80000000, [[A]]
				78	; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], 1.0, [[MAX]]
				79	define amdgpu_kernel void @v_clamp_negzero_maybe_snan_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
				80	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				81	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				82	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				83	%a = load float, float addrspace(1)* %gep0
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	84	%max = call float @llvm.maxnum.f32(float %a, float -0.0)
				85	%med = call float @llvm.minnum.f32(float %max, float 1.0)
				86
				87	store float %med, float addrspace(1)* %out.gep
				88	ret void
				89	}
				90
				91	; GCN-LABEL: {{^}}v_clamp_multi_use_max_f32:
Matt Arsenault	6b114d2	2017-08-30 01:20:17 +0000	[diff] [blame]	92	; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	93	; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], 0, [[A]]
				94	; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], 1.0, [[MAX]]
				95	define amdgpu_kernel void @v_clamp_multi_use_max_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
				96	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				97	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				98	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				99	%a = load float, float addrspace(1)* %gep0
				100	%max = call float @llvm.maxnum.f32(float %a, float 0.0)
				101	%med = call float @llvm.minnum.f32(float %max, float 1.0)
				102
				103	store float %med, float addrspace(1)* %out.gep
				104	store volatile float %max, float addrspace(1)* undef
				105	ret void
				106	}
				107
				108	; GCN-LABEL: {{^}}v_clamp_f16:
Matt Arsenault	6b114d2	2017-08-30 01:20:17 +0000	[diff] [blame]	109	; GCN: {{buffer\|flat\|global}}_load_ushort [[A:v[0-9]+]]
				110	; GFX89: v_max_f16_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	111
Matt Arsenault	d5c6515	2017-02-22 23:27:53 +0000	[diff] [blame]	112	; SI: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], [[A]] clamp{{$}}
				113	; SI: v_cvt_f16_f32_e32 v{{[0-9]+}}, [[CVT]]
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	114	define amdgpu_kernel void @v_clamp_f16(half addrspace(1)* %out, half addrspace(1)* %aptr) #0 {
				115	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				116	%gep0 = getelementptr half, half addrspace(1)* %aptr, i32 %tid
				117	%out.gep = getelementptr half, half addrspace(1)* %out, i32 %tid
				118	%a = load half, half addrspace(1)* %gep0
				119	%max = call half @llvm.maxnum.f16(half %a, half 0.0)
				120	%med = call half @llvm.minnum.f16(half %max, half 1.0)
				121
				122	store half %med, half addrspace(1)* %out.gep
				123	ret void
				124	}
				125
				126	; GCN-LABEL: {{^}}v_clamp_neg_f16:
Matt Arsenault	6b114d2	2017-08-30 01:20:17 +0000	[diff] [blame]	127	; GCN: {{buffer\|flat\|global}}_load_ushort [[A:v[0-9]+]]
				128	; GFX89: v_max_f16_e64 v{{[0-9]+}}, -[[A]], -[[A]] clamp{{$}}
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	129
				130	; FIXME: Better to fold neg into max
Matt Arsenault	d5c6515	2017-02-22 23:27:53 +0000	[diff] [blame]	131	; SI: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], -[[A]] clamp{{$}}
				132	; SI: v_cvt_f16_f32_e32 v{{[0-9]+}}, [[CVT]]
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	133	define amdgpu_kernel void @v_clamp_neg_f16(half addrspace(1)* %out, half addrspace(1)* %aptr) #0 {
				134	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				135	%gep0 = getelementptr half, half addrspace(1)* %aptr, i32 %tid
				136	%out.gep = getelementptr half, half addrspace(1)* %out, i32 %tid
				137	%a = load half, half addrspace(1)* %gep0
				138	%fneg.a = fsub half -0.0, %a
				139	%max = call half @llvm.maxnum.f16(half %fneg.a, half 0.0)
				140	%med = call half @llvm.minnum.f16(half %max, half 1.0)
				141
				142	store half %med, half addrspace(1)* %out.gep
				143	ret void
				144	}
				145
				146	; GCN-LABEL: {{^}}v_clamp_negabs_f16:
Matt Arsenault	6b114d2	2017-08-30 01:20:17 +0000	[diff] [blame]	147	; GCN: {{buffer\|flat\|global}}_load_ushort [[A:v[0-9]+]]
				148	; GFX89: v_max_f16_e64 v{{[0-9]+}}, -\|[[A]]\|, -\|[[A]]\| clamp{{$}}
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	149
				150	; FIXME: Better to fold neg/abs into max
				151
Matt Arsenault	d5c6515	2017-02-22 23:27:53 +0000	[diff] [blame]	152	; SI: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], -\|[[A]]\| clamp{{$}}
				153	; SI: v_cvt_f16_f32_e32 v{{[0-9]+}}, [[CVT]]
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	154	define amdgpu_kernel void @v_clamp_negabs_f16(half addrspace(1)* %out, half addrspace(1)* %aptr) #0 {
				155	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				156	%gep0 = getelementptr half, half addrspace(1)* %aptr, i32 %tid
				157	%out.gep = getelementptr half, half addrspace(1)* %out, i32 %tid
				158	%a = load half, half addrspace(1)* %gep0
				159	%fabs.a = call half @llvm.fabs.f16(half %a)
				160	%fneg.fabs.a = fsub half -0.0, %fabs.a
				161
				162	%max = call half @llvm.maxnum.f16(half %fneg.fabs.a, half 0.0)
				163	%med = call half @llvm.minnum.f16(half %max, half 1.0)
				164
				165	store half %med, half addrspace(1)* %out.gep
				166	ret void
				167	}
				168
				169	; FIXME: Do f64 instructions support clamp?
				170	; GCN-LABEL: {{^}}v_clamp_f64:
Matt Arsenault	6b114d2	2017-08-30 01:20:17 +0000	[diff] [blame]	171	; GCN: {{buffer\|flat\|global}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
Matt Arsenault	79a45db	2017-02-22 23:53:37 +0000	[diff] [blame]	172	; GCN: v_max_f64 v{{\[[0-9]+:[0-9]+\]}}, [[A]], [[A]] clamp{{$}}
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	173	define amdgpu_kernel void @v_clamp_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) #0 {
				174	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				175	%gep0 = getelementptr double, double addrspace(1)* %aptr, i32 %tid
				176	%out.gep = getelementptr double, double addrspace(1)* %out, i32 %tid
				177	%a = load double, double addrspace(1)* %gep0
				178	%max = call double @llvm.maxnum.f64(double %a, double 0.0)
				179	%med = call double @llvm.minnum.f64(double %max, double 1.0)
				180
				181	store double %med, double addrspace(1)* %out.gep
				182	ret void
				183	}
				184
				185	; GCN-LABEL: {{^}}v_clamp_neg_f64:
Matt Arsenault	6b114d2	2017-08-30 01:20:17 +0000	[diff] [blame]	186	; GCN: {{buffer\|flat\|global}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
Matt Arsenault	79a45db	2017-02-22 23:53:37 +0000	[diff] [blame]	187	; GCN: v_max_f64 v{{\[[0-9]+:[0-9]+\]}}, -[[A]], -[[A]] clamp{{$}}
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	188	define amdgpu_kernel void @v_clamp_neg_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) #0 {
				189	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				190	%gep0 = getelementptr double, double addrspace(1)* %aptr, i32 %tid
				191	%out.gep = getelementptr double, double addrspace(1)* %out, i32 %tid
				192	%a = load double, double addrspace(1)* %gep0
				193	%fneg.a = fsub double -0.0, %a
				194	%max = call double @llvm.maxnum.f64(double %fneg.a, double 0.0)
				195	%med = call double @llvm.minnum.f64(double %max, double 1.0)
				196
				197	store double %med, double addrspace(1)* %out.gep
				198	ret void
				199	}
				200
				201	; GCN-LABEL: {{^}}v_clamp_negabs_f64:
Matt Arsenault	6b114d2	2017-08-30 01:20:17 +0000	[diff] [blame]	202	; GCN: {{buffer\|flat\|global}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
Matt Arsenault	79a45db	2017-02-22 23:53:37 +0000	[diff] [blame]	203	; GCN: v_max_f64 v{{\[[0-9]+:[0-9]+\]}}, -\|[[A]]\|, -\|[[A]]\| clamp{{$}}
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	204	define amdgpu_kernel void @v_clamp_negabs_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) #0 {
				205	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				206	%gep0 = getelementptr double, double addrspace(1)* %aptr, i32 %tid
				207	%out.gep = getelementptr double, double addrspace(1)* %out, i32 %tid
				208	%a = load double, double addrspace(1)* %gep0
				209	%fabs.a = call double @llvm.fabs.f64(double %a)
				210	%fneg.fabs.a = fsub double -0.0, %fabs.a
				211
				212	%max = call double @llvm.maxnum.f64(double %fneg.fabs.a, double 0.0)
				213	%med = call double @llvm.minnum.f64(double %max, double 1.0)
				214
				215	store double %med, double addrspace(1)* %out.gep
				216	ret void
				217	}
				218
				219	; GCN-LABEL: {{^}}v_clamp_med3_aby_negzero_f32:
Matt Arsenault	6b114d2	2017-08-30 01:20:17 +0000	[diff] [blame]	220	; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	221	; GCN: v_med3_f32
				222	define amdgpu_kernel void @v_clamp_med3_aby_negzero_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
				223	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				224	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				225	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				226	%a = load float, float addrspace(1)* %gep0
				227	%med = call float @llvm.amdgcn.fmed3.f32(float -0.0, float 1.0, float %a)
				228	store float %med, float addrspace(1)* %out.gep
				229	ret void
				230	}
				231
				232	; GCN-LABEL: {{^}}v_clamp_med3_aby_f32:
Matt Arsenault	6b114d2	2017-08-30 01:20:17 +0000	[diff] [blame]	233	; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	234	; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
				235	define amdgpu_kernel void @v_clamp_med3_aby_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
				236	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				237	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				238	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				239	%a = load float, float addrspace(1)* %gep0
				240	%med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float %a)
				241	store float %med, float addrspace(1)* %out.gep
				242	ret void
				243	}
				244
				245	; GCN-LABEL: {{^}}v_clamp_med3_bay_f32:
Matt Arsenault	6b114d2	2017-08-30 01:20:17 +0000	[diff] [blame]	246	; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	247	; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
				248	define amdgpu_kernel void @v_clamp_med3_bay_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
				249	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				250	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				251	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				252	%a = load float, float addrspace(1)* %gep0
				253	%med = call float @llvm.amdgcn.fmed3.f32(float 1.0, float 0.0, float %a)
				254	store float %med, float addrspace(1)* %out.gep
				255	ret void
				256	}
				257
				258	; GCN-LABEL: {{^}}v_clamp_med3_yab_f32:
Matt Arsenault	6b114d2	2017-08-30 01:20:17 +0000	[diff] [blame]	259	; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	260	; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
				261	define amdgpu_kernel void @v_clamp_med3_yab_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
				262	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				263	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				264	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				265	%a = load float, float addrspace(1)* %gep0
				266	%med = call float @llvm.amdgcn.fmed3.f32(float %a, float 0.0, float 1.0)
				267	store float %med, float addrspace(1)* %out.gep
				268	ret void
				269	}
				270
				271	; GCN-LABEL: {{^}}v_clamp_med3_yba_f32:
Matt Arsenault	6b114d2	2017-08-30 01:20:17 +0000	[diff] [blame]	272	; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	273	; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
				274	define amdgpu_kernel void @v_clamp_med3_yba_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
				275	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				276	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				277	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				278	%a = load float, float addrspace(1)* %gep0
				279	%med = call float @llvm.amdgcn.fmed3.f32(float %a, float 1.0, float 0.0)
				280	store float %med, float addrspace(1)* %out.gep
				281	ret void
				282	}
				283
				284	; GCN-LABEL: {{^}}v_clamp_med3_ayb_f32:
Matt Arsenault	6b114d2	2017-08-30 01:20:17 +0000	[diff] [blame]	285	; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	286	; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
				287	define amdgpu_kernel void @v_clamp_med3_ayb_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
				288	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				289	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				290	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				291	%a = load float, float addrspace(1)* %gep0
				292	%med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float %a, float 1.0)
				293	store float %med, float addrspace(1)* %out.gep
				294	ret void
				295	}
				296
				297	; GCN-LABEL: {{^}}v_clamp_med3_bya_f32:
Matt Arsenault	6b114d2	2017-08-30 01:20:17 +0000	[diff] [blame]	298	; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	299	; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
				300	define amdgpu_kernel void @v_clamp_med3_bya_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
				301	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				302	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				303	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				304	%a = load float, float addrspace(1)* %gep0
				305	%med = call float @llvm.amdgcn.fmed3.f32(float 1.0, float %a, float 0.0)
				306	store float %med, float addrspace(1)* %out.gep
				307	ret void
				308	}
				309
				310	; GCN-LABEL: {{^}}v_clamp_constants_to_one_f32:
				311	; GCN: v_mov_b32_e32 v{{[0-9]+}}, 1.0
				312	define amdgpu_kernel void @v_clamp_constants_to_one_f32(float addrspace(1)* %out) #0 {
				313	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				314	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				315	%med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float 4.0)
				316	store float %med, float addrspace(1)* %out.gep
				317	ret void
				318	}
				319
				320	; GCN-LABEL: {{^}}v_clamp_constants_to_zero_f32:
				321	; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
				322	define amdgpu_kernel void @v_clamp_constants_to_zero_f32(float addrspace(1)* %out) #0 {
				323	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				324	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				325	%med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float -4.0)
				326	store float %med, float addrspace(1)* %out.gep
				327	ret void
				328	}
				329
				330	; GCN-LABEL: {{^}}v_clamp_constant_preserve_f32:
				331	; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0.5
				332	define amdgpu_kernel void @v_clamp_constant_preserve_f32(float addrspace(1)* %out) #0 {
				333	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				334	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				335	%med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float 0.5)
				336	store float %med, float addrspace(1)* %out.gep
				337	ret void
				338	}
				339
				340	; GCN-LABEL: {{^}}v_clamp_constant_preserve_denorm_f32:
				341	; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0x7fffff{{$}}
				342	define amdgpu_kernel void @v_clamp_constant_preserve_denorm_f32(float addrspace(1)* %out) #0 {
				343	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				344	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				345	%med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float bitcast (i32 8388607 to float))
				346	store float %med, float addrspace(1)* %out.gep
				347	ret void
				348	}
				349
				350	; GCN-LABEL: {{^}}v_clamp_constant_qnan_f32:
				351	; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
				352	define amdgpu_kernel void @v_clamp_constant_qnan_f32(float addrspace(1)* %out) #0 {
				353	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				354	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				355	%med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float 0x7FF8000000000000)
				356	store float %med, float addrspace(1)* %out.gep
				357	ret void
				358	}
				359
				360	; GCN-LABEL: {{^}}v_clamp_constant_snan_f32:
				361	; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
				362	define amdgpu_kernel void @v_clamp_constant_snan_f32(float addrspace(1)* %out) #0 {
				363	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				364	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				365	%med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float bitcast (i32 2139095041 to float))
				366	store float %med, float addrspace(1)* %out.gep
				367	ret void
				368	}
				369
				370	; ---------------------------------------------------------------------
				371	; Test non-default behaviors enabling snans and disabling dx10_clamp
				372	; ---------------------------------------------------------------------
				373
				374	; GCN-LABEL: {{^}}v_clamp_f32_no_dx10_clamp:
Matt Arsenault	6b114d2	2017-08-30 01:20:17 +0000	[diff] [blame]	375	; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
Matt Arsenault	c3dc8e6	2018-08-03 18:27:52 +0000	[diff] [blame]	376	; GCN: v_add_f32_e32 [[ADD:v[0-9]+]], 0.5, [[A]]
				377	; GCN: v_med3_f32 v{{[0-9]+}}, [[ADD]], 0, 1.0
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	378	define amdgpu_kernel void @v_clamp_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
				379	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				380	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				381	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				382	%a = load float, float addrspace(1)* %gep0
Matt Arsenault	c3dc8e6	2018-08-03 18:27:52 +0000	[diff] [blame]	383	%a.nnan = fadd nnan float %a, 0.5
				384	%max = call float @llvm.maxnum.f32(float %a.nnan, float 0.0)
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	385	%med = call float @llvm.minnum.f32(float %max, float 1.0)
				386
				387	store float %med, float addrspace(1)* %out.gep
				388	ret void
				389	}
				390
				391	; GCN-LABEL: {{^}}v_clamp_f32_snan_dx10clamp:
Matt Arsenault	6b114d2	2017-08-30 01:20:17 +0000	[diff] [blame]	392	; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
Matt Arsenault	c3dc8e6	2018-08-03 18:27:52 +0000	[diff] [blame]	393	; GCN: v_add_f32_e64 [[ADD:v[0-9]+]], [[A]], 0.5 clamp{{$}}
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	394	define amdgpu_kernel void @v_clamp_f32_snan_dx10clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #3 {
				395	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				396	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				397	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				398	%a = load float, float addrspace(1)* %gep0
Matt Arsenault	c3dc8e6	2018-08-03 18:27:52 +0000	[diff] [blame]	399	%add = fadd float %a, 0.5
				400	%max = call float @llvm.maxnum.f32(float %add, float 0.0)
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	401	%med = call float @llvm.minnum.f32(float %max, float 1.0)
				402
				403	store float %med, float addrspace(1)* %out.gep
				404	ret void
				405	}
				406
				407	; GCN-LABEL: {{^}}v_clamp_f32_snan_no_dx10clamp:
Matt Arsenault	6b114d2	2017-08-30 01:20:17 +0000	[diff] [blame]	408	; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	409	; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], 0, [[A]]
				410	; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], 1.0, [[MAX]]
				411	define amdgpu_kernel void @v_clamp_f32_snan_no_dx10clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #4 {
				412	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				413	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				414	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				415	%a = load float, float addrspace(1)* %gep0
				416	%max = call float @llvm.maxnum.f32(float %a, float 0.0)
				417	%med = call float @llvm.minnum.f32(float %max, float 1.0)
				418
				419	store float %med, float addrspace(1)* %out.gep
				420	ret void
				421	}
				422
				423	; GCN-LABEL: {{^}}v_clamp_f32_snan_no_dx10clamp_nnan_src:
Matt Arsenault	6b114d2	2017-08-30 01:20:17 +0000	[diff] [blame]	424	; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
Matt Arsenault	301162c	2017-11-15 21:51:43 +0000	[diff] [blame]	425	; GCN: v_add_f32_e32 [[ADD:v[0-9]+]], 1.0, [[A]]
				426	; GCN: v_med3_f32 v{{[0-9]+}}, [[ADD]], 0, 1.0
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	427	define amdgpu_kernel void @v_clamp_f32_snan_no_dx10clamp_nnan_src(float addrspace(1)* %out, float addrspace(1)* %aptr) #4 {
				428	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				429	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				430	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				431	%a = load float, float addrspace(1)* %gep0
				432	%add = fadd nnan float %a, 1.0
				433	%max = call float @llvm.maxnum.f32(float %add, float 0.0)
				434	%med = call float @llvm.minnum.f32(float %max, float 1.0)
				435
				436	store float %med, float addrspace(1)* %out.gep
				437	ret void
				438	}
				439
				440	; GCN-LABEL: {{^}}v_clamp_med3_aby_f32_no_dx10_clamp:
Matt Arsenault	6b114d2	2017-08-30 01:20:17 +0000	[diff] [blame]	441	; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	442	; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
				443	define amdgpu_kernel void @v_clamp_med3_aby_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
				444	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				445	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				446	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				447	%a = load float, float addrspace(1)* %gep0
				448	%med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float %a)
				449	store float %med, float addrspace(1)* %out.gep
				450	ret void
				451	}
				452
				453	; GCN-LABEL: {{^}}v_clamp_med3_bay_f32_no_dx10_clamp:
Matt Arsenault	6b114d2	2017-08-30 01:20:17 +0000	[diff] [blame]	454	; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	455	; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
				456	define amdgpu_kernel void @v_clamp_med3_bay_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
				457	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				458	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				459	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				460	%a = load float, float addrspace(1)* %gep0
				461	%med = call float @llvm.amdgcn.fmed3.f32(float 1.0, float 0.0, float %a)
				462	store float %med, float addrspace(1)* %out.gep
				463	ret void
				464	}
				465
				466	; GCN-LABEL: {{^}}v_clamp_med3_yab_f32_no_dx10_clamp:
Matt Arsenault	6b114d2	2017-08-30 01:20:17 +0000	[diff] [blame]	467	; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	468	; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], 0, 1.0
				469	define amdgpu_kernel void @v_clamp_med3_yab_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
				470	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				471	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				472	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				473	%a = load float, float addrspace(1)* %gep0
				474	%med = call float @llvm.amdgcn.fmed3.f32(float %a, float 0.0, float 1.0)
				475	store float %med, float addrspace(1)* %out.gep
				476	ret void
				477	}
				478
				479	; GCN-LABEL: {{^}}v_clamp_med3_yba_f32_no_dx10_clamp:
Matt Arsenault	6b114d2	2017-08-30 01:20:17 +0000	[diff] [blame]	480	; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	481	; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], 1.0, 0
				482	define amdgpu_kernel void @v_clamp_med3_yba_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
				483	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				484	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				485	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				486	%a = load float, float addrspace(1)* %gep0
				487	%med = call float @llvm.amdgcn.fmed3.f32(float %a, float 1.0, float 0.0)
				488	store float %med, float addrspace(1)* %out.gep
				489	ret void
				490	}
				491
				492	; GCN-LABEL: {{^}}v_clamp_med3_ayb_f32_no_dx10_clamp:
Matt Arsenault	6b114d2	2017-08-30 01:20:17 +0000	[diff] [blame]	493	; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	494	; GCN: v_med3_f32 v{{[0-9]+}}, 0, [[A]], 1.0
				495	define amdgpu_kernel void @v_clamp_med3_ayb_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
				496	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				497	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				498	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				499	%a = load float, float addrspace(1)* %gep0
				500	%med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float %a, float 1.0)
				501	store float %med, float addrspace(1)* %out.gep
				502	ret void
				503	}
				504
				505	; GCN-LABEL: {{^}}v_clamp_med3_bya_f32_no_dx10_clamp:
Matt Arsenault	6b114d2	2017-08-30 01:20:17 +0000	[diff] [blame]	506	; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	507	; GCN: v_med3_f32 v{{[0-9]+}}, 1.0, [[A]], 0
				508	define amdgpu_kernel void @v_clamp_med3_bya_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
				509	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				510	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				511	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				512	%a = load float, float addrspace(1)* %gep0
				513	%med = call float @llvm.amdgcn.fmed3.f32(float 1.0, float %a, float 0.0)
				514	store float %med, float addrspace(1)* %out.gep
				515	ret void
				516	}
				517
				518	; GCN-LABEL: {{^}}v_clamp_constant_qnan_f32_no_dx10_clamp:
				519	; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0x7fc00000
				520	define amdgpu_kernel void @v_clamp_constant_qnan_f32_no_dx10_clamp(float addrspace(1)* %out) #2 {
				521	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				522	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				523	%med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float 0x7FF8000000000000)
				524	store float %med, float addrspace(1)* %out.gep
				525	ret void
				526	}
				527
				528	; GCN-LABEL: {{^}}v_clamp_constant_snan_f32_no_dx10_clamp:
				529	; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0x7f800001
				530	define amdgpu_kernel void @v_clamp_constant_snan_f32_no_dx10_clamp(float addrspace(1)* %out) #2 {
				531	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				532	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				533	%med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float bitcast (i32 2139095041 to float))
				534	store float %med, float addrspace(1)* %out.gep
				535	ret void
				536	}
				537
Matt Arsenault	6b114d2	2017-08-30 01:20:17 +0000	[diff] [blame]	538	; GCN-LABEL: {{^}}v_clamp_v2f16:
				539	; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
				540	; GFX9-NOT: [[A]]
				541	; GFX9: v_pk_max_f16 [[CLAMP:v[0-9]+]], [[A]], [[A]] clamp{{$}}
				542	define amdgpu_kernel void @v_clamp_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
				543	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				544	%gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
				545	%out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
				546	%a = load <2 x half>, <2 x half> addrspace(1)* %gep0
				547	%max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> zeroinitializer)
				548	%med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
				549
				550	store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
				551	ret void
				552	}
				553
				554	; GCN-LABEL: {{^}}v_clamp_v2f16_undef_elt:
				555	; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
				556	; GFX9-NOT: [[A]]
				557	; GFX9: v_pk_max_f16 [[CLAMP:v[0-9]+]], [[A]], [[A]] clamp{{$}}
				558	define amdgpu_kernel void @v_clamp_v2f16_undef_elt(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
				559	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				560	%gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
				561	%out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
				562	%a = load <2 x half>, <2 x half> addrspace(1)* %gep0
				563	%max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> <half undef, half 0.0>)
				564	%med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half undef>)
				565
				566	store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
				567	ret void
				568	}
				569
				570	; GCN-LABEL: {{^}}v_clamp_v2f16_not_zero:
				571	; GFX9: v_pk_max_f16
				572	; GFX9: v_pk_min_f16
				573	define amdgpu_kernel void @v_clamp_v2f16_not_zero(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
				574	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				575	%gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
				576	%out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
				577	%a = load <2 x half>, <2 x half> addrspace(1)* %gep0
				578	%max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> <half 2.0, half 0.0>)
				579	%med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
				580
				581	store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
				582	ret void
				583	}
				584
				585	; GCN-LABEL: {{^}}v_clamp_v2f16_not_one:
				586	; GFX9: v_pk_max_f16
				587	; GFX9: v_pk_min_f16
				588	define amdgpu_kernel void @v_clamp_v2f16_not_one(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
				589	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				590	%gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
				591	%out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
				592	%a = load <2 x half>, <2 x half> addrspace(1)* %gep0
				593	%max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> <half 0.0, half 0.0>)
				594	%med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 0.0, half 1.0>)
				595
				596	store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
				597	ret void
				598	}
				599
				600	; GCN-LABEL: {{^}}v_clamp_neg_v2f16:
				601	; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
				602	; GFX9-NOT: [[A]]
				603	; GFX9: v_pk_max_f16 [[CLAMP:v[0-9]+]], [[A]], [[A]] neg_lo:[1,1] neg_hi:[1,1] clamp{{$}}
				604	define amdgpu_kernel void @v_clamp_neg_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
				605	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				606	%gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
				607	%out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
				608	%a = load <2 x half>, <2 x half> addrspace(1)* %gep0
				609	%fneg.a = fsub <2 x half> <half -0.0, half -0.0>, %a
				610	%max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %fneg.a, <2 x half> zeroinitializer)
				611	%med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
				612
				613	store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
				614	ret void
				615	}
				616
				617	; GCN-LABEL: {{^}}v_clamp_negabs_v2f16:
				618	; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
				619	; GFX9: v_and_b32_e32 [[ABS:v[0-9]+]], 0x7fff7fff, [[A]]
				620	; GFX9: v_pk_max_f16 [[CLAMP:v[0-9]+]], [[ABS]], [[ABS]] neg_lo:[1,1] neg_hi:[1,1] clamp{{$}}
				621	define amdgpu_kernel void @v_clamp_negabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
				622	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				623	%gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
				624	%out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
				625	%a = load <2 x half>, <2 x half> addrspace(1)* %gep0
				626	%fabs.a = call <2 x half> @llvm.fabs.v2f16(<2 x half> %a)
				627	%fneg.fabs.a = fsub <2 x half> <half -0.0, half -0.0>, %fabs.a
				628
				629	%max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %fneg.fabs.a, <2 x half> zeroinitializer)
				630	%med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
				631
				632	store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
				633	ret void
				634	}
				635
				636	; GCN-LABEL: {{^}}v_clamp_neglo_v2f16:
				637	; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
				638	; GFX9-NOT: [[A]]
				639	; GFX9: v_pk_max_f16 [[CLAMP:v[0-9]+]], [[A]], [[A]] neg_lo:[1,1] clamp{{$}}
				640	define amdgpu_kernel void @v_clamp_neglo_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
				641	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				642	%gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
				643	%out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
				644	%a = load <2 x half>, <2 x half> addrspace(1)* %gep0
				645	%lo = extractelement <2 x half> %a, i32 0
				646	%neg.lo = fsub half -0.0, %lo
				647	%neg.lo.vec = insertelement <2 x half> %a, half %neg.lo, i32 0
				648	%max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %neg.lo.vec, <2 x half> zeroinitializer)
				649	%med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
				650
				651	store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
				652	ret void
				653	}
				654
				655	; GCN-LABEL: {{^}}v_clamp_neghi_v2f16:
				656	; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
				657	; GFX9-NOT: [[A]]
				658	; GFX9: v_pk_max_f16 [[CLAMP:v[0-9]+]], [[A]], [[A]] neg_hi:[1,1] clamp{{$}}
				659	define amdgpu_kernel void @v_clamp_neghi_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
				660	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				661	%gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
				662	%out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
				663	%a = load <2 x half>, <2 x half> addrspace(1)* %gep0
				664	%hi = extractelement <2 x half> %a, i32 1
				665	%neg.hi = fsub half -0.0, %hi
				666	%neg.hi.vec = insertelement <2 x half> %a, half %neg.hi, i32 1
				667	%max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %neg.hi.vec, <2 x half> zeroinitializer)
				668	%med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
				669
				670	store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
				671	ret void
				672	}
				673
				674	; GCN-LABEL: {{^}}v_clamp_v2f16_shuffle:
				675	; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
				676	; GFX9-NOT: [[A]]
				677	; GFX9: v_pk_max_f16 [[CLAMP:v[0-9]+]], [[A]], [[A]] op_sel:[1,1] op_sel_hi:[0,0] clamp{{$}}
				678	define amdgpu_kernel void @v_clamp_v2f16_shuffle(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
				679	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				680	%gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
				681	%out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
				682	%a = load <2 x half>, <2 x half> addrspace(1)* %gep0
				683	%shuf = shufflevector <2 x half> %a, <2 x half> undef, <2 x i32> <i32 1, i32 0>
				684	%max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %shuf, <2 x half> zeroinitializer)
				685	%med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
				686
				687	store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
				688	ret void
				689	}
				690
Matt Arsenault	b5acec1	2018-08-12 08:42:54 +0000	[diff] [blame]	691	; GCN-LABEL: {{^}}v_clamp_v2f16_undef_limit_elts0:
				692	; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
				693	; GFX9-NOT: [[A]]
				694	; GFX9: v_pk_max_f16 [[CLAMP:v[0-9]+]], [[A]], [[A]] clamp{{$}}
				695	define amdgpu_kernel void @v_clamp_v2f16_undef_limit_elts0(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
				696	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				697	%gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
				698	%out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
				699	%a = load <2 x half>, <2 x half> addrspace(1)* %gep0
				700	%max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> <half 0.0, half undef>)
				701	%med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half undef, half 1.0>)
				702
				703	store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
				704	ret void
				705	}
				706
				707	; GCN-LABEL: {{^}}v_clamp_v2f16_undef_limit_elts1:
				708	; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
				709	; GFX9-NOT: [[A]]
				710	; GFX9: v_pk_max_f16 [[CLAMP:v[0-9]+]], [[A]], [[A]] clamp{{$}}
				711	define amdgpu_kernel void @v_clamp_v2f16_undef_limit_elts1(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
				712	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				713	%gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
				714	%out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
				715	%a = load <2 x half>, <2 x half> addrspace(1)* %gep0
				716	%max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> <half undef, half 0.0>)
				717	%med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half undef>)
				718
				719	store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
				720	ret void
				721	}
				722
Matt Arsenault	aafff87	2017-10-05 00:13:17 +0000	[diff] [blame]	723	; GCN-LABEL: {{^}}v_clamp_diff_source_f32:
				724	; GCN: v_add_f32_e32 [[A:v[0-9]+]]
				725	; GCN: v_add_f32_e32 [[B:v[0-9]+]]
				726	; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[B]] clamp{{$}}
				727	define amdgpu_kernel void @v_clamp_diff_source_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0
				728	{
				729	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 0
				730	%gep1 = getelementptr float, float addrspace(1)* %aptr, i32 1
				731	%gep2 = getelementptr float, float addrspace(1)* %aptr, i32 2
				732	%l0 = load float, float addrspace(1)* %gep0
				733	%l1 = load float, float addrspace(1)* %gep1
				734	%l2 = load float, float addrspace(1)* %gep2
				735	%a = fadd nsz float %l0, %l1
				736	%b = fadd nsz float %l0, %l2
				737	%res = call nsz float @llvm.maxnum.f32(float %a, float %b)
				738	%max = call nsz float @llvm.maxnum.f32(float %res, float 0.0)
				739	%min = call nsz float @llvm.minnum.f32(float %max, float 1.0)
				740	%out.gep = getelementptr float, float addrspace(1)* %out, i32 3
				741	store float %min, float addrspace(1)* %out.gep
				742	ret void
				743	}
				744
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	745	declare i32 @llvm.amdgcn.workitem.id.x() #1
				746	declare float @llvm.fabs.f32(float) #1
				747	declare float @llvm.minnum.f32(float, float) #1
				748	declare float @llvm.maxnum.f32(float, float) #1
				749	declare float @llvm.amdgcn.fmed3.f32(float, float, float) #1
				750	declare double @llvm.fabs.f64(double) #1
				751	declare double @llvm.minnum.f64(double, double) #1
				752	declare double @llvm.maxnum.f64(double, double) #1
				753	declare half @llvm.fabs.f16(half) #1
				754	declare half @llvm.minnum.f16(half, half) #1
				755	declare half @llvm.maxnum.f16(half, half) #1
Matt Arsenault	6b114d2	2017-08-30 01:20:17 +0000	[diff] [blame]	756	declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #1
				757	declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>) #1
				758	declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>) #1
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	759
				760	attributes #0 = { nounwind }
				761	attributes #1 = { nounwind readnone }
				762	attributes #2 = { nounwind "target-features"="-dx10-clamp,-fp-exceptions" "no-nans-fp-math"="false" }
				763	attributes #3 = { nounwind "target-features"="+dx10-clamp,+fp-exceptions" "no-nans-fp-math"="false" }
				764	attributes #4 = { nounwind "target-features"="-dx10-clamp,+fp-exceptions" "no-nans-fp-math"="false" }