Blame - llvm/test/CodeGen/AMDGPU/clamp.ll - toolchain/llvm-project

blob: 216ecf76345661f0e93ad3864d2e92390816a0ac [file] [log] [blame]

Matt Arsenault	6b114d2	2017-08-30 01:20:17 +0000	[diff] [blame]	1	; RUN: llc -march=amdgcn -verify-machineinstrs < %s \| FileCheck -enable-var-scope -check-prefixes=GCN,SI %s
				2	; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s \| FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,VI %s
				3	; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s \| FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9 %s
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	4
				5	; GCN-LABEL: {{^}}v_clamp_f32:
Matt Arsenault	6b114d2	2017-08-30 01:20:17 +0000	[diff] [blame]	6	; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	7	; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
				8	define amdgpu_kernel void @v_clamp_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
				9	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				10	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				11	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				12	%a = load float, float addrspace(1)* %gep0
				13	%max = call float @llvm.maxnum.f32(float %a, float 0.0)
				14	%med = call float @llvm.minnum.f32(float %max, float 1.0)
				15
				16	store float %med, float addrspace(1)* %out.gep
				17	ret void
				18	}
				19
				20	; GCN-LABEL: {{^}}v_clamp_neg_f32:
Matt Arsenault	6b114d2	2017-08-30 01:20:17 +0000	[diff] [blame]	21	; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	22	; GCN: v_max_f32_e64 v{{[0-9]+}}, -[[A]], -[[A]] clamp{{$}}
				23	define amdgpu_kernel void @v_clamp_neg_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
				24	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				25	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				26	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				27	%a = load float, float addrspace(1)* %gep0
				28	%fneg.a = fsub float -0.0, %a
				29	%max = call float @llvm.maxnum.f32(float %fneg.a, float 0.0)
				30	%med = call float @llvm.minnum.f32(float %max, float 1.0)
				31
				32	store float %med, float addrspace(1)* %out.gep
				33	ret void
				34	}
				35
				36	; GCN-LABEL: {{^}}v_clamp_negabs_f32:
Matt Arsenault	6b114d2	2017-08-30 01:20:17 +0000	[diff] [blame]	37	; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	38	; GCN: v_max_f32_e64 v{{[0-9]+}}, -\|[[A]]\|, -\|[[A]]\| clamp{{$}}
				39	define amdgpu_kernel void @v_clamp_negabs_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
				40	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				41	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				42	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				43	%a = load float, float addrspace(1)* %gep0
				44	%fabs.a = call float @llvm.fabs.f32(float %a)
				45	%fneg.fabs.a = fsub float -0.0, %fabs.a
				46
				47	%max = call float @llvm.maxnum.f32(float %fneg.fabs.a, float 0.0)
				48	%med = call float @llvm.minnum.f32(float %max, float 1.0)
				49
				50	store float %med, float addrspace(1)* %out.gep
				51	ret void
				52	}
				53
				54	; GCN-LABEL: {{^}}v_clamp_negzero_f32:
Matt Arsenault	6b114d2	2017-08-30 01:20:17 +0000	[diff] [blame]	55	; GCN-DAG: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
Stanislav Mekhanoshin	79da2a7	2017-03-11 00:29:27 +0000	[diff] [blame]	56	; GCN-DAG: v_bfrev_b32_e32 [[SIGNBIT:v[0-9]+]], 1
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	57	; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[SIGNBIT]], 1.0
				58	define amdgpu_kernel void @v_clamp_negzero_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
				59	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				60	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				61	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				62	%a = load float, float addrspace(1)* %gep0
				63	%max = call float @llvm.maxnum.f32(float %a, float -0.0)
				64	%med = call float @llvm.minnum.f32(float %max, float 1.0)
				65
				66	store float %med, float addrspace(1)* %out.gep
				67	ret void
				68	}
				69
				70	; GCN-LABEL: {{^}}v_clamp_multi_use_max_f32:
Matt Arsenault	6b114d2	2017-08-30 01:20:17 +0000	[diff] [blame]	71	; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	72	; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], 0, [[A]]
				73	; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], 1.0, [[MAX]]
				74	define amdgpu_kernel void @v_clamp_multi_use_max_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
				75	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				76	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				77	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				78	%a = load float, float addrspace(1)* %gep0
				79	%max = call float @llvm.maxnum.f32(float %a, float 0.0)
				80	%med = call float @llvm.minnum.f32(float %max, float 1.0)
				81
				82	store float %med, float addrspace(1)* %out.gep
				83	store volatile float %max, float addrspace(1)* undef
				84	ret void
				85	}
				86
				87	; GCN-LABEL: {{^}}v_clamp_f16:
Matt Arsenault	6b114d2	2017-08-30 01:20:17 +0000	[diff] [blame]	88	; GCN: {{buffer\|flat\|global}}_load_ushort [[A:v[0-9]+]]
				89	; GFX89: v_max_f16_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	90
Matt Arsenault	d5c6515	2017-02-22 23:27:53 +0000	[diff] [blame]	91	; SI: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], [[A]] clamp{{$}}
				92	; SI: v_cvt_f16_f32_e32 v{{[0-9]+}}, [[CVT]]
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	93	define amdgpu_kernel void @v_clamp_f16(half addrspace(1)* %out, half addrspace(1)* %aptr) #0 {
				94	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				95	%gep0 = getelementptr half, half addrspace(1)* %aptr, i32 %tid
				96	%out.gep = getelementptr half, half addrspace(1)* %out, i32 %tid
				97	%a = load half, half addrspace(1)* %gep0
				98	%max = call half @llvm.maxnum.f16(half %a, half 0.0)
				99	%med = call half @llvm.minnum.f16(half %max, half 1.0)
				100
				101	store half %med, half addrspace(1)* %out.gep
				102	ret void
				103	}
				104
				105	; GCN-LABEL: {{^}}v_clamp_neg_f16:
Matt Arsenault	6b114d2	2017-08-30 01:20:17 +0000	[diff] [blame]	106	; GCN: {{buffer\|flat\|global}}_load_ushort [[A:v[0-9]+]]
				107	; GFX89: v_max_f16_e64 v{{[0-9]+}}, -[[A]], -[[A]] clamp{{$}}
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	108
				109	; FIXME: Better to fold neg into max
Matt Arsenault	d5c6515	2017-02-22 23:27:53 +0000	[diff] [blame]	110	; SI: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], -[[A]] clamp{{$}}
				111	; SI: v_cvt_f16_f32_e32 v{{[0-9]+}}, [[CVT]]
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	112	define amdgpu_kernel void @v_clamp_neg_f16(half addrspace(1)* %out, half addrspace(1)* %aptr) #0 {
				113	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				114	%gep0 = getelementptr half, half addrspace(1)* %aptr, i32 %tid
				115	%out.gep = getelementptr half, half addrspace(1)* %out, i32 %tid
				116	%a = load half, half addrspace(1)* %gep0
				117	%fneg.a = fsub half -0.0, %a
				118	%max = call half @llvm.maxnum.f16(half %fneg.a, half 0.0)
				119	%med = call half @llvm.minnum.f16(half %max, half 1.0)
				120
				121	store half %med, half addrspace(1)* %out.gep
				122	ret void
				123	}
				124
				125	; GCN-LABEL: {{^}}v_clamp_negabs_f16:
Matt Arsenault	6b114d2	2017-08-30 01:20:17 +0000	[diff] [blame]	126	; GCN: {{buffer\|flat\|global}}_load_ushort [[A:v[0-9]+]]
				127	; GFX89: v_max_f16_e64 v{{[0-9]+}}, -\|[[A]]\|, -\|[[A]]\| clamp{{$}}
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	128
				129	; FIXME: Better to fold neg/abs into max
				130
Matt Arsenault	d5c6515	2017-02-22 23:27:53 +0000	[diff] [blame]	131	; SI: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], -\|[[A]]\| clamp{{$}}
				132	; SI: v_cvt_f16_f32_e32 v{{[0-9]+}}, [[CVT]]
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	133	define amdgpu_kernel void @v_clamp_negabs_f16(half addrspace(1)* %out, half addrspace(1)* %aptr) #0 {
				134	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				135	%gep0 = getelementptr half, half addrspace(1)* %aptr, i32 %tid
				136	%out.gep = getelementptr half, half addrspace(1)* %out, i32 %tid
				137	%a = load half, half addrspace(1)* %gep0
				138	%fabs.a = call half @llvm.fabs.f16(half %a)
				139	%fneg.fabs.a = fsub half -0.0, %fabs.a
				140
				141	%max = call half @llvm.maxnum.f16(half %fneg.fabs.a, half 0.0)
				142	%med = call half @llvm.minnum.f16(half %max, half 1.0)
				143
				144	store half %med, half addrspace(1)* %out.gep
				145	ret void
				146	}
				147
				148	; FIXME: Do f64 instructions support clamp?
				149	; GCN-LABEL: {{^}}v_clamp_f64:
Matt Arsenault	6b114d2	2017-08-30 01:20:17 +0000	[diff] [blame]	150	; GCN: {{buffer\|flat\|global}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
Matt Arsenault	79a45db	2017-02-22 23:53:37 +0000	[diff] [blame]	151	; GCN: v_max_f64 v{{\[[0-9]+:[0-9]+\]}}, [[A]], [[A]] clamp{{$}}
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	152	define amdgpu_kernel void @v_clamp_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) #0 {
				153	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				154	%gep0 = getelementptr double, double addrspace(1)* %aptr, i32 %tid
				155	%out.gep = getelementptr double, double addrspace(1)* %out, i32 %tid
				156	%a = load double, double addrspace(1)* %gep0
				157	%max = call double @llvm.maxnum.f64(double %a, double 0.0)
				158	%med = call double @llvm.minnum.f64(double %max, double 1.0)
				159
				160	store double %med, double addrspace(1)* %out.gep
				161	ret void
				162	}
				163
				164	; GCN-LABEL: {{^}}v_clamp_neg_f64:
Matt Arsenault	6b114d2	2017-08-30 01:20:17 +0000	[diff] [blame]	165	; GCN: {{buffer\|flat\|global}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
Matt Arsenault	79a45db	2017-02-22 23:53:37 +0000	[diff] [blame]	166	; GCN: v_max_f64 v{{\[[0-9]+:[0-9]+\]}}, -[[A]], -[[A]] clamp{{$}}
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	167	define amdgpu_kernel void @v_clamp_neg_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) #0 {
				168	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				169	%gep0 = getelementptr double, double addrspace(1)* %aptr, i32 %tid
				170	%out.gep = getelementptr double, double addrspace(1)* %out, i32 %tid
				171	%a = load double, double addrspace(1)* %gep0
				172	%fneg.a = fsub double -0.0, %a
				173	%max = call double @llvm.maxnum.f64(double %fneg.a, double 0.0)
				174	%med = call double @llvm.minnum.f64(double %max, double 1.0)
				175
				176	store double %med, double addrspace(1)* %out.gep
				177	ret void
				178	}
				179
				180	; GCN-LABEL: {{^}}v_clamp_negabs_f64:
Matt Arsenault	6b114d2	2017-08-30 01:20:17 +0000	[diff] [blame]	181	; GCN: {{buffer\|flat\|global}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
Matt Arsenault	79a45db	2017-02-22 23:53:37 +0000	[diff] [blame]	182	; GCN: v_max_f64 v{{\[[0-9]+:[0-9]+\]}}, -\|[[A]]\|, -\|[[A]]\| clamp{{$}}
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	183	define amdgpu_kernel void @v_clamp_negabs_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) #0 {
				184	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				185	%gep0 = getelementptr double, double addrspace(1)* %aptr, i32 %tid
				186	%out.gep = getelementptr double, double addrspace(1)* %out, i32 %tid
				187	%a = load double, double addrspace(1)* %gep0
				188	%fabs.a = call double @llvm.fabs.f64(double %a)
				189	%fneg.fabs.a = fsub double -0.0, %fabs.a
				190
				191	%max = call double @llvm.maxnum.f64(double %fneg.fabs.a, double 0.0)
				192	%med = call double @llvm.minnum.f64(double %max, double 1.0)
				193
				194	store double %med, double addrspace(1)* %out.gep
				195	ret void
				196	}
				197
				198	; GCN-LABEL: {{^}}v_clamp_med3_aby_negzero_f32:
Matt Arsenault	6b114d2	2017-08-30 01:20:17 +0000	[diff] [blame]	199	; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	200	; GCN: v_med3_f32
				201	define amdgpu_kernel void @v_clamp_med3_aby_negzero_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
				202	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				203	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				204	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				205	%a = load float, float addrspace(1)* %gep0
				206	%med = call float @llvm.amdgcn.fmed3.f32(float -0.0, float 1.0, float %a)
				207	store float %med, float addrspace(1)* %out.gep
				208	ret void
				209	}
				210
				211	; GCN-LABEL: {{^}}v_clamp_med3_aby_f32:
Matt Arsenault	6b114d2	2017-08-30 01:20:17 +0000	[diff] [blame]	212	; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	213	; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
				214	define amdgpu_kernel void @v_clamp_med3_aby_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
				215	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				216	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				217	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				218	%a = load float, float addrspace(1)* %gep0
				219	%med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float %a)
				220	store float %med, float addrspace(1)* %out.gep
				221	ret void
				222	}
				223
				224	; GCN-LABEL: {{^}}v_clamp_med3_bay_f32:
Matt Arsenault	6b114d2	2017-08-30 01:20:17 +0000	[diff] [blame]	225	; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	226	; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
				227	define amdgpu_kernel void @v_clamp_med3_bay_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
				228	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				229	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				230	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				231	%a = load float, float addrspace(1)* %gep0
				232	%med = call float @llvm.amdgcn.fmed3.f32(float 1.0, float 0.0, float %a)
				233	store float %med, float addrspace(1)* %out.gep
				234	ret void
				235	}
				236
				237	; GCN-LABEL: {{^}}v_clamp_med3_yab_f32:
Matt Arsenault	6b114d2	2017-08-30 01:20:17 +0000	[diff] [blame]	238	; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	239	; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
				240	define amdgpu_kernel void @v_clamp_med3_yab_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
				241	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				242	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				243	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				244	%a = load float, float addrspace(1)* %gep0
				245	%med = call float @llvm.amdgcn.fmed3.f32(float %a, float 0.0, float 1.0)
				246	store float %med, float addrspace(1)* %out.gep
				247	ret void
				248	}
				249
				250	; GCN-LABEL: {{^}}v_clamp_med3_yba_f32:
Matt Arsenault	6b114d2	2017-08-30 01:20:17 +0000	[diff] [blame]	251	; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	252	; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
				253	define amdgpu_kernel void @v_clamp_med3_yba_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
				254	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				255	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				256	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				257	%a = load float, float addrspace(1)* %gep0
				258	%med = call float @llvm.amdgcn.fmed3.f32(float %a, float 1.0, float 0.0)
				259	store float %med, float addrspace(1)* %out.gep
				260	ret void
				261	}
				262
				263	; GCN-LABEL: {{^}}v_clamp_med3_ayb_f32:
Matt Arsenault	6b114d2	2017-08-30 01:20:17 +0000	[diff] [blame]	264	; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	265	; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
				266	define amdgpu_kernel void @v_clamp_med3_ayb_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
				267	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				268	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				269	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				270	%a = load float, float addrspace(1)* %gep0
				271	%med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float %a, float 1.0)
				272	store float %med, float addrspace(1)* %out.gep
				273	ret void
				274	}
				275
				276	; GCN-LABEL: {{^}}v_clamp_med3_bya_f32:
Matt Arsenault	6b114d2	2017-08-30 01:20:17 +0000	[diff] [blame]	277	; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	278	; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
				279	define amdgpu_kernel void @v_clamp_med3_bya_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
				280	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				281	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				282	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				283	%a = load float, float addrspace(1)* %gep0
				284	%med = call float @llvm.amdgcn.fmed3.f32(float 1.0, float %a, float 0.0)
				285	store float %med, float addrspace(1)* %out.gep
				286	ret void
				287	}
				288
				289	; GCN-LABEL: {{^}}v_clamp_constants_to_one_f32:
				290	; GCN: v_mov_b32_e32 v{{[0-9]+}}, 1.0
				291	define amdgpu_kernel void @v_clamp_constants_to_one_f32(float addrspace(1)* %out) #0 {
				292	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				293	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				294	%med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float 4.0)
				295	store float %med, float addrspace(1)* %out.gep
				296	ret void
				297	}
				298
				299	; GCN-LABEL: {{^}}v_clamp_constants_to_zero_f32:
				300	; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
				301	define amdgpu_kernel void @v_clamp_constants_to_zero_f32(float addrspace(1)* %out) #0 {
				302	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				303	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				304	%med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float -4.0)
				305	store float %med, float addrspace(1)* %out.gep
				306	ret void
				307	}
				308
				309	; GCN-LABEL: {{^}}v_clamp_constant_preserve_f32:
				310	; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0.5
				311	define amdgpu_kernel void @v_clamp_constant_preserve_f32(float addrspace(1)* %out) #0 {
				312	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				313	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				314	%med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float 0.5)
				315	store float %med, float addrspace(1)* %out.gep
				316	ret void
				317	}
				318
				319	; GCN-LABEL: {{^}}v_clamp_constant_preserve_denorm_f32:
				320	; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0x7fffff{{$}}
				321	define amdgpu_kernel void @v_clamp_constant_preserve_denorm_f32(float addrspace(1)* %out) #0 {
				322	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				323	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				324	%med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float bitcast (i32 8388607 to float))
				325	store float %med, float addrspace(1)* %out.gep
				326	ret void
				327	}
				328
				329	; GCN-LABEL: {{^}}v_clamp_constant_qnan_f32:
				330	; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
				331	define amdgpu_kernel void @v_clamp_constant_qnan_f32(float addrspace(1)* %out) #0 {
				332	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				333	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				334	%med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float 0x7FF8000000000000)
				335	store float %med, float addrspace(1)* %out.gep
				336	ret void
				337	}
				338
				339	; GCN-LABEL: {{^}}v_clamp_constant_snan_f32:
				340	; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
				341	define amdgpu_kernel void @v_clamp_constant_snan_f32(float addrspace(1)* %out) #0 {
				342	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				343	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				344	%med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float bitcast (i32 2139095041 to float))
				345	store float %med, float addrspace(1)* %out.gep
				346	ret void
				347	}
				348
				349	; ---------------------------------------------------------------------
				350	; Test non-default behaviors enabling snans and disabling dx10_clamp
				351	; ---------------------------------------------------------------------
				352
				353	; GCN-LABEL: {{^}}v_clamp_f32_no_dx10_clamp:
Matt Arsenault	6b114d2	2017-08-30 01:20:17 +0000	[diff] [blame]	354	; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	355	; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], 0, 1.0
				356	define amdgpu_kernel void @v_clamp_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
				357	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				358	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				359	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				360	%a = load float, float addrspace(1)* %gep0
				361	%max = call float @llvm.maxnum.f32(float %a, float 0.0)
				362	%med = call float @llvm.minnum.f32(float %max, float 1.0)
				363
				364	store float %med, float addrspace(1)* %out.gep
				365	ret void
				366	}
				367
				368	; GCN-LABEL: {{^}}v_clamp_f32_snan_dx10clamp:
Matt Arsenault	6b114d2	2017-08-30 01:20:17 +0000	[diff] [blame]	369	; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	370	; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
				371	define amdgpu_kernel void @v_clamp_f32_snan_dx10clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #3 {
				372	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				373	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				374	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				375	%a = load float, float addrspace(1)* %gep0
				376	%max = call float @llvm.maxnum.f32(float %a, float 0.0)
				377	%med = call float @llvm.minnum.f32(float %max, float 1.0)
				378
				379	store float %med, float addrspace(1)* %out.gep
				380	ret void
				381	}
				382
				383	; GCN-LABEL: {{^}}v_clamp_f32_snan_no_dx10clamp:
Matt Arsenault	6b114d2	2017-08-30 01:20:17 +0000	[diff] [blame]	384	; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	385	; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], 0, [[A]]
				386	; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], 1.0, [[MAX]]
				387	define amdgpu_kernel void @v_clamp_f32_snan_no_dx10clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #4 {
				388	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				389	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				390	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				391	%a = load float, float addrspace(1)* %gep0
				392	%max = call float @llvm.maxnum.f32(float %a, float 0.0)
				393	%med = call float @llvm.minnum.f32(float %max, float 1.0)
				394
				395	store float %med, float addrspace(1)* %out.gep
				396	ret void
				397	}
				398
				399	; GCN-LABEL: {{^}}v_clamp_f32_snan_no_dx10clamp_nnan_src:
Matt Arsenault	6b114d2	2017-08-30 01:20:17 +0000	[diff] [blame]	400	; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	401	; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], 0, 1.0
				402	define amdgpu_kernel void @v_clamp_f32_snan_no_dx10clamp_nnan_src(float addrspace(1)* %out, float addrspace(1)* %aptr) #4 {
				403	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				404	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				405	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				406	%a = load float, float addrspace(1)* %gep0
				407	%add = fadd nnan float %a, 1.0
				408	%max = call float @llvm.maxnum.f32(float %add, float 0.0)
				409	%med = call float @llvm.minnum.f32(float %max, float 1.0)
				410
				411	store float %med, float addrspace(1)* %out.gep
				412	ret void
				413	}
				414
				415	; GCN-LABEL: {{^}}v_clamp_med3_aby_f32_no_dx10_clamp:
Matt Arsenault	6b114d2	2017-08-30 01:20:17 +0000	[diff] [blame]	416	; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	417	; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
				418	define amdgpu_kernel void @v_clamp_med3_aby_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
				419	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				420	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				421	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				422	%a = load float, float addrspace(1)* %gep0
				423	%med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float %a)
				424	store float %med, float addrspace(1)* %out.gep
				425	ret void
				426	}
				427
				428	; GCN-LABEL: {{^}}v_clamp_med3_bay_f32_no_dx10_clamp:
Matt Arsenault	6b114d2	2017-08-30 01:20:17 +0000	[diff] [blame]	429	; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	430	; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
				431	define amdgpu_kernel void @v_clamp_med3_bay_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
				432	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				433	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				434	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				435	%a = load float, float addrspace(1)* %gep0
				436	%med = call float @llvm.amdgcn.fmed3.f32(float 1.0, float 0.0, float %a)
				437	store float %med, float addrspace(1)* %out.gep
				438	ret void
				439	}
				440
				441	; GCN-LABEL: {{^}}v_clamp_med3_yab_f32_no_dx10_clamp:
Matt Arsenault	6b114d2	2017-08-30 01:20:17 +0000	[diff] [blame]	442	; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	443	; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], 0, 1.0
				444	define amdgpu_kernel void @v_clamp_med3_yab_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
				445	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				446	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				447	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				448	%a = load float, float addrspace(1)* %gep0
				449	%med = call float @llvm.amdgcn.fmed3.f32(float %a, float 0.0, float 1.0)
				450	store float %med, float addrspace(1)* %out.gep
				451	ret void
				452	}
				453
				454	; GCN-LABEL: {{^}}v_clamp_med3_yba_f32_no_dx10_clamp:
Matt Arsenault	6b114d2	2017-08-30 01:20:17 +0000	[diff] [blame]	455	; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	456	; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], 1.0, 0
				457	define amdgpu_kernel void @v_clamp_med3_yba_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
				458	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				459	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				460	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				461	%a = load float, float addrspace(1)* %gep0
				462	%med = call float @llvm.amdgcn.fmed3.f32(float %a, float 1.0, float 0.0)
				463	store float %med, float addrspace(1)* %out.gep
				464	ret void
				465	}
				466
				467	; GCN-LABEL: {{^}}v_clamp_med3_ayb_f32_no_dx10_clamp:
Matt Arsenault	6b114d2	2017-08-30 01:20:17 +0000	[diff] [blame]	468	; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	469	; GCN: v_med3_f32 v{{[0-9]+}}, 0, [[A]], 1.0
				470	define amdgpu_kernel void @v_clamp_med3_ayb_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
				471	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				472	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				473	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				474	%a = load float, float addrspace(1)* %gep0
				475	%med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float %a, float 1.0)
				476	store float %med, float addrspace(1)* %out.gep
				477	ret void
				478	}
				479
				480	; GCN-LABEL: {{^}}v_clamp_med3_bya_f32_no_dx10_clamp:
Matt Arsenault	6b114d2	2017-08-30 01:20:17 +0000	[diff] [blame]	481	; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	482	; GCN: v_med3_f32 v{{[0-9]+}}, 1.0, [[A]], 0
				483	define amdgpu_kernel void @v_clamp_med3_bya_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
				484	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				485	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
				486	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				487	%a = load float, float addrspace(1)* %gep0
				488	%med = call float @llvm.amdgcn.fmed3.f32(float 1.0, float %a, float 0.0)
				489	store float %med, float addrspace(1)* %out.gep
				490	ret void
				491	}
				492
				493	; GCN-LABEL: {{^}}v_clamp_constant_qnan_f32_no_dx10_clamp:
				494	; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0x7fc00000
				495	define amdgpu_kernel void @v_clamp_constant_qnan_f32_no_dx10_clamp(float addrspace(1)* %out) #2 {
				496	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				497	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				498	%med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float 0x7FF8000000000000)
				499	store float %med, float addrspace(1)* %out.gep
				500	ret void
				501	}
				502
				503	; GCN-LABEL: {{^}}v_clamp_constant_snan_f32_no_dx10_clamp:
				504	; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0x7f800001
				505	define amdgpu_kernel void @v_clamp_constant_snan_f32_no_dx10_clamp(float addrspace(1)* %out) #2 {
				506	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				507	%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
				508	%med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float bitcast (i32 2139095041 to float))
				509	store float %med, float addrspace(1)* %out.gep
				510	ret void
				511	}
				512
Matt Arsenault	6b114d2	2017-08-30 01:20:17 +0000	[diff] [blame]	513	; GCN-LABEL: {{^}}v_clamp_v2f16:
				514	; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
				515	; GFX9-NOT: [[A]]
				516	; GFX9: v_pk_max_f16 [[CLAMP:v[0-9]+]], [[A]], [[A]] clamp{{$}}
				517	define amdgpu_kernel void @v_clamp_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
				518	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				519	%gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
				520	%out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
				521	%a = load <2 x half>, <2 x half> addrspace(1)* %gep0
				522	%max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> zeroinitializer)
				523	%med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
				524
				525	store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
				526	ret void
				527	}
				528
				529	; GCN-LABEL: {{^}}v_clamp_v2f16_undef_elt:
				530	; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
				531	; GFX9-NOT: [[A]]
				532	; GFX9: v_pk_max_f16 [[CLAMP:v[0-9]+]], [[A]], [[A]] clamp{{$}}
				533	define amdgpu_kernel void @v_clamp_v2f16_undef_elt(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
				534	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				535	%gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
				536	%out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
				537	%a = load <2 x half>, <2 x half> addrspace(1)* %gep0
				538	%max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> <half undef, half 0.0>)
				539	%med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half undef>)
				540
				541	store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
				542	ret void
				543	}
				544
				545	; GCN-LABEL: {{^}}v_clamp_v2f16_not_zero:
				546	; GFX9: v_pk_max_f16
				547	; GFX9: v_pk_min_f16
				548	define amdgpu_kernel void @v_clamp_v2f16_not_zero(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
				549	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				550	%gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
				551	%out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
				552	%a = load <2 x half>, <2 x half> addrspace(1)* %gep0
				553	%max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> <half 2.0, half 0.0>)
				554	%med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
				555
				556	store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
				557	ret void
				558	}
				559
				560	; GCN-LABEL: {{^}}v_clamp_v2f16_not_one:
				561	; GFX9: v_pk_max_f16
				562	; GFX9: v_pk_min_f16
				563	define amdgpu_kernel void @v_clamp_v2f16_not_one(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
				564	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				565	%gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
				566	%out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
				567	%a = load <2 x half>, <2 x half> addrspace(1)* %gep0
				568	%max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> <half 0.0, half 0.0>)
				569	%med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 0.0, half 1.0>)
				570
				571	store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
				572	ret void
				573	}
				574
				575	; GCN-LABEL: {{^}}v_clamp_neg_v2f16:
				576	; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
				577	; GFX9-NOT: [[A]]
				578	; GFX9: v_pk_max_f16 [[CLAMP:v[0-9]+]], [[A]], [[A]] neg_lo:[1,1] neg_hi:[1,1] clamp{{$}}
				579	define amdgpu_kernel void @v_clamp_neg_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
				580	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				581	%gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
				582	%out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
				583	%a = load <2 x half>, <2 x half> addrspace(1)* %gep0
				584	%fneg.a = fsub <2 x half> <half -0.0, half -0.0>, %a
				585	%max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %fneg.a, <2 x half> zeroinitializer)
				586	%med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
				587
				588	store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
				589	ret void
				590	}
				591
				592	; GCN-LABEL: {{^}}v_clamp_negabs_v2f16:
				593	; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
				594	; GFX9: v_and_b32_e32 [[ABS:v[0-9]+]], 0x7fff7fff, [[A]]
				595	; GFX9: v_pk_max_f16 [[CLAMP:v[0-9]+]], [[ABS]], [[ABS]] neg_lo:[1,1] neg_hi:[1,1] clamp{{$}}
				596	define amdgpu_kernel void @v_clamp_negabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
				597	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				598	%gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
				599	%out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
				600	%a = load <2 x half>, <2 x half> addrspace(1)* %gep0
				601	%fabs.a = call <2 x half> @llvm.fabs.v2f16(<2 x half> %a)
				602	%fneg.fabs.a = fsub <2 x half> <half -0.0, half -0.0>, %fabs.a
				603
				604	%max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %fneg.fabs.a, <2 x half> zeroinitializer)
				605	%med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
				606
				607	store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
				608	ret void
				609	}
				610
				611	; GCN-LABEL: {{^}}v_clamp_neglo_v2f16:
				612	; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
				613	; GFX9-NOT: [[A]]
				614	; GFX9: v_pk_max_f16 [[CLAMP:v[0-9]+]], [[A]], [[A]] neg_lo:[1,1] clamp{{$}}
				615	define amdgpu_kernel void @v_clamp_neglo_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
				616	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				617	%gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
				618	%out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
				619	%a = load <2 x half>, <2 x half> addrspace(1)* %gep0
				620	%lo = extractelement <2 x half> %a, i32 0
				621	%neg.lo = fsub half -0.0, %lo
				622	%neg.lo.vec = insertelement <2 x half> %a, half %neg.lo, i32 0
				623	%max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %neg.lo.vec, <2 x half> zeroinitializer)
				624	%med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
				625
				626	store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
				627	ret void
				628	}
				629
				630	; GCN-LABEL: {{^}}v_clamp_neghi_v2f16:
				631	; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
				632	; GFX9-NOT: [[A]]
				633	; GFX9: v_pk_max_f16 [[CLAMP:v[0-9]+]], [[A]], [[A]] neg_hi:[1,1] clamp{{$}}
				634	define amdgpu_kernel void @v_clamp_neghi_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
				635	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				636	%gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
				637	%out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
				638	%a = load <2 x half>, <2 x half> addrspace(1)* %gep0
				639	%hi = extractelement <2 x half> %a, i32 1
				640	%neg.hi = fsub half -0.0, %hi
				641	%neg.hi.vec = insertelement <2 x half> %a, half %neg.hi, i32 1
				642	%max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %neg.hi.vec, <2 x half> zeroinitializer)
				643	%med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
				644
				645	store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
				646	ret void
				647	}
				648
				649	; GCN-LABEL: {{^}}v_clamp_v2f16_shuffle:
				650	; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
				651	; GFX9-NOT: [[A]]
				652	; GFX9: v_pk_max_f16 [[CLAMP:v[0-9]+]], [[A]], [[A]] op_sel:[1,1] op_sel_hi:[0,0] clamp{{$}}
				653	define amdgpu_kernel void @v_clamp_v2f16_shuffle(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
				654	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				655	%gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
				656	%out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
				657	%a = load <2 x half>, <2 x half> addrspace(1)* %gep0
				658	%shuf = shufflevector <2 x half> %a, <2 x half> undef, <2 x i32> <i32 1, i32 0>
				659	%max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %shuf, <2 x half> zeroinitializer)
				660	%med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
				661
				662	store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
				663	ret void
				664	}
				665
Matt Arsenault	aafff87	2017-10-05 00:13:17 +0000	[diff] [blame^]	666	; GCN-LABEL: {{^}}v_clamp_diff_source_f32:
				667	; GCN: v_add_f32_e32 [[A:v[0-9]+]]
				668	; GCN: v_add_f32_e32 [[B:v[0-9]+]]
				669	; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[B]] clamp{{$}}
				670	define amdgpu_kernel void @v_clamp_diff_source_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0
				671	{
				672	%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 0
				673	%gep1 = getelementptr float, float addrspace(1)* %aptr, i32 1
				674	%gep2 = getelementptr float, float addrspace(1)* %aptr, i32 2
				675	%l0 = load float, float addrspace(1)* %gep0
				676	%l1 = load float, float addrspace(1)* %gep1
				677	%l2 = load float, float addrspace(1)* %gep2
				678	%a = fadd nsz float %l0, %l1
				679	%b = fadd nsz float %l0, %l2
				680	%res = call nsz float @llvm.maxnum.f32(float %a, float %b)
				681	%max = call nsz float @llvm.maxnum.f32(float %res, float 0.0)
				682	%min = call nsz float @llvm.minnum.f32(float %max, float 1.0)
				683	%out.gep = getelementptr float, float addrspace(1)* %out, i32 3
				684	store float %min, float addrspace(1)* %out.gep
				685	ret void
				686	}
				687
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	688	declare i32 @llvm.amdgcn.workitem.id.x() #1
				689	declare float @llvm.fabs.f32(float) #1
				690	declare float @llvm.minnum.f32(float, float) #1
				691	declare float @llvm.maxnum.f32(float, float) #1
				692	declare float @llvm.amdgcn.fmed3.f32(float, float, float) #1
				693	declare double @llvm.fabs.f64(double) #1
				694	declare double @llvm.minnum.f64(double, double) #1
				695	declare double @llvm.maxnum.f64(double, double) #1
				696	declare half @llvm.fabs.f16(half) #1
				697	declare half @llvm.minnum.f16(half, half) #1
				698	declare half @llvm.maxnum.f16(half, half) #1
Matt Arsenault	6b114d2	2017-08-30 01:20:17 +0000	[diff] [blame]	699	declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #1
				700	declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>) #1
				701	declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>) #1
Matt Arsenault	2fdf2a1	2017-02-21 23:35:48 +0000	[diff] [blame]	702
				703	attributes #0 = { nounwind }
				704	attributes #1 = { nounwind readnone }
				705	attributes #2 = { nounwind "target-features"="-dx10-clamp,-fp-exceptions" "no-nans-fp-math"="false" }
				706	attributes #3 = { nounwind "target-features"="+dx10-clamp,+fp-exceptions" "no-nans-fp-math"="false" }
				707	attributes #4 = { nounwind "target-features"="-dx10-clamp,+fp-exceptions" "no-nans-fp-math"="false" }