Blame - llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pknorm.u16.ll - toolchain/llvm-project

blob: cab6c8c0016b15afd45f506d22de0bc11c4406fc [file] [log] [blame]

Marek Olsak	13e4741	2018-01-31 20:18:04 +0000	[diff] [blame]	1	; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s \| FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s
				2	; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s \| FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s
				3	; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s \| FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s
				4	; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s \| FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s
				5
				6	; GCN-LABEL: {{^}}s_cvt_pknorm_u16_f32:
Matt Arsenault	8c4a352	2018-06-26 19:10:00 +0000	[diff] [blame]	7	; GCN-DAG: s_load_dwordx2 s{{\[}}[[SX:[0-9]+]]:[[SY:[0-9]+]]{{\]}}, s[0:1], 0x{{b\|2c}}
				8	; GCN: v_mov_b32_e32 [[VY:v[0-9]+]], s[[SY]]
				9	; SI: v_cvt_pknorm_u16_f32_e32 v{{[0-9]+}}, s[[SX]], [[VY]]
				10	; VI: v_cvt_pknorm_u16_f32 v{{[0-9]+}}, s[[SX]], [[VY]]
Marek Olsak	13e4741	2018-01-31 20:18:04 +0000	[diff] [blame]	11	define amdgpu_kernel void @s_cvt_pknorm_u16_f32(i32 addrspace(1)* %out, float %x, float %y) #0 {
				12	%result = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float %x, float %y)
				13	%r = bitcast <2 x i16> %result to i32
				14	store i32 %r, i32 addrspace(1)* %out
				15	ret void
				16	}
				17
				18	; GCN-LABEL: {{^}}s_cvt_pknorm_u16_samereg_f32:
				19	; GCN: s_load_dword [[X:s[0-9]+]]
				20	; GCN: v_cvt_pknorm_u16_f32{{(_e64)*}} v{{[0-9]+}}, [[X]], [[X]]
				21	define amdgpu_kernel void @s_cvt_pknorm_u16_samereg_f32(i32 addrspace(1)* %out, float %x) #0 {
				22	%result = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float %x, float %x)
				23	%r = bitcast <2 x i16> %result to i32
				24	store i32 %r, i32 addrspace(1)* %out
				25	ret void
				26	}
				27
				28	; GCN-LABEL: {{^}}v_cvt_pknorm_u16_f32:
				29	; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
				30	; GCN: {{buffer\|flat\|global}}_load_dword [[B:v[0-9]+]]
				31	; SI: v_cvt_pknorm_u16_f32_e32 v{{[0-9]+}}, [[A]], [[B]]
				32	; VI: v_cvt_pknorm_u16_f32 v{{[0-9]+}}, [[A]], [[B]]
				33	define amdgpu_kernel void @v_cvt_pknorm_u16_f32(i32 addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
				34	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				35	%tid.ext = sext i32 %tid to i64
				36	%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
				37	%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
				38	%out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
				39	%a = load volatile float, float addrspace(1)* %a.gep
				40	%b = load volatile float, float addrspace(1)* %b.gep
				41	%cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float %a, float %b)
				42	%r = bitcast <2 x i16> %cvt to i32
				43	store i32 %r, i32 addrspace(1)* %out.gep
				44	ret void
				45	}
				46
				47	; GCN-LABEL: {{^}}v_cvt_pknorm_u16_f32_reg_imm:
				48	; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
				49	; GCN: v_cvt_pknorm_u16_f32{{(_e64)*}} v{{[0-9]+}}, [[A]], 1.0
				50	define amdgpu_kernel void @v_cvt_pknorm_u16_f32_reg_imm(i32 addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
				51	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				52	%tid.ext = sext i32 %tid to i64
				53	%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
				54	%out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
				55	%a = load volatile float, float addrspace(1)* %a.gep
				56	%cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float %a, float 1.0)
				57	%r = bitcast <2 x i16> %cvt to i32
				58	store i32 %r, i32 addrspace(1)* %out.gep
				59	ret void
				60	}
				61
				62	; GCN-LABEL: {{^}}v_cvt_pknorm_u16_f32_imm_reg:
				63	; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
				64	; SI: v_cvt_pknorm_u16_f32_e32 v{{[0-9]+}}, 1.0, [[A]]
				65	; VI: v_cvt_pknorm_u16_f32 v{{[0-9]+}}, 1.0, [[A]]
				66	define amdgpu_kernel void @v_cvt_pknorm_u16_f32_imm_reg(i32 addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
				67	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				68	%tid.ext = sext i32 %tid to i64
				69	%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
				70	%out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
				71	%a = load volatile float, float addrspace(1)* %a.gep
				72	%cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float 1.0, float %a)
				73	%r = bitcast <2 x i16> %cvt to i32
				74	store i32 %r, i32 addrspace(1)* %out.gep
				75	ret void
				76	}
				77
				78	; GCN-LABEL: {{^}}v_cvt_pknorm_u16_f32_fneg_lo:
				79	; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
				80	; GCN: {{buffer\|flat\|global}}_load_dword [[B:v[0-9]+]]
				81	; GCN: v_cvt_pknorm_u16_f32{{(_e64)*}} v{{[0-9]+}}, -[[A]], [[B]]
				82	define amdgpu_kernel void @v_cvt_pknorm_u16_f32_fneg_lo(i32 addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
				83	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				84	%tid.ext = sext i32 %tid to i64
				85	%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
				86	%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
				87	%out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
				88	%a = load volatile float, float addrspace(1)* %a.gep
				89	%b = load volatile float, float addrspace(1)* %b.gep
				90	%neg.a = fsub float -0.0, %a
				91	%cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float %neg.a, float %b)
				92	%r = bitcast <2 x i16> %cvt to i32
				93	store i32 %r, i32 addrspace(1)* %out.gep
				94	ret void
				95	}
				96
				97	; GCN-LABEL: {{^}}v_cvt_pknorm_u16_f32_fneg_hi:
				98	; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
				99	; GCN: {{buffer\|flat\|global}}_load_dword [[B:v[0-9]+]]
				100	; GCN: v_cvt_pknorm_u16_f32{{(_e64)*}} v{{[0-9]+}}, [[A]], -[[B]]
				101	define amdgpu_kernel void @v_cvt_pknorm_u16_f32_fneg_hi(i32 addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
				102	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				103	%tid.ext = sext i32 %tid to i64
				104	%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
				105	%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
				106	%out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
				107	%a = load volatile float, float addrspace(1)* %a.gep
				108	%b = load volatile float, float addrspace(1)* %b.gep
				109	%neg.b = fsub float -0.0, %b
				110	%cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float %a, float %neg.b)
				111	%r = bitcast <2 x i16> %cvt to i32
				112	store i32 %r, i32 addrspace(1)* %out.gep
				113	ret void
				114	}
				115
				116	; GCN-LABEL: {{^}}v_cvt_pknorm_u16_f32_fneg_lo_hi:
				117	; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
				118	; GCN: {{buffer\|flat\|global}}_load_dword [[B:v[0-9]+]]
				119	; GCN: v_cvt_pknorm_u16_f32{{(_e64)*}} v{{[0-9]+}}, -[[A]], -[[B]]
				120	define amdgpu_kernel void @v_cvt_pknorm_u16_f32_fneg_lo_hi(i32 addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
				121	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				122	%tid.ext = sext i32 %tid to i64
				123	%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
				124	%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
				125	%out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
				126	%a = load volatile float, float addrspace(1)* %a.gep
				127	%b = load volatile float, float addrspace(1)* %b.gep
				128	%neg.a = fsub float -0.0, %a
				129	%neg.b = fsub float -0.0, %b
				130	%cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float %neg.a, float %neg.b)
				131	%r = bitcast <2 x i16> %cvt to i32
				132	store i32 %r, i32 addrspace(1)* %out.gep
				133	ret void
				134	}
				135
				136	; GCN-LABEL: {{^}}v_cvt_pknorm_u16_f32_fneg_fabs_lo_fneg_hi:
				137	; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
				138	; GCN: {{buffer\|flat\|global}}_load_dword [[B:v[0-9]+]]
				139	; GCN: v_cvt_pknorm_u16_f32{{(_e64)*}} v{{[0-9]+}}, -\|[[A]]\|, -[[B]]
				140	define amdgpu_kernel void @v_cvt_pknorm_u16_f32_fneg_fabs_lo_fneg_hi(i32 addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
				141	%tid = call i32 @llvm.amdgcn.workitem.id.x()
				142	%tid.ext = sext i32 %tid to i64
				143	%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
				144	%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
				145	%out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
				146	%a = load volatile float, float addrspace(1)* %a.gep
				147	%b = load volatile float, float addrspace(1)* %b.gep
				148	%fabs.a = call float @llvm.fabs.f32(float %a)
				149	%neg.fabs.a = fsub float -0.0, %fabs.a
				150	%neg.b = fsub float -0.0, %b
				151	%cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float %neg.fabs.a, float %neg.b)
				152	%r = bitcast <2 x i16> %cvt to i32
				153	store i32 %r, i32 addrspace(1)* %out.gep
				154	ret void
				155	}
				156
				157	declare <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float, float) #1
				158	declare float @llvm.fabs.f32(float) #1
				159	declare i32 @llvm.amdgcn.workitem.id.x() #1
				160
				161
				162	attributes #0 = { nounwind }
				163	attributes #1 = { nounwind readnone }