Blame - llvm/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.ll - toolchain/llvm-project

blob: 617f1f19e360ae6e2550ec8287740bcc9a5691af [file] [log] [blame]

Marek Olsak	2d82590	2017-04-28 20:21:58 +0000	[diff] [blame]	1	;RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs \| FileCheck %s --check-prefix=GCN
				2
				3	; GCN-LABEL: {{^}}full_mask:
				4	; GCN: s_mov_b64 exec, -1
				5	; GCN: v_add_f32_e32 v0,
				6	define amdgpu_ps float @full_mask(float %a, float %b) {
				7	main_body:
				8	%s = fadd float %a, %b
				9	call void @llvm.amdgcn.init.exec(i64 -1)
				10	ret float %s
				11	}
				12
				13	; GCN-LABEL: {{^}}partial_mask:
				14	; GCN: s_mov_b64 exec, 0x1e240
				15	; GCN: v_add_f32_e32 v0,
				16	define amdgpu_ps float @partial_mask(float %a, float %b) {
				17	main_body:
				18	%s = fadd float %a, %b
				19	call void @llvm.amdgcn.init.exec(i64 123456)
				20	ret float %s
				21	}
				22
				23	; GCN-LABEL: {{^}}input_s3off8:
				24	; GCN: s_bfe_u32 s0, s3, 0x70008
				25	; GCN: s_bfm_b64 exec, s0, 0
				26	; GCN: s_cmp_eq_u32 s0, 64
				27	; GCN: s_cmov_b64 exec, -1
				28	; GCN: v_add_f32_e32 v0,
				29	define amdgpu_ps float @input_s3off8(i32 inreg, i32 inreg, i32 inreg, i32 inreg %count, float %a, float %b) {
				30	main_body:
				31	%s = fadd float %a, %b
				32	call void @llvm.amdgcn.init.exec.from.input(i32 %count, i32 8)
				33	ret float %s
				34	}
				35
				36	; GCN-LABEL: {{^}}input_s0off19:
				37	; GCN: s_bfe_u32 s0, s0, 0x70013
				38	; GCN: s_bfm_b64 exec, s0, 0
				39	; GCN: s_cmp_eq_u32 s0, 64
				40	; GCN: s_cmov_b64 exec, -1
				41	; GCN: v_add_f32_e32 v0,
				42	define amdgpu_ps float @input_s0off19(i32 inreg %count, float %a, float %b) {
				43	main_body:
				44	%s = fadd float %a, %b
				45	call void @llvm.amdgcn.init.exec.from.input(i32 %count, i32 19)
				46	ret float %s
				47	}
				48
				49	; GCN-LABEL: {{^}}reuse_input:
				50	; GCN: s_bfe_u32 s1, s0, 0x70013
				51	; GCN: s_bfm_b64 exec, s1, 0
				52	; GCN: s_cmp_eq_u32 s1, 64
				53	; GCN: s_cmov_b64 exec, -1
				54	; GCN: v_add_i32_e32 v0, vcc, s0, v0
				55	define amdgpu_ps float @reuse_input(i32 inreg %count, i32 %a) {
				56	main_body:
				57	call void @llvm.amdgcn.init.exec.from.input(i32 %count, i32 19)
				58	%s = add i32 %a, %count
				59	%f = sitofp i32 %s to float
				60	ret float %f
				61	}
				62
				63	; GCN-LABEL: {{^}}reuse_input2:
				64	; GCN: s_bfe_u32 s1, s0, 0x70013
				65	; GCN: s_bfm_b64 exec, s1, 0
				66	; GCN: s_cmp_eq_u32 s1, 64
				67	; GCN: s_cmov_b64 exec, -1
				68	; GCN: v_add_i32_e32 v0, vcc, s0, v0
				69	define amdgpu_ps float @reuse_input2(i32 inreg %count, i32 %a) {
				70	main_body:
				71	%s = add i32 %a, %count
				72	%f = sitofp i32 %s to float
				73	call void @llvm.amdgcn.init.exec.from.input(i32 %count, i32 19)
				74	ret float %f
				75	}
				76
				77	declare void @llvm.amdgcn.init.exec(i64) #1
				78	declare void @llvm.amdgcn.init.exec.from.input(i32, i32) #1
				79
				80	attributes #1 = { convergent }