Blame - llvm/test/CodeGen/R600/fetch-limits.r700+.ll - toolchain/llvm-project

blob: 1a8a43fccc7264930c732e177656a15139e14611 [file] [log] [blame]

Tom Stellard	3498e4f	2013-06-07 20:28:55 +0000	[diff] [blame]	1	; RUN: llc < %s -march=r600 -mcpu=rv710 \| FileCheck %s
				2	; RUN: llc < %s -march=r600 -mcpu=rv730 \| FileCheck %s
				3	; RUN: llc < %s -march=r600 -mcpu=rv770 \| FileCheck %s
				4	; RUN: llc < %s -march=r600 -mcpu=cedar \| FileCheck %s
				5	; RUN: llc < %s -march=r600 -mcpu=redwood \| FileCheck %s
				6	; RUN: llc < %s -march=r600 -mcpu=sumo \| FileCheck %s
				7	; RUN: llc < %s -march=r600 -mcpu=juniper \| FileCheck %s
				8	; RUN: llc < %s -march=r600 -mcpu=cypress \| FileCheck %s
				9	; RUN: llc < %s -march=r600 -mcpu=barts \| FileCheck %s
				10	; RUN: llc < %s -march=r600 -mcpu=turks \| FileCheck %s
				11	; RUN: llc < %s -march=r600 -mcpu=caicos \| FileCheck %s
				12	; RUN: llc < %s -march=r600 -mcpu=cayman \| FileCheck %s
				13
				14	; r700+ supports 16 fetches in a clause
				15	; CHECK: @fetch_limits_r700
				16	; CHECK: Fetch clause
				17	; CHECK: Fetch clause
				18
				19	define void @fetch_limits_r700() #0 {
				20	entry:
				21	%0 = load <4 x float> addrspace(8)* null
				22	%1 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
				23	%2 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
				24	%3 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
				25	%4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
				26	%5 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
				27	%6 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
				28	%7 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
				29	%8 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
				30	%9 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
				31	%10 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
				32	%11 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
				33	%12 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 12)
				34	%13 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 13)
				35	%14 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
				36	%15 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 15)
				37	%16 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 16)
				38	%res0 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %0, i32 0, i32 0, i32 1)
				39	%res1 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %1, i32 0, i32 0, i32 1)
				40	%res2 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %2, i32 0, i32 0, i32 1)
				41	%res3 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %3, i32 0, i32 0, i32 1)
				42	%res4 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %4, i32 0, i32 0, i32 1)
				43	%res5 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %5, i32 0, i32 0, i32 1)
				44	%res6 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %6, i32 0, i32 0, i32 1)
				45	%res7 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %7, i32 0, i32 0, i32 1)
				46	%res8 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %8, i32 0, i32 0, i32 1)
				47	%res9 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %9, i32 0, i32 0, i32 1)
				48	%res10 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %10, i32 0, i32 0, i32 1)
				49	%res11 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %11, i32 0, i32 0, i32 1)
				50	%res12 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %12, i32 0, i32 0, i32 1)
				51	%res13 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %13, i32 0, i32 0, i32 1)
				52	%res14 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %14, i32 0, i32 0, i32 1)
				53	%res15 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %15, i32 0, i32 0, i32 1)
				54	%res16 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %16, i32 0, i32 0, i32 1)
				55	%a = fadd <4 x float> %res0, %res1
				56	%b = fadd <4 x float> %res2, %res3
				57	%c = fadd <4 x float> %res4, %res5
				58	%d = fadd <4 x float> %res6, %res7
				59	%e = fadd <4 x float> %res8, %res9
				60	%f = fadd <4 x float> %res10, %res11
				61	%g = fadd <4 x float> %res12, %res13
				62	%h = fadd <4 x float> %res14, %res15
				63	%i = fadd <4 x float> %res16, %a
				64
				65	%bc = fadd <4 x float> %b, %c
				66	%de = fadd <4 x float> %d, %e
				67	%fg = fadd <4 x float> %f, %g
				68	%hi = fadd <4 x float> %h, %i
				69
				70	%bcde = fadd <4 x float> %bc, %de
				71	%fghi = fadd <4 x float> %fg, %hi
				72
				73	%bcdefghi = fadd <4 x float> %bcde, %fghi
				74	call void @llvm.R600.store.swizzle(<4 x float> %bcdefghi, i32 0, i32 1)
				75	ret void
				76	}
				77
				78	attributes #0 = { "ShaderType"="0" } ; Pixel Shader
				79
				80	declare <4 x float> @llvm.AMDGPU.tex(<4 x float>, i32, i32, i32) readnone
				81	declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)