blob: afae1e7cb580586c4eff5b50b5706ed05635f231 [file] [log] [blame]
Matt Arsenault7d5e2cb2014-07-13 02:46:17 +00001; RUN: llc -march=r600 -mcpu=SI -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI %s
2; RUN: llc -march=r600 -mcpu=SI -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI %s
3
Matt Arsenault6e63dd22014-02-02 00:13:12 +00004
5declare void @llvm.AMDGPU.barrier.local() noduplicate nounwind
6
Tom Stellard79243d92014-10-01 17:15:17 +00007; SI-LABEL: {{^}}private_access_f64_alloca:
Matt Arsenault7d5e2cb2014-07-13 02:46:17 +00008
Tom Stellardb02094e2014-07-21 15:45:01 +00009; SI-ALLOCA: BUFFER_STORE_DWORDX2
Tom Stellarde812f2f2014-07-21 15:45:06 +000010; SI-ALLOCA: BUFFER_LOAD_DWORDX2
Matt Arsenault7d5e2cb2014-07-13 02:46:17 +000011
12; SI-PROMOTE: DS_WRITE_B64
13; SI-PROMOTE: DS_READ_B64
Matt Arsenaultad41d7b2014-03-24 17:50:46 +000014define void @private_access_f64_alloca(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in, i32 %b) nounwind {
Matt Arsenault6e63dd22014-02-02 00:13:12 +000015 %val = load double addrspace(1)* %in, align 8
16 %array = alloca double, i32 16, align 8
17 %ptr = getelementptr double* %array, i32 %b
18 store double %val, double* %ptr, align 8
19 call void @llvm.AMDGPU.barrier.local() noduplicate nounwind
20 %result = load double* %ptr, align 8
21 store double %result, double addrspace(1)* %out, align 8
22 ret void
23}
24
Tom Stellard79243d92014-10-01 17:15:17 +000025; SI-LABEL: {{^}}private_access_v2f64_alloca:
Matt Arsenault7d5e2cb2014-07-13 02:46:17 +000026
Tom Stellardb02094e2014-07-21 15:45:01 +000027; SI-ALLOCA: BUFFER_STORE_DWORDX4
Tom Stellarde812f2f2014-07-21 15:45:06 +000028; SI-ALLOCA: BUFFER_LOAD_DWORDX4
Matt Arsenault7d5e2cb2014-07-13 02:46:17 +000029
Matt Arsenaultca3976f2014-07-15 02:06:31 +000030; SI-PROMOTE: DS_WRITE_B32
31; SI-PROMOTE: DS_WRITE_B32
32; SI-PROMOTE: DS_WRITE_B32
33; SI-PROMOTE: DS_WRITE_B32
34; SI-PROMOTE: DS_READ_B32
35; SI-PROMOTE: DS_READ_B32
36; SI-PROMOTE: DS_READ_B32
37; SI-PROMOTE: DS_READ_B32
Matt Arsenaultad41d7b2014-03-24 17:50:46 +000038define void @private_access_v2f64_alloca(<2 x double> addrspace(1)* noalias %out, <2 x double> addrspace(1)* noalias %in, i32 %b) nounwind {
Matt Arsenault6e63dd22014-02-02 00:13:12 +000039 %val = load <2 x double> addrspace(1)* %in, align 16
40 %array = alloca <2 x double>, i32 16, align 16
41 %ptr = getelementptr <2 x double>* %array, i32 %b
42 store <2 x double> %val, <2 x double>* %ptr, align 16
43 call void @llvm.AMDGPU.barrier.local() noduplicate nounwind
44 %result = load <2 x double>* %ptr, align 16
45 store <2 x double> %result, <2 x double> addrspace(1)* %out, align 16
46 ret void
47}
Matt Arsenaultad41d7b2014-03-24 17:50:46 +000048
Tom Stellard79243d92014-10-01 17:15:17 +000049; SI-LABEL: {{^}}private_access_i64_alloca:
Matt Arsenault7d5e2cb2014-07-13 02:46:17 +000050
Tom Stellardb02094e2014-07-21 15:45:01 +000051; SI-ALLOCA: BUFFER_STORE_DWORDX2
Tom Stellarde812f2f2014-07-21 15:45:06 +000052; SI-ALLOCA: BUFFER_LOAD_DWORDX2
Matt Arsenault7d5e2cb2014-07-13 02:46:17 +000053
54; SI-PROMOTE: DS_WRITE_B64
55; SI-PROMOTE: DS_READ_B64
Matt Arsenaultad41d7b2014-03-24 17:50:46 +000056define void @private_access_i64_alloca(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i32 %b) nounwind {
57 %val = load i64 addrspace(1)* %in, align 8
58 %array = alloca i64, i32 16, align 8
59 %ptr = getelementptr i64* %array, i32 %b
60 store i64 %val, i64* %ptr, align 8
61 call void @llvm.AMDGPU.barrier.local() noduplicate nounwind
62 %result = load i64* %ptr, align 8
63 store i64 %result, i64 addrspace(1)* %out, align 8
64 ret void
65}
66
Tom Stellard79243d92014-10-01 17:15:17 +000067; SI-LABEL: {{^}}private_access_v2i64_alloca:
Matt Arsenault7d5e2cb2014-07-13 02:46:17 +000068
Tom Stellardb02094e2014-07-21 15:45:01 +000069; SI-ALLOCA: BUFFER_STORE_DWORDX4
Tom Stellarde812f2f2014-07-21 15:45:06 +000070; SI-ALLOCA: BUFFER_LOAD_DWORDX4
Matt Arsenault7d5e2cb2014-07-13 02:46:17 +000071
Matt Arsenaultca3976f2014-07-15 02:06:31 +000072; SI-PROMOTE: DS_WRITE_B32
73; SI-PROMOTE: DS_WRITE_B32
74; SI-PROMOTE: DS_WRITE_B32
75; SI-PROMOTE: DS_WRITE_B32
76; SI-PROMOTE: DS_READ_B32
77; SI-PROMOTE: DS_READ_B32
78; SI-PROMOTE: DS_READ_B32
79; SI-PROMOTE: DS_READ_B32
Matt Arsenaultad41d7b2014-03-24 17:50:46 +000080define void @private_access_v2i64_alloca(<2 x i64> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %in, i32 %b) nounwind {
81 %val = load <2 x i64> addrspace(1)* %in, align 16
82 %array = alloca <2 x i64>, i32 16, align 16
83 %ptr = getelementptr <2 x i64>* %array, i32 %b
84 store <2 x i64> %val, <2 x i64>* %ptr, align 16
85 call void @llvm.AMDGPU.barrier.local() noduplicate nounwind
86 %result = load <2 x i64>* %ptr, align 16
87 store <2 x i64> %result, <2 x i64> addrspace(1)* %out, align 16
88 ret void
89}