blob: 24006f8799b209291c7d1106d022881edcacf306 [file] [log] [blame]
Tom Stellard49f8bfd2015-01-06 18:00:21 +00001; RUN: llc -march=amdgcn -mcpu=SI -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI %s
2; RUN: llc -march=amdgcn -mcpu=SI -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI %s
Matt Arsenault7d5e2cb2014-07-13 02:46:17 +00003
Matt Arsenault6e63dd22014-02-02 00:13:12 +00004
5declare void @llvm.AMDGPU.barrier.local() noduplicate nounwind
6
Tom Stellard79243d92014-10-01 17:15:17 +00007; SI-LABEL: {{^}}private_access_f64_alloca:
Matt Arsenault7d5e2cb2014-07-13 02:46:17 +00008
Tom Stellard326d6ec2014-11-05 14:50:53 +00009; SI-ALLOCA: buffer_store_dwordx2
10; SI-ALLOCA: buffer_load_dwordx2
Matt Arsenault7d5e2cb2014-07-13 02:46:17 +000011
Tom Stellard326d6ec2014-11-05 14:50:53 +000012; SI-PROMOTE: ds_write_b64
13; SI-PROMOTE: ds_read_b64
Matt Arsenaultad41d7b2014-03-24 17:50:46 +000014define void @private_access_f64_alloca(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in, i32 %b) nounwind {
Matt Arsenault6e63dd22014-02-02 00:13:12 +000015 %val = load double addrspace(1)* %in, align 8
16 %array = alloca double, i32 16, align 8
17 %ptr = getelementptr double* %array, i32 %b
18 store double %val, double* %ptr, align 8
19 call void @llvm.AMDGPU.barrier.local() noduplicate nounwind
20 %result = load double* %ptr, align 8
21 store double %result, double addrspace(1)* %out, align 8
22 ret void
23}
24
Tom Stellard79243d92014-10-01 17:15:17 +000025; SI-LABEL: {{^}}private_access_v2f64_alloca:
Matt Arsenault7d5e2cb2014-07-13 02:46:17 +000026
Tom Stellard326d6ec2014-11-05 14:50:53 +000027; SI-ALLOCA: buffer_store_dwordx4
28; SI-ALLOCA: buffer_load_dwordx4
Matt Arsenault7d5e2cb2014-07-13 02:46:17 +000029
Tom Stellard326d6ec2014-11-05 14:50:53 +000030; SI-PROMOTE: ds_write_b32
31; SI-PROMOTE: ds_write_b32
32; SI-PROMOTE: ds_write_b32
33; SI-PROMOTE: ds_write_b32
34; SI-PROMOTE: ds_read_b32
35; SI-PROMOTE: ds_read_b32
36; SI-PROMOTE: ds_read_b32
37; SI-PROMOTE: ds_read_b32
Matt Arsenaultad41d7b2014-03-24 17:50:46 +000038define void @private_access_v2f64_alloca(<2 x double> addrspace(1)* noalias %out, <2 x double> addrspace(1)* noalias %in, i32 %b) nounwind {
Matt Arsenault6e63dd22014-02-02 00:13:12 +000039 %val = load <2 x double> addrspace(1)* %in, align 16
40 %array = alloca <2 x double>, i32 16, align 16
41 %ptr = getelementptr <2 x double>* %array, i32 %b
42 store <2 x double> %val, <2 x double>* %ptr, align 16
43 call void @llvm.AMDGPU.barrier.local() noduplicate nounwind
44 %result = load <2 x double>* %ptr, align 16
45 store <2 x double> %result, <2 x double> addrspace(1)* %out, align 16
46 ret void
47}
Matt Arsenaultad41d7b2014-03-24 17:50:46 +000048
Tom Stellard79243d92014-10-01 17:15:17 +000049; SI-LABEL: {{^}}private_access_i64_alloca:
Matt Arsenault7d5e2cb2014-07-13 02:46:17 +000050
Tom Stellard326d6ec2014-11-05 14:50:53 +000051; SI-ALLOCA: buffer_store_dwordx2
52; SI-ALLOCA: buffer_load_dwordx2
Matt Arsenault7d5e2cb2014-07-13 02:46:17 +000053
Tom Stellard326d6ec2014-11-05 14:50:53 +000054; SI-PROMOTE: ds_write_b64
55; SI-PROMOTE: ds_read_b64
Matt Arsenaultad41d7b2014-03-24 17:50:46 +000056define void @private_access_i64_alloca(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i32 %b) nounwind {
57 %val = load i64 addrspace(1)* %in, align 8
58 %array = alloca i64, i32 16, align 8
59 %ptr = getelementptr i64* %array, i32 %b
60 store i64 %val, i64* %ptr, align 8
61 call void @llvm.AMDGPU.barrier.local() noduplicate nounwind
62 %result = load i64* %ptr, align 8
63 store i64 %result, i64 addrspace(1)* %out, align 8
64 ret void
65}
66
Tom Stellard79243d92014-10-01 17:15:17 +000067; SI-LABEL: {{^}}private_access_v2i64_alloca:
Matt Arsenault7d5e2cb2014-07-13 02:46:17 +000068
Tom Stellard326d6ec2014-11-05 14:50:53 +000069; SI-ALLOCA: buffer_store_dwordx4
70; SI-ALLOCA: buffer_load_dwordx4
Matt Arsenault7d5e2cb2014-07-13 02:46:17 +000071
Tom Stellard326d6ec2014-11-05 14:50:53 +000072; SI-PROMOTE: ds_write_b32
73; SI-PROMOTE: ds_write_b32
74; SI-PROMOTE: ds_write_b32
75; SI-PROMOTE: ds_write_b32
76; SI-PROMOTE: ds_read_b32
77; SI-PROMOTE: ds_read_b32
78; SI-PROMOTE: ds_read_b32
79; SI-PROMOTE: ds_read_b32
Matt Arsenaultad41d7b2014-03-24 17:50:46 +000080define void @private_access_v2i64_alloca(<2 x i64> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %in, i32 %b) nounwind {
81 %val = load <2 x i64> addrspace(1)* %in, align 16
82 %array = alloca <2 x i64>, i32 16, align 16
83 %ptr = getelementptr <2 x i64>* %array, i32 %b
84 store <2 x i64> %val, <2 x i64>* %ptr, align 16
85 call void @llvm.AMDGPU.barrier.local() noduplicate nounwind
86 %result = load <2 x i64>* %ptr, align 16
87 store <2 x i64> %result, <2 x i64> addrspace(1)* %out, align 16
88 ret void
89}