blob: 1f851f9de53558909e6e8b575f6c075e5c07a1c5 [file] [log] [blame]
Matt Arsenaulte8ed8e52016-05-11 00:28:54 +00001; RUN: llc -march=amdgcn -mattr=-promote-alloca,+max-private-element-size-16 -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA16 -check-prefix=SI %s
2; RUN: llc -march=amdgcn -mattr=-promote-alloca,+max-private-element-size-4 -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA4 -check-prefix=SI %s
Matt Arsenaultc5fce692016-04-28 18:38:48 +00003; RUN: llc -march=amdgcn -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI %s
Changpeng Fang71369b32016-05-26 19:35:29 +00004; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca,+max-private-element-size-16 -verify-machineinstrs < %s | FileCheck -check-prefix=CI-ALLOCA16 -check-prefix=SI %s
5; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=CI-PROMOTE -check-prefix=SI %s
Matt Arsenault7d5e2cb2014-07-13 02:46:17 +00006
Matt Arsenault8a028bf2016-05-16 21:19:59 +00007declare void @llvm.amdgcn.s.barrier() #0
Matt Arsenault6e63dd22014-02-02 00:13:12 +00008
Tom Stellard79243d92014-10-01 17:15:17 +00009; SI-LABEL: {{^}}private_access_f64_alloca:
Matt Arsenault7d5e2cb2014-07-13 02:46:17 +000010
Matt Arsenaulte8ed8e52016-05-11 00:28:54 +000011; SI-ALLOCA16: buffer_store_dwordx2
12; SI-ALLOCA16: buffer_load_dwordx2
13
14; SI-ALLOCA4: buffer_store_dword v
15; SI-ALLOCA4: buffer_store_dword v
16; SI-ALLOCA4: buffer_load_dword v
17; SI-ALLOCA4: buffer_load_dword v
Matt Arsenault7d5e2cb2014-07-13 02:46:17 +000018
Tom Stellard326d6ec2014-11-05 14:50:53 +000019; SI-PROMOTE: ds_write_b64
20; SI-PROMOTE: ds_read_b64
Changpeng Fang71369b32016-05-26 19:35:29 +000021; CI-PROMOTE: ds_write_b64
22; CI-PROMOTE: ds_read_b64
Matt Arsenault9c47dd52016-02-11 06:02:01 +000023define void @private_access_f64_alloca(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in, i32 %b) #1 {
David Blaikiea79ac142015-02-27 21:17:42 +000024 %val = load double, double addrspace(1)* %in, align 8
Matt Arsenaultc5fce692016-04-28 18:38:48 +000025 %array = alloca [16 x double], align 8
26 %ptr = getelementptr inbounds [16 x double], [16 x double]* %array, i32 0, i32 %b
Matt Arsenault6e63dd22014-02-02 00:13:12 +000027 store double %val, double* %ptr, align 8
Matt Arsenault9c47dd52016-02-11 06:02:01 +000028 call void @llvm.amdgcn.s.barrier()
David Blaikiea79ac142015-02-27 21:17:42 +000029 %result = load double, double* %ptr, align 8
Matt Arsenault6e63dd22014-02-02 00:13:12 +000030 store double %result, double addrspace(1)* %out, align 8
31 ret void
32}
33
Tom Stellard79243d92014-10-01 17:15:17 +000034; SI-LABEL: {{^}}private_access_v2f64_alloca:
Matt Arsenault7d5e2cb2014-07-13 02:46:17 +000035
Matt Arsenaulte8ed8e52016-05-11 00:28:54 +000036; SI-ALLOCA16: buffer_store_dwordx4
37; SI-ALLOCA16: buffer_load_dwordx4
38
39; SI-ALLOCA4: buffer_store_dword v
40; SI-ALLOCA4: buffer_store_dword v
41; SI-ALLOCA4: buffer_store_dword v
42; SI-ALLOCA4: buffer_store_dword v
43; SI-ALLOCA4: buffer_load_dword v
44; SI-ALLOCA4: buffer_load_dword v
45; SI-ALLOCA4: buffer_load_dword v
46; SI-ALLOCA4: buffer_load_dword v
Matt Arsenault7d5e2cb2014-07-13 02:46:17 +000047
Matt Arsenaultff05da82015-11-24 12:18:54 +000048; SI-PROMOTE: ds_write_b64
49; SI-PROMOTE: ds_write_b64
50; SI-PROMOTE: ds_read_b64
51; SI-PROMOTE: ds_read_b64
Changpeng Fang71369b32016-05-26 19:35:29 +000052; CI-PROMOTE: ds_write2_b64
53; CI-PROMOTE: ds_read2_b64
Matt Arsenault9c47dd52016-02-11 06:02:01 +000054define void @private_access_v2f64_alloca(<2 x double> addrspace(1)* noalias %out, <2 x double> addrspace(1)* noalias %in, i32 %b) #1 {
David Blaikiea79ac142015-02-27 21:17:42 +000055 %val = load <2 x double>, <2 x double> addrspace(1)* %in, align 16
Matt Arsenaultc5fce692016-04-28 18:38:48 +000056 %array = alloca [8 x <2 x double>], align 16
57 %ptr = getelementptr inbounds [8 x <2 x double>], [8 x <2 x double>]* %array, i32 0, i32 %b
Matt Arsenault6e63dd22014-02-02 00:13:12 +000058 store <2 x double> %val, <2 x double>* %ptr, align 16
Matt Arsenault9c47dd52016-02-11 06:02:01 +000059 call void @llvm.amdgcn.s.barrier()
David Blaikiea79ac142015-02-27 21:17:42 +000060 %result = load <2 x double>, <2 x double>* %ptr, align 16
Matt Arsenault6e63dd22014-02-02 00:13:12 +000061 store <2 x double> %result, <2 x double> addrspace(1)* %out, align 16
62 ret void
63}
Matt Arsenaultad41d7b2014-03-24 17:50:46 +000064
Tom Stellard79243d92014-10-01 17:15:17 +000065; SI-LABEL: {{^}}private_access_i64_alloca:
Matt Arsenault7d5e2cb2014-07-13 02:46:17 +000066
Matt Arsenaulte8ed8e52016-05-11 00:28:54 +000067; SI-ALLOCA16: buffer_store_dwordx2
68; SI-ALLOCA16: buffer_load_dwordx2
69
70; SI-ALLOCA4: buffer_store_dword v
71; SI-ALLOCA4: buffer_store_dword v
72; SI-ALLOCA4: buffer_load_dword v
73; SI-ALLOCA4: buffer_load_dword v
74
Matt Arsenault7d5e2cb2014-07-13 02:46:17 +000075
Tom Stellard326d6ec2014-11-05 14:50:53 +000076; SI-PROMOTE: ds_write_b64
77; SI-PROMOTE: ds_read_b64
Changpeng Fang71369b32016-05-26 19:35:29 +000078; CI-PROMOTE: ds_write_b64
79; CI-PROMOTE: ds_read_b64
Matt Arsenault9c47dd52016-02-11 06:02:01 +000080define void @private_access_i64_alloca(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i32 %b) #1 {
David Blaikiea79ac142015-02-27 21:17:42 +000081 %val = load i64, i64 addrspace(1)* %in, align 8
Matt Arsenaultc5fce692016-04-28 18:38:48 +000082 %array = alloca [8 x i64], align 8
83 %ptr = getelementptr inbounds [8 x i64], [8 x i64]* %array, i32 0, i32 %b
Matt Arsenaultad41d7b2014-03-24 17:50:46 +000084 store i64 %val, i64* %ptr, align 8
Matt Arsenault9c47dd52016-02-11 06:02:01 +000085 call void @llvm.amdgcn.s.barrier()
David Blaikiea79ac142015-02-27 21:17:42 +000086 %result = load i64, i64* %ptr, align 8
Matt Arsenaultad41d7b2014-03-24 17:50:46 +000087 store i64 %result, i64 addrspace(1)* %out, align 8
88 ret void
89}
90
Tom Stellard79243d92014-10-01 17:15:17 +000091; SI-LABEL: {{^}}private_access_v2i64_alloca:
Matt Arsenault7d5e2cb2014-07-13 02:46:17 +000092
Matt Arsenaulte8ed8e52016-05-11 00:28:54 +000093; SI-ALLOCA16: buffer_store_dwordx4
94; SI-ALLOCA16: buffer_load_dwordx4
95
96; SI-ALLOCA4: buffer_store_dword v
97; SI-ALLOCA4: buffer_store_dword v
98; SI-ALLOCA4: buffer_store_dword v
99; SI-ALLOCA4: buffer_store_dword v
100
101; SI-ALLOCA4: buffer_load_dword v
102; SI-ALLOCA4: buffer_load_dword v
103; SI-ALLOCA4: buffer_load_dword v
104; SI-ALLOCA4: buffer_load_dword v
Matt Arsenault7d5e2cb2014-07-13 02:46:17 +0000105
Matt Arsenaultff05da82015-11-24 12:18:54 +0000106; SI-PROMOTE: ds_write_b64
107; SI-PROMOTE: ds_write_b64
108; SI-PROMOTE: ds_read_b64
109; SI-PROMOTE: ds_read_b64
Changpeng Fang71369b32016-05-26 19:35:29 +0000110; CI-PROMOTE: ds_write2_b64
111; CI-PROMOTE: ds_read2_b64
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000112define void @private_access_v2i64_alloca(<2 x i64> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %in, i32 %b) #1 {
David Blaikiea79ac142015-02-27 21:17:42 +0000113 %val = load <2 x i64>, <2 x i64> addrspace(1)* %in, align 16
Matt Arsenaultc5fce692016-04-28 18:38:48 +0000114 %array = alloca [8 x <2 x i64>], align 16
115 %ptr = getelementptr inbounds [8 x <2 x i64>], [8 x <2 x i64>]* %array, i32 0, i32 %b
Matt Arsenaultad41d7b2014-03-24 17:50:46 +0000116 store <2 x i64> %val, <2 x i64>* %ptr, align 16
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000117 call void @llvm.amdgcn.s.barrier()
David Blaikiea79ac142015-02-27 21:17:42 +0000118 %result = load <2 x i64>, <2 x i64>* %ptr, align 16
Matt Arsenaultad41d7b2014-03-24 17:50:46 +0000119 store <2 x i64> %result, <2 x i64> addrspace(1)* %out, align 16
120 ret void
121}
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000122
Matt Arsenault8a028bf2016-05-16 21:19:59 +0000123attributes #0 = { convergent nounwind }
124attributes #1 = { nounwind "amdgpu-max-waves-per-eu"="2" "amdgpu-max-work-group-size"="64" }