blob: ae2e3ba378fafac78da32fe17c294153b723c109 [file] [log] [blame]
Matt Arsenaulte8ed8e52016-05-11 00:28:54 +00001; RUN: llc -march=amdgcn -mattr=-promote-alloca,+max-private-element-size-16 -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA16 -check-prefix=SI %s
2; RUN: llc -march=amdgcn -mattr=-promote-alloca,+max-private-element-size-4 -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA4 -check-prefix=SI %s
Matt Arsenaultc5fce692016-04-28 18:38:48 +00003; RUN: llc -march=amdgcn -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI %s
Matt Arsenaulte8ed8e52016-05-11 00:28:54 +00004; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca,+max-private-element-size-16 -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA16 -check-prefix=SI %s
Marek Olsak75170772015-01-27 17:27:15 +00005; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI %s
Matt Arsenault7d5e2cb2014-07-13 02:46:17 +00006
Matt Arsenault8a028bf2016-05-16 21:19:59 +00007declare void @llvm.amdgcn.s.barrier() #0
Matt Arsenault6e63dd22014-02-02 00:13:12 +00008
Tom Stellard79243d92014-10-01 17:15:17 +00009; SI-LABEL: {{^}}private_access_f64_alloca:
Matt Arsenault7d5e2cb2014-07-13 02:46:17 +000010
Matt Arsenaulte8ed8e52016-05-11 00:28:54 +000011; SI-ALLOCA16: buffer_store_dwordx2
12; SI-ALLOCA16: buffer_load_dwordx2
13
14; SI-ALLOCA4: buffer_store_dword v
15; SI-ALLOCA4: buffer_store_dword v
16; SI-ALLOCA4: buffer_load_dword v
17; SI-ALLOCA4: buffer_load_dword v
Matt Arsenault7d5e2cb2014-07-13 02:46:17 +000018
Tom Stellard326d6ec2014-11-05 14:50:53 +000019; SI-PROMOTE: ds_write_b64
20; SI-PROMOTE: ds_read_b64
Matt Arsenault9c47dd52016-02-11 06:02:01 +000021define void @private_access_f64_alloca(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in, i32 %b) #1 {
David Blaikiea79ac142015-02-27 21:17:42 +000022 %val = load double, double addrspace(1)* %in, align 8
Matt Arsenaultc5fce692016-04-28 18:38:48 +000023 %array = alloca [16 x double], align 8
24 %ptr = getelementptr inbounds [16 x double], [16 x double]* %array, i32 0, i32 %b
Matt Arsenault6e63dd22014-02-02 00:13:12 +000025 store double %val, double* %ptr, align 8
Matt Arsenault9c47dd52016-02-11 06:02:01 +000026 call void @llvm.amdgcn.s.barrier()
David Blaikiea79ac142015-02-27 21:17:42 +000027 %result = load double, double* %ptr, align 8
Matt Arsenault6e63dd22014-02-02 00:13:12 +000028 store double %result, double addrspace(1)* %out, align 8
29 ret void
30}
31
Tom Stellard79243d92014-10-01 17:15:17 +000032; SI-LABEL: {{^}}private_access_v2f64_alloca:
Matt Arsenault7d5e2cb2014-07-13 02:46:17 +000033
Matt Arsenaulte8ed8e52016-05-11 00:28:54 +000034; SI-ALLOCA16: buffer_store_dwordx4
35; SI-ALLOCA16: buffer_load_dwordx4
36
37; SI-ALLOCA4: buffer_store_dword v
38; SI-ALLOCA4: buffer_store_dword v
39; SI-ALLOCA4: buffer_store_dword v
40; SI-ALLOCA4: buffer_store_dword v
41; SI-ALLOCA4: buffer_load_dword v
42; SI-ALLOCA4: buffer_load_dword v
43; SI-ALLOCA4: buffer_load_dword v
44; SI-ALLOCA4: buffer_load_dword v
Matt Arsenault7d5e2cb2014-07-13 02:46:17 +000045
Matt Arsenaultff05da82015-11-24 12:18:54 +000046; SI-PROMOTE: ds_write_b64
47; SI-PROMOTE: ds_write_b64
48; SI-PROMOTE: ds_read_b64
49; SI-PROMOTE: ds_read_b64
Matt Arsenault9c47dd52016-02-11 06:02:01 +000050define void @private_access_v2f64_alloca(<2 x double> addrspace(1)* noalias %out, <2 x double> addrspace(1)* noalias %in, i32 %b) #1 {
David Blaikiea79ac142015-02-27 21:17:42 +000051 %val = load <2 x double>, <2 x double> addrspace(1)* %in, align 16
Matt Arsenaultc5fce692016-04-28 18:38:48 +000052 %array = alloca [8 x <2 x double>], align 16
53 %ptr = getelementptr inbounds [8 x <2 x double>], [8 x <2 x double>]* %array, i32 0, i32 %b
Matt Arsenault6e63dd22014-02-02 00:13:12 +000054 store <2 x double> %val, <2 x double>* %ptr, align 16
Matt Arsenault9c47dd52016-02-11 06:02:01 +000055 call void @llvm.amdgcn.s.barrier()
David Blaikiea79ac142015-02-27 21:17:42 +000056 %result = load <2 x double>, <2 x double>* %ptr, align 16
Matt Arsenault6e63dd22014-02-02 00:13:12 +000057 store <2 x double> %result, <2 x double> addrspace(1)* %out, align 16
58 ret void
59}
Matt Arsenaultad41d7b2014-03-24 17:50:46 +000060
Tom Stellard79243d92014-10-01 17:15:17 +000061; SI-LABEL: {{^}}private_access_i64_alloca:
Matt Arsenault7d5e2cb2014-07-13 02:46:17 +000062
Matt Arsenaulte8ed8e52016-05-11 00:28:54 +000063; SI-ALLOCA16: buffer_store_dwordx2
64; SI-ALLOCA16: buffer_load_dwordx2
65
66; SI-ALLOCA4: buffer_store_dword v
67; SI-ALLOCA4: buffer_store_dword v
68; SI-ALLOCA4: buffer_load_dword v
69; SI-ALLOCA4: buffer_load_dword v
70
Matt Arsenault7d5e2cb2014-07-13 02:46:17 +000071
Tom Stellard326d6ec2014-11-05 14:50:53 +000072; SI-PROMOTE: ds_write_b64
73; SI-PROMOTE: ds_read_b64
Matt Arsenault9c47dd52016-02-11 06:02:01 +000074define void @private_access_i64_alloca(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i32 %b) #1 {
David Blaikiea79ac142015-02-27 21:17:42 +000075 %val = load i64, i64 addrspace(1)* %in, align 8
Matt Arsenaultc5fce692016-04-28 18:38:48 +000076 %array = alloca [8 x i64], align 8
77 %ptr = getelementptr inbounds [8 x i64], [8 x i64]* %array, i32 0, i32 %b
Matt Arsenaultad41d7b2014-03-24 17:50:46 +000078 store i64 %val, i64* %ptr, align 8
Matt Arsenault9c47dd52016-02-11 06:02:01 +000079 call void @llvm.amdgcn.s.barrier()
David Blaikiea79ac142015-02-27 21:17:42 +000080 %result = load i64, i64* %ptr, align 8
Matt Arsenaultad41d7b2014-03-24 17:50:46 +000081 store i64 %result, i64 addrspace(1)* %out, align 8
82 ret void
83}
84
Tom Stellard79243d92014-10-01 17:15:17 +000085; SI-LABEL: {{^}}private_access_v2i64_alloca:
Matt Arsenault7d5e2cb2014-07-13 02:46:17 +000086
Matt Arsenaulte8ed8e52016-05-11 00:28:54 +000087; SI-ALLOCA16: buffer_store_dwordx4
88; SI-ALLOCA16: buffer_load_dwordx4
89
90; SI-ALLOCA4: buffer_store_dword v
91; SI-ALLOCA4: buffer_store_dword v
92; SI-ALLOCA4: buffer_store_dword v
93; SI-ALLOCA4: buffer_store_dword v
94
95; SI-ALLOCA4: buffer_load_dword v
96; SI-ALLOCA4: buffer_load_dword v
97; SI-ALLOCA4: buffer_load_dword v
98; SI-ALLOCA4: buffer_load_dword v
Matt Arsenault7d5e2cb2014-07-13 02:46:17 +000099
Matt Arsenaultff05da82015-11-24 12:18:54 +0000100; SI-PROMOTE: ds_write_b64
101; SI-PROMOTE: ds_write_b64
102; SI-PROMOTE: ds_read_b64
103; SI-PROMOTE: ds_read_b64
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000104define void @private_access_v2i64_alloca(<2 x i64> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %in, i32 %b) #1 {
David Blaikiea79ac142015-02-27 21:17:42 +0000105 %val = load <2 x i64>, <2 x i64> addrspace(1)* %in, align 16
Matt Arsenaultc5fce692016-04-28 18:38:48 +0000106 %array = alloca [8 x <2 x i64>], align 16
107 %ptr = getelementptr inbounds [8 x <2 x i64>], [8 x <2 x i64>]* %array, i32 0, i32 %b
Matt Arsenaultad41d7b2014-03-24 17:50:46 +0000108 store <2 x i64> %val, <2 x i64>* %ptr, align 16
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000109 call void @llvm.amdgcn.s.barrier()
David Blaikiea79ac142015-02-27 21:17:42 +0000110 %result = load <2 x i64>, <2 x i64>* %ptr, align 16
Matt Arsenaultad41d7b2014-03-24 17:50:46 +0000111 store <2 x i64> %result, <2 x i64> addrspace(1)* %out, align 16
112 ret void
113}
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000114
Matt Arsenault8a028bf2016-05-16 21:19:59 +0000115attributes #0 = { convergent nounwind }
116attributes #1 = { nounwind "amdgpu-max-waves-per-eu"="2" "amdgpu-max-work-group-size"="64" }