blob: f805430797016944a4cfa3d20c53468fb4cee7cb [file] [log] [blame]
Matt Arsenault14a4d312016-05-16 22:17:27 +00001; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mattr=-promote-alloca,+max-private-element-size-16 -verify-machineinstrs < %s | FileCheck -check-prefix=ELT16 -check-prefix=HSA -check-prefix=HSA-ELT16 -check-prefix=ALL -check-prefix=HSA_ELTGE8 %s
2; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mattr=-promote-alloca,+max-private-element-size-8 -verify-machineinstrs < %s | FileCheck -check-prefix=ELT8 -check-prefix=HSA -check-prefix=HSA-ELT8 -check-prefix=ALL -check-prefix=HSA-ELTGE8 %s
Matt Arsenaultf2ddbf02016-02-13 04:18:53 +00003; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mattr=-promote-alloca,+max-private-element-size-4 -verify-machineinstrs < %s | FileCheck -check-prefix=ELT4 -check-prefix=HSA -check-prefix=HSA-ELT4 -check-prefix=ALL %s
4
5
6; ALL-LABEL: {{^}}private_elt_size_v4i32:
7
8; HSA-ELT16: private_element_size = 3
9; HSA-ELT8: private_element_size = 2
10; HSA-ELT4: private_element_size = 1
11
12
Nicolai Haehnle2857dc32016-12-08 14:08:02 +000013; HSA-ELT16-DAG: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:16
Matt Arsenault707780b2017-02-22 21:05:25 +000014; HSA-ELT16-DAG: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:32
Matt Arsenaultf2ddbf02016-02-13 04:18:53 +000015; HSA-ELT16-DAG: buffer_load_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen{{$}}
16
Matt Arsenault707780b2017-02-22 21:05:25 +000017; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:24{{$}}
Nicolai Haehnle2857dc32016-12-08 14:08:02 +000018; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:16
Matt Arsenault707780b2017-02-22 21:05:25 +000019; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:32
20; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:40
Matt Arsenaultf2ddbf02016-02-13 04:18:53 +000021
22; HSA-ELT8: buffer_load_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen
23; HSA-ELT8: buffer_load_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen
24
25
Nicolai Haehnle2857dc32016-12-08 14:08:02 +000026; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:16{{$}}
27; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:20{{$}}
28; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:24{{$}}
29; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:28{{$}}
Matt Arsenault707780b2017-02-22 21:05:25 +000030; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:32{{$}}
31; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:36{{$}}
32; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:40{{$}}
33; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:44{{$}}
Matt Arsenaultf2ddbf02016-02-13 04:18:53 +000034
Nirav Dave54e22f32017-03-14 00:34:14 +000035; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen{{$}}
36; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:4{{$}}
37; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:8{{$}}
38; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:12{{$}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000039define amdgpu_kernel void @private_elt_size_v4i32(<4 x i32> addrspace(1)* %out, i32 addrspace(1)* %index.array) #0 {
Matt Arsenaultf2ddbf02016-02-13 04:18:53 +000040entry:
41 %tid = call i32 @llvm.amdgcn.workitem.id.x()
42 %idxprom = sext i32 %tid to i64
43 %gep.index = getelementptr inbounds i32, i32 addrspace(1)* %index.array, i64 %idxprom
44 %index.load = load i32, i32 addrspace(1)* %gep.index
45 %index = and i32 %index.load, 2
46 %alloca = alloca [2 x <4 x i32>], align 16
47 %gep0 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %alloca, i32 0, i32 0
48 %gep1 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %alloca, i32 0, i32 1
49 store <4 x i32> zeroinitializer, <4 x i32>* %gep0
50 store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32>* %gep1
Matt Arsenault14a4d312016-05-16 22:17:27 +000051 %gep2 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %alloca, i32 0, i32 %index
Matt Arsenaultf2ddbf02016-02-13 04:18:53 +000052 %load = load <4 x i32>, <4 x i32>* %gep2
53 store <4 x i32> %load, <4 x i32> addrspace(1)* %out
54 ret void
55}
56
57; ALL-LABEL: {{^}}private_elt_size_v8i32:
58; HSA-ELT16: private_element_size = 3
59; HSA-ELT8: private_element_size = 2
60; HSA-ELT4: private_element_size = 1
61
Nicolai Haehnle2857dc32016-12-08 14:08:02 +000062; HSA-ELT16-DAG: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:32
63; HSA-ELT16-DAG: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:48
Matt Arsenault707780b2017-02-22 21:05:25 +000064; HSA-ELT16-DAG: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:64
65; HSA-ELT16-DAG: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:80
Matt Arsenaultf2ddbf02016-02-13 04:18:53 +000066
67; HSA-ELT16-DAG: buffer_load_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen{{$}}
68; HSA-ELT16-DAG: buffer_load_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen{{$}}
69
70
Nicolai Haehnle2857dc32016-12-08 14:08:02 +000071; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:32
72; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:40
73; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:48
74; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:56
Matt Arsenault707780b2017-02-22 21:05:25 +000075; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:88
76; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:80
77; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:72
78; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:64
Matt Arsenaultf2ddbf02016-02-13 04:18:53 +000079
80; HSA-ELT8: buffer_load_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen
81; HSA-ELT8: buffer_load_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen
82
83
Nicolai Haehnle2857dc32016-12-08 14:08:02 +000084; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:32{{$}}
85; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:36{{$}}
86; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:40{{$}}
87; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:44{{$}}
88; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:48{{$}}
89; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:52{{$}}
90; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:56{{$}}
91; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:60{{$}}
Matt Arsenault707780b2017-02-22 21:05:25 +000092; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:64{{$}}
93; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:68{{$}}
94; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:72{{$}}
95; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:76{{$}}
96; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:80{{$}}
97; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:84{{$}}
98; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:88{{$}}
99; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:92{{$}}
Matt Arsenaultf2ddbf02016-02-13 04:18:53 +0000100
Matt Arsenault3a619852016-02-27 20:26:57 +0000101; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen{{$}}
102; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:4{{$}}
103; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:8{{$}}
104; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:12{{$}}
105; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:16{{$}}
106; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:20{{$}}
107; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:24{{$}}
108; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:28{{$}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000109define amdgpu_kernel void @private_elt_size_v8i32(<8 x i32> addrspace(1)* %out, i32 addrspace(1)* %index.array) #0 {
Matt Arsenaultf2ddbf02016-02-13 04:18:53 +0000110entry:
111 %tid = call i32 @llvm.amdgcn.workitem.id.x()
112 %idxprom = sext i32 %tid to i64
113 %gep.index = getelementptr inbounds i32, i32 addrspace(1)* %index.array, i64 %idxprom
114 %index.load = load i32, i32 addrspace(1)* %gep.index
115 %index = and i32 %index.load, 2
116 %alloca = alloca [2 x <8 x i32>], align 16
117 %gep0 = getelementptr inbounds [2 x <8 x i32>], [2 x <8 x i32>]* %alloca, i32 0, i32 0
118 %gep1 = getelementptr inbounds [2 x <8 x i32>], [2 x <8 x i32>]* %alloca, i32 0, i32 1
119 store <8 x i32> zeroinitializer, <8 x i32>* %gep0
120 store <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>, <8 x i32>* %gep1
Matt Arsenault14a4d312016-05-16 22:17:27 +0000121 %gep2 = getelementptr inbounds [2 x <8 x i32>], [2 x <8 x i32>]* %alloca, i32 0, i32 %index
Matt Arsenaultf2ddbf02016-02-13 04:18:53 +0000122 %load = load <8 x i32>, <8 x i32>* %gep2
123 store <8 x i32> %load, <8 x i32> addrspace(1)* %out
124 ret void
125}
126
Matt Arsenault14a4d312016-05-16 22:17:27 +0000127
128; ALL-LABEL: {{^}}private_elt_size_i64:
129; HSA-ELT16: private_element_size = 3
130; HSA-ELT8: private_element_size = 2
131; HSA-ELT4: private_element_size = 1
132
Nirav Dave54e22f32017-03-14 00:34:14 +0000133; HSA-ELTGE8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, {{off|v[0-9]}}, s[0:3], s9 offset:1
134; HSA-ELTGE8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, {{off|v[0-9]}}, s[0:3], s9 offset:2
Matt Arsenault14a4d312016-05-16 22:17:27 +0000135
136; HSA-ELTGE8: buffer_load_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen
137
138
Matt Arsenault707780b2017-02-22 21:05:25 +0000139; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:16{{$}}
140; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:20{{$}}
141; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:24{{$}}
142; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:28{{$}}
Matt Arsenault14a4d312016-05-16 22:17:27 +0000143
144; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen{{$}}
145; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:4{{$}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000146define amdgpu_kernel void @private_elt_size_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %index.array) #0 {
Matt Arsenault14a4d312016-05-16 22:17:27 +0000147entry:
148 %tid = call i32 @llvm.amdgcn.workitem.id.x()
149 %idxprom = sext i32 %tid to i64
150 %gep.index = getelementptr inbounds i32, i32 addrspace(1)* %index.array, i64 %idxprom
151 %index.load = load i32, i32 addrspace(1)* %gep.index
152 %index = and i32 %index.load, 2
153 %alloca = alloca [2 x i64], align 16
154 %gep0 = getelementptr inbounds [2 x i64], [2 x i64]* %alloca, i32 0, i32 0
155 %gep1 = getelementptr inbounds [2 x i64], [2 x i64]* %alloca, i32 0, i32 1
156 store i64 0, i64* %gep0
157 store i64 34359738602, i64* %gep1
158 %gep2 = getelementptr inbounds [2 x i64], [2 x i64]* %alloca, i32 0, i32 %index
159 %load = load i64, i64* %gep2
160 store i64 %load, i64 addrspace(1)* %out
161 ret void
162}
163
164; ALL-LABEL: {{^}}private_elt_size_f64:
165; HSA-ELT16: private_element_size = 3
166; HSA-ELT8: private_element_size = 2
167; HSA-ELT4: private_element_size = 1
168
Matt Arsenault707780b2017-02-22 21:05:25 +0000169; HSA-ELTGE8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:16
170; HSA-ELTGE8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:24
Matt Arsenault14a4d312016-05-16 22:17:27 +0000171
172; HSA-ELTGE8: buffer_load_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen
173
174
Matt Arsenault707780b2017-02-22 21:05:25 +0000175; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:16{{$}}
176; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:20{{$}}
177; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:24{{$}}
178; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:28{{$}}
Matt Arsenault14a4d312016-05-16 22:17:27 +0000179
180; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen{{$}}
181; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:4{{$}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000182define amdgpu_kernel void @private_elt_size_f64(double addrspace(1)* %out, i32 addrspace(1)* %index.array) #0 {
Matt Arsenault14a4d312016-05-16 22:17:27 +0000183entry:
184 %tid = call i32 @llvm.amdgcn.workitem.id.x()
185 %idxprom = sext i32 %tid to i64
186 %gep.index = getelementptr inbounds i32, i32 addrspace(1)* %index.array, i64 %idxprom
187 %index.load = load i32, i32 addrspace(1)* %gep.index
188 %index = and i32 %index.load, 2
189 %alloca = alloca [2 x double], align 16
190 %gep0 = getelementptr inbounds [2 x double], [2 x double]* %alloca, i32 0, i32 0
191 %gep1 = getelementptr inbounds [2 x double], [2 x double]* %alloca, i32 0, i32 1
192 store double 0.0, double* %gep0
193 store double 4.0, double* %gep1
194 %gep2 = getelementptr inbounds [2 x double], [2 x double]* %alloca, i32 0, i32 %index
195 %load = load double, double* %gep2
196 store double %load, double addrspace(1)* %out
197 ret void
198}
199
200; ALL-LABEL: {{^}}private_elt_size_v2i64:
201; HSA-ELT16: private_element_size = 3
202; HSA-ELT8: private_element_size = 2
203; HSA-ELT4: private_element_size = 1
204
Nicolai Haehnle2857dc32016-12-08 14:08:02 +0000205; HSA-ELT16-DAG: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:16
Matt Arsenault707780b2017-02-22 21:05:25 +0000206; HSA-ELT16-DAG: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:32
Matt Arsenault14a4d312016-05-16 22:17:27 +0000207; HSA-ELT16-DAG: buffer_load_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen{{$}}
208
Matt Arsenault707780b2017-02-22 21:05:25 +0000209; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:16{{$}}
Nicolai Haehnle2857dc32016-12-08 14:08:02 +0000210; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:24
Matt Arsenault707780b2017-02-22 21:05:25 +0000211; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:40
212; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:32
Matt Arsenault14a4d312016-05-16 22:17:27 +0000213
214; HSA-ELT8: buffer_load_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen
215; HSA-ELT8: buffer_load_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen
216
217
Nicolai Haehnle2857dc32016-12-08 14:08:02 +0000218; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:16{{$}}
219; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:20{{$}}
220; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:24{{$}}
221; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:28{{$}}
Matt Arsenault707780b2017-02-22 21:05:25 +0000222; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:32{{$}}
223; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:36{{$}}
224; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:40{{$}}
225; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:44{{$}}
Matt Arsenault14a4d312016-05-16 22:17:27 +0000226
227; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen{{$}}
228; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:4{{$}}
229; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:8{{$}}
230; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:12{{$}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000231define amdgpu_kernel void @private_elt_size_v2i64(<2 x i64> addrspace(1)* %out, i32 addrspace(1)* %index.array) #0 {
Matt Arsenault14a4d312016-05-16 22:17:27 +0000232entry:
233 %tid = call i32 @llvm.amdgcn.workitem.id.x()
234 %idxprom = sext i32 %tid to i64
235 %gep.index = getelementptr inbounds i32, i32 addrspace(1)* %index.array, i64 %idxprom
236 %index.load = load i32, i32 addrspace(1)* %gep.index
237 %index = and i32 %index.load, 2
238 %alloca = alloca [2 x <2 x i64>], align 16
239 %gep0 = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* %alloca, i32 0, i32 0
240 %gep1 = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* %alloca, i32 0, i32 1
241 store <2 x i64> zeroinitializer, <2 x i64>* %gep0
242 store <2 x i64> <i64 1, i64 2>, <2 x i64>* %gep1
243 %gep2 = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* %alloca, i32 0, i32 %index
244 %load = load <2 x i64>, <2 x i64>* %gep2
245 store <2 x i64> %load, <2 x i64> addrspace(1)* %out
246 ret void
247}
248
Matt Arsenaultf2ddbf02016-02-13 04:18:53 +0000249declare i32 @llvm.amdgcn.workitem.id.x() #1
250
251attributes #0 = { nounwind }
252attributes #1 = { nounwind readnone }