blob: e486b9c71a54d34c718e190e45f526089f6d7671 [file] [log] [blame]
Matt Arsenaultdb7c6a82017-06-12 16:53:51 +00001; RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=bonaire < %s | FileCheck -check-prefixes=CHECK,CIVI %s
2; RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefixes=CHECK,CIVI %s
Matt Arsenault7aad8fd2017-01-24 22:02:15 +00003; RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global < %s | FileCheck -check-prefixes=CHECK,HSA %s
Matt Arsenaultdb7c6a82017-06-12 16:53:51 +00004; RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=CHECK,HSA,GFX9 %s
Matt Arsenault3f981402014-09-15 15:41:53 +00005
6; Disable optimizations in case there are optimizations added that
7; specialize away generic pointer accesses.
8
9
Matt Arsenault3f981402014-09-15 15:41:53 +000010; These testcases might become useless when there are optimizations to
11; remove generic pointers.
12
Tom Stellard79243d92014-10-01 17:15:17 +000013; CHECK-LABEL: {{^}}store_flat_i32:
Nicolai Haehnledd587052015-12-19 01:16:06 +000014; CHECK-DAG: s_load_dwordx2 s{{\[}}[[LO_SREG:[0-9]+]]:[[HI_SREG:[0-9]+]]],
15; CHECK-DAG: s_load_dword s[[SDATA:[0-9]+]],
16; CHECK: s_waitcnt lgkmcnt(0)
17; CHECK-DAG: v_mov_b32_e32 v[[DATA:[0-9]+]], s[[SDATA]]
18; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG]]
19; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], s[[HI_SREG]]
Tom Stellard46937ca2016-02-12 17:57:54 +000020; CHECK: flat_store_dword v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}, v[[DATA]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000021define amdgpu_kernel void @store_flat_i32(i32 addrspace(1)* %gptr, i32 %x) #0 {
Matt Arsenault3f981402014-09-15 15:41:53 +000022 %fptr = addrspacecast i32 addrspace(1)* %gptr to i32 addrspace(4)*
Matt Arsenault417e0072017-02-08 06:16:04 +000023 store volatile i32 %x, i32 addrspace(4)* %fptr, align 4
Matt Arsenault3f981402014-09-15 15:41:53 +000024 ret void
25}
26
Tom Stellard79243d92014-10-01 17:15:17 +000027; CHECK-LABEL: {{^}}store_flat_i64:
Tom Stellard326d6ec2014-11-05 14:50:53 +000028; CHECK: flat_store_dwordx2
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000029define amdgpu_kernel void @store_flat_i64(i64 addrspace(1)* %gptr, i64 %x) #0 {
Matt Arsenault3f981402014-09-15 15:41:53 +000030 %fptr = addrspacecast i64 addrspace(1)* %gptr to i64 addrspace(4)*
Matt Arsenault417e0072017-02-08 06:16:04 +000031 store volatile i64 %x, i64 addrspace(4)* %fptr, align 8
Matt Arsenault3f981402014-09-15 15:41:53 +000032 ret void
33}
34
Tom Stellard79243d92014-10-01 17:15:17 +000035; CHECK-LABEL: {{^}}store_flat_v4i32:
Tom Stellard326d6ec2014-11-05 14:50:53 +000036; CHECK: flat_store_dwordx4
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000037define amdgpu_kernel void @store_flat_v4i32(<4 x i32> addrspace(1)* %gptr, <4 x i32> %x) #0 {
Matt Arsenault3f981402014-09-15 15:41:53 +000038 %fptr = addrspacecast <4 x i32> addrspace(1)* %gptr to <4 x i32> addrspace(4)*
Matt Arsenault417e0072017-02-08 06:16:04 +000039 store volatile <4 x i32> %x, <4 x i32> addrspace(4)* %fptr, align 16
Matt Arsenault3f981402014-09-15 15:41:53 +000040 ret void
41}
42
Tom Stellard79243d92014-10-01 17:15:17 +000043; CHECK-LABEL: {{^}}store_flat_trunc_i16:
Tom Stellard326d6ec2014-11-05 14:50:53 +000044; CHECK: flat_store_short
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000045define amdgpu_kernel void @store_flat_trunc_i16(i16 addrspace(1)* %gptr, i32 %x) #0 {
Matt Arsenault3f981402014-09-15 15:41:53 +000046 %fptr = addrspacecast i16 addrspace(1)* %gptr to i16 addrspace(4)*
47 %y = trunc i32 %x to i16
Matt Arsenault417e0072017-02-08 06:16:04 +000048 store volatile i16 %y, i16 addrspace(4)* %fptr, align 2
Matt Arsenault3f981402014-09-15 15:41:53 +000049 ret void
50}
51
Tom Stellard79243d92014-10-01 17:15:17 +000052; CHECK-LABEL: {{^}}store_flat_trunc_i8:
Tom Stellard326d6ec2014-11-05 14:50:53 +000053; CHECK: flat_store_byte
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000054define amdgpu_kernel void @store_flat_trunc_i8(i8 addrspace(1)* %gptr, i32 %x) #0 {
Matt Arsenault3f981402014-09-15 15:41:53 +000055 %fptr = addrspacecast i8 addrspace(1)* %gptr to i8 addrspace(4)*
56 %y = trunc i32 %x to i8
Matt Arsenault417e0072017-02-08 06:16:04 +000057 store volatile i8 %y, i8 addrspace(4)* %fptr, align 2
Matt Arsenault3f981402014-09-15 15:41:53 +000058 ret void
59}
60
61
62
Hans Wennborg4a613702015-08-31 21:10:35 +000063; CHECK-LABEL: load_flat_i32:
Tom Stellard326d6ec2014-11-05 14:50:53 +000064; CHECK: flat_load_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000065define amdgpu_kernel void @load_flat_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %gptr) #0 {
Matt Arsenault3f981402014-09-15 15:41:53 +000066 %fptr = addrspacecast i32 addrspace(1)* %gptr to i32 addrspace(4)*
Matt Arsenault417e0072017-02-08 06:16:04 +000067 %fload = load volatile i32, i32 addrspace(4)* %fptr, align 4
Matt Arsenault3f981402014-09-15 15:41:53 +000068 store i32 %fload, i32 addrspace(1)* %out, align 4
69 ret void
70}
71
Hans Wennborg4a613702015-08-31 21:10:35 +000072; CHECK-LABEL: load_flat_i64:
Tom Stellard326d6ec2014-11-05 14:50:53 +000073; CHECK: flat_load_dwordx2
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000074define amdgpu_kernel void @load_flat_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %gptr) #0 {
Matt Arsenault3f981402014-09-15 15:41:53 +000075 %fptr = addrspacecast i64 addrspace(1)* %gptr to i64 addrspace(4)*
Matt Arsenault417e0072017-02-08 06:16:04 +000076 %fload = load volatile i64, i64 addrspace(4)* %fptr, align 8
Matt Arsenault3f981402014-09-15 15:41:53 +000077 store i64 %fload, i64 addrspace(1)* %out, align 8
78 ret void
79}
80
Hans Wennborg4a613702015-08-31 21:10:35 +000081; CHECK-LABEL: load_flat_v4i32:
Tom Stellard326d6ec2014-11-05 14:50:53 +000082; CHECK: flat_load_dwordx4
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000083define amdgpu_kernel void @load_flat_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %gptr) #0 {
Matt Arsenault3f981402014-09-15 15:41:53 +000084 %fptr = addrspacecast <4 x i32> addrspace(1)* %gptr to <4 x i32> addrspace(4)*
Matt Arsenault417e0072017-02-08 06:16:04 +000085 %fload = load volatile <4 x i32>, <4 x i32> addrspace(4)* %fptr, align 32
Matt Arsenault3f981402014-09-15 15:41:53 +000086 store <4 x i32> %fload, <4 x i32> addrspace(1)* %out, align 8
87 ret void
88}
89
Hans Wennborg4a613702015-08-31 21:10:35 +000090; CHECK-LABEL: sextload_flat_i8:
Tom Stellard326d6ec2014-11-05 14:50:53 +000091; CHECK: flat_load_sbyte
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000092define amdgpu_kernel void @sextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %gptr) #0 {
Matt Arsenault3f981402014-09-15 15:41:53 +000093 %fptr = addrspacecast i8 addrspace(1)* %gptr to i8 addrspace(4)*
Matt Arsenault417e0072017-02-08 06:16:04 +000094 %fload = load volatile i8, i8 addrspace(4)* %fptr, align 4
Matt Arsenault3f981402014-09-15 15:41:53 +000095 %ext = sext i8 %fload to i32
96 store i32 %ext, i32 addrspace(1)* %out, align 4
97 ret void
98}
99
Hans Wennborg4a613702015-08-31 21:10:35 +0000100; CHECK-LABEL: zextload_flat_i8:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000101; CHECK: flat_load_ubyte
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000102define amdgpu_kernel void @zextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %gptr) #0 {
Matt Arsenault3f981402014-09-15 15:41:53 +0000103 %fptr = addrspacecast i8 addrspace(1)* %gptr to i8 addrspace(4)*
Matt Arsenault417e0072017-02-08 06:16:04 +0000104 %fload = load volatile i8, i8 addrspace(4)* %fptr, align 4
Matt Arsenault3f981402014-09-15 15:41:53 +0000105 %ext = zext i8 %fload to i32
106 store i32 %ext, i32 addrspace(1)* %out, align 4
107 ret void
108}
109
Hans Wennborg4a613702015-08-31 21:10:35 +0000110; CHECK-LABEL: sextload_flat_i16:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000111; CHECK: flat_load_sshort
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000112define amdgpu_kernel void @sextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %gptr) #0 {
Matt Arsenault3f981402014-09-15 15:41:53 +0000113 %fptr = addrspacecast i16 addrspace(1)* %gptr to i16 addrspace(4)*
Matt Arsenault417e0072017-02-08 06:16:04 +0000114 %fload = load volatile i16, i16 addrspace(4)* %fptr, align 4
Matt Arsenault3f981402014-09-15 15:41:53 +0000115 %ext = sext i16 %fload to i32
116 store i32 %ext, i32 addrspace(1)* %out, align 4
117 ret void
118}
119
Hans Wennborg4a613702015-08-31 21:10:35 +0000120; CHECK-LABEL: zextload_flat_i16:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000121; CHECK: flat_load_ushort
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000122define amdgpu_kernel void @zextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %gptr) #0 {
Matt Arsenault3f981402014-09-15 15:41:53 +0000123 %fptr = addrspacecast i16 addrspace(1)* %gptr to i16 addrspace(4)*
Matt Arsenault417e0072017-02-08 06:16:04 +0000124 %fload = load volatile i16, i16 addrspace(4)* %fptr, align 4
Matt Arsenault3f981402014-09-15 15:41:53 +0000125 %ext = zext i16 %fload to i32
126 store i32 %ext, i32 addrspace(1)* %out, align 4
127 ret void
128}
129
Tom Stellard64a9d082016-10-14 18:10:39 +0000130; CHECK-LABEL: flat_scratch_unaligned_load:
131; CHECK: flat_load_ubyte
132; CHECK: flat_load_ubyte
133; CHECK: flat_load_ubyte
134; CHECK: flat_load_ubyte
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000135define amdgpu_kernel void @flat_scratch_unaligned_load() {
Tom Stellard64a9d082016-10-14 18:10:39 +0000136 %scratch = alloca i32
137 %fptr = addrspacecast i32* %scratch to i32 addrspace(4)*
138 %ld = load volatile i32, i32 addrspace(4)* %fptr, align 1
139 ret void
140}
141
142; CHECK-LABEL: flat_scratch_unaligned_store:
143; CHECK: flat_store_byte
144; CHECK: flat_store_byte
145; CHECK: flat_store_byte
146; CHECK: flat_store_byte
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000147define amdgpu_kernel void @flat_scratch_unaligned_store() {
Tom Stellard64a9d082016-10-14 18:10:39 +0000148 %scratch = alloca i32
149 %fptr = addrspacecast i32* %scratch to i32 addrspace(4)*
150 store volatile i32 0, i32 addrspace(4)* %fptr, align 1
151 ret void
152}
153
Tom Stellardf8e6eaf2016-10-26 14:38:47 +0000154; CHECK-LABEL: flat_scratch_multidword_load:
155; HSA: flat_load_dword
156; HSA: flat_load_dword
157; FIXME: These tests are broken for os = mesa3d, becasue it doesn't initialize flat_scr
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000158define amdgpu_kernel void @flat_scratch_multidword_load() {
Tom Stellardf8e6eaf2016-10-26 14:38:47 +0000159 %scratch = alloca <2 x i32>
160 %fptr = addrspacecast <2 x i32>* %scratch to <2 x i32> addrspace(4)*
161 %ld = load volatile <2 x i32>, <2 x i32> addrspace(4)* %fptr
162 ret void
163}
164
165; CHECK-LABEL: flat_scratch_multidword_store:
166; HSA: flat_store_dword
167; HSA: flat_store_dword
168; FIXME: These tests are broken for os = mesa3d, becasue it doesn't initialize flat_scr
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000169define amdgpu_kernel void @flat_scratch_multidword_store() {
Tom Stellardf8e6eaf2016-10-26 14:38:47 +0000170 %scratch = alloca <2 x i32>
171 %fptr = addrspacecast <2 x i32>* %scratch to <2 x i32> addrspace(4)*
172 store volatile <2 x i32> zeroinitializer, <2 x i32> addrspace(4)* %fptr
173 ret void
174}
175
Matt Arsenaultdb7c6a82017-06-12 16:53:51 +0000176; CHECK-LABEL: {{^}}store_flat_i8_max_offset:
177; CIVI: flat_store_byte v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}{{$}}
178; GFX9: flat_store_byte v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset:4095{{$}}
179define amdgpu_kernel void @store_flat_i8_max_offset(i8 addrspace(4)* %fptr, i8 %x) #0 {
180 %fptr.offset = getelementptr inbounds i8, i8 addrspace(4)* %fptr, i64 4095
181 store volatile i8 %x, i8 addrspace(4)* %fptr.offset
182 ret void
183}
184
185; CHECK-LABEL: {{^}}store_flat_i8_max_offset_p1:
186; CHECK: flat_store_byte v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}{{$}}
187define amdgpu_kernel void @store_flat_i8_max_offset_p1(i8 addrspace(4)* %fptr, i8 %x) #0 {
188 %fptr.offset = getelementptr inbounds i8, i8 addrspace(4)* %fptr, i64 4096
189 store volatile i8 %x, i8 addrspace(4)* %fptr.offset
190 ret void
191}
192
193; CHECK-LABEL: {{^}}store_flat_i8_neg_offset:
194; CHECK: flat_store_byte v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}{{$}}
195define amdgpu_kernel void @store_flat_i8_neg_offset(i8 addrspace(4)* %fptr, i8 %x) #0 {
196 %fptr.offset = getelementptr inbounds i8, i8 addrspace(4)* %fptr, i64 -2
197 store volatile i8 %x, i8 addrspace(4)* %fptr.offset
198 ret void
199}
200
201; CHECK-LABEL: {{^}}load_flat_i8_max_offset:
202; CIVI: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}{{$}}
203; GFX9: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} offset:4095{{$}}
204define amdgpu_kernel void @load_flat_i8_max_offset(i8 addrspace(4)* %fptr) #0 {
205 %fptr.offset = getelementptr inbounds i8, i8 addrspace(4)* %fptr, i64 4095
206 %val = load volatile i8, i8 addrspace(4)* %fptr.offset
207 ret void
208}
209
210; CHECK-LABEL: {{^}}load_flat_i8_max_offset_p1:
211; CHECK: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}{{$}}
212define amdgpu_kernel void @load_flat_i8_max_offset_p1(i8 addrspace(4)* %fptr) #0 {
213 %fptr.offset = getelementptr inbounds i8, i8 addrspace(4)* %fptr, i64 4096
214 %val = load volatile i8, i8 addrspace(4)* %fptr.offset
215 ret void
216}
217
218; CHECK-LABEL: {{^}}load_flat_i8_neg_offset:
219; CHECK: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}{{$}}
220define amdgpu_kernel void @load_flat_i8_neg_offset(i8 addrspace(4)* %fptr) #0 {
221 %fptr.offset = getelementptr inbounds i8, i8 addrspace(4)* %fptr, i64 -2
222 %val = load volatile i8, i8 addrspace(4)* %fptr.offset
223 ret void
224}
225
Matt Arsenault3f981402014-09-15 15:41:53 +0000226attributes #0 = { nounwind }
Matt Arsenault2aed6ca2015-12-19 01:46:41 +0000227attributes #1 = { nounwind convergent }