blob: 5c45528f9df6d161ca5c6d0d9096e691f9c6c9a7 [file] [log] [blame]
Matt Arsenaultdb7c6a82017-06-12 16:53:51 +00001; RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=bonaire < %s | FileCheck -check-prefixes=CHECK,CIVI %s
2; RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefixes=CHECK,CIVI %s
Matt Arsenault7aad8fd2017-01-24 22:02:15 +00003; RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global < %s | FileCheck -check-prefixes=CHECK,HSA %s
Matt Arsenaultdb7c6a82017-06-12 16:53:51 +00004; RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=CHECK,HSA,GFX9 %s
Matt Arsenault3f981402014-09-15 15:41:53 +00005
6; Disable optimizations in case there are optimizations added that
7; specialize away generic pointer accesses.
8
9
Matt Arsenault3f981402014-09-15 15:41:53 +000010; These testcases might become useless when there are optimizations to
11; remove generic pointers.
12
Tom Stellard79243d92014-10-01 17:15:17 +000013; CHECK-LABEL: {{^}}store_flat_i32:
Nicolai Haehnledd587052015-12-19 01:16:06 +000014; CHECK-DAG: s_load_dwordx2 s{{\[}}[[LO_SREG:[0-9]+]]:[[HI_SREG:[0-9]+]]],
15; CHECK-DAG: s_load_dword s[[SDATA:[0-9]+]],
16; CHECK: s_waitcnt lgkmcnt(0)
17; CHECK-DAG: v_mov_b32_e32 v[[DATA:[0-9]+]], s[[SDATA]]
18; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG]]
19; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], s[[HI_SREG]]
Tom Stellard46937ca2016-02-12 17:57:54 +000020; CHECK: flat_store_dword v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}, v[[DATA]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000021define amdgpu_kernel void @store_flat_i32(i32 addrspace(1)* %gptr, i32 %x) #0 {
Yaxun Liu2a22c5d2018-02-02 16:07:16 +000022 %fptr = addrspacecast i32 addrspace(1)* %gptr to i32*
23 store volatile i32 %x, i32* %fptr, align 4
Matt Arsenault3f981402014-09-15 15:41:53 +000024 ret void
25}
26
Tom Stellard79243d92014-10-01 17:15:17 +000027; CHECK-LABEL: {{^}}store_flat_i64:
Tom Stellard326d6ec2014-11-05 14:50:53 +000028; CHECK: flat_store_dwordx2
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000029define amdgpu_kernel void @store_flat_i64(i64 addrspace(1)* %gptr, i64 %x) #0 {
Yaxun Liu2a22c5d2018-02-02 16:07:16 +000030 %fptr = addrspacecast i64 addrspace(1)* %gptr to i64*
31 store volatile i64 %x, i64* %fptr, align 8
Matt Arsenault3f981402014-09-15 15:41:53 +000032 ret void
33}
34
Tom Stellard79243d92014-10-01 17:15:17 +000035; CHECK-LABEL: {{^}}store_flat_v4i32:
Tom Stellard326d6ec2014-11-05 14:50:53 +000036; CHECK: flat_store_dwordx4
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000037define amdgpu_kernel void @store_flat_v4i32(<4 x i32> addrspace(1)* %gptr, <4 x i32> %x) #0 {
Yaxun Liu2a22c5d2018-02-02 16:07:16 +000038 %fptr = addrspacecast <4 x i32> addrspace(1)* %gptr to <4 x i32>*
39 store volatile <4 x i32> %x, <4 x i32>* %fptr, align 16
Matt Arsenault3f981402014-09-15 15:41:53 +000040 ret void
41}
42
Tom Stellard79243d92014-10-01 17:15:17 +000043; CHECK-LABEL: {{^}}store_flat_trunc_i16:
Tom Stellard326d6ec2014-11-05 14:50:53 +000044; CHECK: flat_store_short
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000045define amdgpu_kernel void @store_flat_trunc_i16(i16 addrspace(1)* %gptr, i32 %x) #0 {
Yaxun Liu2a22c5d2018-02-02 16:07:16 +000046 %fptr = addrspacecast i16 addrspace(1)* %gptr to i16*
Matt Arsenault3f981402014-09-15 15:41:53 +000047 %y = trunc i32 %x to i16
Yaxun Liu2a22c5d2018-02-02 16:07:16 +000048 store volatile i16 %y, i16* %fptr, align 2
Matt Arsenault3f981402014-09-15 15:41:53 +000049 ret void
50}
51
Tom Stellard79243d92014-10-01 17:15:17 +000052; CHECK-LABEL: {{^}}store_flat_trunc_i8:
Tom Stellard326d6ec2014-11-05 14:50:53 +000053; CHECK: flat_store_byte
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000054define amdgpu_kernel void @store_flat_trunc_i8(i8 addrspace(1)* %gptr, i32 %x) #0 {
Yaxun Liu2a22c5d2018-02-02 16:07:16 +000055 %fptr = addrspacecast i8 addrspace(1)* %gptr to i8*
Matt Arsenault3f981402014-09-15 15:41:53 +000056 %y = trunc i32 %x to i8
Yaxun Liu2a22c5d2018-02-02 16:07:16 +000057 store volatile i8 %y, i8* %fptr, align 2
Matt Arsenault3f981402014-09-15 15:41:53 +000058 ret void
59}
60
61
62
Hans Wennborg4a613702015-08-31 21:10:35 +000063; CHECK-LABEL: load_flat_i32:
Tom Stellard326d6ec2014-11-05 14:50:53 +000064; CHECK: flat_load_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000065define amdgpu_kernel void @load_flat_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %gptr) #0 {
Yaxun Liu2a22c5d2018-02-02 16:07:16 +000066 %fptr = addrspacecast i32 addrspace(1)* %gptr to i32*
67 %fload = load volatile i32, i32* %fptr, align 4
Matt Arsenault3f981402014-09-15 15:41:53 +000068 store i32 %fload, i32 addrspace(1)* %out, align 4
69 ret void
70}
71
Hans Wennborg4a613702015-08-31 21:10:35 +000072; CHECK-LABEL: load_flat_i64:
Tom Stellard326d6ec2014-11-05 14:50:53 +000073; CHECK: flat_load_dwordx2
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000074define amdgpu_kernel void @load_flat_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %gptr) #0 {
Yaxun Liu2a22c5d2018-02-02 16:07:16 +000075 %fptr = addrspacecast i64 addrspace(1)* %gptr to i64*
76 %fload = load volatile i64, i64* %fptr, align 8
Matt Arsenault3f981402014-09-15 15:41:53 +000077 store i64 %fload, i64 addrspace(1)* %out, align 8
78 ret void
79}
80
Hans Wennborg4a613702015-08-31 21:10:35 +000081; CHECK-LABEL: load_flat_v4i32:
Tom Stellard326d6ec2014-11-05 14:50:53 +000082; CHECK: flat_load_dwordx4
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000083define amdgpu_kernel void @load_flat_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %gptr) #0 {
Yaxun Liu2a22c5d2018-02-02 16:07:16 +000084 %fptr = addrspacecast <4 x i32> addrspace(1)* %gptr to <4 x i32>*
85 %fload = load volatile <4 x i32>, <4 x i32>* %fptr, align 32
Matt Arsenault3f981402014-09-15 15:41:53 +000086 store <4 x i32> %fload, <4 x i32> addrspace(1)* %out, align 8
87 ret void
88}
89
Hans Wennborg4a613702015-08-31 21:10:35 +000090; CHECK-LABEL: sextload_flat_i8:
Tom Stellard326d6ec2014-11-05 14:50:53 +000091; CHECK: flat_load_sbyte
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000092define amdgpu_kernel void @sextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %gptr) #0 {
Yaxun Liu2a22c5d2018-02-02 16:07:16 +000093 %fptr = addrspacecast i8 addrspace(1)* %gptr to i8*
94 %fload = load volatile i8, i8* %fptr, align 4
Matt Arsenault3f981402014-09-15 15:41:53 +000095 %ext = sext i8 %fload to i32
96 store i32 %ext, i32 addrspace(1)* %out, align 4
97 ret void
98}
99
Hans Wennborg4a613702015-08-31 21:10:35 +0000100; CHECK-LABEL: zextload_flat_i8:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000101; CHECK: flat_load_ubyte
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000102define amdgpu_kernel void @zextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %gptr) #0 {
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000103 %fptr = addrspacecast i8 addrspace(1)* %gptr to i8*
104 %fload = load volatile i8, i8* %fptr, align 4
Matt Arsenault3f981402014-09-15 15:41:53 +0000105 %ext = zext i8 %fload to i32
106 store i32 %ext, i32 addrspace(1)* %out, align 4
107 ret void
108}
109
Hans Wennborg4a613702015-08-31 21:10:35 +0000110; CHECK-LABEL: sextload_flat_i16:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000111; CHECK: flat_load_sshort
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000112define amdgpu_kernel void @sextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %gptr) #0 {
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000113 %fptr = addrspacecast i16 addrspace(1)* %gptr to i16*
114 %fload = load volatile i16, i16* %fptr, align 4
Matt Arsenault3f981402014-09-15 15:41:53 +0000115 %ext = sext i16 %fload to i32
116 store i32 %ext, i32 addrspace(1)* %out, align 4
117 ret void
118}
119
Hans Wennborg4a613702015-08-31 21:10:35 +0000120; CHECK-LABEL: zextload_flat_i16:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000121; CHECK: flat_load_ushort
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000122define amdgpu_kernel void @zextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %gptr) #0 {
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000123 %fptr = addrspacecast i16 addrspace(1)* %gptr to i16*
124 %fload = load volatile i16, i16* %fptr, align 4
Matt Arsenault3f981402014-09-15 15:41:53 +0000125 %ext = zext i16 %fload to i32
126 store i32 %ext, i32 addrspace(1)* %out, align 4
127 ret void
128}
129
Tom Stellard64a9d082016-10-14 18:10:39 +0000130; CHECK-LABEL: flat_scratch_unaligned_load:
131; CHECK: flat_load_ubyte
132; CHECK: flat_load_ubyte
133; CHECK: flat_load_ubyte
134; CHECK: flat_load_ubyte
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000135define amdgpu_kernel void @flat_scratch_unaligned_load() {
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000136 %scratch = alloca i32, addrspace(5)
137 %fptr = addrspacecast i32 addrspace(5)* %scratch to i32*
138 %ld = load volatile i32, i32* %fptr, align 1
Tom Stellard64a9d082016-10-14 18:10:39 +0000139 ret void
140}
141
142; CHECK-LABEL: flat_scratch_unaligned_store:
143; CHECK: flat_store_byte
144; CHECK: flat_store_byte
145; CHECK: flat_store_byte
146; CHECK: flat_store_byte
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000147define amdgpu_kernel void @flat_scratch_unaligned_store() {
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000148 %scratch = alloca i32, addrspace(5)
149 %fptr = addrspacecast i32 addrspace(5)* %scratch to i32*
150 store volatile i32 0, i32* %fptr, align 1
Tom Stellard64a9d082016-10-14 18:10:39 +0000151 ret void
152}
153
Tom Stellardf8e6eaf2016-10-26 14:38:47 +0000154; CHECK-LABEL: flat_scratch_multidword_load:
155; HSA: flat_load_dword
156; HSA: flat_load_dword
157; FIXME: These tests are broken for os = mesa3d, becasue it doesn't initialize flat_scr
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000158define amdgpu_kernel void @flat_scratch_multidword_load() {
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000159 %scratch = alloca <2 x i32>, addrspace(5)
160 %fptr = addrspacecast <2 x i32> addrspace(5)* %scratch to <2 x i32>*
161 %ld = load volatile <2 x i32>, <2 x i32>* %fptr
Tom Stellardf8e6eaf2016-10-26 14:38:47 +0000162 ret void
163}
164
165; CHECK-LABEL: flat_scratch_multidword_store:
166; HSA: flat_store_dword
167; HSA: flat_store_dword
168; FIXME: These tests are broken for os = mesa3d, becasue it doesn't initialize flat_scr
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000169define amdgpu_kernel void @flat_scratch_multidword_store() {
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000170 %scratch = alloca <2 x i32>, addrspace(5)
171 %fptr = addrspacecast <2 x i32> addrspace(5)* %scratch to <2 x i32>*
172 store volatile <2 x i32> zeroinitializer, <2 x i32>* %fptr
Tom Stellardf8e6eaf2016-10-26 14:38:47 +0000173 ret void
174}
175
Matt Arsenaultdb7c6a82017-06-12 16:53:51 +0000176; CHECK-LABEL: {{^}}store_flat_i8_max_offset:
177; CIVI: flat_store_byte v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}{{$}}
178; GFX9: flat_store_byte v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset:4095{{$}}
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000179define amdgpu_kernel void @store_flat_i8_max_offset(i8* %fptr, i8 %x) #0 {
180 %fptr.offset = getelementptr inbounds i8, i8* %fptr, i64 4095
181 store volatile i8 %x, i8* %fptr.offset
Matt Arsenaultdb7c6a82017-06-12 16:53:51 +0000182 ret void
183}
184
185; CHECK-LABEL: {{^}}store_flat_i8_max_offset_p1:
186; CHECK: flat_store_byte v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}{{$}}
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000187define amdgpu_kernel void @store_flat_i8_max_offset_p1(i8* %fptr, i8 %x) #0 {
188 %fptr.offset = getelementptr inbounds i8, i8* %fptr, i64 4096
189 store volatile i8 %x, i8* %fptr.offset
Matt Arsenaultdb7c6a82017-06-12 16:53:51 +0000190 ret void
191}
192
193; CHECK-LABEL: {{^}}store_flat_i8_neg_offset:
194; CHECK: flat_store_byte v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}{{$}}
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000195define amdgpu_kernel void @store_flat_i8_neg_offset(i8* %fptr, i8 %x) #0 {
196 %fptr.offset = getelementptr inbounds i8, i8* %fptr, i64 -2
197 store volatile i8 %x, i8* %fptr.offset
Matt Arsenaultdb7c6a82017-06-12 16:53:51 +0000198 ret void
199}
200
201; CHECK-LABEL: {{^}}load_flat_i8_max_offset:
202; CIVI: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}{{$}}
203; GFX9: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} offset:4095{{$}}
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000204define amdgpu_kernel void @load_flat_i8_max_offset(i8* %fptr) #0 {
205 %fptr.offset = getelementptr inbounds i8, i8* %fptr, i64 4095
206 %val = load volatile i8, i8* %fptr.offset
Matt Arsenaultdb7c6a82017-06-12 16:53:51 +0000207 ret void
208}
209
210; CHECK-LABEL: {{^}}load_flat_i8_max_offset_p1:
211; CHECK: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}{{$}}
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000212define amdgpu_kernel void @load_flat_i8_max_offset_p1(i8* %fptr) #0 {
213 %fptr.offset = getelementptr inbounds i8, i8* %fptr, i64 4096
214 %val = load volatile i8, i8* %fptr.offset
Matt Arsenaultdb7c6a82017-06-12 16:53:51 +0000215 ret void
216}
217
218; CHECK-LABEL: {{^}}load_flat_i8_neg_offset:
219; CHECK: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}{{$}}
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000220define amdgpu_kernel void @load_flat_i8_neg_offset(i8* %fptr) #0 {
221 %fptr.offset = getelementptr inbounds i8, i8* %fptr, i64 -2
222 %val = load volatile i8, i8* %fptr.offset
Matt Arsenaultdb7c6a82017-06-12 16:53:51 +0000223 ret void
224}
225
Matt Arsenault3f981402014-09-15 15:41:53 +0000226attributes #0 = { nounwind }
Matt Arsenault2aed6ca2015-12-19 01:46:41 +0000227attributes #1 = { nounwind convergent }