blob: f2addc16246f87a0e10688472294df83ce7a9b24 [file] [log] [blame]
Matt Arsenaultd9b77842017-06-12 17:06:35 +00001; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=bonaire < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-CI -check-prefix=OPT-CIVI %s
2; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-VI -check-prefix=OPT-CIVI %s
3; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=gfx900 -mattr=-flat-for-global < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-GFX9 %s
Alexander Timofeev982aee62017-07-04 17:32:00 +00004; RUN: llc -march=amdgcn -amdgpu-scalarize-global-loads=false -mcpu=bonaire -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=CIVI %s
5; RUN: llc -march=amdgcn -amdgpu-scalarize-global-loads=false -mcpu=tonga -mattr=-flat-for-global -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=CIVI %s
6; RUN: llc -march=amdgcn -amdgpu-scalarize-global-loads=false -mcpu=gfx900 -mattr=-flat-for-global -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
Matt Arsenault711b3902015-08-07 20:18:34 +00007
8; OPT-LABEL: @test_no_sink_flat_small_offset_i32(
Yaxun Liu2a22c5d2018-02-02 16:07:16 +00009; OPT-CIVI: getelementptr i32, i32* %in
Matt Arsenaultd9b77842017-06-12 17:06:35 +000010; OPT-CIVI: br i1
11; OPT-CIVI-NOT: ptrtoint
12
13; OPT-GFX9: br
Yaxun Liu2a22c5d2018-02-02 16:07:16 +000014; OPT-GFX9: %sunkaddr = getelementptr i8, i8* %0, i64 28
15; OPT-GFX9: %1 = bitcast i8* %sunkaddr to i32*
16; OPT-GFX9: load i32, i32* %1
Matt Arsenault711b3902015-08-07 20:18:34 +000017
18; GCN-LABEL: {{^}}test_no_sink_flat_small_offset_i32:
19; GCN: flat_load_dword
20; GCN: {{^}}BB0_2:
Yaxun Liu2a22c5d2018-02-02 16:07:16 +000021define amdgpu_kernel void @test_no_sink_flat_small_offset_i32(i32* %out, i32* %in, i32 %cond) {
Matt Arsenault711b3902015-08-07 20:18:34 +000022entry:
Yaxun Liu2a22c5d2018-02-02 16:07:16 +000023 %out.gep = getelementptr i32, i32* %out, i64 999999
24 %in.gep = getelementptr i32, i32* %in, i64 7
Matt Arsenault711b3902015-08-07 20:18:34 +000025 %tmp0 = icmp eq i32 %cond, 0
26 br i1 %tmp0, label %endif, label %if
27
28if:
Yaxun Liu2a22c5d2018-02-02 16:07:16 +000029 %tmp1 = load i32, i32* %in.gep
Matt Arsenault711b3902015-08-07 20:18:34 +000030 br label %endif
31
32endif:
33 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
Yaxun Liu2a22c5d2018-02-02 16:07:16 +000034 store i32 %x, i32* %out.gep
Matt Arsenault711b3902015-08-07 20:18:34 +000035 br label %done
36
37done:
38 ret void
39}
Matt Arsenaultf9bfeaf2015-12-01 23:04:00 +000040
41; OPT-LABEL: @test_sink_noop_addrspacecast_flat_to_global_i32(
Yaxun Liu2a22c5d2018-02-02 16:07:16 +000042; OPT: getelementptr i32, i32* %out,
Alexander Timofeev982aee62017-07-04 17:32:00 +000043; rOPT-CI-NOT: getelementptr
Matt Arsenaultf9bfeaf2015-12-01 23:04:00 +000044; OPT: br i1
45
Eli Friedman5fba1e52017-04-06 22:42:18 +000046; OPT-CI: addrspacecast
47; OPT-CI: getelementptr
48; OPT-CI: bitcast
Matt Arsenaultf9bfeaf2015-12-01 23:04:00 +000049; OPT: br label
50
51; GCN-LABEL: {{^}}test_sink_noop_addrspacecast_flat_to_global_i32:
Nikolay Haustov4f672a32016-04-29 09:02:30 +000052; CI: buffer_load_dword {{v[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:28
Yaxun Liu2a22c5d2018-02-02 16:07:16 +000053define amdgpu_kernel void @test_sink_noop_addrspacecast_flat_to_global_i32(i32* %out, i32* %in, i32 %cond) {
Matt Arsenaultf9bfeaf2015-12-01 23:04:00 +000054entry:
Yaxun Liu2a22c5d2018-02-02 16:07:16 +000055 %out.gep = getelementptr i32, i32* %out, i64 999999
56 %in.gep = getelementptr i32, i32* %in, i64 7
57 %cast = addrspacecast i32* %in.gep to i32 addrspace(1)*
Matt Arsenaultf9bfeaf2015-12-01 23:04:00 +000058 %tmp0 = icmp eq i32 %cond, 0
59 br i1 %tmp0, label %endif, label %if
60
61if:
62 %tmp1 = load i32, i32 addrspace(1)* %cast
63 br label %endif
64
65endif:
66 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
Yaxun Liu2a22c5d2018-02-02 16:07:16 +000067 store i32 %x, i32* %out.gep
Matt Arsenaultf9bfeaf2015-12-01 23:04:00 +000068 br label %done
69
70done:
71 ret void
72}
73
74; OPT-LABEL: @test_sink_noop_addrspacecast_flat_to_constant_i32(
Yaxun Liu2a22c5d2018-02-02 16:07:16 +000075; OPT: getelementptr i32, i32* %out,
Matt Arsenaultf9bfeaf2015-12-01 23:04:00 +000076; OPT-CI-NOT: getelementptr
77; OPT: br i1
78
Eli Friedman5fba1e52017-04-06 22:42:18 +000079; OPT-CI: addrspacecast
80; OPT-CI: getelementptr
81; OPT-CI: bitcast
Matt Arsenaultf9bfeaf2015-12-01 23:04:00 +000082; OPT: br label
83
84; GCN-LABEL: {{^}}test_sink_noop_addrspacecast_flat_to_constant_i32:
85; CI: s_load_dword {{s[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
Yaxun Liu2a22c5d2018-02-02 16:07:16 +000086define amdgpu_kernel void @test_sink_noop_addrspacecast_flat_to_constant_i32(i32* %out, i32* %in, i32 %cond) {
Matt Arsenaultf9bfeaf2015-12-01 23:04:00 +000087entry:
Yaxun Liu2a22c5d2018-02-02 16:07:16 +000088 %out.gep = getelementptr i32, i32* %out, i64 999999
89 %in.gep = getelementptr i32, i32* %in, i64 7
Yaxun Liu0124b542018-02-13 18:00:25 +000090 %cast = addrspacecast i32* %in.gep to i32 addrspace(4)*
Matt Arsenaultf9bfeaf2015-12-01 23:04:00 +000091 %tmp0 = icmp eq i32 %cond, 0
92 br i1 %tmp0, label %endif, label %if
93
94if:
Yaxun Liu0124b542018-02-13 18:00:25 +000095 %tmp1 = load i32, i32 addrspace(4)* %cast
Matt Arsenaultf9bfeaf2015-12-01 23:04:00 +000096 br label %endif
97
98endif:
99 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000100 store i32 %x, i32* %out.gep
Matt Arsenaultf9bfeaf2015-12-01 23:04:00 +0000101 br label %done
102
103done:
104 ret void
105}
Matt Arsenaultd9b77842017-06-12 17:06:35 +0000106
107; OPT-LABEL: @test_sink_flat_small_max_flat_offset(
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000108; OPT-CIVI: %in.gep = getelementptr i8, i8* %in, i64 4095
Matt Arsenaultd9b77842017-06-12 17:06:35 +0000109; OPT-CIVI: br
110; OPT-CIVI-NOT: getelementptr
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000111; OPT-CIVI: load i8, i8* %in.gep
Matt Arsenaultd9b77842017-06-12 17:06:35 +0000112
113; OPT-GFX9: br
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000114; OPT-GFX9: %sunkaddr = getelementptr i8, i8* %in, i64 4095
115; OPT-GFX9: load i8, i8* %sunkaddr
Matt Arsenaultd9b77842017-06-12 17:06:35 +0000116
117; GCN-LABEL: {{^}}test_sink_flat_small_max_flat_offset:
118; GFX9: flat_load_sbyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} offset:4095{{$}}
119; CIVI: flat_load_sbyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]$}}
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000120define amdgpu_kernel void @test_sink_flat_small_max_flat_offset(i32* %out, i8* %in) #1 {
Matt Arsenaultd9b77842017-06-12 17:06:35 +0000121entry:
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000122 %out.gep = getelementptr i32, i32* %out, i32 1024
123 %in.gep = getelementptr i8, i8* %in, i64 4095
Matt Arsenaultd9b77842017-06-12 17:06:35 +0000124 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
125 %tmp0 = icmp eq i32 %tid, 0
126 br i1 %tmp0, label %endif, label %if
127
128if:
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000129 %tmp1 = load i8, i8* %in.gep
Matt Arsenaultd9b77842017-06-12 17:06:35 +0000130 %tmp2 = sext i8 %tmp1 to i32
131 br label %endif
132
133endif:
134 %x = phi i32 [ %tmp2, %if ], [ 0, %entry ]
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000135 store i32 %x, i32* %out.gep
Matt Arsenaultd9b77842017-06-12 17:06:35 +0000136 br label %done
137
138done:
139 ret void
140}
141
142; OPT-LABEL: @test_sink_flat_small_max_plus_1_flat_offset(
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000143; OPT: %in.gep = getelementptr i8, i8* %in, i64 4096
Matt Arsenaultd9b77842017-06-12 17:06:35 +0000144; OPT: br
145; OPT-NOT: getelementptr
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000146; OPT: load i8, i8* %in.gep
Matt Arsenaultd9b77842017-06-12 17:06:35 +0000147
148; GCN-LABEL: {{^}}test_sink_flat_small_max_plus_1_flat_offset:
149; GCN: flat_load_sbyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]$}}
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000150define amdgpu_kernel void @test_sink_flat_small_max_plus_1_flat_offset(i32* %out, i8* %in) #1 {
Matt Arsenaultd9b77842017-06-12 17:06:35 +0000151entry:
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000152 %out.gep = getelementptr i32, i32* %out, i64 99999
153 %in.gep = getelementptr i8, i8* %in, i64 4096
Matt Arsenaultd9b77842017-06-12 17:06:35 +0000154 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
155 %tmp0 = icmp eq i32 %tid, 0
156 br i1 %tmp0, label %endif, label %if
157
158if:
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000159 %tmp1 = load i8, i8* %in.gep
Matt Arsenaultd9b77842017-06-12 17:06:35 +0000160 %tmp2 = sext i8 %tmp1 to i32
161 br label %endif
162
163endif:
164 %x = phi i32 [ %tmp2, %if ], [ 0, %entry ]
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000165 store i32 %x, i32* %out.gep
Matt Arsenaultd9b77842017-06-12 17:06:35 +0000166 br label %done
167
168done:
169 ret void
170}
171
172; OPT-LABEL: @test_no_sink_flat_reg_offset(
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000173; OPT: %in.gep = getelementptr i8, i8* %in, i64 %reg
Matt Arsenaultd9b77842017-06-12 17:06:35 +0000174; OPT: br
175
176; OPT-NOT: getelementptr
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000177; OPT: load i8, i8* %in.gep
Matt Arsenaultd9b77842017-06-12 17:06:35 +0000178
179; GCN-LABEL: {{^}}test_no_sink_flat_reg_offset:
180; GCN: flat_load_sbyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]$}}
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000181define amdgpu_kernel void @test_no_sink_flat_reg_offset(i32* %out, i8* %in, i64 %reg) #1 {
Matt Arsenaultd9b77842017-06-12 17:06:35 +0000182entry:
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000183 %out.gep = getelementptr i32, i32* %out, i32 1024
184 %in.gep = getelementptr i8, i8* %in, i64 %reg
Matt Arsenaultd9b77842017-06-12 17:06:35 +0000185 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
186 %tmp0 = icmp eq i32 %tid, 0
187 br i1 %tmp0, label %endif, label %if
188
189if:
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000190 %tmp1 = load i8, i8* %in.gep
Matt Arsenaultd9b77842017-06-12 17:06:35 +0000191 %tmp2 = sext i8 %tmp1 to i32
192 br label %endif
193
194endif:
195 %x = phi i32 [ %tmp2, %if ], [ 0, %entry ]
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000196 store i32 %x, i32* %out.gep
Matt Arsenaultd9b77842017-06-12 17:06:35 +0000197 br label %done
198
199done:
200 ret void
201}
202
203declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0
204
205attributes #0 = { nounwind readnone }
206attributes #1 = { nounwind }
207attributes #2 = { nounwind argmemonly }