| Matt Arsenault | dc8f5cc | 2017-07-29 01:12:31 +0000 | [diff] [blame] | 1 | ; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tahiti < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-SI -check-prefix=OPT-SICIVI %s | 
|  | 2 | ; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=bonaire < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-CI -check-prefix=OPT-SICIVI %s | 
|  | 3 | ; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-VI -check-prefix=OPT-SICIVI %s | 
|  | 4 | ; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=gfx900 < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-GFX9 %s | 
|  | 5 | ; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-promote-alloca -amdgpu-scalarize-global-loads=false -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SICIVI %s | 
|  | 6 | ; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca -amdgpu-scalarize-global-loads=false -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=SICIVI %s | 
|  | 7 | ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -amdgpu-scalarize-global-loads=false -mattr=-promote-alloca -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=SICIVI %s | 
|  | 8 | ; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-promote-alloca -amdgpu-scalarize-global-loads=false -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s | 
| Matt Arsenault | 73e06fa | 2015-06-04 16:17:42 +0000 | [diff] [blame] | 9 |  | 
| Yaxun Liu | 2a22c5d | 2018-02-02 16:07:16 +0000 | [diff] [blame^] | 10 | target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" | 
| Matt Arsenault | 02d915b | 2017-03-15 22:35:20 +0000 | [diff] [blame] | 11 |  | 
| Matt Arsenault | 73e06fa | 2015-06-04 16:17:42 +0000 | [diff] [blame] | 12 | ; OPT-LABEL: @test_sink_global_small_offset_i32( | 
| Tom Stellard | 70580f8 | 2015-07-20 14:28:41 +0000 | [diff] [blame] | 13 | ; OPT-CI-NOT: getelementptr i32, i32 addrspace(1)* %in | 
|  | 14 | ; OPT-VI: getelementptr i32, i32 addrspace(1)* %in | 
| Matt Arsenault | 73e06fa | 2015-06-04 16:17:42 +0000 | [diff] [blame] | 15 | ; OPT: br i1 | 
| Eli Friedman | 5fba1e5 | 2017-04-06 22:42:18 +0000 | [diff] [blame] | 16 | ; OPT-CI: getelementptr i8, | 
| Matt Arsenault | 73e06fa | 2015-06-04 16:17:42 +0000 | [diff] [blame] | 17 |  | 
|  | 18 | ; GCN-LABEL: {{^}}test_sink_global_small_offset_i32: | 
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 19 | define amdgpu_kernel void @test_sink_global_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { | 
| Matt Arsenault | 73e06fa | 2015-06-04 16:17:42 +0000 | [diff] [blame] | 20 | entry: | 
|  | 21 | %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 | 
|  | 22 | %in.gep = getelementptr i32, i32 addrspace(1)* %in, i64 7 | 
| Tom Stellard | bc4497b | 2016-02-12 23:45:29 +0000 | [diff] [blame] | 23 | %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 | 
|  | 24 | %tmp0 = icmp eq i32 %tid, 0 | 
| Matt Arsenault | 73e06fa | 2015-06-04 16:17:42 +0000 | [diff] [blame] | 25 | br i1 %tmp0, label %endif, label %if | 
|  | 26 |  | 
|  | 27 | if: | 
|  | 28 | %tmp1 = load i32, i32 addrspace(1)* %in.gep | 
|  | 29 | br label %endif | 
|  | 30 |  | 
|  | 31 | endif: | 
|  | 32 | %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] | 
|  | 33 | store i32 %x, i32 addrspace(1)* %out.gep | 
|  | 34 | br label %done | 
|  | 35 |  | 
|  | 36 | done: | 
|  | 37 | ret void | 
|  | 38 | } | 
|  | 39 |  | 
|  | 40 | ; OPT-LABEL: @test_sink_global_small_max_i32_ds_offset( | 
|  | 41 | ; OPT: %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 65535 | 
|  | 42 | ; OPT: br i1 | 
|  | 43 |  | 
|  | 44 | ; GCN-LABEL: {{^}}test_sink_global_small_max_i32_ds_offset: | 
|  | 45 | ; GCN: s_and_saveexec_b64 | 
| Matt Arsenault | dc8f5cc | 2017-07-29 01:12:31 +0000 | [diff] [blame] | 46 | ; SICIVI: buffer_load_sbyte {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}} | 
|  | 47 | ; GFX9: global_load_sbyte {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, off{{$}} | 
| Matt Arsenault | 73e06fa | 2015-06-04 16:17:42 +0000 | [diff] [blame] | 48 | ; GCN: {{^}}BB1_2: | 
|  | 49 | ; GCN: s_or_b64 exec | 
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 50 | define amdgpu_kernel void @test_sink_global_small_max_i32_ds_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { | 
| Matt Arsenault | 73e06fa | 2015-06-04 16:17:42 +0000 | [diff] [blame] | 51 | entry: | 
|  | 52 | %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 99999 | 
|  | 53 | %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 65535 | 
| Tom Stellard | bc4497b | 2016-02-12 23:45:29 +0000 | [diff] [blame] | 54 | %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 | 
|  | 55 | %tmp0 = icmp eq i32 %tid, 0 | 
| Matt Arsenault | 73e06fa | 2015-06-04 16:17:42 +0000 | [diff] [blame] | 56 | br i1 %tmp0, label %endif, label %if | 
|  | 57 |  | 
|  | 58 | if: | 
|  | 59 | %tmp1 = load i8, i8 addrspace(1)* %in.gep | 
|  | 60 | %tmp2 = sext i8 %tmp1 to i32 | 
|  | 61 | br label %endif | 
|  | 62 |  | 
|  | 63 | endif: | 
|  | 64 | %x = phi i32 [ %tmp2, %if ], [ 0, %entry ] | 
|  | 65 | store i32 %x, i32 addrspace(1)* %out.gep | 
|  | 66 | br label %done | 
|  | 67 |  | 
|  | 68 | done: | 
|  | 69 | ret void | 
|  | 70 | } | 
|  | 71 |  | 
|  | 72 | ; GCN-LABEL: {{^}}test_sink_global_small_max_mubuf_offset: | 
|  | 73 | ; GCN: s_and_saveexec_b64 | 
| Matt Arsenault | dc8f5cc | 2017-07-29 01:12:31 +0000 | [diff] [blame] | 74 | ; SICIVI: buffer_load_sbyte {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:4095{{$}} | 
|  | 75 | ; GFX9: global_load_sbyte {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, off offset:4095{{$}} | 
| Matt Arsenault | 73e06fa | 2015-06-04 16:17:42 +0000 | [diff] [blame] | 76 | ; GCN: {{^}}BB2_2: | 
|  | 77 | ; GCN: s_or_b64 exec | 
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 78 | define amdgpu_kernel void @test_sink_global_small_max_mubuf_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { | 
| Matt Arsenault | 73e06fa | 2015-06-04 16:17:42 +0000 | [diff] [blame] | 79 | entry: | 
|  | 80 | %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 1024 | 
|  | 81 | %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 4095 | 
| Tom Stellard | bc4497b | 2016-02-12 23:45:29 +0000 | [diff] [blame] | 82 | %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 | 
|  | 83 | %tmp0 = icmp eq i32 %tid, 0 | 
| Matt Arsenault | 73e06fa | 2015-06-04 16:17:42 +0000 | [diff] [blame] | 84 | br i1 %tmp0, label %endif, label %if | 
|  | 85 |  | 
|  | 86 | if: | 
|  | 87 | %tmp1 = load i8, i8 addrspace(1)* %in.gep | 
|  | 88 | %tmp2 = sext i8 %tmp1 to i32 | 
|  | 89 | br label %endif | 
|  | 90 |  | 
|  | 91 | endif: | 
|  | 92 | %x = phi i32 [ %tmp2, %if ], [ 0, %entry ] | 
|  | 93 | store i32 %x, i32 addrspace(1)* %out.gep | 
|  | 94 | br label %done | 
|  | 95 |  | 
|  | 96 | done: | 
|  | 97 | ret void | 
|  | 98 | } | 
|  | 99 |  | 
|  | 100 | ; GCN-LABEL: {{^}}test_sink_global_small_max_plus_1_mubuf_offset: | 
|  | 101 | ; GCN: s_and_saveexec_b64 | 
| Matt Arsenault | dc8f5cc | 2017-07-29 01:12:31 +0000 | [diff] [blame] | 102 | ; SICIVI: buffer_load_sbyte {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}} | 
|  | 103 | ; GFX9: global_load_sbyte {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, off{{$}} | 
| Matt Arsenault | 73e06fa | 2015-06-04 16:17:42 +0000 | [diff] [blame] | 104 | ; GCN: {{^}}BB3_2: | 
|  | 105 | ; GCN: s_or_b64 exec | 
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 106 | define amdgpu_kernel void @test_sink_global_small_max_plus_1_mubuf_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { | 
| Matt Arsenault | 73e06fa | 2015-06-04 16:17:42 +0000 | [diff] [blame] | 107 | entry: | 
|  | 108 | %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 99999 | 
|  | 109 | %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 4096 | 
| Tom Stellard | bc4497b | 2016-02-12 23:45:29 +0000 | [diff] [blame] | 110 | %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 | 
|  | 111 | %tmp0 = icmp eq i32 %tid, 0 | 
| Matt Arsenault | 73e06fa | 2015-06-04 16:17:42 +0000 | [diff] [blame] | 112 | br i1 %tmp0, label %endif, label %if | 
|  | 113 |  | 
|  | 114 | if: | 
|  | 115 | %tmp1 = load i8, i8 addrspace(1)* %in.gep | 
|  | 116 | %tmp2 = sext i8 %tmp1 to i32 | 
|  | 117 | br label %endif | 
|  | 118 |  | 
|  | 119 | endif: | 
|  | 120 | %x = phi i32 [ %tmp2, %if ], [ 0, %entry ] | 
|  | 121 | store i32 %x, i32 addrspace(1)* %out.gep | 
|  | 122 | br label %done | 
|  | 123 |  | 
|  | 124 | done: | 
|  | 125 | ret void | 
|  | 126 | } | 
|  | 127 |  | 
| Matt Arsenault | 73e06fa | 2015-06-04 16:17:42 +0000 | [diff] [blame] | 128 | ; OPT-LABEL: @test_sink_scratch_small_offset_i32( | 
|  | 129 | ; OPT-NOT:  getelementptr [512 x i32] | 
|  | 130 | ; OPT: br i1 | 
| Eli Friedman | 5fba1e5 | 2017-04-06 22:42:18 +0000 | [diff] [blame] | 131 | ; OPT: getelementptr i8, | 
| Matt Arsenault | 73e06fa | 2015-06-04 16:17:42 +0000 | [diff] [blame] | 132 |  | 
|  | 133 | ; GCN-LABEL: {{^}}test_sink_scratch_small_offset_i32: | 
|  | 134 | ; GCN: s_and_saveexec_b64 | 
| Matt Arsenault | 39787bd | 2016-10-26 15:08:16 +0000 | [diff] [blame] | 135 | ; GCN: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offset:4092{{$}} | 
|  | 136 | ; GCN: buffer_load_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offset:4092{{$}} | 
| Matt Arsenault | 711b390 | 2015-08-07 20:18:34 +0000 | [diff] [blame] | 137 | ; GCN: {{^}}BB4_2: | 
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 138 | define amdgpu_kernel void @test_sink_scratch_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %arg) { | 
| Matt Arsenault | 73e06fa | 2015-06-04 16:17:42 +0000 | [diff] [blame] | 139 | entry: | 
| Yaxun Liu | 2a22c5d | 2018-02-02 16:07:16 +0000 | [diff] [blame^] | 140 | %alloca = alloca [512 x i32], align 4, addrspace(5) | 
| Matt Arsenault | 73e06fa | 2015-06-04 16:17:42 +0000 | [diff] [blame] | 141 | %out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i64 999998 | 
|  | 142 | %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i64 999999 | 
|  | 143 | %add.arg = add i32 %arg, 8 | 
| Yaxun Liu | 2a22c5d | 2018-02-02 16:07:16 +0000 | [diff] [blame^] | 144 | %alloca.gep = getelementptr [512 x i32], [512 x i32] addrspace(5)* %alloca, i32 0, i32 1022 | 
| Matt Arsenault | 707780b | 2017-02-22 21:05:25 +0000 | [diff] [blame] | 145 | %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 | 
|  | 146 | %tmp0 = icmp eq i32 %tid, 0 | 
|  | 147 | br i1 %tmp0, label %endif, label %if | 
|  | 148 |  | 
|  | 149 | if: | 
| Yaxun Liu | 2a22c5d | 2018-02-02 16:07:16 +0000 | [diff] [blame^] | 150 | store volatile i32 123, i32 addrspace(5)* %alloca.gep | 
|  | 151 | %tmp1 = load volatile i32, i32 addrspace(5)* %alloca.gep | 
| Matt Arsenault | 707780b | 2017-02-22 21:05:25 +0000 | [diff] [blame] | 152 | br label %endif | 
|  | 153 |  | 
|  | 154 | endif: | 
|  | 155 | %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] | 
|  | 156 | store i32 %x, i32 addrspace(1)* %out.gep.0 | 
| Yaxun Liu | 2a22c5d | 2018-02-02 16:07:16 +0000 | [diff] [blame^] | 157 | %load = load volatile i32, i32 addrspace(5)* %alloca.gep | 
| Matt Arsenault | 707780b | 2017-02-22 21:05:25 +0000 | [diff] [blame] | 158 | store i32 %load, i32 addrspace(1)* %out.gep.1 | 
|  | 159 | br label %done | 
|  | 160 |  | 
|  | 161 | done: | 
|  | 162 | ret void | 
|  | 163 | } | 
|  | 164 |  | 
|  | 165 | ; This ends up not fitting due to the reserved 4 bytes at offset 0 | 
|  | 166 | ; OPT-LABEL: @test_sink_scratch_small_offset_i32_reserved( | 
|  | 167 | ; OPT-NOT:  getelementptr [512 x i32] | 
|  | 168 | ; OPT: br i1 | 
| Eli Friedman | 5fba1e5 | 2017-04-06 22:42:18 +0000 | [diff] [blame] | 169 | ; OPT: getelementptr i8, | 
| Matt Arsenault | 707780b | 2017-02-22 21:05:25 +0000 | [diff] [blame] | 170 |  | 
|  | 171 | ; GCN-LABEL: {{^}}test_sink_scratch_small_offset_i32_reserved: | 
|  | 172 | ; GCN: s_and_saveexec_b64 | 
|  | 173 | ; GCN: v_mov_b32_e32 [[BASE_FI0:v[0-9]+]], 4 | 
|  | 174 | ; GCN: buffer_store_dword {{v[0-9]+}}, [[BASE_FI0]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:4092{{$}} | 
|  | 175 | ; GCN: v_mov_b32_e32 [[BASE_FI1:v[0-9]+]], 4 | 
|  | 176 | ; GCN: buffer_load_dword {{v[0-9]+}}, [[BASE_FI1]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:4092{{$}} | 
|  | 177 | ; GCN: {{^BB[0-9]+}}_2: | 
|  | 178 |  | 
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 179 | define amdgpu_kernel void @test_sink_scratch_small_offset_i32_reserved(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %arg) { | 
| Matt Arsenault | 707780b | 2017-02-22 21:05:25 +0000 | [diff] [blame] | 180 | entry: | 
| Yaxun Liu | 2a22c5d | 2018-02-02 16:07:16 +0000 | [diff] [blame^] | 181 | %alloca = alloca [512 x i32], align 4, addrspace(5) | 
| Matt Arsenault | 707780b | 2017-02-22 21:05:25 +0000 | [diff] [blame] | 182 | %out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i64 999998 | 
|  | 183 | %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i64 999999 | 
|  | 184 | %add.arg = add i32 %arg, 8 | 
| Yaxun Liu | 2a22c5d | 2018-02-02 16:07:16 +0000 | [diff] [blame^] | 185 | %alloca.gep = getelementptr [512 x i32], [512 x i32] addrspace(5)* %alloca, i32 0, i32 1023 | 
| Tom Stellard | bc4497b | 2016-02-12 23:45:29 +0000 | [diff] [blame] | 186 | %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 | 
|  | 187 | %tmp0 = icmp eq i32 %tid, 0 | 
| Matt Arsenault | 73e06fa | 2015-06-04 16:17:42 +0000 | [diff] [blame] | 188 | br i1 %tmp0, label %endif, label %if | 
|  | 189 |  | 
|  | 190 | if: | 
| Yaxun Liu | 2a22c5d | 2018-02-02 16:07:16 +0000 | [diff] [blame^] | 191 | store volatile i32 123, i32 addrspace(5)* %alloca.gep | 
|  | 192 | %tmp1 = load volatile i32, i32 addrspace(5)* %alloca.gep | 
| Matt Arsenault | 73e06fa | 2015-06-04 16:17:42 +0000 | [diff] [blame] | 193 | br label %endif | 
|  | 194 |  | 
|  | 195 | endif: | 
|  | 196 | %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] | 
|  | 197 | store i32 %x, i32 addrspace(1)* %out.gep.0 | 
| Yaxun Liu | 2a22c5d | 2018-02-02 16:07:16 +0000 | [diff] [blame^] | 198 | %load = load volatile i32, i32 addrspace(5)* %alloca.gep | 
| Matt Arsenault | 73e06fa | 2015-06-04 16:17:42 +0000 | [diff] [blame] | 199 | store i32 %load, i32 addrspace(1)* %out.gep.1 | 
|  | 200 | br label %done | 
|  | 201 |  | 
|  | 202 | done: | 
|  | 203 | ret void | 
|  | 204 | } | 
|  | 205 |  | 
|  | 206 | ; OPT-LABEL: @test_no_sink_scratch_large_offset_i32( | 
| Yaxun Liu | 2a22c5d | 2018-02-02 16:07:16 +0000 | [diff] [blame^] | 207 | ; OPT: %alloca.gep = getelementptr [512 x i32], [512 x i32] addrspace(5)* %alloca, i32 0, i32 1024 | 
| Matt Arsenault | 73e06fa | 2015-06-04 16:17:42 +0000 | [diff] [blame] | 208 | ; OPT: br i1 | 
|  | 209 | ; OPT-NOT: ptrtoint | 
|  | 210 |  | 
|  | 211 | ; GCN-LABEL: {{^}}test_no_sink_scratch_large_offset_i32: | 
|  | 212 | ; GCN: s_and_saveexec_b64 | 
|  | 213 | ; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}} | 
|  | 214 | ; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}} | 
| Matt Arsenault | 707780b | 2017-02-22 21:05:25 +0000 | [diff] [blame] | 215 | ; GCN: {{^BB[0-9]+}}_2: | 
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 216 | define amdgpu_kernel void @test_no_sink_scratch_large_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %arg) { | 
| Matt Arsenault | 73e06fa | 2015-06-04 16:17:42 +0000 | [diff] [blame] | 217 | entry: | 
| Yaxun Liu | 2a22c5d | 2018-02-02 16:07:16 +0000 | [diff] [blame^] | 218 | %alloca = alloca [512 x i32], align 4, addrspace(5) | 
| Matt Arsenault | 73e06fa | 2015-06-04 16:17:42 +0000 | [diff] [blame] | 219 | %out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i64 999998 | 
|  | 220 | %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i64 999999 | 
|  | 221 | %add.arg = add i32 %arg, 8 | 
| Yaxun Liu | 2a22c5d | 2018-02-02 16:07:16 +0000 | [diff] [blame^] | 222 | %alloca.gep = getelementptr [512 x i32], [512 x i32] addrspace(5)* %alloca, i32 0, i32 1024 | 
| Tom Stellard | bc4497b | 2016-02-12 23:45:29 +0000 | [diff] [blame] | 223 | %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 | 
|  | 224 | %tmp0 = icmp eq i32 %tid, 0 | 
| Matt Arsenault | 73e06fa | 2015-06-04 16:17:42 +0000 | [diff] [blame] | 225 | br i1 %tmp0, label %endif, label %if | 
|  | 226 |  | 
|  | 227 | if: | 
| Yaxun Liu | 2a22c5d | 2018-02-02 16:07:16 +0000 | [diff] [blame^] | 228 | store volatile i32 123, i32 addrspace(5)* %alloca.gep | 
|  | 229 | %tmp1 = load volatile i32, i32 addrspace(5)* %alloca.gep | 
| Matt Arsenault | 73e06fa | 2015-06-04 16:17:42 +0000 | [diff] [blame] | 230 | br label %endif | 
|  | 231 |  | 
|  | 232 | endif: | 
|  | 233 | %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] | 
|  | 234 | store i32 %x, i32 addrspace(1)* %out.gep.0 | 
| Yaxun Liu | 2a22c5d | 2018-02-02 16:07:16 +0000 | [diff] [blame^] | 235 | %load = load volatile i32, i32 addrspace(5)* %alloca.gep | 
| Matt Arsenault | 73e06fa | 2015-06-04 16:17:42 +0000 | [diff] [blame] | 236 | store i32 %load, i32 addrspace(1)* %out.gep.1 | 
|  | 237 | br label %done | 
|  | 238 |  | 
|  | 239 | done: | 
|  | 240 | ret void | 
|  | 241 | } | 
|  | 242 |  | 
|  | 243 | ; GCN-LABEL: {{^}}test_sink_global_vreg_sreg_i32: | 
|  | 244 | ; GCN: s_and_saveexec_b64 | 
| Tom Stellard | 70580f8 | 2015-07-20 14:28:41 +0000 | [diff] [blame] | 245 | ; CI: buffer_load_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} | 
|  | 246 | ; VI: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] | 
| Matt Arsenault | 707780b | 2017-02-22 21:05:25 +0000 | [diff] [blame] | 247 | ; GCN: {{^BB[0-9]+}}_2: | 
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 248 | define amdgpu_kernel void @test_sink_global_vreg_sreg_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %offset) { | 
| Matt Arsenault | 73e06fa | 2015-06-04 16:17:42 +0000 | [diff] [blame] | 249 | entry: | 
|  | 250 | %offset.ext = zext i32 %offset to i64 | 
|  | 251 | %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 | 
|  | 252 | %in.gep = getelementptr i32, i32 addrspace(1)* %in, i64 %offset.ext | 
| Tom Stellard | bc4497b | 2016-02-12 23:45:29 +0000 | [diff] [blame] | 253 | %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 | 
|  | 254 | %tmp0 = icmp eq i32 %tid, 0 | 
| Matt Arsenault | 73e06fa | 2015-06-04 16:17:42 +0000 | [diff] [blame] | 255 | br i1 %tmp0, label %endif, label %if | 
|  | 256 |  | 
|  | 257 | if: | 
|  | 258 | %tmp1 = load i32, i32 addrspace(1)* %in.gep | 
|  | 259 | br label %endif | 
|  | 260 |  | 
|  | 261 | endif: | 
|  | 262 | %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] | 
|  | 263 | store i32 %x, i32 addrspace(1)* %out.gep | 
|  | 264 | br label %done | 
|  | 265 |  | 
|  | 266 | done: | 
|  | 267 | ret void | 
|  | 268 | } | 
|  | 269 |  | 
| Matt Arsenault | 711b390 | 2015-08-07 20:18:34 +0000 | [diff] [blame] | 270 | ; OPT-LABEL: @test_sink_constant_small_offset_i32 | 
|  | 271 | ; OPT-NOT:  getelementptr i32, i32 addrspace(2)* | 
|  | 272 | ; OPT: br i1 | 
|  | 273 |  | 
|  | 274 | ; GCN-LABEL: {{^}}test_sink_constant_small_offset_i32: | 
|  | 275 | ; GCN: s_and_saveexec_b64 | 
|  | 276 | ; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x7{{$}} | 
|  | 277 | ; GCN: s_or_b64 exec, exec | 
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 278 | define amdgpu_kernel void @test_sink_constant_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) { | 
| Matt Arsenault | 711b390 | 2015-08-07 20:18:34 +0000 | [diff] [blame] | 279 | entry: | 
|  | 280 | %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 | 
|  | 281 | %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 7 | 
| Tom Stellard | bc4497b | 2016-02-12 23:45:29 +0000 | [diff] [blame] | 282 | %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 | 
|  | 283 | %tmp0 = icmp eq i32 %tid, 0 | 
| Matt Arsenault | 711b390 | 2015-08-07 20:18:34 +0000 | [diff] [blame] | 284 | br i1 %tmp0, label %endif, label %if | 
|  | 285 |  | 
|  | 286 | if: | 
|  | 287 | %tmp1 = load i32, i32 addrspace(2)* %in.gep | 
|  | 288 | br label %endif | 
|  | 289 |  | 
|  | 290 | endif: | 
|  | 291 | %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] | 
|  | 292 | store i32 %x, i32 addrspace(1)* %out.gep | 
|  | 293 | br label %done | 
|  | 294 |  | 
|  | 295 | done: | 
|  | 296 | ret void | 
|  | 297 | } | 
|  | 298 |  | 
|  | 299 | ; OPT-LABEL: @test_sink_constant_max_8_bit_offset_i32 | 
|  | 300 | ; OPT-NOT:  getelementptr i32, i32 addrspace(2)* | 
|  | 301 | ; OPT: br i1 | 
|  | 302 |  | 
|  | 303 | ; GCN-LABEL: {{^}}test_sink_constant_max_8_bit_offset_i32: | 
|  | 304 | ; GCN: s_and_saveexec_b64 | 
|  | 305 | ; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0xff{{$}} | 
|  | 306 | ; GCN: s_or_b64 exec, exec | 
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 307 | define amdgpu_kernel void @test_sink_constant_max_8_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) { | 
| Matt Arsenault | 711b390 | 2015-08-07 20:18:34 +0000 | [diff] [blame] | 308 | entry: | 
|  | 309 | %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 | 
|  | 310 | %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 255 | 
| Tom Stellard | bc4497b | 2016-02-12 23:45:29 +0000 | [diff] [blame] | 311 | %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 | 
|  | 312 | %tmp0 = icmp eq i32 %tid, 0 | 
| Matt Arsenault | 711b390 | 2015-08-07 20:18:34 +0000 | [diff] [blame] | 313 | br i1 %tmp0, label %endif, label %if | 
|  | 314 |  | 
|  | 315 | if: | 
|  | 316 | %tmp1 = load i32, i32 addrspace(2)* %in.gep | 
|  | 317 | br label %endif | 
|  | 318 |  | 
|  | 319 | endif: | 
|  | 320 | %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] | 
|  | 321 | store i32 %x, i32 addrspace(1)* %out.gep | 
|  | 322 | br label %done | 
|  | 323 |  | 
|  | 324 | done: | 
|  | 325 | ret void | 
|  | 326 | } | 
|  | 327 |  | 
|  | 328 | ; OPT-LABEL: @test_sink_constant_max_8_bit_offset_p1_i32 | 
|  | 329 | ; OPT-SI:  getelementptr i32, i32 addrspace(2)* | 
|  | 330 | ; OPT-CI-NOT:  getelementptr i32, i32 addrspace(2)* | 
|  | 331 | ; OPT-VI-NOT:  getelementptr i32, i32 addrspace(2)* | 
|  | 332 | ; OPT: br i1 | 
|  | 333 |  | 
|  | 334 | ; GCN-LABEL: {{^}}test_sink_constant_max_8_bit_offset_p1_i32: | 
|  | 335 | ; GCN: s_and_saveexec_b64 | 
|  | 336 | ; SI: s_movk_i32 [[OFFSET:s[0-9]+]], 0x400 | 
|  | 337 |  | 
|  | 338 | ; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}} | 
|  | 339 | ; GCN: s_or_b64 exec, exec | 
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 340 | define amdgpu_kernel void @test_sink_constant_max_8_bit_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) { | 
| Matt Arsenault | 711b390 | 2015-08-07 20:18:34 +0000 | [diff] [blame] | 341 | entry: | 
|  | 342 | %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 | 
|  | 343 | %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 256 | 
| Tom Stellard | bc4497b | 2016-02-12 23:45:29 +0000 | [diff] [blame] | 344 | %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 | 
|  | 345 | %tmp0 = icmp eq i32 %tid, 0 | 
| Matt Arsenault | 711b390 | 2015-08-07 20:18:34 +0000 | [diff] [blame] | 346 | br i1 %tmp0, label %endif, label %if | 
|  | 347 |  | 
|  | 348 | if: | 
|  | 349 | %tmp1 = load i32, i32 addrspace(2)* %in.gep | 
|  | 350 | br label %endif | 
|  | 351 |  | 
|  | 352 | endif: | 
|  | 353 | %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] | 
|  | 354 | store i32 %x, i32 addrspace(1)* %out.gep | 
|  | 355 | br label %done | 
|  | 356 |  | 
|  | 357 | done: | 
|  | 358 | ret void | 
|  | 359 | } | 
|  | 360 |  | 
|  | 361 | ; OPT-LABEL: @test_sink_constant_max_32_bit_offset_i32 | 
|  | 362 | ; OPT-SI: getelementptr i32, i32 addrspace(2)* | 
|  | 363 | ; OPT-CI-NOT: getelementptr i32, i32 addrspace(2)* | 
|  | 364 | ; OPT: br i1 | 
|  | 365 |  | 
|  | 366 | ; GCN-LABEL: {{^}}test_sink_constant_max_32_bit_offset_i32: | 
|  | 367 | ; GCN: s_and_saveexec_b64 | 
| Tom Stellard | 9a19767 | 2015-09-09 15:43:26 +0000 | [diff] [blame] | 368 | ; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, -4{{$}} | 
|  | 369 | ; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, 3{{$}} | 
| Matt Arsenault | 711b390 | 2015-08-07 20:18:34 +0000 | [diff] [blame] | 370 | ; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}} | 
|  | 371 | ; GCN: s_or_b64 exec, exec | 
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 372 | define amdgpu_kernel void @test_sink_constant_max_32_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) { | 
| Matt Arsenault | 711b390 | 2015-08-07 20:18:34 +0000 | [diff] [blame] | 373 | entry: | 
|  | 374 | %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 | 
|  | 375 | %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 4294967295 | 
| Tom Stellard | bc4497b | 2016-02-12 23:45:29 +0000 | [diff] [blame] | 376 | %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 | 
|  | 377 | %tmp0 = icmp eq i32 %tid, 0 | 
| Matt Arsenault | 711b390 | 2015-08-07 20:18:34 +0000 | [diff] [blame] | 378 | br i1 %tmp0, label %endif, label %if | 
|  | 379 |  | 
|  | 380 | if: | 
|  | 381 | %tmp1 = load i32, i32 addrspace(2)* %in.gep | 
|  | 382 | br label %endif | 
|  | 383 |  | 
|  | 384 | endif: | 
|  | 385 | %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] | 
|  | 386 | store i32 %x, i32 addrspace(1)* %out.gep | 
|  | 387 | br label %done | 
|  | 388 |  | 
|  | 389 | done: | 
|  | 390 | ret void | 
|  | 391 | } | 
|  | 392 |  | 
|  | 393 | ; OPT-LABEL: @test_sink_constant_max_32_bit_offset_p1_i32 | 
|  | 394 | ; OPT: getelementptr i32, i32 addrspace(2)* | 
|  | 395 | ; OPT: br i1 | 
|  | 396 |  | 
|  | 397 | ; GCN-LABEL: {{^}}test_sink_constant_max_32_bit_offset_p1_i32: | 
|  | 398 | ; GCN: s_and_saveexec_b64 | 
|  | 399 | ; GCN: s_add_u32 | 
|  | 400 | ; GCN: s_addc_u32 | 
|  | 401 | ; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}} | 
|  | 402 | ; GCN: s_or_b64 exec, exec | 
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 403 | define amdgpu_kernel void @test_sink_constant_max_32_bit_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) { | 
| Matt Arsenault | 711b390 | 2015-08-07 20:18:34 +0000 | [diff] [blame] | 404 | entry: | 
|  | 405 | %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 | 
|  | 406 | %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 17179869181 | 
| Tom Stellard | bc4497b | 2016-02-12 23:45:29 +0000 | [diff] [blame] | 407 | %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 | 
|  | 408 | %tmp0 = icmp eq i32 %tid, 0 | 
| Matt Arsenault | 711b390 | 2015-08-07 20:18:34 +0000 | [diff] [blame] | 409 | br i1 %tmp0, label %endif, label %if | 
|  | 410 |  | 
|  | 411 | if: | 
|  | 412 | %tmp1 = load i32, i32 addrspace(2)* %in.gep | 
|  | 413 | br label %endif | 
|  | 414 |  | 
|  | 415 | endif: | 
|  | 416 | %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] | 
|  | 417 | store i32 %x, i32 addrspace(1)* %out.gep | 
|  | 418 | br label %done | 
|  | 419 |  | 
|  | 420 | done: | 
|  | 421 | ret void | 
|  | 422 | } | 
|  | 423 |  | 
|  | 424 | ; GCN-LABEL: {{^}}test_sink_constant_max_20_bit_byte_offset_i32: | 
|  | 425 | ; GCN: s_and_saveexec_b64 | 
|  | 426 | ; SI: s_mov_b32 [[OFFSET:s[0-9]+]], 0xffffc{{$}} | 
|  | 427 | ; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}} | 
|  | 428 |  | 
|  | 429 | ; CI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x3ffff{{$}} | 
|  | 430 | ; VI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0xffffc{{$}} | 
|  | 431 |  | 
|  | 432 | ; GCN: s_or_b64 exec, exec | 
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 433 | define amdgpu_kernel void @test_sink_constant_max_20_bit_byte_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) { | 
| Matt Arsenault | 711b390 | 2015-08-07 20:18:34 +0000 | [diff] [blame] | 434 | entry: | 
|  | 435 | %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 | 
|  | 436 | %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 262143 | 
| Tom Stellard | bc4497b | 2016-02-12 23:45:29 +0000 | [diff] [blame] | 437 | %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 | 
|  | 438 | %tmp0 = icmp eq i32 %tid, 0 | 
| Matt Arsenault | 711b390 | 2015-08-07 20:18:34 +0000 | [diff] [blame] | 439 | br i1 %tmp0, label %endif, label %if | 
|  | 440 |  | 
|  | 441 | if: | 
|  | 442 | %tmp1 = load i32, i32 addrspace(2)* %in.gep | 
|  | 443 | br label %endif | 
|  | 444 |  | 
|  | 445 | endif: | 
|  | 446 | %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] | 
|  | 447 | store i32 %x, i32 addrspace(1)* %out.gep | 
|  | 448 | br label %done | 
|  | 449 |  | 
|  | 450 | done: | 
|  | 451 | ret void | 
|  | 452 | } | 
|  | 453 |  | 
|  | 454 | ; OPT-LABEL: @test_sink_constant_max_20_bit_byte_offset_p1_i32 | 
|  | 455 | ; OPT-SI: getelementptr i32, i32 addrspace(2)* | 
|  | 456 | ; OPT-CI-NOT: getelementptr i32, i32 addrspace(2)* | 
|  | 457 | ; OPT-VI: getelementptr i32, i32 addrspace(2)* | 
|  | 458 | ; OPT: br i1 | 
|  | 459 |  | 
|  | 460 | ; GCN-LABEL: {{^}}test_sink_constant_max_20_bit_byte_offset_p1_i32: | 
|  | 461 | ; GCN: s_and_saveexec_b64 | 
|  | 462 | ; SI: s_mov_b32 [[OFFSET:s[0-9]+]], 0x100000{{$}} | 
|  | 463 | ; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}} | 
|  | 464 |  | 
|  | 465 | ; CI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x40000{{$}} | 
|  | 466 |  | 
|  | 467 | ; VI: s_mov_b32 [[OFFSET:s[0-9]+]], 0x100000{{$}} | 
|  | 468 | ; VI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}} | 
|  | 469 |  | 
|  | 470 | ; GCN: s_or_b64 exec, exec | 
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 471 | define amdgpu_kernel void @test_sink_constant_max_20_bit_byte_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) { | 
| Matt Arsenault | 711b390 | 2015-08-07 20:18:34 +0000 | [diff] [blame] | 472 | entry: | 
|  | 473 | %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 | 
|  | 474 | %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 262144 | 
| Tom Stellard | bc4497b | 2016-02-12 23:45:29 +0000 | [diff] [blame] | 475 | %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 | 
|  | 476 | %tmp0 = icmp eq i32 %tid, 0 | 
| Matt Arsenault | 711b390 | 2015-08-07 20:18:34 +0000 | [diff] [blame] | 477 | br i1 %tmp0, label %endif, label %if | 
|  | 478 |  | 
|  | 479 | if: | 
|  | 480 | %tmp1 = load i32, i32 addrspace(2)* %in.gep | 
|  | 481 | br label %endif | 
|  | 482 |  | 
|  | 483 | endif: | 
|  | 484 | %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] | 
|  | 485 | store i32 %x, i32 addrspace(1)* %out.gep | 
|  | 486 | br label %done | 
|  | 487 |  | 
|  | 488 | done: | 
|  | 489 | ret void | 
|  | 490 | } | 
| Tom Stellard | bc4497b | 2016-02-12 23:45:29 +0000 | [diff] [blame] | 491 |  | 
| Matt Arsenault | c1e6a45 | 2016-07-09 08:02:28 +0000 | [diff] [blame] | 492 | %struct.foo = type { [3 x float], [3 x float] } | 
|  | 493 |  | 
|  | 494 | ; OPT-LABEL: @sink_ds_address( | 
| Eli Friedman | 5fba1e5 | 2017-04-06 22:42:18 +0000 | [diff] [blame] | 495 | ; OPT: getelementptr i8, | 
| Matt Arsenault | c1e6a45 | 2016-07-09 08:02:28 +0000 | [diff] [blame] | 496 |  | 
|  | 497 | ; GCN-LABEL: {{^}}sink_ds_address: | 
|  | 498 | ; GCN: s_load_dword [[SREG1:s[0-9]+]], | 
|  | 499 | ; GCN: v_mov_b32_e32 [[VREG1:v[0-9]+]], [[SREG1]] | 
|  | 500 | ; GCN-DAG: ds_read2_b32 v[{{[0-9+:[0-9]+}}], [[VREG1]] offset0:3 offset1:5 | 
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 501 | define amdgpu_kernel void @sink_ds_address(%struct.foo addrspace(3)* nocapture %ptr) nounwind { | 
| Matt Arsenault | c1e6a45 | 2016-07-09 08:02:28 +0000 | [diff] [blame] | 502 | entry: | 
|  | 503 | %x = getelementptr inbounds %struct.foo, %struct.foo addrspace(3)* %ptr, i32 0, i32 1, i32 0 | 
|  | 504 | %y = getelementptr inbounds %struct.foo, %struct.foo addrspace(3)* %ptr, i32 0, i32 1, i32 2 | 
|  | 505 | br label %bb32 | 
|  | 506 |  | 
|  | 507 | bb32: | 
|  | 508 | %a = load float, float addrspace(3)* %x, align 4 | 
|  | 509 | %b = load float, float addrspace(3)* %y, align 4 | 
|  | 510 | %cmp = fcmp one float %a, %b | 
|  | 511 | br i1 %cmp, label %bb34, label %bb33 | 
|  | 512 |  | 
|  | 513 | bb33: | 
|  | 514 | unreachable | 
|  | 515 |  | 
|  | 516 | bb34: | 
|  | 517 | unreachable | 
|  | 518 | } | 
|  | 519 |  | 
| Matt Arsenault | 3cc1e00 | 2016-08-13 01:43:51 +0000 | [diff] [blame] | 520 | ; Address offset is not a multiple of 4. This is a valid mubuf offset, | 
|  | 521 | ; but not smrd. | 
|  | 522 |  | 
|  | 523 | ; OPT-LABEL: @test_sink_constant_small_max_mubuf_offset_load_i32_align_1( | 
|  | 524 | ; OPT: br i1 %tmp0, | 
|  | 525 | ; OPT: if: | 
| Eli Friedman | 5fba1e5 | 2017-04-06 22:42:18 +0000 | [diff] [blame] | 526 | ; OPT: getelementptr i8, {{.*}} 4095 | 
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 527 | define amdgpu_kernel void @test_sink_constant_small_max_mubuf_offset_load_i32_align_1(i32 addrspace(1)* %out, i8 addrspace(2)* %in) { | 
| Matt Arsenault | 3cc1e00 | 2016-08-13 01:43:51 +0000 | [diff] [blame] | 528 | entry: | 
|  | 529 | %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 1024 | 
|  | 530 | %in.gep = getelementptr i8, i8 addrspace(2)* %in, i64 4095 | 
|  | 531 | %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 | 
|  | 532 | %tmp0 = icmp eq i32 %tid, 0 | 
|  | 533 | br i1 %tmp0, label %endif, label %if | 
|  | 534 |  | 
|  | 535 | if: | 
|  | 536 | %bitcast = bitcast i8 addrspace(2)* %in.gep to i32 addrspace(2)* | 
|  | 537 | %tmp1 = load i32, i32 addrspace(2)* %bitcast, align 1 | 
|  | 538 | br label %endif | 
|  | 539 |  | 
|  | 540 | endif: | 
|  | 541 | %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] | 
|  | 542 | store i32 %x, i32 addrspace(1)* %out.gep | 
|  | 543 | br label %done | 
|  | 544 |  | 
|  | 545 | done: | 
|  | 546 | ret void | 
|  | 547 | } | 
|  | 548 |  | 
| Matt Arsenault | 02d915b | 2017-03-15 22:35:20 +0000 | [diff] [blame] | 549 | ; OPT-LABEL: @test_sink_local_small_offset_atomicrmw_i32( | 
| Eli Friedman | 5fba1e5 | 2017-04-06 22:42:18 +0000 | [diff] [blame] | 550 | ; OPT: %0 = bitcast i32 addrspace(3)* %in to i8 addrspace(3)* | 
|  | 551 | ; OPT: %sunkaddr = getelementptr i8, i8 addrspace(3)* %0, i32 28 | 
|  | 552 | ; OPT: %1 = bitcast i8 addrspace(3)* %sunkaddr to i32 addrspace(3)* | 
|  | 553 | ; OPT: %tmp1 = atomicrmw add i32 addrspace(3)* %1, i32 2 seq_cst | 
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 554 | define amdgpu_kernel void @test_sink_local_small_offset_atomicrmw_i32(i32 addrspace(3)* %out, i32 addrspace(3)* %in) { | 
| Matt Arsenault | 02d915b | 2017-03-15 22:35:20 +0000 | [diff] [blame] | 555 | entry: | 
|  | 556 | %out.gep = getelementptr i32, i32 addrspace(3)* %out, i32 999999 | 
|  | 557 | %in.gep = getelementptr i32, i32 addrspace(3)* %in, i32 7 | 
|  | 558 | %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 | 
|  | 559 | %tmp0 = icmp eq i32 %tid, 0 | 
|  | 560 | br i1 %tmp0, label %endif, label %if | 
|  | 561 |  | 
|  | 562 | if: | 
|  | 563 | %tmp1 = atomicrmw add i32 addrspace(3)* %in.gep, i32 2 seq_cst | 
|  | 564 | br label %endif | 
|  | 565 |  | 
|  | 566 | endif: | 
|  | 567 | %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] | 
|  | 568 | store i32 %x, i32 addrspace(3)* %out.gep | 
|  | 569 | br label %done | 
|  | 570 |  | 
|  | 571 | done: | 
|  | 572 | ret void | 
|  | 573 | } | 
|  | 574 |  | 
|  | 575 | ; OPT-LABEL: @test_sink_local_small_offset_cmpxchg_i32( | 
| Eli Friedman | 5fba1e5 | 2017-04-06 22:42:18 +0000 | [diff] [blame] | 576 | ; OPT: %0 = bitcast i32 addrspace(3)* %in to i8 addrspace(3)* | 
|  | 577 | ; OPT: %sunkaddr = getelementptr i8, i8 addrspace(3)* %0, i32 28 | 
|  | 578 | ; OPT: %1 = bitcast i8 addrspace(3)* %sunkaddr to i32 addrspace(3)* | 
|  | 579 | ; OPT: %tmp1.struct = cmpxchg i32 addrspace(3)* %1, i32 undef, i32 2 seq_cst monotonic | 
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 580 | define amdgpu_kernel void @test_sink_local_small_offset_cmpxchg_i32(i32 addrspace(3)* %out, i32 addrspace(3)* %in) { | 
| Matt Arsenault | 02d915b | 2017-03-15 22:35:20 +0000 | [diff] [blame] | 581 | entry: | 
|  | 582 | %out.gep = getelementptr i32, i32 addrspace(3)* %out, i32 999999 | 
|  | 583 | %in.gep = getelementptr i32, i32 addrspace(3)* %in, i32 7 | 
|  | 584 | %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 | 
|  | 585 | %tmp0 = icmp eq i32 %tid, 0 | 
|  | 586 | br i1 %tmp0, label %endif, label %if | 
|  | 587 |  | 
|  | 588 | if: | 
|  | 589 | %tmp1.struct = cmpxchg i32 addrspace(3)* %in.gep, i32 undef, i32 2 seq_cst monotonic | 
|  | 590 | %tmp1 = extractvalue { i32, i1 } %tmp1.struct, 0 | 
|  | 591 | br label %endif | 
|  | 592 |  | 
|  | 593 | endif: | 
|  | 594 | %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] | 
|  | 595 | store i32 %x, i32 addrspace(3)* %out.gep | 
|  | 596 | br label %done | 
|  | 597 |  | 
|  | 598 | done: | 
|  | 599 | ret void | 
|  | 600 | } | 
|  | 601 |  | 
|  | 602 | ; OPT-LABEL: @test_wrong_operand_local_small_offset_cmpxchg_i32( | 
|  | 603 | ; OPT: %in.gep = getelementptr i32, i32 addrspace(3)* %in, i32 7 | 
|  | 604 | ; OPT: br i1 | 
|  | 605 | ; OPT: cmpxchg i32 addrspace(3)* addrspace(3)* undef, i32 addrspace(3)* %in.gep, i32 addrspace(3)* undef seq_cst monotonic | 
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 606 | define amdgpu_kernel void @test_wrong_operand_local_small_offset_cmpxchg_i32(i32 addrspace(3)* addrspace(3)* %out, i32 addrspace(3)* %in) { | 
| Matt Arsenault | 02d915b | 2017-03-15 22:35:20 +0000 | [diff] [blame] | 607 | entry: | 
|  | 608 | %out.gep = getelementptr i32 addrspace(3)*, i32 addrspace(3)* addrspace(3)* %out, i32 999999 | 
|  | 609 | %in.gep = getelementptr i32, i32 addrspace(3)* %in, i32 7 | 
|  | 610 | %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 | 
|  | 611 | %tmp0 = icmp eq i32 %tid, 0 | 
|  | 612 | br i1 %tmp0, label %endif, label %if | 
|  | 613 |  | 
|  | 614 | if: | 
|  | 615 | %tmp1.struct = cmpxchg i32 addrspace(3)* addrspace(3)* undef, i32 addrspace(3)* %in.gep, i32 addrspace(3)* undef seq_cst monotonic | 
|  | 616 | %tmp1 = extractvalue { i32 addrspace(3)*, i1 } %tmp1.struct, 0 | 
|  | 617 | br label %endif | 
|  | 618 |  | 
|  | 619 | endif: | 
|  | 620 | %x = phi i32 addrspace(3)* [ %tmp1, %if ], [ null, %entry ] | 
|  | 621 | store i32 addrspace(3)* %x, i32 addrspace(3)* addrspace(3)* %out.gep | 
|  | 622 | br label %done | 
|  | 623 |  | 
|  | 624 | done: | 
|  | 625 | ret void | 
|  | 626 | } | 
|  | 627 |  | 
| Matt Arsenault | 7dc01c9 | 2017-03-15 23:15:12 +0000 | [diff] [blame] | 628 | ; OPT-LABEL: @test_sink_local_small_offset_atomic_inc_i32( | 
| Eli Friedman | 5fba1e5 | 2017-04-06 22:42:18 +0000 | [diff] [blame] | 629 | ; OPT: %0 = bitcast i32 addrspace(3)* %in to i8 addrspace(3)* | 
|  | 630 | ; OPT: %sunkaddr = getelementptr i8, i8 addrspace(3)* %0, i32 28 | 
|  | 631 | ; OPT: %1 = bitcast i8 addrspace(3)* %sunkaddr to i32 addrspace(3)* | 
|  | 632 | ; OPT: %tmp1 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %1, i32 2, i32 0, i32 0, i1 false) | 
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 633 | define amdgpu_kernel void @test_sink_local_small_offset_atomic_inc_i32(i32 addrspace(3)* %out, i32 addrspace(3)* %in) { | 
| Matt Arsenault | 7dc01c9 | 2017-03-15 23:15:12 +0000 | [diff] [blame] | 634 | entry: | 
|  | 635 | %out.gep = getelementptr i32, i32 addrspace(3)* %out, i32 999999 | 
|  | 636 | %in.gep = getelementptr i32, i32 addrspace(3)* %in, i32 7 | 
|  | 637 | %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 | 
|  | 638 | %tmp0 = icmp eq i32 %tid, 0 | 
|  | 639 | br i1 %tmp0, label %endif, label %if | 
|  | 640 |  | 
|  | 641 | if: | 
| Matt Arsenault | 79f837c | 2017-03-30 22:21:40 +0000 | [diff] [blame] | 642 | %tmp1 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %in.gep, i32 2, i32 0, i32 0, i1 false) | 
| Matt Arsenault | 7dc01c9 | 2017-03-15 23:15:12 +0000 | [diff] [blame] | 643 | br label %endif | 
|  | 644 |  | 
|  | 645 | endif: | 
|  | 646 | %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] | 
|  | 647 | store i32 %x, i32 addrspace(3)* %out.gep | 
|  | 648 | br label %done | 
|  | 649 |  | 
|  | 650 | done: | 
|  | 651 | ret void | 
|  | 652 | } | 
|  | 653 |  | 
|  | 654 | ; OPT-LABEL: @test_sink_local_small_offset_atomic_dec_i32( | 
| Eli Friedman | 5fba1e5 | 2017-04-06 22:42:18 +0000 | [diff] [blame] | 655 | ; OPT: %0 = bitcast i32 addrspace(3)* %in to i8 addrspace(3)* | 
|  | 656 | ; OPT: %sunkaddr = getelementptr i8, i8 addrspace(3)* %0, i32 28 | 
|  | 657 | ; OPT: %1 = bitcast i8 addrspace(3)* %sunkaddr to i32 addrspace(3)* | 
|  | 658 | ; OPT: %tmp1 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %1, i32 2, i32 0, i32 0, i1 false) | 
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 659 | define amdgpu_kernel void @test_sink_local_small_offset_atomic_dec_i32(i32 addrspace(3)* %out, i32 addrspace(3)* %in) { | 
| Matt Arsenault | 7dc01c9 | 2017-03-15 23:15:12 +0000 | [diff] [blame] | 660 | entry: | 
|  | 661 | %out.gep = getelementptr i32, i32 addrspace(3)* %out, i32 999999 | 
|  | 662 | %in.gep = getelementptr i32, i32 addrspace(3)* %in, i32 7 | 
|  | 663 | %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 | 
|  | 664 | %tmp0 = icmp eq i32 %tid, 0 | 
|  | 665 | br i1 %tmp0, label %endif, label %if | 
|  | 666 |  | 
|  | 667 | if: | 
| Matt Arsenault | 79f837c | 2017-03-30 22:21:40 +0000 | [diff] [blame] | 668 | %tmp1 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %in.gep, i32 2, i32 0, i32 0, i1 false) | 
| Matt Arsenault | 7dc01c9 | 2017-03-15 23:15:12 +0000 | [diff] [blame] | 669 | br label %endif | 
|  | 670 |  | 
|  | 671 | endif: | 
|  | 672 | %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] | 
|  | 673 | store i32 %x, i32 addrspace(3)* %out.gep | 
|  | 674 | br label %done | 
|  | 675 |  | 
|  | 676 | done: | 
|  | 677 | ret void | 
|  | 678 | } | 
|  | 679 |  | 
| Matt Arsenault | dc8f5cc | 2017-07-29 01:12:31 +0000 | [diff] [blame] | 680 | ; OPT-LABEL: @test_sink_global_small_min_scratch_global_offset( | 
|  | 681 | ; OPT-SICIVI: %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 -4096 | 
|  | 682 | ; OPT-SICIV: br | 
|  | 683 | ; OPT-SICIVI: %tmp1 = load i8, i8 addrspace(1)* %in.gep | 
|  | 684 |  | 
|  | 685 | ; OPT-GFX9: br | 
|  | 686 | ; OPT-GFX9: %sunkaddr = getelementptr i8, i8 addrspace(1)* %in, i64 -4096 | 
|  | 687 | ; OPT-GFX9: load i8, i8 addrspace(1)* %sunkaddr | 
|  | 688 |  | 
|  | 689 | ; GCN-LABEL: {{^}}test_sink_global_small_min_scratch_global_offset: | 
|  | 690 | ; GFX9: global_load_sbyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, off offset:-4096{{$}} | 
|  | 691 | define amdgpu_kernel void @test_sink_global_small_min_scratch_global_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { | 
|  | 692 | entry: | 
|  | 693 | %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 1024 | 
|  | 694 | %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 -4096 | 
|  | 695 | %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 | 
|  | 696 | %tmp0 = icmp eq i32 %tid, 0 | 
|  | 697 | br i1 %tmp0, label %endif, label %if | 
|  | 698 |  | 
|  | 699 | if: | 
|  | 700 | %tmp1 = load i8, i8 addrspace(1)* %in.gep | 
|  | 701 | %tmp2 = sext i8 %tmp1 to i32 | 
|  | 702 | br label %endif | 
|  | 703 |  | 
|  | 704 | endif: | 
|  | 705 | %x = phi i32 [ %tmp2, %if ], [ 0, %entry ] | 
|  | 706 | store i32 %x, i32 addrspace(1)* %out.gep | 
|  | 707 | br label %done | 
|  | 708 |  | 
|  | 709 | done: | 
|  | 710 | ret void | 
|  | 711 | } | 
|  | 712 |  | 
|  | 713 | ; OPT-LABEL: @test_sink_global_small_min_scratch_global_neg1_offset( | 
|  | 714 | ; OPT: %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 -4097 | 
|  | 715 | ; OPT: br | 
|  | 716 | ; OPT: load i8, i8 addrspace(1)* %in.gep | 
|  | 717 |  | 
|  | 718 | ; GCN-LABEL: {{^}}test_sink_global_small_min_scratch_global_neg1_offset: | 
|  | 719 | define amdgpu_kernel void @test_sink_global_small_min_scratch_global_neg1_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { | 
|  | 720 | entry: | 
|  | 721 | %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 99999 | 
|  | 722 | %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 -4097 | 
|  | 723 | %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 | 
|  | 724 | %tmp0 = icmp eq i32 %tid, 0 | 
|  | 725 | br i1 %tmp0, label %endif, label %if | 
|  | 726 |  | 
|  | 727 | if: | 
|  | 728 | %tmp1 = load i8, i8 addrspace(1)* %in.gep | 
|  | 729 | %tmp2 = sext i8 %tmp1 to i32 | 
|  | 730 | br label %endif | 
|  | 731 |  | 
|  | 732 | endif: | 
|  | 733 | %x = phi i32 [ %tmp2, %if ], [ 0, %entry ] | 
|  | 734 | store i32 %x, i32 addrspace(1)* %out.gep | 
|  | 735 | br label %done | 
|  | 736 |  | 
|  | 737 | done: | 
|  | 738 | ret void | 
|  | 739 | } | 
|  | 740 |  | 
| Tom Stellard | bc4497b | 2016-02-12 23:45:29 +0000 | [diff] [blame] | 741 | declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0 | 
| Matt Arsenault | 79f837c | 2017-03-30 22:21:40 +0000 | [diff] [blame] | 742 | declare i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* nocapture, i32, i32, i32, i1) #2 | 
|  | 743 | declare i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* nocapture, i32, i32, i32, i1) #2 | 
| Tom Stellard | bc4497b | 2016-02-12 23:45:29 +0000 | [diff] [blame] | 744 |  | 
|  | 745 | attributes #0 = { nounwind readnone } | 
| Matt Arsenault | 3cc1e00 | 2016-08-13 01:43:51 +0000 | [diff] [blame] | 746 | attributes #1 = { nounwind } | 
| Matt Arsenault | 7dc01c9 | 2017-03-15 23:15:12 +0000 | [diff] [blame] | 747 | attributes #2 = { nounwind argmemonly } |