| Matt Arsenault | d1097a3 | 2016-06-02 19:54:26 +0000 | [diff] [blame] | 1 | ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s |
| 2 | ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s |
| 3 | ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s |
| 4 | ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s |
| Matt Arsenault | 2ba54c3 | 2013-10-30 23:30:05 +0000 | [diff] [blame] | 5 | |
| Matt Arsenault | d1097a3 | 2016-06-02 19:54:26 +0000 | [diff] [blame] | 6 | ; FUNC-LABEL: {{^}}local_unaligned_load_store_i16: |
| 7 | ; GCN: ds_read_u8 |
| 8 | ; GCN: ds_read_u8 |
| 9 | ; GCN: ds_write_b8 |
| 10 | ; GCN: ds_write_b8 |
| 11 | ; GCN: s_endpgm |
| 12 | define void @local_unaligned_load_store_i16(i16 addrspace(3)* %p, i16 addrspace(3)* %r) #0 { |
| David Blaikie | a79ac14 | 2015-02-27 21:17:42 +0000 | [diff] [blame] | 13 | %v = load i16, i16 addrspace(3)* %p, align 1 |
| Tom Stellard | 33e64c6 | 2015-02-04 20:49:52 +0000 | [diff] [blame] | 14 | store i16 %v, i16 addrspace(3)* %r, align 1 |
| 15 | ret void |
| 16 | } |
| 17 | |
| Matt Arsenault | 8af47a0 | 2016-07-01 22:55:55 +0000 | [diff] [blame^] | 18 | ; FUNC-LABEL: {{^}}global_unaligned_load_store_i16: |
| Matt Arsenault | d1097a3 | 2016-06-02 19:54:26 +0000 | [diff] [blame] | 19 | ; GCN-NOHSA: buffer_load_ubyte |
| 20 | ; GCN-NOHSA: buffer_load_ubyte |
| 21 | ; GCN-NOHSA: buffer_store_byte |
| 22 | ; GCN-NOHSA: buffer_store_byte |
| 23 | |
| 24 | ; GCN-HSA: flat_load_ubyte |
| 25 | ; GCN-HSA: flat_load_ubyte |
| 26 | ; GCN-HSA: flat_store_byte |
| 27 | ; GCN-HSA: flat_store_byte |
| Matt Arsenault | 8af47a0 | 2016-07-01 22:55:55 +0000 | [diff] [blame^] | 28 | define void @global_unaligned_load_store_i16(i16 addrspace(1)* %p, i16 addrspace(1)* %r) #0 { |
| David Blaikie | a79ac14 | 2015-02-27 21:17:42 +0000 | [diff] [blame] | 29 | %v = load i16, i16 addrspace(1)* %p, align 1 |
| Tom Stellard | 33e64c6 | 2015-02-04 20:49:52 +0000 | [diff] [blame] | 30 | store i16 %v, i16 addrspace(1)* %r, align 1 |
| 31 | ret void |
| 32 | } |
| 33 | |
| Matt Arsenault | d1097a3 | 2016-06-02 19:54:26 +0000 | [diff] [blame] | 34 | ; FUNC-LABEL: {{^}}local_unaligned_load_store_i32: |
| Matt Arsenault | 8af47a0 | 2016-07-01 22:55:55 +0000 | [diff] [blame^] | 35 | |
| 36 | ; SI: ds_read_u8 |
| 37 | ; SI: ds_read_u8 |
| 38 | ; SI: ds_read_u8 |
| 39 | ; SI: ds_read_u8 |
| 40 | ; SI-NOT: v_or |
| 41 | ; SI-NOT: v_lshl |
| 42 | ; SI: ds_write_b8 |
| 43 | ; SI: ds_write_b8 |
| 44 | ; SI: ds_write_b8 |
| 45 | ; SI: ds_write_b8 |
| 46 | ; SI: s_endpgm |
| Matt Arsenault | d1097a3 | 2016-06-02 19:54:26 +0000 | [diff] [blame] | 47 | define void @local_unaligned_load_store_i32(i32 addrspace(3)* %p, i32 addrspace(3)* %r) #0 { |
| David Blaikie | a79ac14 | 2015-02-27 21:17:42 +0000 | [diff] [blame] | 48 | %v = load i32, i32 addrspace(3)* %p, align 1 |
| Matt Arsenault | 2ba54c3 | 2013-10-30 23:30:05 +0000 | [diff] [blame] | 49 | store i32 %v, i32 addrspace(3)* %r, align 1 |
| 50 | ret void |
| 51 | } |
| 52 | |
| Matt Arsenault | d1097a3 | 2016-06-02 19:54:26 +0000 | [diff] [blame] | 53 | ; FUNC-LABEL: {{^}}global_unaligned_load_store_i32: |
| 54 | ; GCN-NOHSA: buffer_load_ubyte |
| 55 | ; GCN-NOHSA: buffer_load_ubyte |
| 56 | ; GCN-NOHSA: buffer_load_ubyte |
| 57 | ; GCN-NOHSA: buffer_load_ubyte |
| 58 | ; GCN-NOHSA: buffer_store_byte |
| 59 | ; GCN-NOHSA: buffer_store_byte |
| 60 | ; GCN-NOHSA: buffer_store_byte |
| 61 | ; GCN-NOHSA: buffer_store_byte |
| 62 | |
| 63 | ; GCN-HSA: flat_load_ubyte |
| 64 | ; GCN-HSA: flat_load_ubyte |
| 65 | ; GCN-HSA: flat_load_ubyte |
| 66 | ; GCN-HSA: flat_load_ubyte |
| 67 | ; GCN-HSA: flat_store_byte |
| 68 | ; GCN-HSA: flat_store_byte |
| 69 | ; GCN-HSA: flat_store_byte |
| 70 | ; GCN-HSA: flat_store_byte |
| 71 | define void @global_unaligned_load_store_i32(i32 addrspace(1)* %p, i32 addrspace(1)* %r) #0 { |
| David Blaikie | a79ac14 | 2015-02-27 21:17:42 +0000 | [diff] [blame] | 72 | %v = load i32, i32 addrspace(1)* %p, align 1 |
| Tom Stellard | c6b299c | 2015-02-02 18:02:28 +0000 | [diff] [blame] | 73 | store i32 %v, i32 addrspace(1)* %r, align 1 |
| 74 | ret void |
| 75 | } |
| 76 | |
| Matt Arsenault | d1097a3 | 2016-06-02 19:54:26 +0000 | [diff] [blame] | 77 | ; FUNC-LABEL: {{^}}global_align2_load_store_i32: |
| 78 | ; GCN-NOHSA: buffer_load_ushort |
| 79 | ; GCN-NOHSA: buffer_load_ushort |
| 80 | ; GCN-NOHSA: buffer_store_short |
| 81 | ; GCN-NOHSA: buffer_store_short |
| 82 | |
| 83 | ; GCN-HSA: flat_load_ushort |
| 84 | ; GCN-HSA: flat_load_ushort |
| 85 | ; GCN-HSA: flat_store_short |
| 86 | ; GCN-HSA: flat_store_short |
| 87 | define void @global_align2_load_store_i32(i32 addrspace(1)* %p, i32 addrspace(1)* %r) #0 { |
| Matt Arsenault | bcdfee7 | 2016-05-02 20:13:51 +0000 | [diff] [blame] | 88 | %v = load i32, i32 addrspace(1)* %p, align 2 |
| 89 | store i32 %v, i32 addrspace(1)* %r, align 2 |
| 90 | ret void |
| 91 | } |
| 92 | |
| Matt Arsenault | d1097a3 | 2016-06-02 19:54:26 +0000 | [diff] [blame] | 93 | ; FUNC-LABEL: {{^}}local_align2_load_store_i32: |
| 94 | ; GCN: ds_read_u16 |
| 95 | ; GCN: ds_read_u16 |
| 96 | ; GCN: ds_write_b16 |
| 97 | ; GCN: ds_write_b16 |
| 98 | define void @local_align2_load_store_i32(i32 addrspace(3)* %p, i32 addrspace(3)* %r) #0 { |
| Matt Arsenault | bcdfee7 | 2016-05-02 20:13:51 +0000 | [diff] [blame] | 99 | %v = load i32, i32 addrspace(3)* %p, align 2 |
| 100 | store i32 %v, i32 addrspace(3)* %r, align 2 |
| 101 | ret void |
| 102 | } |
| 103 | |
| Matt Arsenault | d1097a3 | 2016-06-02 19:54:26 +0000 | [diff] [blame] | 104 | ; FUNC-LABEL: {{^}}local_unaligned_load_store_i64: |
| Matt Arsenault | 8af47a0 | 2016-07-01 22:55:55 +0000 | [diff] [blame^] | 105 | ; SI: ds_read_u8 |
| 106 | ; SI: ds_read_u8 |
| 107 | ; SI: ds_read_u8 |
| 108 | ; SI: ds_read_u8 |
| 109 | ; SI: ds_read_u8 |
| 110 | ; SI: ds_read_u8 |
| 111 | ; SI: ds_read_u8 |
| 112 | ; SI: ds_read_u8 |
| Matt Arsenault | bcdfee7 | 2016-05-02 20:13:51 +0000 | [diff] [blame] | 113 | |
| Matt Arsenault | 8af47a0 | 2016-07-01 22:55:55 +0000 | [diff] [blame^] | 114 | ; SI-NOT: v_or_b32 |
| 115 | ; SI-NOT: v_lshl |
| 116 | ; SI: ds_write_b8 |
| 117 | ; SI-NOT: v_or_b32 |
| 118 | ; SI-NOT: v_lshl |
| Matt Arsenault | bcdfee7 | 2016-05-02 20:13:51 +0000 | [diff] [blame] | 119 | |
| Matt Arsenault | 8af47a0 | 2016-07-01 22:55:55 +0000 | [diff] [blame^] | 120 | ; SI: ds_write_b8 |
| 121 | ; SI-NOT: v_or_b32 |
| 122 | ; SI-NOT: v_lshl |
| Matt Arsenault | bcdfee7 | 2016-05-02 20:13:51 +0000 | [diff] [blame] | 123 | |
| Matt Arsenault | 8af47a0 | 2016-07-01 22:55:55 +0000 | [diff] [blame^] | 124 | ; SI: ds_write_b8 |
| 125 | ; SI-NOT: v_or_b32 |
| 126 | ; SI-NOT: v_lshl |
| Matt Arsenault | bcdfee7 | 2016-05-02 20:13:51 +0000 | [diff] [blame] | 127 | |
| Matt Arsenault | 8af47a0 | 2016-07-01 22:55:55 +0000 | [diff] [blame^] | 128 | ; SI: ds_write_b8 |
| 129 | ; SI-NOT: v_or_b32 |
| 130 | ; SI-NOT: v_lshl |
| Matt Arsenault | bcdfee7 | 2016-05-02 20:13:51 +0000 | [diff] [blame] | 131 | |
| Matt Arsenault | 8af47a0 | 2016-07-01 22:55:55 +0000 | [diff] [blame^] | 132 | ; SI: ds_write_b8 |
| 133 | ; SI-NOT: v_or_b32 |
| 134 | ; SI-NOT: v_lshl |
| Matt Arsenault | bcdfee7 | 2016-05-02 20:13:51 +0000 | [diff] [blame] | 135 | |
| Matt Arsenault | 8af47a0 | 2016-07-01 22:55:55 +0000 | [diff] [blame^] | 136 | ; SI: ds_write_b8 |
| 137 | ; SI-NOT: v_or_b32 |
| 138 | ; SI-NOT: v_lshl |
| Matt Arsenault | bcdfee7 | 2016-05-02 20:13:51 +0000 | [diff] [blame] | 139 | |
| Matt Arsenault | 8af47a0 | 2016-07-01 22:55:55 +0000 | [diff] [blame^] | 140 | ; SI: ds_write_b8 |
| 141 | ; SI-NOT: v_or_b32 |
| 142 | ; SI-NOT: v_lshl |
| 143 | ; SI: ds_write_b8 |
| 144 | ; SI: s_endpgm |
| Matt Arsenault | d1097a3 | 2016-06-02 19:54:26 +0000 | [diff] [blame] | 145 | define void @local_unaligned_load_store_i64(i64 addrspace(3)* %p, i64 addrspace(3)* %r) { |
| David Blaikie | a79ac14 | 2015-02-27 21:17:42 +0000 | [diff] [blame] | 146 | %v = load i64, i64 addrspace(3)* %p, align 1 |
| Tom Stellard | c6b299c | 2015-02-02 18:02:28 +0000 | [diff] [blame] | 147 | store i64 %v, i64 addrspace(3)* %r, align 1 |
| 148 | ret void |
| 149 | } |
| 150 | |
| Matt Arsenault | 8af47a0 | 2016-07-01 22:55:55 +0000 | [diff] [blame^] | 151 | ; SI-LABEL: {{^}}local_unaligned_load_store_v2i32: |
| 152 | ; SI: ds_read_u8 |
| 153 | ; SI: ds_read_u8 |
| 154 | ; SI: ds_read_u8 |
| 155 | ; SI: ds_read_u8 |
| 156 | ; SI: ds_read_u8 |
| 157 | ; SI: ds_read_u8 |
| 158 | ; SI: ds_read_u8 |
| 159 | ; SI: ds_read_u8 |
| Matt Arsenault | bcdfee7 | 2016-05-02 20:13:51 +0000 | [diff] [blame] | 160 | |
| Matt Arsenault | 8af47a0 | 2016-07-01 22:55:55 +0000 | [diff] [blame^] | 161 | ; SI-NOT: v_or_b32 |
| 162 | ; SI-NOT: v_lshl |
| 163 | ; SI: ds_write_b8 |
| 164 | ; SI-NOT: v_or_b32 |
| 165 | ; SI-NOT: v_lshl |
| Matt Arsenault | bcdfee7 | 2016-05-02 20:13:51 +0000 | [diff] [blame] | 166 | |
| Matt Arsenault | 8af47a0 | 2016-07-01 22:55:55 +0000 | [diff] [blame^] | 167 | ; SI: ds_write_b8 |
| 168 | ; SI-NOT: v_or_b32 |
| 169 | ; SI-NOT: v_lshl |
| Matt Arsenault | bcdfee7 | 2016-05-02 20:13:51 +0000 | [diff] [blame] | 170 | |
| Matt Arsenault | 8af47a0 | 2016-07-01 22:55:55 +0000 | [diff] [blame^] | 171 | ; SI: ds_write_b8 |
| 172 | ; SI-NOT: v_or_b32 |
| 173 | ; SI-NOT: v_lshl |
| Matt Arsenault | bcdfee7 | 2016-05-02 20:13:51 +0000 | [diff] [blame] | 174 | |
| Matt Arsenault | 8af47a0 | 2016-07-01 22:55:55 +0000 | [diff] [blame^] | 175 | ; SI: ds_write_b8 |
| 176 | ; SI-NOT: v_or_b32 |
| 177 | ; SI-NOT: v_lshl |
| Matt Arsenault | bcdfee7 | 2016-05-02 20:13:51 +0000 | [diff] [blame] | 178 | |
| Matt Arsenault | 8af47a0 | 2016-07-01 22:55:55 +0000 | [diff] [blame^] | 179 | ; SI: ds_write_b8 |
| 180 | ; SI-NOT: v_or_b32 |
| 181 | ; SI-NOT: v_lshl |
| Matt Arsenault | bcdfee7 | 2016-05-02 20:13:51 +0000 | [diff] [blame] | 182 | |
| Matt Arsenault | 8af47a0 | 2016-07-01 22:55:55 +0000 | [diff] [blame^] | 183 | ; SI: ds_write_b8 |
| 184 | ; SI-NOT: v_or_b32 |
| 185 | ; SI-NOT: v_lshl |
| Matt Arsenault | bcdfee7 | 2016-05-02 20:13:51 +0000 | [diff] [blame] | 186 | |
| Matt Arsenault | 8af47a0 | 2016-07-01 22:55:55 +0000 | [diff] [blame^] | 187 | ; SI: ds_write_b8 |
| 188 | ; SI-NOT: v_or_b32 |
| 189 | ; SI-NOT: v_lshl |
| 190 | ; SI: ds_write_b8 |
| 191 | ; SI: s_endpgm |
| Matt Arsenault | d1097a3 | 2016-06-02 19:54:26 +0000 | [diff] [blame] | 192 | define void @local_unaligned_load_store_v2i32(<2 x i32> addrspace(3)* %p, <2 x i32> addrspace(3)* %r) { |
| Matt Arsenault | bcdfee7 | 2016-05-02 20:13:51 +0000 | [diff] [blame] | 193 | %v = load <2 x i32>, <2 x i32> addrspace(3)* %p, align 1 |
| 194 | store <2 x i32> %v, <2 x i32> addrspace(3)* %r, align 1 |
| 195 | ret void |
| 196 | } |
| 197 | |
| Matt Arsenault | 8af47a0 | 2016-07-01 22:55:55 +0000 | [diff] [blame^] | 198 | ; SI-LABEL: {{^}}global_align2_load_store_i64: |
| 199 | ; SI: buffer_load_ushort |
| 200 | ; SI: buffer_load_ushort |
| Matt Arsenault | bcdfee7 | 2016-05-02 20:13:51 +0000 | [diff] [blame] | 201 | |
| Matt Arsenault | 8af47a0 | 2016-07-01 22:55:55 +0000 | [diff] [blame^] | 202 | ; SI-NOT: v_or_ |
| 203 | ; SI-NOT: v_lshl |
| Matt Arsenault | bcdfee7 | 2016-05-02 20:13:51 +0000 | [diff] [blame] | 204 | |
| Matt Arsenault | 8af47a0 | 2016-07-01 22:55:55 +0000 | [diff] [blame^] | 205 | ; SI: buffer_load_ushort |
| Matt Arsenault | d1097a3 | 2016-06-02 19:54:26 +0000 | [diff] [blame] | 206 | |
| Matt Arsenault | 8af47a0 | 2016-07-01 22:55:55 +0000 | [diff] [blame^] | 207 | ; SI-NOT: v_or_ |
| 208 | ; SI-NOT: v_lshl |
| Matt Arsenault | d1097a3 | 2016-06-02 19:54:26 +0000 | [diff] [blame] | 209 | |
| Matt Arsenault | 8af47a0 | 2016-07-01 22:55:55 +0000 | [diff] [blame^] | 210 | ; SI: buffer_load_ushort |
| 211 | |
| 212 | ; SI-NOT: v_or_ |
| 213 | ; SI-NOT: v_lshl |
| 214 | |
| 215 | ; SI: buffer_store_short |
| 216 | ; SI: buffer_store_short |
| 217 | ; SI: buffer_store_short |
| 218 | ; SI: buffer_store_short |
| 219 | define void @global_align2_load_store_i64(i64 addrspace(1)* %p, i64 addrspace(1)* %r) { |
| 220 | %v = load i64, i64 addrspace(1)* %p, align 2 |
| 221 | store i64 %v, i64 addrspace(1)* %r, align 2 |
| 222 | ret void |
| 223 | } |
| 224 | |
| 225 | ; SI-LABEL: {{^}}unaligned_load_store_i64_global: |
| 226 | ; SI: buffer_load_ubyte |
| 227 | ; SI: buffer_load_ubyte |
| 228 | ; SI: buffer_load_ubyte |
| 229 | ; SI: buffer_load_ubyte |
| 230 | ; SI: buffer_load_ubyte |
| 231 | ; SI: buffer_load_ubyte |
| 232 | ; SI: buffer_load_ubyte |
| 233 | ; SI: buffer_load_ubyte |
| 234 | |
| 235 | ; SI-NOT: v_or_ |
| 236 | ; SI-NOT: v_lshl |
| 237 | |
| 238 | ; SI: buffer_store_byte |
| 239 | ; SI: buffer_store_byte |
| 240 | ; SI: buffer_store_byte |
| 241 | ; SI: buffer_store_byte |
| 242 | ; SI: buffer_store_byte |
| 243 | ; SI: buffer_store_byte |
| 244 | ; SI: buffer_store_byte |
| 245 | ; SI: buffer_store_byte |
| 246 | define void @unaligned_load_store_i64_global(i64 addrspace(1)* %p, i64 addrspace(1)* %r) { |
| David Blaikie | a79ac14 | 2015-02-27 21:17:42 +0000 | [diff] [blame] | 247 | %v = load i64, i64 addrspace(1)* %p, align 1 |
| Tom Stellard | c6b299c | 2015-02-02 18:02:28 +0000 | [diff] [blame] | 248 | store i64 %v, i64 addrspace(1)* %r, align 1 |
| 249 | ret void |
| 250 | } |
| 251 | |
| Matt Arsenault | d1097a3 | 2016-06-02 19:54:26 +0000 | [diff] [blame] | 252 | ; FUNC-LABEL: {{^}}local_unaligned_load_store_v4i32: |
| 253 | ; GCN: ds_read_u8 |
| 254 | ; GCN: ds_read_u8 |
| 255 | ; GCN: ds_read_u8 |
| 256 | ; GCN: ds_read_u8 |
| Matt Arsenault | bd22342 | 2015-01-14 01:35:17 +0000 | [diff] [blame] | 257 | |
| Matt Arsenault | d1097a3 | 2016-06-02 19:54:26 +0000 | [diff] [blame] | 258 | ; GCN: ds_read_u8 |
| 259 | ; GCN: ds_read_u8 |
| 260 | ; GCN: ds_read_u8 |
| 261 | ; GCN: ds_read_u8 |
| Matt Arsenault | bd22342 | 2015-01-14 01:35:17 +0000 | [diff] [blame] | 262 | |
| Matt Arsenault | d1097a3 | 2016-06-02 19:54:26 +0000 | [diff] [blame] | 263 | ; GCN: ds_read_u8 |
| 264 | ; GCN: ds_read_u8 |
| 265 | ; GCN: ds_read_u8 |
| 266 | ; GCN: ds_read_u8 |
| Matt Arsenault | bd22342 | 2015-01-14 01:35:17 +0000 | [diff] [blame] | 267 | |
| Matt Arsenault | d1097a3 | 2016-06-02 19:54:26 +0000 | [diff] [blame] | 268 | ; GCN: ds_read_u8 |
| 269 | ; GCN: ds_read_u8 |
| 270 | ; GCN: ds_read_u8 |
| 271 | ; GCN: ds_read_u8 |
| Matt Arsenault | bd22342 | 2015-01-14 01:35:17 +0000 | [diff] [blame] | 272 | |
| Matt Arsenault | d1097a3 | 2016-06-02 19:54:26 +0000 | [diff] [blame] | 273 | ; GCN: ds_write_b8 |
| 274 | ; GCN: ds_write_b8 |
| 275 | ; GCN: ds_write_b8 |
| 276 | ; GCN: ds_write_b8 |
| Tom Stellard | c7e448c | 2015-02-04 20:49:51 +0000 | [diff] [blame] | 277 | |
| Matt Arsenault | d1097a3 | 2016-06-02 19:54:26 +0000 | [diff] [blame] | 278 | ; GCN: ds_write_b8 |
| 279 | ; GCN: ds_write_b8 |
| 280 | ; GCN: ds_write_b8 |
| 281 | ; GCN: ds_write_b8 |
| Tom Stellard | c7e448c | 2015-02-04 20:49:51 +0000 | [diff] [blame] | 282 | |
| Matt Arsenault | d1097a3 | 2016-06-02 19:54:26 +0000 | [diff] [blame] | 283 | ; GCN: ds_write_b8 |
| 284 | ; GCN: ds_write_b8 |
| 285 | ; GCN: ds_write_b8 |
| 286 | ; GCN: ds_write_b8 |
| Tom Stellard | c7e448c | 2015-02-04 20:49:51 +0000 | [diff] [blame] | 287 | |
| Matt Arsenault | d1097a3 | 2016-06-02 19:54:26 +0000 | [diff] [blame] | 288 | ; GCN: ds_write_b8 |
| 289 | ; GCN: ds_write_b8 |
| 290 | ; GCN: ds_write_b8 |
| 291 | ; GCN: ds_write_b8 |
| 292 | ; GCN: s_endpgm |
| 293 | define void @local_unaligned_load_store_v4i32(<4 x i32> addrspace(3)* %p, <4 x i32> addrspace(3)* %r) #0 { |
| David Blaikie | a79ac14 | 2015-02-27 21:17:42 +0000 | [diff] [blame] | 294 | %v = load <4 x i32>, <4 x i32> addrspace(3)* %p, align 1 |
| Matt Arsenault | 2ba54c3 | 2013-10-30 23:30:05 +0000 | [diff] [blame] | 295 | store <4 x i32> %v, <4 x i32> addrspace(3)* %r, align 1 |
| 296 | ret void |
| 297 | } |
| Matt Arsenault | 6f2a526 | 2014-07-27 17:46:40 +0000 | [diff] [blame] | 298 | |
| Matt Arsenault | 8af47a0 | 2016-07-01 22:55:55 +0000 | [diff] [blame^] | 299 | ; SI-LABEL: {{^}}global_unaligned_load_store_v4i32 |
| 300 | ; SI: buffer_load_ubyte |
| 301 | ; SI: buffer_load_ubyte |
| 302 | ; SI: buffer_load_ubyte |
| 303 | ; SI: buffer_load_ubyte |
| 304 | ; SI: buffer_load_ubyte |
| 305 | ; SI: buffer_load_ubyte |
| 306 | ; SI: buffer_load_ubyte |
| 307 | ; SI: buffer_load_ubyte |
| 308 | ; SI: buffer_load_ubyte |
| 309 | ; SI: buffer_load_ubyte |
| 310 | ; SI: buffer_load_ubyte |
| 311 | ; SI: buffer_load_ubyte |
| 312 | ; SI: buffer_load_ubyte |
| 313 | ; SI: buffer_load_ubyte |
| 314 | ; SI: buffer_load_ubyte |
| 315 | ; SI: buffer_load_ubyte |
| Matt Arsenault | d1097a3 | 2016-06-02 19:54:26 +0000 | [diff] [blame] | 316 | |
| Matt Arsenault | 8af47a0 | 2016-07-01 22:55:55 +0000 | [diff] [blame^] | 317 | ; SI: buffer_store_byte |
| 318 | ; SI: buffer_store_byte |
| 319 | ; SI: buffer_store_byte |
| 320 | ; SI: buffer_store_byte |
| 321 | ; SI: buffer_store_byte |
| 322 | ; SI: buffer_store_byte |
| 323 | ; SI: buffer_store_byte |
| 324 | ; SI: buffer_store_byte |
| 325 | ; SI: buffer_store_byte |
| 326 | ; SI: buffer_store_byte |
| 327 | ; SI: buffer_store_byte |
| 328 | ; SI: buffer_store_byte |
| 329 | ; SI: buffer_store_byte |
| 330 | ; SI: buffer_store_byte |
| 331 | ; SI: buffer_store_byte |
| 332 | ; SI: buffer_store_byte |
| 333 | define void @global_unaligned_load_store_v4i32(<4 x i32> addrspace(1)* %p, <4 x i32> addrspace(1)* %r) nounwind { |
| David Blaikie | a79ac14 | 2015-02-27 21:17:42 +0000 | [diff] [blame] | 334 | %v = load <4 x i32>, <4 x i32> addrspace(1)* %p, align 1 |
| Tom Stellard | c6b299c | 2015-02-02 18:02:28 +0000 | [diff] [blame] | 335 | store <4 x i32> %v, <4 x i32> addrspace(1)* %r, align 1 |
| 336 | ret void |
| 337 | } |
| 338 | |
| Matt Arsenault | d1097a3 | 2016-06-02 19:54:26 +0000 | [diff] [blame] | 339 | ; FUNC-LABEL: {{^}}local_load_i64_align_4: |
| 340 | ; GCN: ds_read2_b32 |
| 341 | define void @local_load_i64_align_4(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 { |
| David Blaikie | a79ac14 | 2015-02-27 21:17:42 +0000 | [diff] [blame] | 342 | %val = load i64, i64 addrspace(3)* %in, align 4 |
| Matt Arsenault | 6f2a526 | 2014-07-27 17:46:40 +0000 | [diff] [blame] | 343 | store i64 %val, i64 addrspace(1)* %out, align 8 |
| 344 | ret void |
| 345 | } |
| 346 | |
| Matt Arsenault | d1097a3 | 2016-06-02 19:54:26 +0000 | [diff] [blame] | 347 | ; FUNC-LABEL: {{^}}local_load_i64_align_4_with_offset |
| 348 | ; GCN: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}} offset0:8 offset1:9 |
| 349 | define void @local_load_i64_align_4_with_offset(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 { |
| David Blaikie | 79e6c74 | 2015-02-27 19:29:02 +0000 | [diff] [blame] | 350 | %ptr = getelementptr i64, i64 addrspace(3)* %in, i32 4 |
| David Blaikie | a79ac14 | 2015-02-27 21:17:42 +0000 | [diff] [blame] | 351 | %val = load i64, i64 addrspace(3)* %ptr, align 4 |
| Tom Stellard | f3fc555 | 2014-08-22 18:49:35 +0000 | [diff] [blame] | 352 | store i64 %val, i64 addrspace(1)* %out, align 8 |
| 353 | ret void |
| 354 | } |
| 355 | |
| Matt Arsenault | d1097a3 | 2016-06-02 19:54:26 +0000 | [diff] [blame] | 356 | ; FUNC-LABEL: {{^}}local_load_i64_align_4_with_split_offset: |
| Tom Stellard | f3fc555 | 2014-08-22 18:49:35 +0000 | [diff] [blame] | 357 | ; The tests for the case where the lo offset is 8-bits, but the hi offset is 9-bits |
| Matt Arsenault | d1097a3 | 2016-06-02 19:54:26 +0000 | [diff] [blame] | 358 | ; GCN: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}} offset1:1 |
| 359 | ; GCN: s_endpgm |
| 360 | define void @local_load_i64_align_4_with_split_offset(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 { |
| Tom Stellard | f3fc555 | 2014-08-22 18:49:35 +0000 | [diff] [blame] | 361 | %ptr = bitcast i64 addrspace(3)* %in to i32 addrspace(3)* |
| David Blaikie | 79e6c74 | 2015-02-27 19:29:02 +0000 | [diff] [blame] | 362 | %ptr255 = getelementptr i32, i32 addrspace(3)* %ptr, i32 255 |
| Tom Stellard | f3fc555 | 2014-08-22 18:49:35 +0000 | [diff] [blame] | 363 | %ptri64 = bitcast i32 addrspace(3)* %ptr255 to i64 addrspace(3)* |
| David Blaikie | a79ac14 | 2015-02-27 21:17:42 +0000 | [diff] [blame] | 364 | %val = load i64, i64 addrspace(3)* %ptri64, align 4 |
| Tom Stellard | f3fc555 | 2014-08-22 18:49:35 +0000 | [diff] [blame] | 365 | store i64 %val, i64 addrspace(1)* %out, align 8 |
| 366 | ret void |
| 367 | } |
| 368 | |
| Matt Arsenault | d1097a3 | 2016-06-02 19:54:26 +0000 | [diff] [blame] | 369 | ; FUNC-LABEL: {{^}}local_load_i64_align_1: |
| 370 | ; GCN: ds_read_u8 |
| 371 | ; GCN: ds_read_u8 |
| 372 | ; GCN: ds_read_u8 |
| 373 | ; GCN: ds_read_u8 |
| 374 | ; GCN: ds_read_u8 |
| 375 | ; GCN: ds_read_u8 |
| 376 | ; GCN: ds_read_u8 |
| 377 | ; GCN: ds_read_u8 |
| 378 | ; GCN: store_dwordx2 |
| 379 | define void @local_load_i64_align_1(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 { |
| David Blaikie | a79ac14 | 2015-02-27 21:17:42 +0000 | [diff] [blame] | 380 | %val = load i64, i64 addrspace(3)* %in, align 1 |
| Tom Stellard | 4cd6dcd | 2015-02-02 18:02:23 +0000 | [diff] [blame] | 381 | store i64 %val, i64 addrspace(1)* %out, align 8 |
| 382 | ret void |
| 383 | } |
| Tom Stellard | f3fc555 | 2014-08-22 18:49:35 +0000 | [diff] [blame] | 384 | |
| Matt Arsenault | d1097a3 | 2016-06-02 19:54:26 +0000 | [diff] [blame] | 385 | ; FUNC-LABEL: {{^}}local_store_i64_align_4: |
| 386 | ; GCN: ds_write2_b32 |
| 387 | define void @local_store_i64_align_4(i64 addrspace(3)* %out, i64 %val) #0 { |
| Tom Stellard | f3fc555 | 2014-08-22 18:49:35 +0000 | [diff] [blame] | 388 | store i64 %val, i64 addrspace(3)* %out, align 4 |
| 389 | ret void |
| 390 | } |
| 391 | |
| Matt Arsenault | d1097a3 | 2016-06-02 19:54:26 +0000 | [diff] [blame] | 392 | ; FUNC-LABEL: {{^}}local_store_i64_align_4_with_offset |
| 393 | ; GCN: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:8 offset1:9 |
| 394 | ; GCN: s_endpgm |
| 395 | define void @local_store_i64_align_4_with_offset(i64 addrspace(3)* %out) #0 { |
| David Blaikie | 79e6c74 | 2015-02-27 19:29:02 +0000 | [diff] [blame] | 396 | %ptr = getelementptr i64, i64 addrspace(3)* %out, i32 4 |
| Tom Stellard | f3fc555 | 2014-08-22 18:49:35 +0000 | [diff] [blame] | 397 | store i64 0, i64 addrspace(3)* %ptr, align 4 |
| 398 | ret void |
| 399 | } |
| 400 | |
| Matt Arsenault | d1097a3 | 2016-06-02 19:54:26 +0000 | [diff] [blame] | 401 | ; FUNC-LABEL: {{^}}local_store_i64_align_4_with_split_offset: |
| Tom Stellard | f3fc555 | 2014-08-22 18:49:35 +0000 | [diff] [blame] | 402 | ; The tests for the case where the lo offset is 8-bits, but the hi offset is 9-bits |
| Matt Arsenault | d1097a3 | 2016-06-02 19:54:26 +0000 | [diff] [blame] | 403 | ; GCN: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1 |
| 404 | ; GCN: s_endpgm |
| 405 | define void @local_store_i64_align_4_with_split_offset(i64 addrspace(3)* %out) #0 { |
| Tom Stellard | f3fc555 | 2014-08-22 18:49:35 +0000 | [diff] [blame] | 406 | %ptr = bitcast i64 addrspace(3)* %out to i32 addrspace(3)* |
| David Blaikie | 79e6c74 | 2015-02-27 19:29:02 +0000 | [diff] [blame] | 407 | %ptr255 = getelementptr i32, i32 addrspace(3)* %ptr, i32 255 |
| Tom Stellard | f3fc555 | 2014-08-22 18:49:35 +0000 | [diff] [blame] | 408 | %ptri64 = bitcast i32 addrspace(3)* %ptr255 to i64 addrspace(3)* |
| 409 | store i64 0, i64 addrspace(3)* %out, align 4 |
| 410 | ret void |
| 411 | } |
| Matt Arsenault | d1097a3 | 2016-06-02 19:54:26 +0000 | [diff] [blame] | 412 | |
| 413 | ; FUNC-LABEL: {{^}}constant_load_unaligned_i16: |
| 414 | ; GCN-NOHSA: buffer_load_ushort |
| 415 | ; GCN-HSA: flat_load_ushort |
| 416 | |
| 417 | ; EG: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}} |
| 418 | define void @constant_load_unaligned_i16(i32 addrspace(1)* %out, i16 addrspace(2)* %in) { |
| 419 | entry: |
| 420 | %tmp0 = getelementptr i16, i16 addrspace(2)* %in, i32 1 |
| 421 | %tmp1 = load i16, i16 addrspace(2)* %tmp0 |
| 422 | %tmp2 = zext i16 %tmp1 to i32 |
| 423 | store i32 %tmp2, i32 addrspace(1)* %out |
| 424 | ret void |
| 425 | } |
| 426 | |
| 427 | ; FUNC-LABEL: {{^}}constant_load_unaligned_i32: |
| 428 | ; GCN-NOHSA: buffer_load_ubyte |
| 429 | ; GCN-NOHSA: buffer_load_ubyte |
| 430 | ; GCN-NOHSA: buffer_load_ubyte |
| 431 | ; GCN-NOHSA: buffer_load_ubyte |
| 432 | |
| 433 | ; GCN-HSA: flat_load_ubyte |
| 434 | ; GCN-HSA: flat_load_ubyte |
| 435 | ; GCN-HSA: flat_load_ubyte |
| 436 | ; GCN-HSA: flat_load_ubyte |
| 437 | define void @constant_load_unaligned_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) { |
| 438 | entry: |
| 439 | %tmp0 = load i32, i32 addrspace(2)* %in, align 1 |
| 440 | store i32 %tmp0, i32 addrspace(1)* %out |
| 441 | ret void |
| 442 | } |
| 443 | |
| 444 | ; FUNC-LABEL: {{^}}constant_load_unaligned_f32: |
| 445 | ; GCN-NOHSA: buffer_load_ubyte |
| 446 | ; GCN-NOHSA: buffer_load_ubyte |
| 447 | ; GCN-NOHSA: buffer_load_ubyte |
| 448 | ; GCN-NOHSA: buffer_load_ubyte |
| 449 | |
| 450 | ; GCN-HSA: flat_load_ubyte |
| 451 | ; GCN-HSA: flat_load_ubyte |
| 452 | ; GCN-HSA: flat_load_ubyte |
| 453 | ; GCN-HSA: flat_load_ubyte |
| 454 | define void @constant_load_unaligned_f32(float addrspace(1)* %out, float addrspace(2)* %in) { |
| 455 | %tmp1 = load float, float addrspace(2)* %in, align 1 |
| 456 | store float %tmp1, float addrspace(1)* %out |
| 457 | ret void |
| 458 | } |
| 459 | |
| 460 | attributes #0 = { nounwind } |