| Matt Arsenault | 4c519d3 | 2016-07-18 18:34:59 +0000 | [diff] [blame] | 1 | ; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s |
| 2 | ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s |
| Matt Arsenault | 6689abe | 2016-05-05 20:07:37 +0000 | [diff] [blame] | 3 | ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s |
| Tom Stellard | c026e8b | 2013-06-28 15:47:08 +0000 | [diff] [blame] | 4 | |
| Matt Arsenault | cc8d3b8 | 2014-11-13 19:56:13 +0000 | [diff] [blame] | 5 | @local_memory.local_mem = internal unnamed_addr addrspace(3) global [128 x i32] undef, align 4 |
| Tom Stellard | c026e8b | 2013-06-28 15:47:08 +0000 | [diff] [blame] | 6 | |
| Matt Arsenault | d1097a3 | 2016-06-02 19:54:26 +0000 | [diff] [blame] | 7 | @lds = addrspace(3) global [512 x i32] undef, align 4 |
| 8 | |
| Matt Arsenault | 4c519d3 | 2016-07-18 18:34:59 +0000 | [diff] [blame] | 9 | ; On SI we need to make sure that the base offset is a register and |
| 10 | ; not an immediate. |
| 11 | |
| Matt Arsenault | d1097a3 | 2016-06-02 19:54:26 +0000 | [diff] [blame] | 12 | ; FUNC-LABEL: {{^}}load_i32_local_const_ptr: |
| 13 | ; GCN: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0 |
| 14 | ; GCN: ds_read_b32 v{{[0-9]+}}, v[[ZERO]] offset:4 |
| Matt Arsenault | 4c519d3 | 2016-07-18 18:34:59 +0000 | [diff] [blame] | 15 | |
| Matt Arsenault | d1097a3 | 2016-06-02 19:54:26 +0000 | [diff] [blame] | 16 | ; R600: LDS_READ_RET |
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 17 | define amdgpu_kernel void @load_i32_local_const_ptr(i32 addrspace(1)* %out, i32 addrspace(3)* %in) #0 { |
| Matt Arsenault | d1097a3 | 2016-06-02 19:54:26 +0000 | [diff] [blame] | 18 | entry: |
| 19 | %tmp0 = getelementptr [512 x i32], [512 x i32] addrspace(3)* @lds, i32 0, i32 1 |
| 20 | %tmp1 = load i32, i32 addrspace(3)* %tmp0 |
| 21 | %tmp2 = getelementptr i32, i32 addrspace(1)* %out, i32 1 |
| 22 | store i32 %tmp1, i32 addrspace(1)* %tmp2 |
| 23 | ret void |
| 24 | } |
| 25 | |
| 26 | ; Test loading a i32 and v2i32 value from the same base pointer. |
| 27 | ; FUNC-LABEL: {{^}}load_i32_v2i32_local: |
| 28 | ; R600: LDS_READ_RET |
| 29 | ; R600: LDS_READ_RET |
| 30 | ; R600: LDS_READ_RET |
| 31 | ; GCN-DAG: ds_read_b32 |
| 32 | ; GCN-DAG: ds_read2_b32 |
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 33 | define amdgpu_kernel void @load_i32_v2i32_local(<2 x i32> addrspace(1)* %out, i32 addrspace(3)* %in) #0 { |
| Matt Arsenault | d1097a3 | 2016-06-02 19:54:26 +0000 | [diff] [blame] | 34 | %scalar = load i32, i32 addrspace(3)* %in |
| 35 | %tmp0 = bitcast i32 addrspace(3)* %in to <2 x i32> addrspace(3)* |
| 36 | %vec_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(3)* %tmp0, i32 2 |
| 37 | %vec0 = load <2 x i32>, <2 x i32> addrspace(3)* %vec_ptr, align 4 |
| 38 | %vec1 = insertelement <2 x i32> <i32 0, i32 0>, i32 %scalar, i32 0 |
| 39 | %vec = add <2 x i32> %vec0, %vec1 |
| 40 | store <2 x i32> %vec, <2 x i32> addrspace(1)* %out |
| 41 | ret void |
| 42 | } |
| 43 | |
| Matt Arsenault | 4c519d3 | 2016-07-18 18:34:59 +0000 | [diff] [blame] | 44 | attributes #0 = { nounwind } |