Matt Arsenault | 8728c5f | 2017-08-07 14:58:04 +0000 | [diff] [blame] | 1 | ; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=HSA -check-prefix=CI %s |
Matt Arsenault | 0774ea2 | 2017-04-24 19:40:59 +0000 | [diff] [blame] | 2 | ; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=HSA -check-prefix=GFX9 %s |
Matt Arsenault | 592d068 | 2015-12-01 23:04:05 +0000 | [diff] [blame] | 3 | |
Matt Arsenault | 99c1452 | 2016-04-25 19:27:24 +0000 | [diff] [blame] | 4 | ; HSA-LABEL: {{^}}use_group_to_flat_addrspacecast: |
| 5 | ; HSA: enable_sgpr_private_segment_buffer = 1 |
| 6 | ; HSA: enable_sgpr_dispatch_ptr = 0 |
Matt Arsenault | e823d92 | 2017-02-18 18:29:53 +0000 | [diff] [blame] | 7 | ; CI: enable_sgpr_queue_ptr = 1 |
| 8 | ; GFX9: enable_sgpr_queue_ptr = 0 |
Matt Arsenault | 592d068 | 2015-12-01 23:04:05 +0000 | [diff] [blame] | 9 | |
Matt Arsenault | e823d92 | 2017-02-18 18:29:53 +0000 | [diff] [blame] | 10 | ; CI-DAG: s_load_dword [[PTR:s[0-9]+]], s[6:7], 0x0{{$}} |
| 11 | ; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10{{$}} |
| 12 | ; CI-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[APERTURE]] |
Matt Arsenault | 9aa45f0 | 2017-07-06 20:57:05 +0000 | [diff] [blame] | 13 | ; CI-DAG: v_cmp_ne_u32_e64 vcc, [[PTR]], -1 |
| 14 | ; CI-DAG: v_cndmask_b32_e32 v[[HI:[0-9]+]], 0, [[VAPERTURE]], vcc |
| 15 | ; CI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] |
| 16 | ; CI-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, [[VPTR]] |
Matt Arsenault | 99c1452 | 2016-04-25 19:27:24 +0000 | [diff] [blame] | 17 | |
Matt Arsenault | 9aa45f0 | 2017-07-06 20:57:05 +0000 | [diff] [blame] | 18 | ; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7 |
Matt Arsenault | e823d92 | 2017-02-18 18:29:53 +0000 | [diff] [blame] | 19 | ; GFX9-DAG: s_load_dword [[PTR:s[0-9]+]], s[4:5], 0x0{{$}} |
Stanislav Mekhanoshin | 62875fc | 2018-01-15 18:49:15 +0000 | [diff] [blame] | 20 | ; GFX9-DAG: s_getreg_b32 [[SSRC_SHARED:s[0-9]+]], hwreg(HW_REG_SH_MEM_BASES, 16, 16) |
Konstantin Zhuravlyov | 4b3847e | 2017-04-06 23:02:33 +0000 | [diff] [blame] | 21 | ; GFX9-DAG: s_lshl_b32 [[SSRC_SHARED_BASE:s[0-9]+]], [[SSRC_SHARED]], 16 |
| 22 | ; GFX9-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[SSRC_SHARED_BASE]] |
| 23 | |
| 24 | ; GFX9-XXX: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], src_shared_base |
Matt Arsenault | 9aa45f0 | 2017-07-06 20:57:05 +0000 | [diff] [blame] | 25 | ; GFX9: v_cmp_ne_u32_e64 vcc, [[PTR]], -1 |
| 26 | ; GFX9: v_cndmask_b32_e32 v[[HI:[0-9]+]], 0, [[VAPERTURE]], vcc |
| 27 | ; GFX9-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] |
| 28 | ; GFX9-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, [[VPTR]] |
Matt Arsenault | 99c1452 | 2016-04-25 19:27:24 +0000 | [diff] [blame] | 29 | |
| 30 | ; HSA: flat_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, [[K]] |
Matt Arsenault | e823d92 | 2017-02-18 18:29:53 +0000 | [diff] [blame] | 31 | |
| 32 | ; At most 2 digits. Make sure src_shared_base is not counted as a high |
| 33 | ; number SGPR. |
| 34 | |
| 35 | ; CI: NumSgprs: {{[0-9][0-9]+}} |
| 36 | ; GFX9: NumSgprs: {{[0-9]+}} |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 37 | define amdgpu_kernel void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #0 { |
Yaxun Liu | 2a22c5d | 2018-02-02 16:07:16 +0000 | [diff] [blame] | 38 | %stof = addrspacecast i32 addrspace(3)* %ptr to i32* |
| 39 | store volatile i32 7, i32* %stof |
Matt Arsenault | 99c1452 | 2016-04-25 19:27:24 +0000 | [diff] [blame] | 40 | ret void |
| 41 | } |
| 42 | |
| 43 | ; HSA-LABEL: {{^}}use_private_to_flat_addrspacecast: |
| 44 | ; HSA: enable_sgpr_private_segment_buffer = 1 |
| 45 | ; HSA: enable_sgpr_dispatch_ptr = 0 |
Matt Arsenault | e823d92 | 2017-02-18 18:29:53 +0000 | [diff] [blame] | 46 | ; CI: enable_sgpr_queue_ptr = 1 |
| 47 | ; GFX9: enable_sgpr_queue_ptr = 0 |
Matt Arsenault | 99c1452 | 2016-04-25 19:27:24 +0000 | [diff] [blame] | 48 | |
Matt Arsenault | e823d92 | 2017-02-18 18:29:53 +0000 | [diff] [blame] | 49 | ; CI-DAG: s_load_dword [[PTR:s[0-9]+]], s[6:7], 0x0{{$}} |
| 50 | ; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x11{{$}} |
| 51 | ; CI-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[APERTURE]] |
Matt Arsenault | 99c1452 | 2016-04-25 19:27:24 +0000 | [diff] [blame] | 52 | |
Matt Arsenault | 9aa45f0 | 2017-07-06 20:57:05 +0000 | [diff] [blame] | 53 | ; CI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7 |
| 54 | ; CI-DAG: v_cmp_ne_u32_e64 vcc, [[PTR]], 0 |
| 55 | ; CI-DAG: v_cndmask_b32_e32 v[[HI:[0-9]+]], 0, [[VAPERTURE]], vcc |
| 56 | ; CI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] |
| 57 | ; CI-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, [[VPTR]] |
| 58 | |
Matt Arsenault | e823d92 | 2017-02-18 18:29:53 +0000 | [diff] [blame] | 59 | ; GFX9-DAG: s_load_dword [[PTR:s[0-9]+]], s[4:5], 0x0{{$}} |
Stanislav Mekhanoshin | 62875fc | 2018-01-15 18:49:15 +0000 | [diff] [blame] | 60 | ; GFX9-DAG: s_getreg_b32 [[SSRC_PRIVATE:s[0-9]+]], hwreg(HW_REG_SH_MEM_BASES, 0, 16) |
Konstantin Zhuravlyov | 4b3847e | 2017-04-06 23:02:33 +0000 | [diff] [blame] | 61 | ; GFX9-DAG: s_lshl_b32 [[SSRC_PRIVATE_BASE:s[0-9]+]], [[SSRC_PRIVATE]], 16 |
| 62 | ; GFX9-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[SSRC_PRIVATE_BASE]] |
| 63 | |
| 64 | ; GFX9-XXX: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], src_private_base |
Matt Arsenault | e823d92 | 2017-02-18 18:29:53 +0000 | [diff] [blame] | 65 | |
Matt Arsenault | 9aa45f0 | 2017-07-06 20:57:05 +0000 | [diff] [blame] | 66 | ; GFX9-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7 |
| 67 | ; GFX9: v_cmp_ne_u32_e64 vcc, [[PTR]], 0 |
| 68 | ; GFX9: v_cndmask_b32_e32 v[[HI:[0-9]+]], 0, [[VAPERTURE]], vcc |
| 69 | ; GFX9: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] |
| 70 | ; GFX9-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, [[VPTR]] |
Matt Arsenault | 99c1452 | 2016-04-25 19:27:24 +0000 | [diff] [blame] | 71 | |
| 72 | ; HSA: flat_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, [[K]] |
Matt Arsenault | e823d92 | 2017-02-18 18:29:53 +0000 | [diff] [blame] | 73 | |
| 74 | ; CI: NumSgprs: {{[0-9][0-9]+}} |
| 75 | ; GFX9: NumSgprs: {{[0-9]+}} |
Yaxun Liu | 2a22c5d | 2018-02-02 16:07:16 +0000 | [diff] [blame] | 76 | define amdgpu_kernel void @use_private_to_flat_addrspacecast(i32 addrspace(5)* %ptr) #0 { |
| 77 | %stof = addrspacecast i32 addrspace(5)* %ptr to i32* |
| 78 | store volatile i32 7, i32* %stof |
Matt Arsenault | 99c1452 | 2016-04-25 19:27:24 +0000 | [diff] [blame] | 79 | ret void |
| 80 | } |
| 81 | |
| 82 | ; no-op |
| 83 | ; HSA-LABEL: {{^}}use_global_to_flat_addrspacecast: |
| 84 | ; HSA: enable_sgpr_queue_ptr = 0 |
| 85 | |
| 86 | ; HSA: s_load_dwordx2 s{{\[}}[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]{{\]}} |
| 87 | ; HSA-DAG: v_mov_b32_e32 v[[VPTRLO:[0-9]+]], s[[PTRLO]] |
| 88 | ; HSA-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]] |
| 89 | ; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7 |
| 90 | ; HSA: flat_store_dword v{{\[}}[[VPTRLO]]:[[VPTRHI]]{{\]}}, [[K]] |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 91 | define amdgpu_kernel void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %ptr) #0 { |
Yaxun Liu | 2a22c5d | 2018-02-02 16:07:16 +0000 | [diff] [blame] | 92 | %stof = addrspacecast i32 addrspace(1)* %ptr to i32* |
| 93 | store volatile i32 7, i32* %stof |
Matt Arsenault | 99c1452 | 2016-04-25 19:27:24 +0000 | [diff] [blame] | 94 | ret void |
| 95 | } |
| 96 | |
| 97 | ; no-op |
| 98 | ; HSA-LABEl: {{^}}use_constant_to_flat_addrspacecast: |
| 99 | ; HSA: s_load_dwordx2 s{{\[}}[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]{{\]}} |
| 100 | ; HSA-DAG: v_mov_b32_e32 v[[VPTRLO:[0-9]+]], s[[PTRLO]] |
| 101 | ; HSA-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]] |
| 102 | ; HSA: flat_load_dword v{{[0-9]+}}, v{{\[}}[[VPTRLO]]:[[VPTRHI]]{{\]}} |
Yaxun Liu | 0124b54 | 2018-02-13 18:00:25 +0000 | [diff] [blame] | 103 | define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(4)* %ptr) #0 { |
| 104 | %stof = addrspacecast i32 addrspace(4)* %ptr to i32* |
Yaxun Liu | 2a22c5d | 2018-02-02 16:07:16 +0000 | [diff] [blame] | 105 | %ld = load volatile i32, i32* %stof |
Matt Arsenault | 99c1452 | 2016-04-25 19:27:24 +0000 | [diff] [blame] | 106 | ret void |
| 107 | } |
| 108 | |
| 109 | ; HSA-LABEL: {{^}}use_flat_to_group_addrspacecast: |
| 110 | ; HSA: enable_sgpr_private_segment_buffer = 1 |
| 111 | ; HSA: enable_sgpr_dispatch_ptr = 0 |
| 112 | ; HSA: enable_sgpr_queue_ptr = 0 |
| 113 | |
| 114 | ; HSA: s_load_dwordx2 s{{\[}}[[PTR_LO:[0-9]+]]:[[PTR_HI:[0-9]+]]{{\]}} |
Matt Arsenault | 5d8eb25 | 2016-09-30 01:50:20 +0000 | [diff] [blame] | 115 | ; HSA-DAG: v_cmp_ne_u64_e64 vcc, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0{{$}} |
Matt Arsenault | 99c1452 | 2016-04-25 19:27:24 +0000 | [diff] [blame] | 116 | ; HSA-DAG: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], s[[PTR_LO]] |
| 117 | ; HSA-DAG: v_cndmask_b32_e32 [[CASTPTR:v[0-9]+]], -1, v[[VPTR_LO]] |
| 118 | ; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 0{{$}} |
| 119 | ; HSA: ds_write_b32 [[CASTPTR]], v[[K]] |
Yaxun Liu | 2a22c5d | 2018-02-02 16:07:16 +0000 | [diff] [blame] | 120 | define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32* %ptr) #0 { |
| 121 | %ftos = addrspacecast i32* %ptr to i32 addrspace(3)* |
Matt Arsenault | 99c1452 | 2016-04-25 19:27:24 +0000 | [diff] [blame] | 122 | store volatile i32 0, i32 addrspace(3)* %ftos |
| 123 | ret void |
| 124 | } |
| 125 | |
| 126 | ; HSA-LABEL: {{^}}use_flat_to_private_addrspacecast: |
| 127 | ; HSA: enable_sgpr_private_segment_buffer = 1 |
| 128 | ; HSA: enable_sgpr_dispatch_ptr = 0 |
| 129 | ; HSA: enable_sgpr_queue_ptr = 0 |
| 130 | |
| 131 | ; HSA: s_load_dwordx2 s{{\[}}[[PTR_LO:[0-9]+]]:[[PTR_HI:[0-9]+]]{{\]}} |
Matt Arsenault | 5d8eb25 | 2016-09-30 01:50:20 +0000 | [diff] [blame] | 132 | ; HSA-DAG: v_cmp_ne_u64_e64 vcc, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0{{$}} |
Matt Arsenault | 99c1452 | 2016-04-25 19:27:24 +0000 | [diff] [blame] | 133 | ; HSA-DAG: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], s[[PTR_LO]] |
Matt Arsenault | 971c85e | 2017-03-13 19:47:31 +0000 | [diff] [blame] | 134 | ; HSA-DAG: v_cndmask_b32_e32 [[CASTPTR:v[0-9]+]], 0, v[[VPTR_LO]] |
Matt Arsenault | 99c1452 | 2016-04-25 19:27:24 +0000 | [diff] [blame] | 135 | ; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 0{{$}} |
| 136 | ; HSA: buffer_store_dword v[[K]], [[CASTPTR]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}} |
Yaxun Liu | 2a22c5d | 2018-02-02 16:07:16 +0000 | [diff] [blame] | 137 | define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32* %ptr) #0 { |
| 138 | %ftos = addrspacecast i32* %ptr to i32 addrspace(5)* |
| 139 | store volatile i32 0, i32 addrspace(5)* %ftos |
Matt Arsenault | 99c1452 | 2016-04-25 19:27:24 +0000 | [diff] [blame] | 140 | ret void |
| 141 | } |
| 142 | |
| 143 | ; HSA-LABEL: {{^}}use_flat_to_global_addrspacecast: |
| 144 | ; HSA: enable_sgpr_queue_ptr = 0 |
| 145 | |
| 146 | ; HSA: s_load_dwordx2 s{{\[}}[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]{{\]}}, s[4:5], 0x0 |
| 147 | ; HSA-DAG: v_mov_b32_e32 v[[VPTRLO:[0-9]+]], s[[PTRLO]] |
| 148 | ; HSA-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]] |
| 149 | ; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0 |
Matt Arsenault | 4e309b0 | 2017-07-29 01:03:53 +0000 | [diff] [blame] | 150 | ; HSA: {{flat|global}}_store_dword v{{\[}}[[VPTRLO]]:[[VPTRHI]]{{\]}}, [[K]] |
Yaxun Liu | 2a22c5d | 2018-02-02 16:07:16 +0000 | [diff] [blame] | 151 | define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32* %ptr) #0 { |
| 152 | %ftos = addrspacecast i32* %ptr to i32 addrspace(1)* |
Matt Arsenault | 99c1452 | 2016-04-25 19:27:24 +0000 | [diff] [blame] | 153 | store volatile i32 0, i32 addrspace(1)* %ftos |
| 154 | ret void |
| 155 | } |
| 156 | |
| 157 | ; HSA-LABEL: {{^}}use_flat_to_constant_addrspacecast: |
| 158 | ; HSA: enable_sgpr_queue_ptr = 0 |
| 159 | |
| 160 | ; HSA: s_load_dwordx2 s{{\[}}[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]{{\]}}, s[4:5], 0x0 |
| 161 | ; HSA: s_load_dword s{{[0-9]+}}, s{{\[}}[[PTRLO]]:[[PTRHI]]{{\]}}, 0x0 |
Yaxun Liu | 2a22c5d | 2018-02-02 16:07:16 +0000 | [diff] [blame] | 162 | define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32* %ptr) #0 { |
Yaxun Liu | 0124b54 | 2018-02-13 18:00:25 +0000 | [diff] [blame] | 163 | %ftos = addrspacecast i32* %ptr to i32 addrspace(4)* |
| 164 | load volatile i32, i32 addrspace(4)* %ftos |
Matt Arsenault | 99c1452 | 2016-04-25 19:27:24 +0000 | [diff] [blame] | 165 | ret void |
| 166 | } |
| 167 | |
| 168 | ; HSA-LABEL: {{^}}cast_0_group_to_flat_addrspacecast: |
Matt Arsenault | e823d92 | 2017-02-18 18:29:53 +0000 | [diff] [blame] | 169 | ; CI: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10 |
| 170 | ; CI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE]] |
Stanislav Mekhanoshin | 62875fc | 2018-01-15 18:49:15 +0000 | [diff] [blame] | 171 | ; GFX9-DAG: s_getreg_b32 [[SSRC_SHARED:s[0-9]+]], hwreg(HW_REG_SH_MEM_BASES, 16, 16) |
Konstantin Zhuravlyov | 4b3847e | 2017-04-06 23:02:33 +0000 | [diff] [blame] | 172 | ; GFX9-DAG: s_lshl_b32 [[SSRC_SHARED_BASE:s[0-9]+]], [[SSRC_SHARED]], 16 |
| 173 | ; GFX9-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[SSRC_SHARED_BASE]] |
| 174 | |
| 175 | ; GFX9-XXX: v_mov_b32_e32 v[[HI:[0-9]+]], src_shared_base |
Matt Arsenault | e823d92 | 2017-02-18 18:29:53 +0000 | [diff] [blame] | 176 | |
Matt Arsenault | 99c1452 | 2016-04-25 19:27:24 +0000 | [diff] [blame] | 177 | ; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}} |
| 178 | ; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}} |
Matt Arsenault | 4e309b0 | 2017-07-29 01:03:53 +0000 | [diff] [blame] | 179 | ; HSA: {{flat|global}}_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]] |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 180 | define amdgpu_kernel void @cast_0_group_to_flat_addrspacecast() #0 { |
Yaxun Liu | 2a22c5d | 2018-02-02 16:07:16 +0000 | [diff] [blame] | 181 | %cast = addrspacecast i32 addrspace(3)* null to i32* |
| 182 | store volatile i32 7, i32* %cast |
Matt Arsenault | 99c1452 | 2016-04-25 19:27:24 +0000 | [diff] [blame] | 183 | ret void |
| 184 | } |
| 185 | |
| 186 | ; HSA-LABEL: {{^}}cast_0_flat_to_group_addrspacecast: |
| 187 | ; HSA-DAG: v_mov_b32_e32 [[PTR:v[0-9]+]], -1{{$}} |
| 188 | ; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}} |
| 189 | ; HSA: ds_write_b32 [[PTR]], [[K]] |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 190 | define amdgpu_kernel void @cast_0_flat_to_group_addrspacecast() #0 { |
Yaxun Liu | 2a22c5d | 2018-02-02 16:07:16 +0000 | [diff] [blame] | 191 | %cast = addrspacecast i32* null to i32 addrspace(3)* |
Matt Arsenault | 417e007 | 2017-02-08 06:16:04 +0000 | [diff] [blame] | 192 | store volatile i32 7, i32 addrspace(3)* %cast |
Matt Arsenault | 99c1452 | 2016-04-25 19:27:24 +0000 | [diff] [blame] | 193 | ret void |
| 194 | } |
| 195 | |
| 196 | ; HSA-LABEL: {{^}}cast_neg1_group_to_flat_addrspacecast: |
Matt Arsenault | 99c1452 | 2016-04-25 19:27:24 +0000 | [diff] [blame] | 197 | ; HSA: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}} |
Tom Stellard | cb6ba62 | 2016-04-30 00:23:06 +0000 | [diff] [blame] | 198 | ; HSA: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}} |
Matt Arsenault | 99c1452 | 2016-04-25 19:27:24 +0000 | [diff] [blame] | 199 | ; HSA: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}} |
Matt Arsenault | 4e309b0 | 2017-07-29 01:03:53 +0000 | [diff] [blame] | 200 | ; HSA: {{flat|global}}_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]] |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 201 | define amdgpu_kernel void @cast_neg1_group_to_flat_addrspacecast() #0 { |
Yaxun Liu | 2a22c5d | 2018-02-02 16:07:16 +0000 | [diff] [blame] | 202 | %cast = addrspacecast i32 addrspace(3)* inttoptr (i32 -1 to i32 addrspace(3)*) to i32* |
| 203 | store volatile i32 7, i32* %cast |
Matt Arsenault | 99c1452 | 2016-04-25 19:27:24 +0000 | [diff] [blame] | 204 | ret void |
| 205 | } |
| 206 | |
| 207 | ; HSA-LABEL: {{^}}cast_neg1_flat_to_group_addrspacecast: |
| 208 | ; HSA-DAG: v_mov_b32_e32 [[PTR:v[0-9]+]], -1{{$}} |
| 209 | ; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}} |
| 210 | ; HSA: ds_write_b32 [[PTR]], [[K]] |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 211 | define amdgpu_kernel void @cast_neg1_flat_to_group_addrspacecast() #0 { |
Yaxun Liu | 2a22c5d | 2018-02-02 16:07:16 +0000 | [diff] [blame] | 212 | %cast = addrspacecast i32* inttoptr (i64 -1 to i32*) to i32 addrspace(3)* |
Matt Arsenault | 417e007 | 2017-02-08 06:16:04 +0000 | [diff] [blame] | 213 | store volatile i32 7, i32 addrspace(3)* %cast |
Matt Arsenault | 99c1452 | 2016-04-25 19:27:24 +0000 | [diff] [blame] | 214 | ret void |
| 215 | } |
| 216 | |
Matt Arsenault | 971c85e | 2017-03-13 19:47:31 +0000 | [diff] [blame] | 217 | ; FIXME: Shouldn't need to enable queue ptr |
Matt Arsenault | 99c1452 | 2016-04-25 19:27:24 +0000 | [diff] [blame] | 218 | ; HSA-LABEL: {{^}}cast_0_private_to_flat_addrspacecast: |
Matt Arsenault | 971c85e | 2017-03-13 19:47:31 +0000 | [diff] [blame] | 219 | ; CI: enable_sgpr_queue_ptr = 1 |
| 220 | ; GFX9: enable_sgpr_queue_ptr = 0 |
Matt Arsenault | e823d92 | 2017-02-18 18:29:53 +0000 | [diff] [blame] | 221 | |
Matt Arsenault | 99c1452 | 2016-04-25 19:27:24 +0000 | [diff] [blame] | 222 | ; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}} |
| 223 | ; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}} |
Matt Arsenault | 971c85e | 2017-03-13 19:47:31 +0000 | [diff] [blame] | 224 | ; HSA: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}} |
Matt Arsenault | 4e309b0 | 2017-07-29 01:03:53 +0000 | [diff] [blame] | 225 | ; HSA: {{flat|global}}_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]] |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 226 | define amdgpu_kernel void @cast_0_private_to_flat_addrspacecast() #0 { |
Yaxun Liu | 2a22c5d | 2018-02-02 16:07:16 +0000 | [diff] [blame] | 227 | %cast = addrspacecast i32 addrspace(5)* null to i32* |
| 228 | store volatile i32 7, i32* %cast |
Matt Arsenault | 99c1452 | 2016-04-25 19:27:24 +0000 | [diff] [blame] | 229 | ret void |
| 230 | } |
| 231 | |
| 232 | ; HSA-LABEL: {{^}}cast_0_flat_to_private_addrspacecast: |
Matt Arsenault | 0774ea2 | 2017-04-24 19:40:59 +0000 | [diff] [blame] | 233 | ; HSA: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}} |
| 234 | ; HSA: buffer_store_dword [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}} |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 235 | define amdgpu_kernel void @cast_0_flat_to_private_addrspacecast() #0 { |
Yaxun Liu | 2a22c5d | 2018-02-02 16:07:16 +0000 | [diff] [blame] | 236 | %cast = addrspacecast i32* null to i32 addrspace(5)* |
| 237 | store volatile i32 7, i32 addrspace(5)* %cast |
Matt Arsenault | 99c1452 | 2016-04-25 19:27:24 +0000 | [diff] [blame] | 238 | ret void |
| 239 | } |
Matt Arsenault | 592d068 | 2015-12-01 23:04:05 +0000 | [diff] [blame] | 240 | |
| 241 | ; Disable optimizations in case there are optimizations added that |
| 242 | ; specialize away generic pointer accesses. |
| 243 | |
Matt Arsenault | 99c1452 | 2016-04-25 19:27:24 +0000 | [diff] [blame] | 244 | ; HSA-LABEL: {{^}}branch_use_flat_i32: |
Matt Arsenault | 4e309b0 | 2017-07-29 01:03:53 +0000 | [diff] [blame] | 245 | ; HSA: {{flat|global}}_store_dword {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} |
Matt Arsenault | 99c1452 | 2016-04-25 19:27:24 +0000 | [diff] [blame] | 246 | ; HSA: s_endpgm |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 247 | define amdgpu_kernel void @branch_use_flat_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* %gptr, i32 addrspace(3)* %lptr, i32 %x, i32 %c) #0 { |
Matt Arsenault | 592d068 | 2015-12-01 23:04:05 +0000 | [diff] [blame] | 248 | entry: |
| 249 | %cmp = icmp ne i32 %c, 0 |
| 250 | br i1 %cmp, label %local, label %global |
| 251 | |
| 252 | local: |
Yaxun Liu | 2a22c5d | 2018-02-02 16:07:16 +0000 | [diff] [blame] | 253 | %flat_local = addrspacecast i32 addrspace(3)* %lptr to i32* |
Matt Arsenault | 592d068 | 2015-12-01 23:04:05 +0000 | [diff] [blame] | 254 | br label %end |
| 255 | |
| 256 | global: |
Yaxun Liu | 2a22c5d | 2018-02-02 16:07:16 +0000 | [diff] [blame] | 257 | %flat_global = addrspacecast i32 addrspace(1)* %gptr to i32* |
Matt Arsenault | 592d068 | 2015-12-01 23:04:05 +0000 | [diff] [blame] | 258 | br label %end |
| 259 | |
| 260 | end: |
Yaxun Liu | 2a22c5d | 2018-02-02 16:07:16 +0000 | [diff] [blame] | 261 | %fptr = phi i32* [ %flat_local, %local ], [ %flat_global, %global ] |
| 262 | store volatile i32 %x, i32* %fptr, align 4 |
| 263 | ; %val = load i32, i32* %fptr, align 4 |
Matt Arsenault | 592d068 | 2015-12-01 23:04:05 +0000 | [diff] [blame] | 264 | ; store i32 %val, i32 addrspace(1)* %out, align 4 |
| 265 | ret void |
| 266 | } |
| 267 | |
Matt Arsenault | 592d068 | 2015-12-01 23:04:05 +0000 | [diff] [blame] | 268 | ; Check for prologue initializing special SGPRs pointing to scratch. |
Matt Arsenault | 99c1452 | 2016-04-25 19:27:24 +0000 | [diff] [blame] | 269 | ; HSA-LABEL: {{^}}store_flat_scratch: |
Matt Arsenault | e823d92 | 2017-02-18 18:29:53 +0000 | [diff] [blame] | 270 | ; CI-DAG: s_mov_b32 flat_scratch_lo, s9 |
| 271 | ; CI-DAG: s_add_u32 [[ADD:s[0-9]+]], s8, s11 |
| 272 | ; CI: s_lshr_b32 flat_scratch_hi, [[ADD]], 8 |
| 273 | |
| 274 | ; GFX9: s_add_u32 flat_scratch_lo, s6, s9 |
| 275 | ; GFX9: s_addc_u32 flat_scratch_hi, s7, 0 |
| 276 | |
Matt Arsenault | 4e309b0 | 2017-07-29 01:03:53 +0000 | [diff] [blame] | 277 | ; HSA: {{flat|global}}_store_dword |
Matt Arsenault | 99c1452 | 2016-04-25 19:27:24 +0000 | [diff] [blame] | 278 | ; HSA: s_barrier |
Matt Arsenault | 4e309b0 | 2017-07-29 01:03:53 +0000 | [diff] [blame] | 279 | ; HSA: {{flat|global}}_load_dword |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 280 | define amdgpu_kernel void @store_flat_scratch(i32 addrspace(1)* noalias %out, i32) #0 { |
Yaxun Liu | 2a22c5d | 2018-02-02 16:07:16 +0000 | [diff] [blame] | 281 | %alloca = alloca i32, i32 9, align 4, addrspace(5) |
Matt Arsenault | 99c1452 | 2016-04-25 19:27:24 +0000 | [diff] [blame] | 282 | %x = call i32 @llvm.amdgcn.workitem.id.x() #2 |
Yaxun Liu | 2a22c5d | 2018-02-02 16:07:16 +0000 | [diff] [blame] | 283 | %pptr = getelementptr i32, i32 addrspace(5)* %alloca, i32 %x |
| 284 | %fptr = addrspacecast i32 addrspace(5)* %pptr to i32* |
| 285 | store volatile i32 %x, i32* %fptr |
Matt Arsenault | 592d068 | 2015-12-01 23:04:05 +0000 | [diff] [blame] | 286 | ; Dummy call |
Matt Arsenault | 9c47dd5 | 2016-02-11 06:02:01 +0000 | [diff] [blame] | 287 | call void @llvm.amdgcn.s.barrier() #1 |
Yaxun Liu | 2a22c5d | 2018-02-02 16:07:16 +0000 | [diff] [blame] | 288 | %reload = load volatile i32, i32* %fptr, align 4 |
Matt Arsenault | 417e007 | 2017-02-08 06:16:04 +0000 | [diff] [blame] | 289 | store volatile i32 %reload, i32 addrspace(1)* %out, align 4 |
Matt Arsenault | 592d068 | 2015-12-01 23:04:05 +0000 | [diff] [blame] | 290 | ret void |
| 291 | } |
| 292 | |
Matt Arsenault | 9c47dd5 | 2016-02-11 06:02:01 +0000 | [diff] [blame] | 293 | declare void @llvm.amdgcn.s.barrier() #1 |
Matt Arsenault | 99c1452 | 2016-04-25 19:27:24 +0000 | [diff] [blame] | 294 | declare i32 @llvm.amdgcn.workitem.id.x() #2 |
Matt Arsenault | 592d068 | 2015-12-01 23:04:05 +0000 | [diff] [blame] | 295 | |
| 296 | attributes #0 = { nounwind } |
Matt Arsenault | 2aed6ca | 2015-12-19 01:46:41 +0000 | [diff] [blame] | 297 | attributes #1 = { nounwind convergent } |
Matt Arsenault | 99c1452 | 2016-04-25 19:27:24 +0000 | [diff] [blame] | 298 | attributes #2 = { nounwind readnone } |