Matt Arsenault | caf0ed4 | 2017-11-30 00:52:40 +0000 | [diff] [blame] | 1 | ; RUN: llc -march=amdgcn -mattr=+max-private-element-size-16 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SICIVI %s |
| 2 | ; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+max-private-element-size-16 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SICIVI %s |
| 3 | ; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+max-private-element-size-16 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s |
Matt Arsenault | 0774ea2 | 2017-04-24 19:40:59 +0000 | [diff] [blame] | 4 | |
| 5 | ; Test addressing modes when the scratch base is not a frame index. |
| 6 | |
| 7 | ; GCN-LABEL: {{^}}store_private_offset_i8: |
Geoff Berry | 4e38e02 | 2017-08-17 04:04:11 +0000 | [diff] [blame] | 8 | ; GCN: buffer_store_byte v{{[0-9]+}}, off, s[4:7], s2 offset:8 |
Matt Arsenault | 0774ea2 | 2017-04-24 19:40:59 +0000 | [diff] [blame] | 9 | define amdgpu_kernel void @store_private_offset_i8() #0 { |
Yaxun Liu | 2a22c5d | 2018-02-02 16:07:16 +0000 | [diff] [blame] | 10 | store volatile i8 5, i8 addrspace(5)* inttoptr (i32 8 to i8 addrspace(5)*) |
Matt Arsenault | 0774ea2 | 2017-04-24 19:40:59 +0000 | [diff] [blame] | 11 | ret void |
| 12 | } |
| 13 | |
| 14 | ; GCN-LABEL: {{^}}store_private_offset_i16: |
Geoff Berry | 4e38e02 | 2017-08-17 04:04:11 +0000 | [diff] [blame] | 15 | ; GCN: buffer_store_short v{{[0-9]+}}, off, s[4:7], s2 offset:8 |
Matt Arsenault | 0774ea2 | 2017-04-24 19:40:59 +0000 | [diff] [blame] | 16 | define amdgpu_kernel void @store_private_offset_i16() #0 { |
Yaxun Liu | 2a22c5d | 2018-02-02 16:07:16 +0000 | [diff] [blame] | 17 | store volatile i16 5, i16 addrspace(5)* inttoptr (i32 8 to i16 addrspace(5)*) |
Matt Arsenault | 0774ea2 | 2017-04-24 19:40:59 +0000 | [diff] [blame] | 18 | ret void |
| 19 | } |
| 20 | |
| 21 | ; GCN-LABEL: {{^}}store_private_offset_i32: |
Geoff Berry | 4e38e02 | 2017-08-17 04:04:11 +0000 | [diff] [blame] | 22 | ; GCN: buffer_store_dword v{{[0-9]+}}, off, s[4:7], s2 offset:8 |
Matt Arsenault | 0774ea2 | 2017-04-24 19:40:59 +0000 | [diff] [blame] | 23 | define amdgpu_kernel void @store_private_offset_i32() #0 { |
Yaxun Liu | 2a22c5d | 2018-02-02 16:07:16 +0000 | [diff] [blame] | 24 | store volatile i32 5, i32 addrspace(5)* inttoptr (i32 8 to i32 addrspace(5)*) |
Matt Arsenault | 0774ea2 | 2017-04-24 19:40:59 +0000 | [diff] [blame] | 25 | ret void |
| 26 | } |
| 27 | |
| 28 | ; GCN-LABEL: {{^}}store_private_offset_v2i32: |
Geoff Berry | 4e38e02 | 2017-08-17 04:04:11 +0000 | [diff] [blame] | 29 | ; GCN: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s[4:7], s2 offset:8 |
Matt Arsenault | 0774ea2 | 2017-04-24 19:40:59 +0000 | [diff] [blame] | 30 | define amdgpu_kernel void @store_private_offset_v2i32() #0 { |
Yaxun Liu | 2a22c5d | 2018-02-02 16:07:16 +0000 | [diff] [blame] | 31 | store volatile <2 x i32> <i32 5, i32 10>, <2 x i32> addrspace(5)* inttoptr (i32 8 to <2 x i32> addrspace(5)*) |
Matt Arsenault | 0774ea2 | 2017-04-24 19:40:59 +0000 | [diff] [blame] | 32 | ret void |
| 33 | } |
| 34 | |
| 35 | ; GCN-LABEL: {{^}}store_private_offset_v4i32: |
Geoff Berry | 4e38e02 | 2017-08-17 04:04:11 +0000 | [diff] [blame] | 36 | ; GCN: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s[4:7], s2 offset:8 |
Matt Arsenault | 0774ea2 | 2017-04-24 19:40:59 +0000 | [diff] [blame] | 37 | define amdgpu_kernel void @store_private_offset_v4i32() #0 { |
Yaxun Liu | 2a22c5d | 2018-02-02 16:07:16 +0000 | [diff] [blame] | 38 | store volatile <4 x i32> <i32 5, i32 10, i32 15, i32 0>, <4 x i32> addrspace(5)* inttoptr (i32 8 to <4 x i32> addrspace(5)*) |
Matt Arsenault | 0774ea2 | 2017-04-24 19:40:59 +0000 | [diff] [blame] | 39 | ret void |
| 40 | } |
| 41 | |
| 42 | ; GCN-LABEL: {{^}}load_private_offset_i8: |
Geoff Berry | 4e38e02 | 2017-08-17 04:04:11 +0000 | [diff] [blame] | 43 | ; GCN: buffer_load_ubyte v{{[0-9]+}}, off, s[4:7], s2 offset:8 |
Matt Arsenault | 0774ea2 | 2017-04-24 19:40:59 +0000 | [diff] [blame] | 44 | define amdgpu_kernel void @load_private_offset_i8() #0 { |
Yaxun Liu | 2a22c5d | 2018-02-02 16:07:16 +0000 | [diff] [blame] | 45 | %load = load volatile i8, i8 addrspace(5)* inttoptr (i32 8 to i8 addrspace(5)*) |
Matt Arsenault | 0774ea2 | 2017-04-24 19:40:59 +0000 | [diff] [blame] | 46 | ret void |
| 47 | } |
| 48 | |
| 49 | ; GCN-LABEL: {{^}}sextload_private_offset_i8: |
Geoff Berry | 4e38e02 | 2017-08-17 04:04:11 +0000 | [diff] [blame] | 50 | ; GCN: buffer_load_sbyte v{{[0-9]+}}, off, s[4:7], s8 offset:8 |
Matt Arsenault | 0774ea2 | 2017-04-24 19:40:59 +0000 | [diff] [blame] | 51 | define amdgpu_kernel void @sextload_private_offset_i8(i32 addrspace(1)* %out) #0 { |
Yaxun Liu | 2a22c5d | 2018-02-02 16:07:16 +0000 | [diff] [blame] | 52 | %load = load volatile i8, i8 addrspace(5)* inttoptr (i32 8 to i8 addrspace(5)*) |
Matt Arsenault | 0774ea2 | 2017-04-24 19:40:59 +0000 | [diff] [blame] | 53 | %sextload = sext i8 %load to i32 |
| 54 | store i32 %sextload, i32 addrspace(1)* undef |
| 55 | ret void |
| 56 | } |
| 57 | |
| 58 | ; GCN-LABEL: {{^}}zextload_private_offset_i8: |
Geoff Berry | 4e38e02 | 2017-08-17 04:04:11 +0000 | [diff] [blame] | 59 | ; GCN: buffer_load_ubyte v{{[0-9]+}}, off, s[4:7], s8 offset:8 |
Matt Arsenault | 0774ea2 | 2017-04-24 19:40:59 +0000 | [diff] [blame] | 60 | define amdgpu_kernel void @zextload_private_offset_i8(i32 addrspace(1)* %out) #0 { |
Yaxun Liu | 2a22c5d | 2018-02-02 16:07:16 +0000 | [diff] [blame] | 61 | %load = load volatile i8, i8 addrspace(5)* inttoptr (i32 8 to i8 addrspace(5)*) |
Matt Arsenault | 0774ea2 | 2017-04-24 19:40:59 +0000 | [diff] [blame] | 62 | %zextload = zext i8 %load to i32 |
| 63 | store i32 %zextload, i32 addrspace(1)* undef |
| 64 | ret void |
| 65 | } |
| 66 | |
| 67 | ; GCN-LABEL: {{^}}load_private_offset_i16: |
Geoff Berry | 4e38e02 | 2017-08-17 04:04:11 +0000 | [diff] [blame] | 68 | ; GCN: buffer_load_ushort v{{[0-9]+}}, off, s[4:7], s2 offset:8 |
Matt Arsenault | 0774ea2 | 2017-04-24 19:40:59 +0000 | [diff] [blame] | 69 | define amdgpu_kernel void @load_private_offset_i16() #0 { |
Yaxun Liu | 2a22c5d | 2018-02-02 16:07:16 +0000 | [diff] [blame] | 70 | %load = load volatile i16, i16 addrspace(5)* inttoptr (i32 8 to i16 addrspace(5)*) |
Matt Arsenault | 0774ea2 | 2017-04-24 19:40:59 +0000 | [diff] [blame] | 71 | ret void |
| 72 | } |
| 73 | |
| 74 | ; GCN-LABEL: {{^}}sextload_private_offset_i16: |
Geoff Berry | 4e38e02 | 2017-08-17 04:04:11 +0000 | [diff] [blame] | 75 | ; GCN: buffer_load_sshort v{{[0-9]+}}, off, s[4:7], s8 offset:8 |
Matt Arsenault | 0774ea2 | 2017-04-24 19:40:59 +0000 | [diff] [blame] | 76 | define amdgpu_kernel void @sextload_private_offset_i16(i32 addrspace(1)* %out) #0 { |
Yaxun Liu | 2a22c5d | 2018-02-02 16:07:16 +0000 | [diff] [blame] | 77 | %load = load volatile i16, i16 addrspace(5)* inttoptr (i32 8 to i16 addrspace(5)*) |
Matt Arsenault | 0774ea2 | 2017-04-24 19:40:59 +0000 | [diff] [blame] | 78 | %sextload = sext i16 %load to i32 |
| 79 | store i32 %sextload, i32 addrspace(1)* undef |
| 80 | ret void |
| 81 | } |
| 82 | |
| 83 | ; GCN-LABEL: {{^}}zextload_private_offset_i16: |
Geoff Berry | 4e38e02 | 2017-08-17 04:04:11 +0000 | [diff] [blame] | 84 | ; GCN: buffer_load_ushort v{{[0-9]+}}, off, s[4:7], s8 offset:8 |
Matt Arsenault | 0774ea2 | 2017-04-24 19:40:59 +0000 | [diff] [blame] | 85 | define amdgpu_kernel void @zextload_private_offset_i16(i32 addrspace(1)* %out) #0 { |
Yaxun Liu | 2a22c5d | 2018-02-02 16:07:16 +0000 | [diff] [blame] | 86 | %load = load volatile i16, i16 addrspace(5)* inttoptr (i32 8 to i16 addrspace(5)*) |
Matt Arsenault | 0774ea2 | 2017-04-24 19:40:59 +0000 | [diff] [blame] | 87 | %zextload = zext i16 %load to i32 |
| 88 | store i32 %zextload, i32 addrspace(1)* undef |
| 89 | ret void |
| 90 | } |
| 91 | |
| 92 | ; GCN-LABEL: {{^}}load_private_offset_i32: |
Geoff Berry | 4e38e02 | 2017-08-17 04:04:11 +0000 | [diff] [blame] | 93 | ; GCN: buffer_load_dword v{{[0-9]+}}, off, s[4:7], s2 offset:8 |
Matt Arsenault | 0774ea2 | 2017-04-24 19:40:59 +0000 | [diff] [blame] | 94 | define amdgpu_kernel void @load_private_offset_i32() #0 { |
Yaxun Liu | 2a22c5d | 2018-02-02 16:07:16 +0000 | [diff] [blame] | 95 | %load = load volatile i32, i32 addrspace(5)* inttoptr (i32 8 to i32 addrspace(5)*) |
Matt Arsenault | 0774ea2 | 2017-04-24 19:40:59 +0000 | [diff] [blame] | 96 | ret void |
| 97 | } |
| 98 | |
| 99 | ; GCN-LABEL: {{^}}load_private_offset_v2i32: |
Geoff Berry | 4e38e02 | 2017-08-17 04:04:11 +0000 | [diff] [blame] | 100 | ; GCN: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s[4:7], s2 offset:8 |
Matt Arsenault | 0774ea2 | 2017-04-24 19:40:59 +0000 | [diff] [blame] | 101 | define amdgpu_kernel void @load_private_offset_v2i32() #0 { |
Yaxun Liu | 2a22c5d | 2018-02-02 16:07:16 +0000 | [diff] [blame] | 102 | %load = load volatile <2 x i32>, <2 x i32> addrspace(5)* inttoptr (i32 8 to <2 x i32> addrspace(5)*) |
Matt Arsenault | 0774ea2 | 2017-04-24 19:40:59 +0000 | [diff] [blame] | 103 | ret void |
| 104 | } |
| 105 | |
| 106 | ; GCN-LABEL: {{^}}load_private_offset_v4i32: |
Geoff Berry | 4e38e02 | 2017-08-17 04:04:11 +0000 | [diff] [blame] | 107 | ; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s[4:7], s2 offset:8 |
Matt Arsenault | 0774ea2 | 2017-04-24 19:40:59 +0000 | [diff] [blame] | 108 | define amdgpu_kernel void @load_private_offset_v4i32() #0 { |
Yaxun Liu | 2a22c5d | 2018-02-02 16:07:16 +0000 | [diff] [blame] | 109 | %load = load volatile <4 x i32>, <4 x i32> addrspace(5)* inttoptr (i32 8 to <4 x i32> addrspace(5)*) |
Matt Arsenault | 0774ea2 | 2017-04-24 19:40:59 +0000 | [diff] [blame] | 110 | ret void |
| 111 | } |
| 112 | |
| 113 | ; GCN-LABEL: {{^}}store_private_offset_i8_max_offset: |
Geoff Berry | 4e38e02 | 2017-08-17 04:04:11 +0000 | [diff] [blame] | 114 | ; GCN: buffer_store_byte v{{[0-9]+}}, off, s[4:7], s2 offset:4095 |
Matt Arsenault | 0774ea2 | 2017-04-24 19:40:59 +0000 | [diff] [blame] | 115 | define amdgpu_kernel void @store_private_offset_i8_max_offset() #0 { |
Yaxun Liu | 2a22c5d | 2018-02-02 16:07:16 +0000 | [diff] [blame] | 116 | store volatile i8 5, i8 addrspace(5)* inttoptr (i32 4095 to i8 addrspace(5)*) |
Matt Arsenault | 0774ea2 | 2017-04-24 19:40:59 +0000 | [diff] [blame] | 117 | ret void |
| 118 | } |
| 119 | |
| 120 | ; GCN-LABEL: {{^}}store_private_offset_i8_max_offset_plus1: |
| 121 | ; GCN: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x1000 |
Geoff Berry | 4e38e02 | 2017-08-17 04:04:11 +0000 | [diff] [blame] | 122 | ; GCN: buffer_store_byte v{{[0-9]+}}, [[OFFSET]], s[4:7], s2 offen{{$}} |
Matt Arsenault | 0774ea2 | 2017-04-24 19:40:59 +0000 | [diff] [blame] | 123 | define amdgpu_kernel void @store_private_offset_i8_max_offset_plus1() #0 { |
Yaxun Liu | 2a22c5d | 2018-02-02 16:07:16 +0000 | [diff] [blame] | 124 | store volatile i8 5, i8 addrspace(5)* inttoptr (i32 4096 to i8 addrspace(5)*) |
Matt Arsenault | 0774ea2 | 2017-04-24 19:40:59 +0000 | [diff] [blame] | 125 | ret void |
| 126 | } |
| 127 | |
| 128 | ; GCN-LABEL: {{^}}store_private_offset_i8_max_offset_plus2: |
| 129 | ; GCN: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x1000 |
Geoff Berry | 4e38e02 | 2017-08-17 04:04:11 +0000 | [diff] [blame] | 130 | ; GCN: buffer_store_byte v{{[0-9]+}}, [[OFFSET]], s[4:7], s2 offen offset:1{{$}} |
Matt Arsenault | 0774ea2 | 2017-04-24 19:40:59 +0000 | [diff] [blame] | 131 | define amdgpu_kernel void @store_private_offset_i8_max_offset_plus2() #0 { |
Yaxun Liu | 2a22c5d | 2018-02-02 16:07:16 +0000 | [diff] [blame] | 132 | store volatile i8 5, i8 addrspace(5)* inttoptr (i32 4097 to i8 addrspace(5)*) |
Matt Arsenault | 0774ea2 | 2017-04-24 19:40:59 +0000 | [diff] [blame] | 133 | ret void |
| 134 | } |
| 135 | |
Matt Arsenault | caf0ed4 | 2017-11-30 00:52:40 +0000 | [diff] [blame] | 136 | ; MUBUF used for stack access has bounds checking enabled before gfx9, |
| 137 | ; so a possibly negative base index can't be used for the vgpr offset. |
| 138 | |
| 139 | ; GCN-LABEL: {{^}}store_private_unknown_bits_vaddr: |
| 140 | ; SICIVI: v_add_{{i|u}}32_e32 [[ADDR0:v[0-9]+]], vcc, 4 |
| 141 | ; SICIVI: v_add_{{i|u}}32_e32 [[ADDR1:v[0-9]+]], vcc, 32, [[ADDR0]] |
| 142 | ; SICIVI: buffer_store_dword v{{[0-9]+}}, [[ADDR1]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}} |
| 143 | |
Matt Arsenault | 84445dd | 2017-11-30 22:51:26 +0000 | [diff] [blame] | 144 | ; GFX9: v_add_u32_e32 [[ADDR:v[0-9]+]], 4, |
Matt Arsenault | caf0ed4 | 2017-11-30 00:52:40 +0000 | [diff] [blame] | 145 | ; GFX9: buffer_store_dword v{{[0-9]+}}, [[ADDR]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:32 |
| 146 | define amdgpu_kernel void @store_private_unknown_bits_vaddr() #0 { |
Yaxun Liu | 2a22c5d | 2018-02-02 16:07:16 +0000 | [diff] [blame] | 147 | %alloca = alloca [16 x i32], align 4, addrspace(5) |
Matt Arsenault | caf0ed4 | 2017-11-30 00:52:40 +0000 | [diff] [blame] | 148 | %vaddr = load volatile i32, i32 addrspace(1)* undef |
| 149 | %vaddr.off = add i32 %vaddr, 8 |
Yaxun Liu | 2a22c5d | 2018-02-02 16:07:16 +0000 | [diff] [blame] | 150 | %gep = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %vaddr.off |
| 151 | store volatile i32 9, i32 addrspace(5)* %gep |
Matt Arsenault | caf0ed4 | 2017-11-30 00:52:40 +0000 | [diff] [blame] | 152 | ret void |
| 153 | } |
| 154 | |
Matt Arsenault | 0774ea2 | 2017-04-24 19:40:59 +0000 | [diff] [blame] | 155 | attributes #0 = { nounwind } |