Matt Arsenault | a40450c | 2015-11-05 02:46:56 +0000 | [diff] [blame] | 1 | ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s |
| 2 | ; XUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s |
| 3 | |
| 4 | ; FIXME: broken on VI because flat instructions need to be emitted |
| 5 | ; instead of addr64 equivalent of the _OFFSET variants. |
| 6 | |
| 7 | ; Check that moving the pointer out of the resource descriptor to |
| 8 | ; vaddr works for atomics. |
| 9 | |
Matt Arsenault | 9c47dd5 | 2016-02-11 06:02:01 +0000 | [diff] [blame] | 10 | declare i32 @llvm.amdgcn.workitem.id.x() #1 |
Matt Arsenault | a40450c | 2015-11-05 02:46:56 +0000 | [diff] [blame] | 11 | |
| 12 | ; GCN-LABEL: {{^}}atomic_max_i32: |
| 13 | ; GCN: buffer_atomic_smax v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:400 glc{{$}} |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 14 | define amdgpu_kernel void @atomic_max_i32(i32 addrspace(1)* %out, i32 addrspace(1)* addrspace(1)* %in, i32 addrspace(1)* %x, i32 %y) #0 { |
Matt Arsenault | 9c47dd5 | 2016-02-11 06:02:01 +0000 | [diff] [blame] | 15 | %tid = call i32 @llvm.amdgcn.workitem.id.x() |
Matt Arsenault | a40450c | 2015-11-05 02:46:56 +0000 | [diff] [blame] | 16 | %tid.gep = getelementptr i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %in, i32 %tid |
| 17 | %ptr = load volatile i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %tid.gep |
| 18 | %xor = xor i32 %tid, 1 |
| 19 | %cmp = icmp ne i32 %xor, 0 |
| 20 | br i1 %cmp, label %atomic, label %exit |
| 21 | |
| 22 | atomic: |
| 23 | %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 100 |
| 24 | %ret = atomicrmw max i32 addrspace(1)* %gep, i32 %y seq_cst |
| 25 | store i32 %ret, i32 addrspace(1)* %out |
| 26 | br label %exit |
| 27 | |
| 28 | exit: |
| 29 | ret void |
| 30 | } |
| 31 | |
| 32 | ; GCN-LABEL: {{^}}atomic_max_i32_noret: |
| 33 | ; GCN: buffer_atomic_smax v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:400{{$}} |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 34 | define amdgpu_kernel void @atomic_max_i32_noret(i32 addrspace(1)* %out, i32 addrspace(1)* addrspace(1)* %in, i32 addrspace(1)* %x, i32 %y) #0 { |
Matt Arsenault | 9c47dd5 | 2016-02-11 06:02:01 +0000 | [diff] [blame] | 35 | %tid = call i32 @llvm.amdgcn.workitem.id.x() |
Matt Arsenault | a40450c | 2015-11-05 02:46:56 +0000 | [diff] [blame] | 36 | %tid.gep = getelementptr i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %in, i32 %tid |
| 37 | %ptr = load volatile i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %tid.gep |
| 38 | %xor = xor i32 %tid, 1 |
| 39 | %cmp = icmp ne i32 %xor, 0 |
| 40 | br i1 %cmp, label %atomic, label %exit |
| 41 | |
| 42 | atomic: |
| 43 | %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 100 |
| 44 | %ret = atomicrmw max i32 addrspace(1)* %gep, i32 %y seq_cst |
| 45 | br label %exit |
| 46 | |
| 47 | exit: |
| 48 | ret void |
| 49 | } |
| 50 | |
| 51 | attributes #0 = { nounwind } |
| 52 | attributes #1 = { nounwind readnone } |