Changpeng Fang | ba92059 | 2018-02-16 19:14:17 +0000 | [diff] [blame] | 1 | ; RUN: llc -march=amdgcn -disable-promote-alloca-to-vector -verify-machineinstrs < %s | FileCheck %s |
Matt Arsenault | 59c0ffa | 2016-06-27 20:48:03 +0000 | [diff] [blame] | 2 | |
Yaxun Liu | 0124b54 | 2018-02-13 18:00:25 +0000 | [diff] [blame] | 3 | declare i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #1 |
Matt Arsenault | 59c0ffa | 2016-06-27 20:48:03 +0000 | [diff] [blame] | 4 | |
| 5 | declare i32 @llvm.amdgcn.workitem.id.x() #1 |
| 6 | |
| 7 | ; CI+ intrinsic |
| 8 | declare void @llvm.amdgcn.s.dcache.inv.vol() #0 |
| 9 | |
| 10 | ; VI+ intrinsic |
| 11 | declare void @llvm.amdgcn.s.dcache.wb() #0 |
| 12 | |
| 13 | ; CHECK-LABEL: {{^}}target_none: |
| 14 | ; CHECK: s_movk_i32 [[OFFSETREG:s[0-9]+]], 0x400 |
| 15 | ; CHECK: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, [[OFFSETREG]] |
| 16 | ; CHECK: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 17 | define amdgpu_kernel void @target_none() #0 { |
Yaxun Liu | 0124b54 | 2018-02-13 18:00:25 +0000 | [diff] [blame] | 18 | %kernargs = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() |
| 19 | %kernargs.gep = getelementptr inbounds i8, i8 addrspace(4)* %kernargs, i64 1024 |
| 20 | %kernargs.gep.cast = bitcast i8 addrspace(4)* %kernargs.gep to i32 addrspace(1)* addrspace(4)* |
| 21 | %ptr = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %kernargs.gep.cast |
Matt Arsenault | 59c0ffa | 2016-06-27 20:48:03 +0000 | [diff] [blame] | 22 | %id = call i32 @llvm.amdgcn.workitem.id.x() |
| 23 | %id.ext = sext i32 %id to i64 |
| 24 | %gep = getelementptr inbounds i32, i32 addrspace(1)* %ptr, i64 %id.ext |
| 25 | store i32 0, i32 addrspace(1)* %gep |
| 26 | ret void |
| 27 | } |
| 28 | |
| 29 | ; CHECK-LABEL: {{^}}target_tahiti: |
| 30 | ; CHECK: s_movk_i32 [[OFFSETREG:s[0-9]+]], 0x400 |
| 31 | ; CHECK: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, [[OFFSETREG]] |
| 32 | ; CHECK: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 33 | define amdgpu_kernel void @target_tahiti() #1 { |
Yaxun Liu | 0124b54 | 2018-02-13 18:00:25 +0000 | [diff] [blame] | 34 | %kernargs = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() |
| 35 | %kernargs.gep = getelementptr inbounds i8, i8 addrspace(4)* %kernargs, i64 1024 |
| 36 | %kernargs.gep.cast = bitcast i8 addrspace(4)* %kernargs.gep to i32 addrspace(1)* addrspace(4)* |
| 37 | %ptr = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %kernargs.gep.cast |
Matt Arsenault | 59c0ffa | 2016-06-27 20:48:03 +0000 | [diff] [blame] | 38 | %id = call i32 @llvm.amdgcn.workitem.id.x() |
| 39 | %id.ext = sext i32 %id to i64 |
| 40 | %gep = getelementptr inbounds i32, i32 addrspace(1)* %ptr, i64 %id.ext |
| 41 | store i32 0, i32 addrspace(1)* %gep |
| 42 | ret void |
| 43 | } |
| 44 | |
| 45 | ; CHECK-LABEL: {{^}}target_bonaire: |
| 46 | ; CHECK: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x100 |
| 47 | ; CHECK: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 |
| 48 | ; CHECK: s_dcache_inv_vol |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 49 | define amdgpu_kernel void @target_bonaire() #3 { |
Yaxun Liu | 0124b54 | 2018-02-13 18:00:25 +0000 | [diff] [blame] | 50 | %kernargs = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() |
| 51 | %kernargs.gep = getelementptr inbounds i8, i8 addrspace(4)* %kernargs, i64 1024 |
| 52 | %kernargs.gep.cast = bitcast i8 addrspace(4)* %kernargs.gep to i32 addrspace(1)* addrspace(4)* |
| 53 | %ptr = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %kernargs.gep.cast |
Matt Arsenault | 59c0ffa | 2016-06-27 20:48:03 +0000 | [diff] [blame] | 54 | %id = call i32 @llvm.amdgcn.workitem.id.x() |
| 55 | %id.ext = sext i32 %id to i64 |
| 56 | %gep = getelementptr inbounds i32, i32 addrspace(1)* %ptr, i64 %id.ext |
| 57 | store i32 0, i32 addrspace(1)* %gep |
| 58 | call void @llvm.amdgcn.s.dcache.inv.vol() |
| 59 | ret void |
| 60 | } |
| 61 | |
| 62 | ; CHECK-LABEL: {{^}}target_fiji: |
| 63 | ; CHECK: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x400 |
| 64 | ; CHECK: flat_store_dword |
| 65 | ; CHECK: s_dcache_wb{{$}} |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 66 | define amdgpu_kernel void @target_fiji() #4 { |
Yaxun Liu | 0124b54 | 2018-02-13 18:00:25 +0000 | [diff] [blame] | 67 | %kernargs = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() |
| 68 | %kernargs.gep = getelementptr inbounds i8, i8 addrspace(4)* %kernargs, i64 1024 |
| 69 | %kernargs.gep.cast = bitcast i8 addrspace(4)* %kernargs.gep to i32 addrspace(1)* addrspace(4)* |
| 70 | %ptr = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %kernargs.gep.cast |
Matt Arsenault | 59c0ffa | 2016-06-27 20:48:03 +0000 | [diff] [blame] | 71 | %id = call i32 @llvm.amdgcn.workitem.id.x() |
| 72 | %id.ext = sext i32 %id to i64 |
| 73 | %gep = getelementptr inbounds i32, i32 addrspace(1)* %ptr, i64 %id.ext |
| 74 | store i32 0, i32 addrspace(1)* %gep |
| 75 | call void @llvm.amdgcn.s.dcache.wb() |
| 76 | ret void |
| 77 | } |
| 78 | |
| 79 | ; CHECK-LABEL: {{^}}promote_alloca_enabled: |
| 80 | ; CHECK: ds_read_b32 |
| 81 | ; CHECK: ; LDSByteSize: 5120 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 82 | define amdgpu_kernel void @promote_alloca_enabled(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #5 { |
Matt Arsenault | 59c0ffa | 2016-06-27 20:48:03 +0000 | [diff] [blame] | 83 | entry: |
Yaxun Liu | 2a22c5d | 2018-02-02 16:07:16 +0000 | [diff] [blame] | 84 | %stack = alloca [5 x i32], align 4, addrspace(5) |
Matt Arsenault | 59c0ffa | 2016-06-27 20:48:03 +0000 | [diff] [blame] | 85 | %tmp = load i32, i32 addrspace(1)* %in, align 4 |
Yaxun Liu | 2a22c5d | 2018-02-02 16:07:16 +0000 | [diff] [blame] | 86 | %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %tmp |
| 87 | %load = load i32, i32 addrspace(5)* %arrayidx1 |
Matt Arsenault | 59c0ffa | 2016-06-27 20:48:03 +0000 | [diff] [blame] | 88 | store i32 %load, i32 addrspace(1)* %out |
| 89 | ret void |
| 90 | } |
| 91 | |
| 92 | ; CHECK-LABEL: {{^}}promote_alloca_disabled: |
| 93 | ; CHECK: SCRATCH_RSRC_DWORD0 |
| 94 | ; CHECK: SCRATCH_RSRC_DWORD1 |
| 95 | ; CHECK: ScratchSize: 24 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 96 | define amdgpu_kernel void @promote_alloca_disabled(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #6 { |
Matt Arsenault | 59c0ffa | 2016-06-27 20:48:03 +0000 | [diff] [blame] | 97 | entry: |
Yaxun Liu | 2a22c5d | 2018-02-02 16:07:16 +0000 | [diff] [blame] | 98 | %stack = alloca [5 x i32], align 4, addrspace(5) |
Matt Arsenault | 59c0ffa | 2016-06-27 20:48:03 +0000 | [diff] [blame] | 99 | %tmp = load i32, i32 addrspace(1)* %in, align 4 |
Yaxun Liu | 2a22c5d | 2018-02-02 16:07:16 +0000 | [diff] [blame] | 100 | %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %tmp |
| 101 | %load = load i32, i32 addrspace(5)* %arrayidx1 |
Matt Arsenault | 59c0ffa | 2016-06-27 20:48:03 +0000 | [diff] [blame] | 102 | store i32 %load, i32 addrspace(1)* %out |
| 103 | ret void |
| 104 | } |
| 105 | |
| 106 | attributes #0 = { nounwind } |
| 107 | attributes #1 = { nounwind readnone } |
| 108 | attributes #2 = { nounwind "target-cpu"="tahiti" } |
| 109 | attributes #3 = { nounwind "target-cpu"="bonaire" } |
| 110 | attributes #4 = { nounwind "target-cpu"="fiji" } |
Konstantin Zhuravlyov | 1d65026 | 2016-09-06 20:22:28 +0000 | [diff] [blame] | 111 | attributes #5 = { nounwind "target-features"="+promote-alloca" "amdgpu-waves-per-eu"="1,3" } |
| 112 | attributes #6 = { nounwind "target-features"="-promote-alloca" "amdgpu-waves-per-eu"="1,3" } |