Matt Arsenault | 3b2e2a5 | 2016-06-06 20:03:31 +0000 | [diff] [blame] | 1 | ; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefix=HSA %s |
| 2 | |
Daniel Neilson | 1e68724 | 2018-01-19 17:13:12 +0000 | [diff] [blame] | 3 | declare void @llvm.memcpy.p1i32.p4i32.i32(i32 addrspace(1)* nocapture, i32 addrspace(4)* nocapture, i32, i1) #0 |
Matt Arsenault | 3b2e2a5 | 2016-06-06 20:03:31 +0000 | [diff] [blame] | 4 | |
| 5 | @lds.i32 = unnamed_addr addrspace(3) global i32 undef, align 4 |
| 6 | @lds.arr = unnamed_addr addrspace(3) global [256 x i32] undef, align 4 |
| 7 | |
| 8 | @global.i32 = unnamed_addr addrspace(1) global i32 undef, align 4 |
| 9 | @global.arr = unnamed_addr addrspace(1) global [256 x i32] undef, align 4 |
| 10 | |
| 11 | ; HSA: @store_cast_0_flat_to_group_addrspacecast() #1 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 12 | define amdgpu_kernel void @store_cast_0_flat_to_group_addrspacecast() #1 { |
Matt Arsenault | 3b2e2a5 | 2016-06-06 20:03:31 +0000 | [diff] [blame] | 13 | store i32 7, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* null to i32 addrspace(3)*) |
| 14 | ret void |
| 15 | } |
| 16 | |
| 17 | ; HSA: @store_cast_0_group_to_flat_addrspacecast() #2 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 18 | define amdgpu_kernel void @store_cast_0_group_to_flat_addrspacecast() #1 { |
Matt Arsenault | 3b2e2a5 | 2016-06-06 20:03:31 +0000 | [diff] [blame] | 19 | store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*) |
| 20 | ret void |
| 21 | } |
| 22 | |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 23 | ; HSA: define amdgpu_kernel void @store_constant_cast_group_gv_to_flat() #2 |
| 24 | define amdgpu_kernel void @store_constant_cast_group_gv_to_flat() #1 { |
Matt Arsenault | 3b2e2a5 | 2016-06-06 20:03:31 +0000 | [diff] [blame] | 25 | store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds.i32 to i32 addrspace(4)*) |
| 26 | ret void |
| 27 | } |
| 28 | |
| 29 | ; HSA: @store_constant_cast_group_gv_gep_to_flat() #2 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 30 | define amdgpu_kernel void @store_constant_cast_group_gv_gep_to_flat() #1 { |
Matt Arsenault | 3b2e2a5 | 2016-06-06 20:03:31 +0000 | [diff] [blame] | 31 | store i32 7, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) |
| 32 | ret void |
| 33 | } |
| 34 | |
| 35 | ; HSA: @store_constant_cast_global_gv_to_flat() #1 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 36 | define amdgpu_kernel void @store_constant_cast_global_gv_to_flat() #1 { |
Matt Arsenault | 3b2e2a5 | 2016-06-06 20:03:31 +0000 | [diff] [blame] | 37 | store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global.i32 to i32 addrspace(4)*) |
| 38 | ret void |
| 39 | } |
| 40 | |
| 41 | ; HSA: @store_constant_cast_global_gv_gep_to_flat() #1 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 42 | define amdgpu_kernel void @store_constant_cast_global_gv_gep_to_flat() #1 { |
Matt Arsenault | 3b2e2a5 | 2016-06-06 20:03:31 +0000 | [diff] [blame] | 43 | store i32 7, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(1)* @global.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) |
| 44 | ret void |
| 45 | } |
| 46 | |
| 47 | ; HSA: @load_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #2 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 48 | define amdgpu_kernel void @load_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #1 { |
Matt Arsenault | 3b2e2a5 | 2016-06-06 20:03:31 +0000 | [diff] [blame] | 49 | %val = load i32, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) |
| 50 | store i32 %val, i32 addrspace(1)* %out |
| 51 | ret void |
| 52 | } |
| 53 | |
| 54 | ; HSA: @atomicrmw_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #2 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 55 | define amdgpu_kernel void @atomicrmw_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #1 { |
Matt Arsenault | 3b2e2a5 | 2016-06-06 20:03:31 +0000 | [diff] [blame] | 56 | %val = atomicrmw add i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 1 seq_cst |
| 57 | store i32 %val, i32 addrspace(1)* %out |
| 58 | ret void |
| 59 | } |
| 60 | |
| 61 | ; HSA: @cmpxchg_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #2 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 62 | define amdgpu_kernel void @cmpxchg_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #1 { |
Matt Arsenault | 3b2e2a5 | 2016-06-06 20:03:31 +0000 | [diff] [blame] | 63 | %val = cmpxchg i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 0, i32 1 seq_cst seq_cst |
| 64 | %val0 = extractvalue { i32, i1 } %val, 0 |
| 65 | store i32 %val0, i32 addrspace(1)* %out |
| 66 | ret void |
| 67 | } |
| 68 | |
| 69 | ; HSA: @memcpy_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #2 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 70 | define amdgpu_kernel void @memcpy_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #1 { |
Daniel Neilson | 1e68724 | 2018-01-19 17:13:12 +0000 | [diff] [blame] | 71 | call void @llvm.memcpy.p1i32.p4i32.i32(i32 addrspace(1)* align 4 %out, i32 addrspace(4)* align 4 getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 32, i1 false) |
Matt Arsenault | 3b2e2a5 | 2016-06-06 20:03:31 +0000 | [diff] [blame] | 72 | ret void |
| 73 | } |
| 74 | |
| 75 | ; Can't just search the pointer value |
| 76 | ; HSA: @store_value_constant_cast_lds_gv_gep_to_flat(i32 addrspace(4)* addrspace(1)* %out) #2 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 77 | define amdgpu_kernel void @store_value_constant_cast_lds_gv_gep_to_flat(i32 addrspace(4)* addrspace(1)* %out) #1 { |
Matt Arsenault | 3b2e2a5 | 2016-06-06 20:03:31 +0000 | [diff] [blame] | 78 | store i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 addrspace(4)* addrspace(1)* %out |
| 79 | ret void |
| 80 | } |
| 81 | |
| 82 | ; Can't just search pointer types |
| 83 | ; HSA: @store_ptrtoint_value_constant_cast_lds_gv_gep_to_flat(i64 addrspace(1)* %out) #2 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 84 | define amdgpu_kernel void @store_ptrtoint_value_constant_cast_lds_gv_gep_to_flat(i64 addrspace(1)* %out) #1 { |
Matt Arsenault | 3b2e2a5 | 2016-06-06 20:03:31 +0000 | [diff] [blame] | 85 | store i64 ptrtoint (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i64), i64 addrspace(1)* %out |
| 86 | ret void |
| 87 | } |
| 88 | |
| 89 | ; Cast group to flat, do GEP, cast back to group |
| 90 | ; HSA: @store_constant_cast_group_gv_gep_to_flat_to_group() #2 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 91 | define amdgpu_kernel void @store_constant_cast_group_gv_gep_to_flat_to_group() #1 { |
Matt Arsenault | 3b2e2a5 | 2016-06-06 20:03:31 +0000 | [diff] [blame] | 92 | store i32 7, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i32 addrspace(3)*) |
| 93 | ret void |
| 94 | } |
| 95 | |
| 96 | ; HSA: @ret_constant_cast_group_gv_gep_to_flat_to_group() #2 |
| 97 | define i32 addrspace(3)* @ret_constant_cast_group_gv_gep_to_flat_to_group() #1 { |
| 98 | ret i32 addrspace(3)* addrspacecast (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i32 addrspace(3)*) |
| 99 | } |
| 100 | |
| 101 | ; HSA: attributes #0 = { argmemonly nounwind } |
| 102 | ; HSA: attributes #1 = { nounwind } |
| 103 | ; HSA: attributes #2 = { nounwind "amdgpu-queue-ptr" } |
| 104 | |
| 105 | attributes #0 = { argmemonly nounwind } |
| 106 | attributes #1 = { nounwind } |