Matt Arsenault | 0699ef3 | 2017-02-09 22:00:42 +0000 | [diff] [blame] | 1 | ; RUN: opt -S -amdgpu-lower-intrinsics %s | FileCheck -check-prefix=OPT %s |
| 2 | |
Daniel Neilson | 1e68724 | 2018-01-19 17:13:12 +0000 | [diff] [blame] | 3 | declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture readonly, i64, i1) #1 |
| 4 | declare void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* nocapture, i8 addrspace(3)* nocapture readonly, i32, i1) #1 |
Matt Arsenault | 0699ef3 | 2017-02-09 22:00:42 +0000 | [diff] [blame] | 5 | |
Daniel Neilson | 1e68724 | 2018-01-19 17:13:12 +0000 | [diff] [blame] | 6 | declare void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture readonly, i64, i1) #1 |
| 7 | declare void @llvm.memset.p1i8.i64(i8 addrspace(1)* nocapture, i8, i64, i1) #1 |
Matt Arsenault | 0699ef3 | 2017-02-09 22:00:42 +0000 | [diff] [blame] | 8 | |
| 9 | ; Test the upper bound for sizes to leave |
| 10 | ; OPT-LABEL: @max_size_small_static_memcpy_caller0( |
Daniel Neilson | 1e68724 | 2018-01-19 17:13:12 +0000 | [diff] [blame] | 11 | ; OPT: call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i1 false) |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 12 | define amdgpu_kernel void @max_size_small_static_memcpy_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 { |
Daniel Neilson | 1e68724 | 2018-01-19 17:13:12 +0000 | [diff] [blame] | 13 | call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i1 false) |
Matt Arsenault | 0699ef3 | 2017-02-09 22:00:42 +0000 | [diff] [blame] | 14 | ret void |
| 15 | } |
| 16 | |
| 17 | ; Smallest static size which will be expanded |
| 18 | ; OPT-LABEL: @min_size_large_static_memcpy_caller0( |
| 19 | ; OPT-NOT: call |
Sean Fertile | 5fb624a | 2017-12-18 15:31:14 +0000 | [diff] [blame] | 20 | ; OPT: br label %load-store-loop |
| 21 | ; OPT: [[T1:%[0-9]+]] = getelementptr inbounds i8, i8 addrspace(1)* %src, i64 %loop-index |
| 22 | ; OPT-NEXT: [[T2:%[0-9]+]] = load i8, i8 addrspace(1)* [[T1]] |
| 23 | ; OPT-NEXT: [[T3:%[0-9]+]] = getelementptr inbounds i8, i8 addrspace(1)* %dst, i64 %loop-index |
| 24 | ; OPT-NEXT: store i8 [[T2]], i8 addrspace(1)* [[T3]] |
| 25 | ; OPT-NEXT: [[T4:%[0-9]+]] = add i64 %loop-index, 1 |
| 26 | ; OPT-NEXT: [[T5:%[0-9]+]] = icmp ult i64 [[T4]], 1025 |
| 27 | ; OPT-NEXT: br i1 [[T5]], label %load-store-loop, label %memcpy-split |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 28 | define amdgpu_kernel void @min_size_large_static_memcpy_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 { |
Daniel Neilson | 1e68724 | 2018-01-19 17:13:12 +0000 | [diff] [blame] | 29 | call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1025, i1 false) |
Matt Arsenault | 0699ef3 | 2017-02-09 22:00:42 +0000 | [diff] [blame] | 30 | ret void |
| 31 | } |
| 32 | |
| 33 | ; OPT-LABEL: @max_size_small_static_memmove_caller0( |
Daniel Neilson | 1e68724 | 2018-01-19 17:13:12 +0000 | [diff] [blame] | 34 | ; OPT: call void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i1 false) |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 35 | define amdgpu_kernel void @max_size_small_static_memmove_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 { |
Daniel Neilson | 1e68724 | 2018-01-19 17:13:12 +0000 | [diff] [blame] | 36 | call void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i1 false) |
Matt Arsenault | 0699ef3 | 2017-02-09 22:00:42 +0000 | [diff] [blame] | 37 | ret void |
| 38 | } |
| 39 | |
| 40 | ; OPT-LABEL: @min_size_large_static_memmove_caller0( |
| 41 | ; OPT-NOT: call |
| 42 | ; OPT: getelementptr |
| 43 | ; OPT-NEXT: load i8 |
| 44 | ; OPT: getelementptr |
| 45 | ; OPT-NEXT: store i8 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 46 | define amdgpu_kernel void @min_size_large_static_memmove_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 { |
Daniel Neilson | 1e68724 | 2018-01-19 17:13:12 +0000 | [diff] [blame] | 47 | call void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1025, i1 false) |
Matt Arsenault | 0699ef3 | 2017-02-09 22:00:42 +0000 | [diff] [blame] | 48 | ret void |
| 49 | } |
| 50 | |
| 51 | ; OPT-LABEL: @max_size_small_static_memset_caller0( |
Daniel Neilson | 1e68724 | 2018-01-19 17:13:12 +0000 | [diff] [blame] | 52 | ; OPT: call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 %val, i64 1024, i1 false) |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 53 | define amdgpu_kernel void @max_size_small_static_memset_caller0(i8 addrspace(1)* %dst, i8 %val) #0 { |
Daniel Neilson | 1e68724 | 2018-01-19 17:13:12 +0000 | [diff] [blame] | 54 | call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 %val, i64 1024, i1 false) |
Matt Arsenault | 0699ef3 | 2017-02-09 22:00:42 +0000 | [diff] [blame] | 55 | ret void |
| 56 | } |
| 57 | |
| 58 | ; OPT-LABEL: @min_size_large_static_memset_caller0( |
| 59 | ; OPT-NOT: call |
| 60 | ; OPT: getelementptr |
| 61 | ; OPT: store i8 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 62 | define amdgpu_kernel void @min_size_large_static_memset_caller0(i8 addrspace(1)* %dst, i8 %val) #0 { |
Daniel Neilson | 1e68724 | 2018-01-19 17:13:12 +0000 | [diff] [blame] | 63 | call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 %val, i64 1025, i1 false) |
Matt Arsenault | 0699ef3 | 2017-02-09 22:00:42 +0000 | [diff] [blame] | 64 | ret void |
| 65 | } |
| 66 | |
| 67 | ; OPT-LABEL: @variable_memcpy_caller0( |
| 68 | ; OPT-NOT: call |
| 69 | ; OPT: phi |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 70 | define amdgpu_kernel void @variable_memcpy_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n) #0 { |
Daniel Neilson | 1e68724 | 2018-01-19 17:13:12 +0000 | [diff] [blame] | 71 | call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n, i1 false) |
Matt Arsenault | 0699ef3 | 2017-02-09 22:00:42 +0000 | [diff] [blame] | 72 | ret void |
| 73 | } |
| 74 | |
| 75 | ; OPT-LABEL: @variable_memcpy_caller1( |
| 76 | ; OPT-NOT: call |
| 77 | ; OPT: phi |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 78 | define amdgpu_kernel void @variable_memcpy_caller1(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n) #0 { |
Daniel Neilson | 1e68724 | 2018-01-19 17:13:12 +0000 | [diff] [blame] | 79 | call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n, i1 false) |
Matt Arsenault | 0699ef3 | 2017-02-09 22:00:42 +0000 | [diff] [blame] | 80 | ret void |
| 81 | } |
| 82 | |
| 83 | ; OPT-LABEL: @memcpy_multi_use_one_function( |
| 84 | ; OPT-NOT: call |
| 85 | ; OPT: phi |
| 86 | ; OPT-NOT: call |
| 87 | ; OPT: phi |
| 88 | ; OPT-NOT: call |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 89 | define amdgpu_kernel void @memcpy_multi_use_one_function(i8 addrspace(1)* %dst0, i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 %n, i64 %m) #0 { |
Daniel Neilson | 1e68724 | 2018-01-19 17:13:12 +0000 | [diff] [blame] | 90 | call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst0, i8 addrspace(1)* %src, i64 %n, i1 false) |
| 91 | call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 %m, i1 false) |
Matt Arsenault | 0699ef3 | 2017-02-09 22:00:42 +0000 | [diff] [blame] | 92 | ret void |
| 93 | } |
| 94 | |
| 95 | ; OPT-LABEL: @memcpy_alt_type( |
| 96 | ; OPT: phi |
| 97 | ; OPT: getelementptr inbounds i8, i8 addrspace(3)* |
| 98 | ; OPT: load i8, i8 addrspace(3)* |
| 99 | ; OPT: getelementptr inbounds i8, i8 addrspace(1)* |
| 100 | ; OPT: store i8 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 101 | define amdgpu_kernel void @memcpy_alt_type(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 %n) #0 { |
Daniel Neilson | 1e68724 | 2018-01-19 17:13:12 +0000 | [diff] [blame] | 102 | call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 %n, i1 false) |
Matt Arsenault | 0699ef3 | 2017-02-09 22:00:42 +0000 | [diff] [blame] | 103 | ret void |
| 104 | } |
| 105 | |
| 106 | ; One of the uses in the function should be expanded, the other left alone. |
| 107 | ; OPT-LABEL: @memcpy_multi_use_one_function_keep_small( |
| 108 | ; OPT: getelementptr inbounds i8, i8 addrspace(1)* |
| 109 | ; OPT: load i8, i8 addrspace(1)* |
| 110 | ; OPT: getelementptr inbounds i8, i8 addrspace(1)* |
| 111 | ; OPT: store i8 |
| 112 | |
Daniel Neilson | 1e68724 | 2018-01-19 17:13:12 +0000 | [diff] [blame] | 113 | ; OPT: call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 102, i1 false) |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 114 | define amdgpu_kernel void @memcpy_multi_use_one_function_keep_small(i8 addrspace(1)* %dst0, i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 %n) #0 { |
Daniel Neilson | 1e68724 | 2018-01-19 17:13:12 +0000 | [diff] [blame] | 115 | call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst0, i8 addrspace(1)* %src, i64 %n, i1 false) |
| 116 | call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 102, i1 false) |
Matt Arsenault | 0699ef3 | 2017-02-09 22:00:42 +0000 | [diff] [blame] | 117 | ret void |
| 118 | } |
| 119 | |
| 120 | attributes #0 = { nounwind } |
| 121 | attributes #1 = { argmemonly nounwind } |