Matt Arsenault | 0699ef3 | 2017-02-09 22:00:42 +0000 | [diff] [blame] | 1 | ; RUN: opt -S -amdgpu-lower-intrinsics %s | FileCheck -check-prefix=OPT %s |
Sean Fertile | 9cd1cdf | 2017-07-07 02:00:06 +0000 | [diff] [blame] | 2 | ; RUN: opt -S -amdgpu-lower-intrinsics -use-wide-memcpy-loop-lowering=true %s | FileCheck -check-prefix=WOPT %s |
Matt Arsenault | 0699ef3 | 2017-02-09 22:00:42 +0000 | [diff] [blame] | 3 | |
| 4 | declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture readonly, i64, i32, i1) #1 |
| 5 | declare void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* nocapture, i8 addrspace(3)* nocapture readonly, i32, i32, i1) #1 |
| 6 | |
| 7 | declare void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture readonly, i64, i32, i1) #1 |
| 8 | declare void @llvm.memset.p1i8.i64(i8 addrspace(1)* nocapture, i8, i64, i32, i1) #1 |
| 9 | |
| 10 | ; Test the upper bound for sizes to leave |
| 11 | ; OPT-LABEL: @max_size_small_static_memcpy_caller0( |
| 12 | ; OPT: call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i32 1, i1 false) |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 13 | define amdgpu_kernel void @max_size_small_static_memcpy_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 { |
Matt Arsenault | 0699ef3 | 2017-02-09 22:00:42 +0000 | [diff] [blame] | 14 | call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i32 1, i1 false) |
| 15 | ret void |
| 16 | } |
| 17 | |
| 18 | ; Smallest static size which will be expanded |
| 19 | ; OPT-LABEL: @min_size_large_static_memcpy_caller0( |
| 20 | ; OPT-NOT: call |
| 21 | ; OPT: getelementptr |
| 22 | ; OPT-NEXT: load i8 |
| 23 | ; OPT: getelementptr |
| 24 | ; OPT-NEXT: store i8 |
Sean Fertile | 9cd1cdf | 2017-07-07 02:00:06 +0000 | [diff] [blame] | 25 | |
| 26 | ; WOPT-LABEL: @min_size_large_static_memcpy_caller0( |
| 27 | ; WOPT-NOT: call |
| 28 | ; WOPT: br label %load-store-loop |
| 29 | ; WOPT: [[T1:%[0-9]+]] = getelementptr inbounds i8, i8 addrspace(1)* %src, i64 %loop-index |
| 30 | ; WOPT-NEXT: [[T2:%[0-9]+]] = load i8, i8 addrspace(1)* [[T1]] |
| 31 | ; WOPT-NEXT: [[T3:%[0-9]+]] = getelementptr inbounds i8, i8 addrspace(1)* %dst, i64 %loop-index |
| 32 | ; WOPT-NEXT: store i8 [[T2]], i8 addrspace(1)* [[T3]] |
| 33 | ; WOPT-NEXT: [[T4:%[0-9]+]] = add i64 %loop-index, 1 |
| 34 | ; WOPT-NEXT: [[T5:%[0-9]+]] = icmp ult i64 [[T4]], 1025 |
| 35 | ; WOPT-NEXT: br i1 [[T5]], label %load-store-loop, label %memcpy-split |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 36 | define amdgpu_kernel void @min_size_large_static_memcpy_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 { |
Matt Arsenault | 0699ef3 | 2017-02-09 22:00:42 +0000 | [diff] [blame] | 37 | call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1025, i32 1, i1 false) |
| 38 | ret void |
| 39 | } |
| 40 | |
| 41 | ; OPT-LABEL: @max_size_small_static_memmove_caller0( |
| 42 | ; OPT: call void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i32 1, i1 false) |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 43 | define amdgpu_kernel void @max_size_small_static_memmove_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 { |
Matt Arsenault | 0699ef3 | 2017-02-09 22:00:42 +0000 | [diff] [blame] | 44 | call void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i32 1, i1 false) |
| 45 | ret void |
| 46 | } |
| 47 | |
| 48 | ; OPT-LABEL: @min_size_large_static_memmove_caller0( |
| 49 | ; OPT-NOT: call |
| 50 | ; OPT: getelementptr |
| 51 | ; OPT-NEXT: load i8 |
| 52 | ; OPT: getelementptr |
| 53 | ; OPT-NEXT: store i8 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 54 | define amdgpu_kernel void @min_size_large_static_memmove_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 { |
Matt Arsenault | 0699ef3 | 2017-02-09 22:00:42 +0000 | [diff] [blame] | 55 | call void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1025, i32 1, i1 false) |
| 56 | ret void |
| 57 | } |
| 58 | |
| 59 | ; OPT-LABEL: @max_size_small_static_memset_caller0( |
| 60 | ; OPT: call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 %val, i64 1024, i32 1, i1 false) |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 61 | define amdgpu_kernel void @max_size_small_static_memset_caller0(i8 addrspace(1)* %dst, i8 %val) #0 { |
Matt Arsenault | 0699ef3 | 2017-02-09 22:00:42 +0000 | [diff] [blame] | 62 | call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 %val, i64 1024, i32 1, i1 false) |
| 63 | ret void |
| 64 | } |
| 65 | |
| 66 | ; OPT-LABEL: @min_size_large_static_memset_caller0( |
| 67 | ; OPT-NOT: call |
| 68 | ; OPT: getelementptr |
| 69 | ; OPT: store i8 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 70 | define amdgpu_kernel void @min_size_large_static_memset_caller0(i8 addrspace(1)* %dst, i8 %val) #0 { |
Matt Arsenault | 0699ef3 | 2017-02-09 22:00:42 +0000 | [diff] [blame] | 71 | call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 %val, i64 1025, i32 1, i1 false) |
| 72 | ret void |
| 73 | } |
| 74 | |
| 75 | ; OPT-LABEL: @variable_memcpy_caller0( |
| 76 | ; OPT-NOT: call |
| 77 | ; OPT: phi |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 78 | define amdgpu_kernel void @variable_memcpy_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n) #0 { |
Matt Arsenault | 0699ef3 | 2017-02-09 22:00:42 +0000 | [diff] [blame] | 79 | call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n, i32 1, i1 false) |
| 80 | ret void |
| 81 | } |
| 82 | |
| 83 | ; OPT-LABEL: @variable_memcpy_caller1( |
| 84 | ; OPT-NOT: call |
| 85 | ; OPT: phi |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 86 | define amdgpu_kernel void @variable_memcpy_caller1(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n) #0 { |
Matt Arsenault | 0699ef3 | 2017-02-09 22:00:42 +0000 | [diff] [blame] | 87 | call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n, i32 1, i1 false) |
| 88 | ret void |
| 89 | } |
| 90 | |
| 91 | ; OPT-LABEL: @memcpy_multi_use_one_function( |
| 92 | ; OPT-NOT: call |
| 93 | ; OPT: phi |
| 94 | ; OPT-NOT: call |
| 95 | ; OPT: phi |
| 96 | ; OPT-NOT: call |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 97 | define amdgpu_kernel void @memcpy_multi_use_one_function(i8 addrspace(1)* %dst0, i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 %n, i64 %m) #0 { |
Matt Arsenault | 0699ef3 | 2017-02-09 22:00:42 +0000 | [diff] [blame] | 98 | call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst0, i8 addrspace(1)* %src, i64 %n, i32 1, i1 false) |
| 99 | call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 %m, i32 1, i1 false) |
| 100 | ret void |
| 101 | } |
| 102 | |
| 103 | ; OPT-LABEL: @memcpy_alt_type( |
| 104 | ; OPT: phi |
| 105 | ; OPT: getelementptr inbounds i8, i8 addrspace(3)* |
| 106 | ; OPT: load i8, i8 addrspace(3)* |
| 107 | ; OPT: getelementptr inbounds i8, i8 addrspace(1)* |
| 108 | ; OPT: store i8 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 109 | define amdgpu_kernel void @memcpy_alt_type(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 %n) #0 { |
Matt Arsenault | 0699ef3 | 2017-02-09 22:00:42 +0000 | [diff] [blame] | 110 | call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 %n, i32 1, i1 false) |
| 111 | ret void |
| 112 | } |
| 113 | |
| 114 | ; One of the uses in the function should be expanded, the other left alone. |
| 115 | ; OPT-LABEL: @memcpy_multi_use_one_function_keep_small( |
| 116 | ; OPT: getelementptr inbounds i8, i8 addrspace(1)* |
| 117 | ; OPT: load i8, i8 addrspace(1)* |
| 118 | ; OPT: getelementptr inbounds i8, i8 addrspace(1)* |
| 119 | ; OPT: store i8 |
| 120 | |
| 121 | ; OPT: call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 102, i32 1, i1 false) |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 122 | define amdgpu_kernel void @memcpy_multi_use_one_function_keep_small(i8 addrspace(1)* %dst0, i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 %n) #0 { |
Matt Arsenault | 0699ef3 | 2017-02-09 22:00:42 +0000 | [diff] [blame] | 123 | call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst0, i8 addrspace(1)* %src, i64 %n, i32 1, i1 false) |
| 124 | call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 102, i32 1, i1 false) |
| 125 | ret void |
| 126 | } |
| 127 | |
| 128 | attributes #0 = { nounwind } |
| 129 | attributes #1 = { argmemonly nounwind } |