Matt Arsenault | 0699ef3 | 2017-02-09 22:00:42 +0000 | [diff] [blame] | 1 | ; RUN: opt -S -amdgpu-lower-intrinsics %s | FileCheck -check-prefix=OPT %s |
| 2 | |
| 3 | declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture readonly, i64, i32, i1) #1 |
| 4 | declare void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* nocapture, i8 addrspace(3)* nocapture readonly, i32, i32, i1) #1 |
| 5 | |
| 6 | declare void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture readonly, i64, i32, i1) #1 |
| 7 | declare void @llvm.memset.p1i8.i64(i8 addrspace(1)* nocapture, i8, i64, i32, i1) #1 |
| 8 | |
| 9 | ; Test the upper bound for sizes to leave |
| 10 | ; OPT-LABEL: @max_size_small_static_memcpy_caller0( |
| 11 | ; OPT: call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i32 1, i1 false) |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 12 | define amdgpu_kernel void @max_size_small_static_memcpy_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 { |
Matt Arsenault | 0699ef3 | 2017-02-09 22:00:42 +0000 | [diff] [blame] | 13 | call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i32 1, i1 false) |
| 14 | ret void |
| 15 | } |
| 16 | |
| 17 | ; Smallest static size which will be expanded |
| 18 | ; OPT-LABEL: @min_size_large_static_memcpy_caller0( |
| 19 | ; OPT-NOT: call |
| 20 | ; OPT: getelementptr |
| 21 | ; OPT-NEXT: load i8 |
| 22 | ; OPT: getelementptr |
| 23 | ; OPT-NEXT: store i8 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 24 | define amdgpu_kernel void @min_size_large_static_memcpy_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 { |
Matt Arsenault | 0699ef3 | 2017-02-09 22:00:42 +0000 | [diff] [blame] | 25 | call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1025, i32 1, i1 false) |
| 26 | ret void |
| 27 | } |
| 28 | |
| 29 | ; OPT-LABEL: @max_size_small_static_memmove_caller0( |
| 30 | ; OPT: call void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i32 1, i1 false) |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 31 | define amdgpu_kernel void @max_size_small_static_memmove_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 { |
Matt Arsenault | 0699ef3 | 2017-02-09 22:00:42 +0000 | [diff] [blame] | 32 | call void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i32 1, i1 false) |
| 33 | ret void |
| 34 | } |
| 35 | |
| 36 | ; OPT-LABEL: @min_size_large_static_memmove_caller0( |
| 37 | ; OPT-NOT: call |
| 38 | ; OPT: getelementptr |
| 39 | ; OPT-NEXT: load i8 |
| 40 | ; OPT: getelementptr |
| 41 | ; OPT-NEXT: store i8 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 42 | define amdgpu_kernel void @min_size_large_static_memmove_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 { |
Matt Arsenault | 0699ef3 | 2017-02-09 22:00:42 +0000 | [diff] [blame] | 43 | call void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1025, i32 1, i1 false) |
| 44 | ret void |
| 45 | } |
| 46 | |
| 47 | ; OPT-LABEL: @max_size_small_static_memset_caller0( |
| 48 | ; OPT: call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 %val, i64 1024, i32 1, i1 false) |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 49 | define amdgpu_kernel void @max_size_small_static_memset_caller0(i8 addrspace(1)* %dst, i8 %val) #0 { |
Matt Arsenault | 0699ef3 | 2017-02-09 22:00:42 +0000 | [diff] [blame] | 50 | call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 %val, i64 1024, i32 1, i1 false) |
| 51 | ret void |
| 52 | } |
| 53 | |
| 54 | ; OPT-LABEL: @min_size_large_static_memset_caller0( |
| 55 | ; OPT-NOT: call |
| 56 | ; OPT: getelementptr |
| 57 | ; OPT: store i8 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 58 | define amdgpu_kernel void @min_size_large_static_memset_caller0(i8 addrspace(1)* %dst, i8 %val) #0 { |
Matt Arsenault | 0699ef3 | 2017-02-09 22:00:42 +0000 | [diff] [blame] | 59 | call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 %val, i64 1025, i32 1, i1 false) |
| 60 | ret void |
| 61 | } |
| 62 | |
| 63 | ; OPT-LABEL: @variable_memcpy_caller0( |
| 64 | ; OPT-NOT: call |
| 65 | ; OPT: phi |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 66 | define amdgpu_kernel void @variable_memcpy_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n) #0 { |
Matt Arsenault | 0699ef3 | 2017-02-09 22:00:42 +0000 | [diff] [blame] | 67 | call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n, i32 1, i1 false) |
| 68 | ret void |
| 69 | } |
| 70 | |
| 71 | ; OPT-LABEL: @variable_memcpy_caller1( |
| 72 | ; OPT-NOT: call |
| 73 | ; OPT: phi |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 74 | define amdgpu_kernel void @variable_memcpy_caller1(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n) #0 { |
Matt Arsenault | 0699ef3 | 2017-02-09 22:00:42 +0000 | [diff] [blame] | 75 | call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n, i32 1, i1 false) |
| 76 | ret void |
| 77 | } |
| 78 | |
| 79 | ; OPT-LABEL: @memcpy_multi_use_one_function( |
| 80 | ; OPT-NOT: call |
| 81 | ; OPT: phi |
| 82 | ; OPT-NOT: call |
| 83 | ; OPT: phi |
| 84 | ; OPT-NOT: call |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 85 | define amdgpu_kernel void @memcpy_multi_use_one_function(i8 addrspace(1)* %dst0, i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 %n, i64 %m) #0 { |
Matt Arsenault | 0699ef3 | 2017-02-09 22:00:42 +0000 | [diff] [blame] | 86 | call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst0, i8 addrspace(1)* %src, i64 %n, i32 1, i1 false) |
| 87 | call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 %m, i32 1, i1 false) |
| 88 | ret void |
| 89 | } |
| 90 | |
| 91 | ; OPT-LABEL: @memcpy_alt_type( |
| 92 | ; OPT: phi |
| 93 | ; OPT: getelementptr inbounds i8, i8 addrspace(3)* |
| 94 | ; OPT: load i8, i8 addrspace(3)* |
| 95 | ; OPT: getelementptr inbounds i8, i8 addrspace(1)* |
| 96 | ; OPT: store i8 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 97 | define amdgpu_kernel void @memcpy_alt_type(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 %n) #0 { |
Matt Arsenault | 0699ef3 | 2017-02-09 22:00:42 +0000 | [diff] [blame] | 98 | call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 %n, i32 1, i1 false) |
| 99 | ret void |
| 100 | } |
| 101 | |
| 102 | ; One of the uses in the function should be expanded, the other left alone. |
| 103 | ; OPT-LABEL: @memcpy_multi_use_one_function_keep_small( |
| 104 | ; OPT: getelementptr inbounds i8, i8 addrspace(1)* |
| 105 | ; OPT: load i8, i8 addrspace(1)* |
| 106 | ; OPT: getelementptr inbounds i8, i8 addrspace(1)* |
| 107 | ; OPT: store i8 |
| 108 | |
| 109 | ; OPT: call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 102, i32 1, i1 false) |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 110 | define amdgpu_kernel void @memcpy_multi_use_one_function_keep_small(i8 addrspace(1)* %dst0, i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 %n) #0 { |
Matt Arsenault | 0699ef3 | 2017-02-09 22:00:42 +0000 | [diff] [blame] | 111 | call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst0, i8 addrspace(1)* %src, i64 %n, i32 1, i1 false) |
| 112 | call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 102, i32 1, i1 false) |
| 113 | ret void |
| 114 | } |
| 115 | |
| 116 | attributes #0 = { nounwind } |
| 117 | attributes #1 = { argmemonly nounwind } |