blob: 778467207a012dc66268a63dba2967dc1288f86d [file] [log] [blame]
Matt Arsenault0699ef32017-02-09 22:00:42 +00001; RUN: opt -S -amdgpu-lower-intrinsics %s | FileCheck -check-prefix=OPT %s
2
3declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture readonly, i64, i32, i1) #1
4declare void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* nocapture, i8 addrspace(3)* nocapture readonly, i32, i32, i1) #1
5
6declare void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture readonly, i64, i32, i1) #1
7declare void @llvm.memset.p1i8.i64(i8 addrspace(1)* nocapture, i8, i64, i32, i1) #1
8
9; Test the upper bound for sizes to leave
10; OPT-LABEL: @max_size_small_static_memcpy_caller0(
11; OPT: call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i32 1, i1 false)
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000012define amdgpu_kernel void @max_size_small_static_memcpy_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 {
Matt Arsenault0699ef32017-02-09 22:00:42 +000013 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i32 1, i1 false)
14 ret void
15}
16
17; Smallest static size which will be expanded
18; OPT-LABEL: @min_size_large_static_memcpy_caller0(
19; OPT-NOT: call
Sean Fertile5fb624a2017-12-18 15:31:14 +000020; OPT: br label %load-store-loop
21; OPT: [[T1:%[0-9]+]] = getelementptr inbounds i8, i8 addrspace(1)* %src, i64 %loop-index
22; OPT-NEXT: [[T2:%[0-9]+]] = load i8, i8 addrspace(1)* [[T1]]
23; OPT-NEXT: [[T3:%[0-9]+]] = getelementptr inbounds i8, i8 addrspace(1)* %dst, i64 %loop-index
24; OPT-NEXT: store i8 [[T2]], i8 addrspace(1)* [[T3]]
25; OPT-NEXT: [[T4:%[0-9]+]] = add i64 %loop-index, 1
26; OPT-NEXT: [[T5:%[0-9]+]] = icmp ult i64 [[T4]], 1025
27; OPT-NEXT: br i1 [[T5]], label %load-store-loop, label %memcpy-split
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000028define amdgpu_kernel void @min_size_large_static_memcpy_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 {
Matt Arsenault0699ef32017-02-09 22:00:42 +000029 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1025, i32 1, i1 false)
30 ret void
31}
32
33; OPT-LABEL: @max_size_small_static_memmove_caller0(
34; OPT: call void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i32 1, i1 false)
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000035define amdgpu_kernel void @max_size_small_static_memmove_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 {
Matt Arsenault0699ef32017-02-09 22:00:42 +000036 call void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i32 1, i1 false)
37 ret void
38}
39
40; OPT-LABEL: @min_size_large_static_memmove_caller0(
41; OPT-NOT: call
42; OPT: getelementptr
43; OPT-NEXT: load i8
44; OPT: getelementptr
45; OPT-NEXT: store i8
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000046define amdgpu_kernel void @min_size_large_static_memmove_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 {
Matt Arsenault0699ef32017-02-09 22:00:42 +000047 call void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1025, i32 1, i1 false)
48 ret void
49}
50
51; OPT-LABEL: @max_size_small_static_memset_caller0(
52; OPT: call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 %val, i64 1024, i32 1, i1 false)
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000053define amdgpu_kernel void @max_size_small_static_memset_caller0(i8 addrspace(1)* %dst, i8 %val) #0 {
Matt Arsenault0699ef32017-02-09 22:00:42 +000054 call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 %val, i64 1024, i32 1, i1 false)
55 ret void
56}
57
58; OPT-LABEL: @min_size_large_static_memset_caller0(
59; OPT-NOT: call
60; OPT: getelementptr
61; OPT: store i8
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000062define amdgpu_kernel void @min_size_large_static_memset_caller0(i8 addrspace(1)* %dst, i8 %val) #0 {
Matt Arsenault0699ef32017-02-09 22:00:42 +000063 call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 %val, i64 1025, i32 1, i1 false)
64 ret void
65}
66
67; OPT-LABEL: @variable_memcpy_caller0(
68; OPT-NOT: call
69; OPT: phi
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000070define amdgpu_kernel void @variable_memcpy_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n) #0 {
Matt Arsenault0699ef32017-02-09 22:00:42 +000071 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n, i32 1, i1 false)
72 ret void
73}
74
75; OPT-LABEL: @variable_memcpy_caller1(
76; OPT-NOT: call
77; OPT: phi
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000078define amdgpu_kernel void @variable_memcpy_caller1(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n) #0 {
Matt Arsenault0699ef32017-02-09 22:00:42 +000079 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n, i32 1, i1 false)
80 ret void
81}
82
83; OPT-LABEL: @memcpy_multi_use_one_function(
84; OPT-NOT: call
85; OPT: phi
86; OPT-NOT: call
87; OPT: phi
88; OPT-NOT: call
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000089define amdgpu_kernel void @memcpy_multi_use_one_function(i8 addrspace(1)* %dst0, i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 %n, i64 %m) #0 {
Matt Arsenault0699ef32017-02-09 22:00:42 +000090 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst0, i8 addrspace(1)* %src, i64 %n, i32 1, i1 false)
91 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 %m, i32 1, i1 false)
92 ret void
93}
94
95; OPT-LABEL: @memcpy_alt_type(
96; OPT: phi
97; OPT: getelementptr inbounds i8, i8 addrspace(3)*
98; OPT: load i8, i8 addrspace(3)*
99; OPT: getelementptr inbounds i8, i8 addrspace(1)*
100; OPT: store i8
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000101define amdgpu_kernel void @memcpy_alt_type(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 %n) #0 {
Matt Arsenault0699ef32017-02-09 22:00:42 +0000102 call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 %n, i32 1, i1 false)
103 ret void
104}
105
106; One of the uses in the function should be expanded, the other left alone.
107; OPT-LABEL: @memcpy_multi_use_one_function_keep_small(
108; OPT: getelementptr inbounds i8, i8 addrspace(1)*
109; OPT: load i8, i8 addrspace(1)*
110; OPT: getelementptr inbounds i8, i8 addrspace(1)*
111; OPT: store i8
112
113; OPT: call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 102, i32 1, i1 false)
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000114define amdgpu_kernel void @memcpy_multi_use_one_function_keep_small(i8 addrspace(1)* %dst0, i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 %n) #0 {
Matt Arsenault0699ef32017-02-09 22:00:42 +0000115 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst0, i8 addrspace(1)* %src, i64 %n, i32 1, i1 false)
116 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 102, i32 1, i1 false)
117 ret void
118}
119
120attributes #0 = { nounwind }
121attributes #1 = { argmemonly nounwind }