blob: e1a2af6c7ef9049b79dd94b9970ba7b443cb2f3b [file] [log] [blame]
Matt Arsenault0699ef32017-02-09 22:00:42 +00001; RUN: opt -S -amdgpu-lower-intrinsics %s | FileCheck -check-prefix=OPT %s
Sean Fertile9cd1cdf2017-07-07 02:00:06 +00002; RUN: opt -S -amdgpu-lower-intrinsics -use-wide-memcpy-loop-lowering=true %s | FileCheck -check-prefix=WOPT %s
Matt Arsenault0699ef32017-02-09 22:00:42 +00003
4declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture readonly, i64, i32, i1) #1
5declare void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* nocapture, i8 addrspace(3)* nocapture readonly, i32, i32, i1) #1
6
7declare void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture readonly, i64, i32, i1) #1
8declare void @llvm.memset.p1i8.i64(i8 addrspace(1)* nocapture, i8, i64, i32, i1) #1
9
10; Test the upper bound for sizes to leave
11; OPT-LABEL: @max_size_small_static_memcpy_caller0(
12; OPT: call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i32 1, i1 false)
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000013define amdgpu_kernel void @max_size_small_static_memcpy_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 {
Matt Arsenault0699ef32017-02-09 22:00:42 +000014 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i32 1, i1 false)
15 ret void
16}
17
18; Smallest static size which will be expanded
19; OPT-LABEL: @min_size_large_static_memcpy_caller0(
20; OPT-NOT: call
21; OPT: getelementptr
22; OPT-NEXT: load i8
23; OPT: getelementptr
24; OPT-NEXT: store i8
Sean Fertile9cd1cdf2017-07-07 02:00:06 +000025
26; WOPT-LABEL: @min_size_large_static_memcpy_caller0(
27; WOPT-NOT: call
28; WOPT: br label %load-store-loop
29; WOPT: [[T1:%[0-9]+]] = getelementptr inbounds i8, i8 addrspace(1)* %src, i64 %loop-index
30; WOPT-NEXT: [[T2:%[0-9]+]] = load i8, i8 addrspace(1)* [[T1]]
31; WOPT-NEXT: [[T3:%[0-9]+]] = getelementptr inbounds i8, i8 addrspace(1)* %dst, i64 %loop-index
32; WOPT-NEXT: store i8 [[T2]], i8 addrspace(1)* [[T3]]
33; WOPT-NEXT: [[T4:%[0-9]+]] = add i64 %loop-index, 1
34; WOPT-NEXT: [[T5:%[0-9]+]] = icmp ult i64 [[T4]], 1025
35; WOPT-NEXT: br i1 [[T5]], label %load-store-loop, label %memcpy-split
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000036define amdgpu_kernel void @min_size_large_static_memcpy_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 {
Matt Arsenault0699ef32017-02-09 22:00:42 +000037 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1025, i32 1, i1 false)
38 ret void
39}
40
41; OPT-LABEL: @max_size_small_static_memmove_caller0(
42; OPT: call void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i32 1, i1 false)
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000043define amdgpu_kernel void @max_size_small_static_memmove_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 {
Matt Arsenault0699ef32017-02-09 22:00:42 +000044 call void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i32 1, i1 false)
45 ret void
46}
47
48; OPT-LABEL: @min_size_large_static_memmove_caller0(
49; OPT-NOT: call
50; OPT: getelementptr
51; OPT-NEXT: load i8
52; OPT: getelementptr
53; OPT-NEXT: store i8
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000054define amdgpu_kernel void @min_size_large_static_memmove_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 {
Matt Arsenault0699ef32017-02-09 22:00:42 +000055 call void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1025, i32 1, i1 false)
56 ret void
57}
58
59; OPT-LABEL: @max_size_small_static_memset_caller0(
60; OPT: call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 %val, i64 1024, i32 1, i1 false)
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000061define amdgpu_kernel void @max_size_small_static_memset_caller0(i8 addrspace(1)* %dst, i8 %val) #0 {
Matt Arsenault0699ef32017-02-09 22:00:42 +000062 call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 %val, i64 1024, i32 1, i1 false)
63 ret void
64}
65
66; OPT-LABEL: @min_size_large_static_memset_caller0(
67; OPT-NOT: call
68; OPT: getelementptr
69; OPT: store i8
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000070define amdgpu_kernel void @min_size_large_static_memset_caller0(i8 addrspace(1)* %dst, i8 %val) #0 {
Matt Arsenault0699ef32017-02-09 22:00:42 +000071 call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 %val, i64 1025, i32 1, i1 false)
72 ret void
73}
74
75; OPT-LABEL: @variable_memcpy_caller0(
76; OPT-NOT: call
77; OPT: phi
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000078define amdgpu_kernel void @variable_memcpy_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n) #0 {
Matt Arsenault0699ef32017-02-09 22:00:42 +000079 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n, i32 1, i1 false)
80 ret void
81}
82
83; OPT-LABEL: @variable_memcpy_caller1(
84; OPT-NOT: call
85; OPT: phi
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000086define amdgpu_kernel void @variable_memcpy_caller1(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n) #0 {
Matt Arsenault0699ef32017-02-09 22:00:42 +000087 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n, i32 1, i1 false)
88 ret void
89}
90
91; OPT-LABEL: @memcpy_multi_use_one_function(
92; OPT-NOT: call
93; OPT: phi
94; OPT-NOT: call
95; OPT: phi
96; OPT-NOT: call
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000097define amdgpu_kernel void @memcpy_multi_use_one_function(i8 addrspace(1)* %dst0, i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 %n, i64 %m) #0 {
Matt Arsenault0699ef32017-02-09 22:00:42 +000098 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst0, i8 addrspace(1)* %src, i64 %n, i32 1, i1 false)
99 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 %m, i32 1, i1 false)
100 ret void
101}
102
103; OPT-LABEL: @memcpy_alt_type(
104; OPT: phi
105; OPT: getelementptr inbounds i8, i8 addrspace(3)*
106; OPT: load i8, i8 addrspace(3)*
107; OPT: getelementptr inbounds i8, i8 addrspace(1)*
108; OPT: store i8
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000109define amdgpu_kernel void @memcpy_alt_type(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 %n) #0 {
Matt Arsenault0699ef32017-02-09 22:00:42 +0000110 call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 %n, i32 1, i1 false)
111 ret void
112}
113
114; One of the uses in the function should be expanded, the other left alone.
115; OPT-LABEL: @memcpy_multi_use_one_function_keep_small(
116; OPT: getelementptr inbounds i8, i8 addrspace(1)*
117; OPT: load i8, i8 addrspace(1)*
118; OPT: getelementptr inbounds i8, i8 addrspace(1)*
119; OPT: store i8
120
121; OPT: call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 102, i32 1, i1 false)
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000122define amdgpu_kernel void @memcpy_multi_use_one_function_keep_small(i8 addrspace(1)* %dst0, i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 %n) #0 {
Matt Arsenault0699ef32017-02-09 22:00:42 +0000123 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst0, i8 addrspace(1)* %src, i64 %n, i32 1, i1 false)
124 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 102, i32 1, i1 false)
125 ret void
126}
127
128attributes #0 = { nounwind }
129attributes #1 = { argmemonly nounwind }