[Memcpy Loop Lowering] Remove the fixed int8 lowering.
Switch over to the lowering that uses target supplied operand types.
Differential Revision: https://reviews.llvm.org/D41201
llvm-svn: 320989
diff --git a/llvm/test/CodeGen/AMDGPU/lower-mem-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/lower-mem-intrinsics.ll
index e1a2af6..7784672 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-mem-intrinsics.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-mem-intrinsics.ll
@@ -1,5 +1,4 @@
; RUN: opt -S -amdgpu-lower-intrinsics %s | FileCheck -check-prefix=OPT %s
-; RUN: opt -S -amdgpu-lower-intrinsics -use-wide-memcpy-loop-lowering=true %s | FileCheck -check-prefix=WOPT %s
declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture readonly, i64, i32, i1) #1
declare void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* nocapture, i8 addrspace(3)* nocapture readonly, i32, i32, i1) #1
@@ -18,21 +17,14 @@
; Smallest static size which will be expanded
; OPT-LABEL: @min_size_large_static_memcpy_caller0(
; OPT-NOT: call
-; OPT: getelementptr
-; OPT-NEXT: load i8
-; OPT: getelementptr
-; OPT-NEXT: store i8
-
-; WOPT-LABEL: @min_size_large_static_memcpy_caller0(
-; WOPT-NOT: call
-; WOPT: br label %load-store-loop
-; WOPT: [[T1:%[0-9]+]] = getelementptr inbounds i8, i8 addrspace(1)* %src, i64 %loop-index
-; WOPT-NEXT: [[T2:%[0-9]+]] = load i8, i8 addrspace(1)* [[T1]]
-; WOPT-NEXT: [[T3:%[0-9]+]] = getelementptr inbounds i8, i8 addrspace(1)* %dst, i64 %loop-index
-; WOPT-NEXT: store i8 [[T2]], i8 addrspace(1)* [[T3]]
-; WOPT-NEXT: [[T4:%[0-9]+]] = add i64 %loop-index, 1
-; WOPT-NEXT: [[T5:%[0-9]+]] = icmp ult i64 [[T4]], 1025
-; WOPT-NEXT: br i1 [[T5]], label %load-store-loop, label %memcpy-split
+; OPT: br label %load-store-loop
+; OPT: [[T1:%[0-9]+]] = getelementptr inbounds i8, i8 addrspace(1)* %src, i64 %loop-index
+; OPT-NEXT: [[T2:%[0-9]+]] = load i8, i8 addrspace(1)* [[T1]]
+; OPT-NEXT: [[T3:%[0-9]+]] = getelementptr inbounds i8, i8 addrspace(1)* %dst, i64 %loop-index
+; OPT-NEXT: store i8 [[T2]], i8 addrspace(1)* [[T3]]
+; OPT-NEXT: [[T4:%[0-9]+]] = add i64 %loop-index, 1
+; OPT-NEXT: [[T5:%[0-9]+]] = icmp ult i64 [[T4]], 1025
+; OPT-NEXT: br i1 [[T5]], label %load-store-loop, label %memcpy-split
define amdgpu_kernel void @min_size_large_static_memcpy_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 {
call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1025, i32 1, i1 false)
ret void