[InstCombine] Transform bcopy to memmove

bcopy is still widely used mainly for network apps. Sadly, LLVM has no optimizations for bcopy, but there are some for memmove. 
Since bcopy == memmove, it is profitable to transform bcopy to memmove and use current optimizations for memmove for free here.

llvm-svn: 373537
diff --git a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
index b722c47..88c2ef7 100644
--- a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
+++ b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
@@ -181,6 +181,7 @@
   Value *optimizeMemSet(CallInst *CI, IRBuilder<> &B);
   Value *optimizeRealloc(CallInst *CI, IRBuilder<> &B);
   Value *optimizeWcslen(CallInst *CI, IRBuilder<> &B);
+  Value *optimizeBCopy(CallInst *CI, IRBuilder<> &B);
   // Wrapper for all String/Memory Library Call Optimizations
   Value *optimizeStringMemoryLibCall(CallInst *CI, IRBuilder<> &B);
 
diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 3af754a..1fb4f28 100644
--- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -2792,6 +2792,12 @@
   return nullptr;
 }
 
+Value *LibCallSimplifier::optimizeBCopy(CallInst *CI, IRBuilder<> &B) {
+  // bcopy(src, dst, n) -> llvm.memmove(dst, src, n)
+  return B.CreateMemMove(CI->getArgOperand(1), 1, CI->getArgOperand(0), 1,
+                         CI->getArgOperand(2));
+}
+
 bool LibCallSimplifier::hasFloatVersion(StringRef FuncName) {
   LibFunc Func;
   SmallString<20> FloatFuncName = FuncName;
@@ -2870,6 +2876,8 @@
       return optimizeRealloc(CI, Builder);
     case LibFunc_wcslen:
       return optimizeWcslen(CI, Builder);
+    case LibFunc_bcopy:
+      return optimizeBCopy(CI, Builder);
     default:
       break;
     }
diff --git a/llvm/test/Transforms/InstCombine/bcopy.ll b/llvm/test/Transforms/InstCombine/bcopy.ll
new file mode 100644
index 0000000..6a53bad
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/bcopy.ll
@@ -0,0 +1,25 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+declare void @bcopy(i8* nocapture readonly, i8* nocapture, i32)
+
+define void @bcopy_memmove(i8* nocapture readonly %a, i8* nocapture %b) {
+; CHECK-LABEL: @bcopy_memmove(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[A:%.*]] to i64*
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[B:%.*]] to i64*
+; CHECK-NEXT:    [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1
+; CHECK-NEXT:    store i64 [[TMP3]], i64* [[TMP2]], align 1
+; CHECK-NEXT:    ret void
+;
+  tail call void @bcopy(i8* %a, i8* %b, i32 8)
+  ret void
+}
+
+define void @bcopy_memmove2(i8* nocapture readonly %a, i8* nocapture %b, i32 %len) {
+; CHECK-LABEL: @bcopy_memmove2(
+; CHECK-NEXT:    call void @llvm.memmove.p0i8.p0i8.i32(i8* align 1 [[B:%.*]], i8* align 1 [[A:%.*]], i32 [[LEN:%.*]], i1 false)
+; CHECK-NEXT:    ret void
+;
+  tail call void @bcopy(i8* %a, i8* %b, i32 %len)
+  ret void
+}