Change memcpy/memset/memmove to have dest and source alignments.

Note, this was reviewed (and more details are in) http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20151109/312083.html

These intrinsics currently have an explicit alignment argument which is
required to be a constant integer.  It represents the alignment of the
source and dest, and so must be the minimum of those.

This change allows source and dest to each have their own alignments
by using the alignment attribute on their arguments.  The alignment
argument itself is removed.

There are a few places in the code for which the code needs to be
checked by an expert as to whether using only src/dest alignment is
safe.  For those places, they currently take the minimum of src/dest
alignments which matches the current behaviour.

For example, code which used to read:
  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 8, i1 false)
will now read:
  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %src, i32 500, i1 false)

For out of tree owners, I was able to strip alignment from calls using sed by replacing:
  (call.*llvm\.memset.*)i32\ [0-9]*\,\ i1 false\)
with:
  $1i1 false)

and similarly for memmove and memcpy.

I then added back in alignment to test cases which needed it.

A similar commit will be made to clang which actually has many differences in alignment as now
IRBuilder can generate different source/dest alignments on calls.

In IRBuilder itself, a new argument was added.  Instead of calling:
  CreateMemCpy(Dst, Src, getInt64(Size), DstAlign, /* isVolatile */ false)
you now call
  CreateMemCpy(Dst, Src, getInt64(Size), DstAlign, SrcAlign, /* isVolatile */ false)

There is a temporary class (IntegerAlignment) which takes the source alignment and rejects
implicit conversion from bool.  This is to prevent isVolatile here from passing its default
parameter to the source alignment.

Note, changes in future can now be made to codegen.  I didn't change anything here, but this
change should enable better memcpy code sequences.

Reviewed by Hal Finkel.

llvm-svn: 253511
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index cde26cc..2bc96cc 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -60,14 +60,18 @@
   return T;
 }
 
-Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
+Instruction *InstCombiner::SimplifyMemTransfer(MemTransferInst *MI) {
   unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), DL, MI, AC, DT);
   unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), DL, MI, AC, DT);
-  unsigned MinAlign = std::min(DstAlign, SrcAlign);
-  unsigned CopyAlign = MI->getAlignment();
+  unsigned CopyDestAlign = MI->getDestAlignment();
+  unsigned CopySrcAlign = MI->getSrcAlignment();
 
-  if (CopyAlign < MinAlign) {
-    MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), MinAlign, false));
+  if (CopyDestAlign < DstAlign) {
+    MI->setDestAlignment(DstAlign);
+    return MI;
+  }
+  if (CopySrcAlign < SrcAlign) {
+    MI->setSrcAlignment(SrcAlign);
     return MI;
   }
 
@@ -135,8 +139,8 @@
 
   // If the memcpy/memmove provides better alignment info than we can
   // infer, use it.
-  SrcAlign = std::max(SrcAlign, CopyAlign);
-  DstAlign = std::max(DstAlign, CopyAlign);
+  SrcAlign = std::max(SrcAlign, CopySrcAlign);
+  DstAlign = std::max(DstAlign, CopyDestAlign);
 
   Value *Src = Builder->CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy);
   Value *Dest = Builder->CreateBitCast(MI->getArgOperand(0), NewDstPtrTy);
@@ -156,9 +160,8 @@
 
 Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
   unsigned Alignment = getKnownAlignment(MI->getDest(), DL, MI, AC, DT);
-  if (MI->getAlignment() < Alignment) {
-    MI->setAlignment(ConstantInt::get(MI->getAlignmentType(),
-                                             Alignment, false));
+  if (MI->getDestAlignment() < Alignment) {
+    MI->setDestAlignment(Alignment);
     return MI;
   }
 
@@ -168,7 +171,7 @@
   if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8))
     return nullptr;
   uint64_t Len = LenC->getLimitedValue();
-  Alignment = MI->getAlignment();
+  Alignment = MI->getDestAlignment();
   assert(Len && "0-sized memory setting should be removed already.");
 
   // memset(s,c,n) -> store s, c (for n=1,2,4,8)
@@ -743,8 +746,8 @@
 
     // If we can determine a pointer alignment that is bigger than currently
     // set, update the alignment.
-    if (isa<MemTransferInst>(MI)) {
-      if (Instruction *I = SimplifyMemTransfer(MI))
+    if (auto *MTI = dyn_cast<MemTransferInst>(MI)) {
+      if (Instruction *I = SimplifyMemTransfer(MTI))
         return I;
     } else if (MemSetInst *MSI = dyn_cast<MemSetInst>(MI)) {
       if (Instruction *I = SimplifyMemSet(MSI))
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 1bb3ad6..4b16702 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -558,7 +558,7 @@
   Instruction *PromoteCastOfAllocation(BitCastInst &CI, AllocaInst &AI);
   Instruction *MatchBSwap(BinaryOperator &I);
   bool SimplifyStoreAtEndOfBlock(StoreInst &SI);
-  Instruction *SimplifyMemTransfer(MemIntrinsic *MI);
+  Instruction *SimplifyMemTransfer(MemTransferInst *MI);
   Instruction *SimplifyMemSet(MemSetInst *MI);
 
   Value *EvaluateInDifferentType(Value *V, Type *Ty, bool isSigned);
diff --git a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
index 21ef320..2a9749d 100644
--- a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
@@ -1356,20 +1356,21 @@
   Value *LenShadow = IRB.CreateMul(
       I.getLength(),
       ConstantInt::get(I.getLength()->getType(), DFSF.DFS.ShadowWidth / 8));
-  Value *AlignShadow;
-  if (ClPreserveAlignment) {
-    AlignShadow = IRB.CreateMul(I.getAlignmentCst(),
-                                ConstantInt::get(I.getAlignmentCst()->getType(),
-                                                 DFSF.DFS.ShadowWidth / 8));
-  } else {
-    AlignShadow = ConstantInt::get(I.getAlignmentCst()->getType(),
-                                   DFSF.DFS.ShadowWidth / 8);
-  }
   Type *Int8Ptr = Type::getInt8PtrTy(*DFSF.DFS.Ctx);
   DestShadow = IRB.CreateBitCast(DestShadow, Int8Ptr);
   SrcShadow = IRB.CreateBitCast(SrcShadow, Int8Ptr);
-  IRB.CreateCall(I.getCalledValue(), {DestShadow, SrcShadow, LenShadow,
-                                      AlignShadow, I.getVolatileCst()});
+  auto *MTI = cast<MemTransferInst>(IRB.CreateCall(I.getCalledValue(),
+                                                   { DestShadow, SrcShadow,
+                                                     LenShadow,
+                                                     I.getVolatileCst() }));
+
+  if (ClPreserveAlignment) {
+    MTI->setDestAlignment(I.getDestAlignment() * (DFSF.DFS.ShadowWidth / 8));
+    MTI->setSrcAlignment(I.getSrcAlignment() * (DFSF.DFS.ShadowWidth / 8));
+  } else {
+    MTI->setDestAlignment(DFSF.DFS.ShadowWidth / 8);
+    MTI->setSrcAlignment(DFSF.DFS.ShadowWidth / 8);
+  }
 }
 
 void DFSanVisitor::visitReturnInst(ReturnInst &RI) {
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 218e3e9..836995f 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -1117,7 +1117,7 @@
               unsigned CopyAlign = std::min(ArgAlign, kShadowTLSAlignment);
               Value *Cpy = EntryIRB.CreateMemCpy(
                   getShadowPtr(V, EntryIRB.getInt8Ty(), EntryIRB), Base, Size,
-                  CopyAlign);
+                  CopyAlign, CopyAlign);
               DEBUG(dbgs() << "  ByValCpy: " << *Cpy << "\n");
               (void)Cpy;
             }
@@ -2482,7 +2482,7 @@
         unsigned Alignment = std::min(ParamAlignment, kShadowTLSAlignment);
         Store = IRB.CreateMemCpy(ArgShadowBase,
                                  getShadowPtr(A, Type::getInt8Ty(*MS.C), IRB),
-                                 Size, Alignment);
+                                 Size, Alignment, Alignment);
       } else {
         Size = DL.getTypeAllocSize(A->getType());
         if (ArgOffset + Size > kParamTLSSize) break;
@@ -2834,7 +2834,7 @@
         Value *Base = getShadowPtrForVAArgument(RealTy, IRB, OverflowOffset);
         OverflowOffset += RoundUpToAlignment(ArgSize, 8);
         IRB.CreateMemCpy(Base, MSV.getShadowPtr(A, IRB.getInt8Ty(), IRB),
-                         ArgSize, kShadowTLSAlignment);
+                         ArgSize, kShadowTLSAlignment, kShadowTLSAlignment);
       } else {
         ArgKind AK = classifyArgument(A);
         if (AK == AK_GeneralPurpose && GpOffset >= AMD64GpEndOffset)
@@ -2912,7 +2912,7 @@
         IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, AMD64FpEndOffset),
                       VAArgOverflowSize);
       VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
-      IRB.CreateMemCpy(VAArgTLSCopy, MS.VAArgTLS, CopySize, 8);
+      IRB.CreateMemCpy(VAArgTLSCopy, MS.VAArgTLS, CopySize, 8, 8);
     }
 
     // Instrument va_start.
@@ -2931,7 +2931,7 @@
       Value *RegSaveAreaShadowPtr =
         MSV.getShadowPtr(RegSaveAreaPtr, IRB.getInt8Ty(), IRB);
       IRB.CreateMemCpy(RegSaveAreaShadowPtr, VAArgTLSCopy,
-                       AMD64FpEndOffset, 16);
+                       AMD64FpEndOffset, 16, 16);
 
       Value *OverflowArgAreaPtrPtr =
         IRB.CreateIntToPtr(
@@ -2943,7 +2943,8 @@
         MSV.getShadowPtr(OverflowArgAreaPtr, IRB.getInt8Ty(), IRB);
       Value *SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSCopy,
                                              AMD64FpEndOffset);
-      IRB.CreateMemCpy(OverflowArgAreaShadowPtr, SrcPtr, VAArgOverflowSize, 16);
+      IRB.CreateMemCpy(OverflowArgAreaShadowPtr, SrcPtr, VAArgOverflowSize,
+                       16, 16);
     }
   }
 };
@@ -3029,7 +3030,7 @@
       // If there is a va_start in this function, make a backup copy of
       // va_arg_tls somewhere in the function entry block.
       VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
-      IRB.CreateMemCpy(VAArgTLSCopy, MS.VAArgTLS, CopySize, 8);
+      IRB.CreateMemCpy(VAArgTLSCopy, MS.VAArgTLS, CopySize, 8, 8);
     }
 
     // Instrument va_start.
@@ -3044,7 +3045,7 @@
       Value *RegSaveAreaPtr = IRB.CreateLoad(RegSaveAreaPtrPtr);
       Value *RegSaveAreaShadowPtr =
       MSV.getShadowPtr(RegSaveAreaPtr, IRB.getInt8Ty(), IRB);
-      IRB.CreateMemCpy(RegSaveAreaShadowPtr, VAArgTLSCopy, CopySize, 8);
+      IRB.CreateMemCpy(RegSaveAreaShadowPtr, VAArgTLSCopy, CopySize, 8, 8);
     }
   }
 };
diff --git a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
index a13e552..f041a29 100644
--- a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
+++ b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
@@ -347,6 +347,8 @@
       // instruction, but only for one operand, save it. If we reach the
       // other operand through another assumption later, then we may
       // change the alignment at that point.
+      // FIXME: The above statement is no longer true.  Fix the code below
+      // to be able to reason about different dest/src alignments.
       if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
         unsigned NewSrcAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV,
           MTI->getSource(), SE);
@@ -376,20 +378,23 @@
         if (AltSrcAlignment <= std::max(NewDestAlignment, AltDestAlignment))
           NewAlignment = std::max(NewAlignment, AltSrcAlignment);
 
-        if (NewAlignment > MI->getAlignment()) {
-          MI->setAlignment(ConstantInt::get(Type::getInt32Ty(
-            MI->getParent()->getContext()), NewAlignment));
+        if (NewAlignment > MTI->getDestAlignment()) {
+          MTI->setDestAlignment(NewAlignment);
+          ++NumMemIntAlignChanged;
+        }
+
+        if (NewAlignment > MTI->getSrcAlignment()) {
+          MTI->setSrcAlignment(NewAlignment);
           ++NumMemIntAlignChanged;
         }
 
         NewDestAlignments.insert(std::make_pair(MTI, NewDestAlignment));
         NewSrcAlignments.insert(std::make_pair(MTI, NewSrcAlignment));
-      } else if (NewDestAlignment > MI->getAlignment()) {
+      } else if (NewDestAlignment > MI->getDestAlignment()) {
         assert((!isa<MemIntrinsic>(MI) || isa<MemSetInst>(MI)) &&
                "Unknown memory intrinsic");
 
-        MI->setAlignment(ConstantInt::get(Type::getInt32Ty(
-          MI->getParent()->getContext()), NewDestAlignment));
+        MI->setDestAlignment(NewDestAlignment);
         ++NumMemIntAlignChanged;
       }
     }
diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 36ad0a5..b0e6d19 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -611,7 +611,7 @@
           // as any store/memset/memcpy is likely using vector instructions so
           // shortening it to not vector size is likely to be slower
           MemIntrinsic* DepIntrinsic = cast<MemIntrinsic>(DepWrite);
-          unsigned DepWriteAlign = DepIntrinsic->getAlignment();
+          unsigned DepWriteAlign = DepIntrinsic->getDestAlignment();
           if (llvm::isPowerOf2_64(InstWriteOffset) ||
               ((DepWriteAlign != 0) && InstWriteOffset % DepWriteAlign == 0)) {
 
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index c2fb8cd..aed43b3 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -414,8 +414,8 @@
     return false;
 
   return processLoopStridedStore(Pointer, (unsigned)SizeInBytes,
-                                 MSI->getAlignment(), MSI->getValue(), MSI, Ev,
-                                 BECount, /*NegStride=*/false);
+                                 MSI->getDestAlignment(), MSI->getValue(), MSI,
+                                 Ev, BECount, /*NegStride=*/false);
 }
 
 /// mayLoopAccessLocation - Return true if the specified loop might access the
@@ -700,7 +700,7 @@
 
   CallInst *NewCall =
       Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes,
-                           std::min(SI->getAlignment(), LI->getAlignment()));
+                           SI->getAlignment(), LI->getAlignment());
   NewCall->setDebugLoc(SI->getDebugLoc());
 
   DEBUG(dbgs() << "  Formed memcpy: " << *NewCall << "\n"
diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index f80b07b..2db2b80 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -229,7 +229,8 @@
 
   void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) {
     int64_t Size = cast<ConstantInt>(MSI->getLength())->getZExtValue();
-    addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getAlignment(), MSI);
+    addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getDestAlignment(),
+             MSI);
   }
 
   void addRange(int64_t Start, int64_t Size, Value *Ptr,
@@ -819,20 +820,17 @@
 
   // If all checks passed, then we can transform M.
 
-  // Make sure to use the lesser of the alignment of the source and the dest
-  // since we're changing where we're reading from, but don't want to increase
-  // the alignment past what can be read from or written to.
   // TODO: Is this worth it if we're creating a less aligned memcpy? For
   // example we could be moving from movaps -> movq on x86.
-  unsigned Align = std::min(MDep->getAlignment(), M->getAlignment());
-
   IRBuilder<> Builder(M);
   if (UseMemMove)
     Builder.CreateMemMove(M->getRawDest(), MDep->getRawSource(), M->getLength(),
-                          Align, M->isVolatile());
+                          M->getDestAlignment(), MDep->getSrcAlignment(),
+                          M->isVolatile());
   else
     Builder.CreateMemCpy(M->getRawDest(), MDep->getRawSource(), M->getLength(),
-                         Align, M->isVolatile());
+                         M->getDestAlignment(), MDep->getSrcAlignment(),
+                         M->isVolatile());
 
   // Remove the instruction we're replacing.
   MD->removeInstruction(M);
@@ -878,7 +876,7 @@
   // If Dest is aligned, and SrcSize is constant, use the minimum alignment
   // of the sum.
   const unsigned DestAlign =
-      std::max(MemSet->getAlignment(), MemCpy->getAlignment());
+      std::max(MemSet->getDestAlignment(), MemCpy->getDestAlignment());
   if (DestAlign > 1)
     if (ConstantInt *SrcSizeC = dyn_cast<ConstantInt>(SrcSize))
       Align = MinAlign(SrcSizeC->getZExtValue(), DestAlign);
@@ -935,7 +933,7 @@
 
   IRBuilder<> Builder(MemCpy);
   Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1),
-                       CopySize, MemCpy->getAlignment());
+                       CopySize, MemCpy->getDestAlignment());
   return true;
 }
 
@@ -961,7 +959,7 @@
       if (Value *ByteVal = isBytewiseValue(GV->getInitializer())) {
         IRBuilder<> Builder(M);
         Builder.CreateMemSet(M->getRawDest(), ByteVal, M->getLength(),
-                             M->getAlignment(), false);
+                             M->getDestAlignment(), false);
         MD->removeInstruction(M);
         M->eraseFromParent();
         ++NumCpyToSet;
@@ -990,8 +988,11 @@
   //   d) memcpy from a just-memset'd source can be turned into memset.
   if (DepInfo.isClobber()) {
     if (CallInst *C = dyn_cast<CallInst>(DepInfo.getInst())) {
+      // FIXME: Can we pass in either of dest/src alignment here instead of
+      // convervatively taking the minimum?
+      unsigned Align = std::min(M->getDestAlignment(), M->getSrcAlignment());
       if (performCallSlotOptzn(M, M->getDest(), M->getSource(),
-                               CopySize->getZExtValue(), M->getAlignment(),
+                               CopySize->getZExtValue(), Align,
                                C)) {
         MD->removeInstruction(M);
         M->eraseFromParent();
@@ -1108,7 +1109,11 @@
       getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
           *CS->getParent()->getParent());
   DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-  if (MDep->getAlignment() < ByValAlign &&
+  // FIXME: Can we use either of dest/src alignment here instead of
+  // convervatively taking the minimum?
+  unsigned MinAlign = std::min(MDep->getDestAlignment(),
+                               MDep->getSrcAlignment());
+  if (MinAlign < ByValAlign &&
       getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL,
                                  CS.getInstruction(), &AC, &DT) < ByValAlign)
     return false;
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index bfd48ff..223c531 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -2618,8 +2618,7 @@
       assert(!IsSplit);
       assert(NewBeginOffset == BeginOffset);
       II.setDest(getNewAllocaSlicePtr(IRB, OldPtr->getType()));
-      Type *CstTy = II.getAlignmentCst()->getType();
-      II.setAlignment(ConstantInt::get(CstTy, getSliceAlign()));
+      II.setDestAlignment(getSliceAlign());
 
       deleteIfTriviallyDead(OldPtr);
       return false;
@@ -2735,15 +2734,16 @@
     // update both source and dest of a single call.
     if (!IsSplittable) {
       Value *AdjustedPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
-      if (IsDest)
+      if (IsDest) {
         II.setDest(AdjustedPtr);
-      else
+
+        if (II.getDestAlignment() > SliceAlign)
+          II.setDestAlignment(MinAlign(II.getDestAlignment(), SliceAlign));
+      } else {
         II.setSource(AdjustedPtr);
 
-      if (II.getAlignment() > SliceAlign) {
-        Type *CstTy = II.getAlignmentCst()->getType();
-        II.setAlignment(
-            ConstantInt::get(CstTy, MinAlign(II.getAlignment(), SliceAlign)));
+        if (II.getSrcAlignment() > SliceAlign)
+          II.setSrcAlignment(MinAlign(II.getSrcAlignment(), SliceAlign));
       }
 
       DEBUG(dbgs() << "          to: " << II << "\n");
@@ -2796,8 +2796,10 @@
     // Compute the relative offset for the other pointer within the transfer.
     unsigned IntPtrWidth = DL.getPointerSizeInBits(OtherAS);
     APInt OtherOffset(IntPtrWidth, NewBeginOffset - BeginOffset);
-    unsigned OtherAlign = MinAlign(II.getAlignment() ? II.getAlignment() : 1,
-                                   OtherOffset.zextOrTrunc(64).getZExtValue());
+    unsigned OtherDestAlign = MinAlign(II.getDestAlignment() ? II.getDestAlignment() : 1,
+                                       OtherOffset.zextOrTrunc(64).getZExtValue());
+    unsigned OtherSrcAlign = MinAlign(II.getSrcAlignment() ? II.getSrcAlignment() : 1,
+                                       OtherOffset.zextOrTrunc(64).getZExtValue());
 
     if (EmitMemCpy) {
       // Compute the other pointer, folding as much as possible to produce
@@ -2809,9 +2811,11 @@
       Type *SizeTy = II.getLength()->getType();
       Constant *Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset);
 
-      CallInst *New = IRB.CreateMemCpy(
-          IsDest ? OurPtr : OtherPtr, IsDest ? OtherPtr : OurPtr, Size,
-          MinAlign(SliceAlign, OtherAlign), II.isVolatile());
+      CallInst *New = IRB.CreateMemCpy(IsDest ? OurPtr : OtherPtr,
+                                       IsDest ? OtherPtr : OurPtr, Size,
+                                       MinAlign(SliceAlign, OtherDestAlign),
+                                       MinAlign(SliceAlign, OtherSrcAlign),
+                                       II.isVolatile());
       (void)New;
       DEBUG(dbgs() << "          to: " << *New << "\n");
       return false;
@@ -2843,7 +2847,7 @@
 
     Value *SrcPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy,
                                    OtherPtr->getName() + ".");
-    unsigned SrcAlign = OtherAlign;
+    unsigned SrcAlign = OtherSrcAlign;
     Value *DstPtr = &NewAI;
     unsigned DstAlign = SliceAlign;
     if (!IsDest) {
diff --git a/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index 114d22d..679e241 100644
--- a/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -716,7 +716,7 @@
         SrcPtr = Builder.CreateBitCast(SrcPtr, AIPTy);
 
         LoadInst *SrcVal = Builder.CreateLoad(SrcPtr, "srcval");
-        SrcVal->setAlignment(MTI->getAlignment());
+        SrcVal->setAlignment(MTI->getSrcAlignment());
         Builder.CreateStore(SrcVal, NewAI);
       } else if (GetUnderlyingObject(MTI->getDest(), DL, 0) != OrigAI) {
         // Src must be OrigAI, change this to be a load from NewAI then a store
@@ -733,7 +733,7 @@
         Value *DstPtr = Builder.CreateBitCast(MTI->getDest(), AIPTy);
 
         StoreInst *NewStore = Builder.CreateStore(SrcVal, DstPtr);
-        NewStore->setAlignment(MTI->getAlignment());
+        NewStore->setAlignment(MTI->getDestAlignment());
       } else {
         // Noop transfer. Src == Dst
       }
@@ -2182,7 +2182,8 @@
   // that doesn't have anything to do with the alloca that we are promoting. For
   // memset, this Value* stays null.
   Value *OtherPtr = nullptr;
-  unsigned MemAlignment = MI->getAlignment();
+  unsigned DestMemAlignment = MI->getDestAlignment();
+  unsigned SrcMemAlignment = 0;
   if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) { // memmove/memcopy
     if (Inst == MTI->getRawDest())
       OtherPtr = MTI->getRawSource();
@@ -2190,6 +2191,7 @@
       assert(Inst == MTI->getRawSource());
       OtherPtr = MTI->getRawDest();
     }
+    SrcMemAlignment = MTI->getSrcAlignment();
   }
 
   // If there is an other pointer, we want to convert it to the same pointer
@@ -2235,7 +2237,8 @@
   for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
     // If this is a memcpy/memmove, emit a GEP of the other element address.
     Value *OtherElt = nullptr;
-    unsigned OtherEltAlign = MemAlignment;
+    unsigned OtherDestEltAlign = DestMemAlignment;
+    unsigned OtherSrcEltAlign = SrcMemAlignment;
 
     if (OtherPtr) {
       Value *Idx[2] = { Zero,
@@ -2258,7 +2261,8 @@
       // mem intrinsic and the alignment of the element.  If the alignment of
       // the memcpy (f.e.) is 32 but the element is at a 4-byte offset, then the
       // known alignment is just 4 bytes.
-      OtherEltAlign = (unsigned)MinAlign(OtherEltAlign, EltOffset);
+      OtherDestEltAlign = (unsigned)MinAlign(OtherDestEltAlign, EltOffset);
+      OtherSrcEltAlign = (unsigned)MinAlign(OtherSrcEltAlign, EltOffset);
     }
 
     Value *EltPtr = NewElts[i];
@@ -2269,12 +2273,13 @@
       if (isa<MemTransferInst>(MI)) {
         if (SROADest) {
           // From Other to Alloca.
-          Value *Elt = new LoadInst(OtherElt, "tmp", false, OtherEltAlign, MI);
+          Value *Elt = new LoadInst(OtherElt, "tmp", false,
+                                    OtherSrcEltAlign, MI);
           new StoreInst(Elt, EltPtr, MI);
         } else {
           // From Alloca to Other.
           Value *Elt = new LoadInst(EltPtr, "tmp", MI);
-          new StoreInst(Elt, OtherElt, false, OtherEltAlign, MI);
+          new StoreInst(Elt, OtherElt, false, OtherDestEltAlign, MI);
         }
         continue;
       }
@@ -2337,9 +2342,11 @@
       Value *Src = SROADest ? OtherElt : EltPtr;  // Src ptr
 
       if (isa<MemCpyInst>(MI))
-        Builder.CreateMemCpy(Dst, Src, EltSize, OtherEltAlign,MI->isVolatile());
+        Builder.CreateMemCpy(Dst, Src, EltSize, OtherDestEltAlign,
+                             OtherSrcEltAlign, MI->isVolatile());
       else
-        Builder.CreateMemMove(Dst, Src, EltSize,OtherEltAlign,MI->isVolatile());
+        Builder.CreateMemMove(Dst, Src, EltSize, OtherDestEltAlign,
+                              OtherSrcEltAlign, MI->isVolatile());
     }
   }
   DeadInsts.push_back(MI);
diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index a31131b..7cb7c3a 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -851,7 +851,7 @@
   // Always generate a memcpy of alignment 1 here because we don't know
   // the alignment of the src pointer.  Other optimizations can infer
   // better alignment.
-  Builder.CreateMemCpy(Dst, Src, Size, /*Align=*/1);
+  Builder.CreateMemCpy(Dst, Src, Size, /*DestAlign=*/1, /*SrcAlign=*/1);
 }
 
 /// When inlining a call site that has a byval argument,
diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 144e235..8e2eeb9 100644
--- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -238,7 +238,7 @@
   // concatenation for us.  Make a memcpy to copy the nul byte with align = 1.
   B.CreateMemCpy(CpyDst, Src,
                  ConstantInt::get(DL.getIntPtrType(Src->getContext()), Len + 1),
-                 1);
+                 1, 1);
   return Dst;
 }
 
@@ -471,7 +471,8 @@
   // We have enough information to now generate the memcpy call to do the
   // copy for us.  Make a memcpy to copy the nul byte with align = 1.
   B.CreateMemCpy(Dst, Src,
-                 ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len), 1);
+                 ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len), 1,
+                 1);
   return Dst;
 }
 
@@ -498,7 +499,7 @@
 
   // We have enough information to now generate the memcpy call to do the
   // copy for us.  Make a memcpy to copy the nul byte with align = 1.
-  B.CreateMemCpy(Dst, Src, LenV, 1);
+  B.CreateMemCpy(Dst, Src, LenV, 1, 1);
   return DstEnd;
 }
 
@@ -538,7 +539,7 @@
 
   Type *PT = Callee->getFunctionType()->getParamType(0);
   // strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant]
-  B.CreateMemCpy(Dst, Src, ConstantInt::get(DL.getIntPtrType(PT), Len), 1);
+  B.CreateMemCpy(Dst, Src, ConstantInt::get(DL.getIntPtrType(PT), Len), 1, 1);
 
   return Dst;
 }
@@ -917,7 +918,7 @@
 
   // memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1)
   B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
-                 CI->getArgOperand(2), 1);
+                 CI->getArgOperand(2), 1, 1);
   return CI->getArgOperand(0);
 }
 
@@ -929,7 +930,7 @@
 
   // memmove(x, y, n) -> llvm.memmove(x, y, n, 1)
   B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1),
-                  CI->getArgOperand(2), 1);
+                  CI->getArgOperand(2), 1, 1);
   return CI->getArgOperand(0);
 }
 
@@ -1758,7 +1759,7 @@
     B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
                    ConstantInt::get(DL.getIntPtrType(CI->getContext()),
                                     FormatStr.size() + 1),
-                   1); // Copy the null byte.
+                   1, 1); // Copy the null byte.
     return ConstantInt::get(CI->getType(), FormatStr.size());
   }
 
@@ -1792,7 +1793,7 @@
       return nullptr;
     Value *IncLen =
         B.CreateAdd(Len, ConstantInt::get(Len->getType(), 1), "leninc");
-    B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(2), IncLen, 1);
+    B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(2), IncLen, 1, 1);
 
     // The sprintf result is the unincremented number of bytes in the string.
     return B.CreateIntCast(Len, CI->getType(), false);
@@ -2329,7 +2330,7 @@
 
   if (isFortifiedCallFoldable(CI, 3, 2, false)) {
     B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
-                   CI->getArgOperand(2), 1);
+                   CI->getArgOperand(2), 1, 1);
     return CI->getArgOperand(0);
   }
   return nullptr;
@@ -2343,7 +2344,7 @@
 
   if (isFortifiedCallFoldable(CI, 3, 2, false)) {
     B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1),
-                    CI->getArgOperand(2), 1);
+                    CI->getArgOperand(2), 1, 1);
     return CI->getArgOperand(0);
   }
   return nullptr;