[ARM][ParallelDSP] Replace SExt uses

As loads are combined and widened, we replaced their sext users
operands whereas we should have been replacing the uses of the sext.
I've added a load of tests, with only a few of them originally
causing assertion failures, the rest improve pattern coverage.

Differential Revision: https://reviews.llvm.org/D65740

llvm-svn: 368404
diff --git a/llvm/lib/Target/ARM/ARMParallelDSP.cpp b/llvm/lib/Target/ARM/ARMParallelDSP.cpp
index a4d3fff..6ef03c6 100644
--- a/llvm/lib/Target/ARM/ARMParallelDSP.cpp
+++ b/llvm/lib/Target/ARM/ARMParallelDSP.cpp
@@ -68,7 +68,7 @@
     }
 
     LoadInst *getBaseLoad() const {
-      return cast<LoadInst>(LHS);
+      return VecLd.front();
     }
   };
 
@@ -696,13 +696,15 @@
   // Loads[0] needs trunc while Loads[1] needs a lshr and trunc.
   // TODO: Support big-endian as well.
   Value *Bottom = IRB.CreateTrunc(WideLoad, Base->getType());
-  BaseSExt->setOperand(0, Bottom);
+  Value *NewBaseSExt = IRB.CreateSExt(Bottom, BaseSExt->getType());
+  BaseSExt->replaceAllUsesWith(NewBaseSExt);
 
   IntegerType *OffsetTy = cast<IntegerType>(Offset->getType());
   Value *ShiftVal = ConstantInt::get(LoadTy, OffsetTy->getBitWidth());
   Value *Top = IRB.CreateLShr(WideLoad, ShiftVal);
   Value *Trunc = IRB.CreateTrunc(Top, OffsetTy);
-  OffsetSExt->setOperand(0, Trunc);
+  Value *NewOffsetSExt = IRB.CreateSExt(Trunc, OffsetSExt->getType());
+  OffsetSExt->replaceAllUsesWith(NewOffsetSExt);
 
   WideLoads.emplace(std::make_pair(Base,
                                    make_unique<WidenedLoad>(Loads, WideLoad)));