Revert r311077: [LV] Using VPlan ...

This causes LLVM to assert fail on PPC64 and crash / infloop in other
cases. Filed http://llvm.org/PR34248 with reproducer attached.

llvm-svn: 311304
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/aarch64-predication.ll b/llvm/test/Transforms/LoopVectorize/AArch64/aarch64-predication.ll
index eb12803..37a6d4e 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/aarch64-predication.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/aarch64-predication.ll
@@ -26,9 +26,9 @@
 ; CHECK-NEXT:    br i1 [[TMP3]], label %[[PRED_UDIV_IF:.*]], label %[[PRED_UDIV_CONTINUE:.*]]
 ; CHECK:       [[PRED_UDIV_IF]]:
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i32 0
-; CHECK-NEXT:    [[TMP5:%.*]] = add nsw i64 [[TMP4]], %x
-; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i32 0
-; CHECK-NEXT:    [[TMP7:%.*]] = udiv i64 [[TMP6]], [[TMP5]]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = add nsw i64 [[TMP5]], %x
+; CHECK-NEXT:    [[TMP7:%.*]] = udiv i64 [[TMP4]], [[TMP6]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x i64> undef, i64 [[TMP7]], i32 0
 ; CHECK-NEXT:    br label %[[PRED_UDIV_CONTINUE]]
 ; CHECK:       [[PRED_UDIV_CONTINUE]]:
@@ -37,9 +37,9 @@
 ; CHECK-NEXT:    br i1 [[TMP10]], label %[[PRED_UDIV_IF1:.*]], label %[[PRED_UDIV_CONTINUE2]]
 ; CHECK:       [[PRED_UDIV_IF1]]:
 ; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i32 1
-; CHECK-NEXT:    [[TMP12:%.*]] = add nsw i64 [[TMP11]], %x
-; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i32 1
-; CHECK-NEXT:    [[TMP14:%.*]] = udiv i64 [[TMP13]], [[TMP12]]
+; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i32 1
+; CHECK-NEXT:    [[TMP13:%.*]] = add nsw i64 [[TMP12]], %x
+; CHECK-NEXT:    [[TMP14:%.*]] = udiv i64 [[TMP11]], [[TMP13]]
 ; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP14]], i32 1
 ; CHECK-NEXT:    br label %[[PRED_UDIV_CONTINUE2]]
 ; CHECK:       [[PRED_UDIV_CONTINUE2]]:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/predication_costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/predication_costs.ll
index b0ebb4e..3be0865 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/predication_costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/predication_costs.ll
@@ -18,8 +18,8 @@
 ; Cost of udiv:
 ;   (udiv(2) + extractelement(6) + insertelement(3)) / 2 = 5
 ;
-; CHECK: Scalarizing and predicating: %tmp4 = udiv i32 %tmp2, %tmp3
 ; CHECK: Found an estimated cost of 5 for VF 2 For instruction: %tmp4 = udiv i32 %tmp2, %tmp3
+; CHECK: Scalarizing and predicating: %tmp4 = udiv i32 %tmp2, %tmp3
 ;
 define i32 @predicated_udiv(i32* %a, i32* %b, i1 %c, i64 %n) {
 entry:
@@ -59,8 +59,8 @@
 ; Cost of store:
 ;   (store(4) + extractelement(3)) / 2 = 3
 ;
-; CHECK: Scalarizing and predicating: store i32 %tmp2, i32* %tmp0, align 4
 ; CHECK: Found an estimated cost of 3 for VF 2 For instruction: store i32 %tmp2, i32* %tmp0, align 4
+; CHECK: Scalarizing and predicating: store i32 %tmp2, i32* %tmp0, align 4
 ;
 define void @predicated_store(i32* %a, i1 %c, i32 %x, i64 %n) {
 entry:
@@ -98,10 +98,10 @@
 ; Cost of udiv:
 ;   (udiv(2) + extractelement(3) + insertelement(3)) / 2 = 4
 ;
-; CHECK: Scalarizing: %tmp3 = add nsw i32 %tmp2, %x
-; CHECK: Scalarizing and predicating: %tmp4 = udiv i32 %tmp2, %tmp3
 ; CHECK: Found an estimated cost of 2 for VF 2 For instruction: %tmp3 = add nsw i32 %tmp2, %x
 ; CHECK: Found an estimated cost of 4 for VF 2 For instruction: %tmp4 = udiv i32 %tmp2, %tmp3
+; CHECK: Scalarizing: %tmp3 = add nsw i32 %tmp2, %x
+; CHECK: Scalarizing and predicating: %tmp4 = udiv i32 %tmp2, %tmp3
 ;
 define i32 @predicated_udiv_scalarized_operand(i32* %a, i1 %c, i32 %x, i64 %n) {
 entry:
@@ -143,10 +143,10 @@
 ; Cost of store:
 ;   store(4) / 2 = 2
 ;
-; CHECK: Scalarizing: %tmp2 = add nsw i32 %tmp1, %x
-; CHECK: Scalarizing and predicating: store i32 %tmp2, i32* %tmp0, align 4
 ; CHECK: Found an estimated cost of 2 for VF 2 For instruction: %tmp2 = add nsw i32 %tmp1, %x
 ; CHECK: Found an estimated cost of 2 for VF 2 For instruction: store i32 %tmp2, i32* %tmp0, align 4
+; CHECK: Scalarizing: %tmp2 = add nsw i32 %tmp1, %x
+; CHECK: Scalarizing and predicating: store i32 %tmp2, i32* %tmp0, align 4
 ;
 define void @predicated_store_scalarized_operand(i32* %a, i1 %c, i32 %x, i64 %n) {
 entry:
@@ -192,16 +192,16 @@
 ; Cost of store:
 ;   store(4) / 2 = 2
 ;
-; CHECK-NOT: Scalarizing: %tmp2 = add i32 %tmp1, %x
-; CHECK:     Scalarizing and predicating: %tmp3 = sdiv i32 %tmp1, %tmp2
-; CHECK:     Scalarizing and predicating: %tmp4 = udiv i32 %tmp3, %tmp2
-; CHECK:     Scalarizing: %tmp5 = sub i32 %tmp4, %x
-; CHECK:     Scalarizing and predicating: store i32 %tmp5, i32* %tmp0, align 4
 ; CHECK:     Found an estimated cost of 1 for VF 2 For instruction: %tmp2 = add i32 %tmp1, %x
 ; CHECK:     Found an estimated cost of 5 for VF 2 For instruction: %tmp3 = sdiv i32 %tmp1, %tmp2
 ; CHECK:     Found an estimated cost of 5 for VF 2 For instruction: %tmp4 = udiv i32 %tmp3, %tmp2
 ; CHECK:     Found an estimated cost of 2 for VF 2 For instruction: %tmp5 = sub i32 %tmp4, %x
 ; CHECK:     Found an estimated cost of 2 for VF 2 For instruction: store i32 %tmp5, i32* %tmp0, align 4
+; CHECK-NOT: Scalarizing: %tmp2 = add i32 %tmp1, %x
+; CHECK:     Scalarizing and predicating: %tmp3 = sdiv i32 %tmp1, %tmp2
+; CHECK:     Scalarizing and predicating: %tmp4 = udiv i32 %tmp3, %tmp2
+; CHECK:     Scalarizing: %tmp5 = sub i32 %tmp4, %x
+; CHECK:     Scalarizing and predicating: store i32 %tmp5, i32* %tmp0, align 4
 ;
 define void @predication_multi_context(i32* %a, i1 %c, i32 %x, i64 %n) {
 entry:
diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/load-store-scalarization-cost.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/load-store-scalarization-cost.ll
index 9fdf22e..e7096c2 100644
--- a/llvm/test/Transforms/LoopVectorize/SystemZ/load-store-scalarization-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/SystemZ/load-store-scalarization-cost.ll
@@ -24,10 +24,10 @@
 for.end:
   ret void
 
-; CHECK: LV: Scalarizing:  %tmp1 = load i32, i32* %tmp0, align 4
-; CHECK: LV: Scalarizing:  store i32 %tmp2, i32* %tmp0, align 4
-
 ; CHECK: LV: Found an estimated cost of 4 for VF 4 For instruction:   %tmp1 = load i32, i32* %tmp0, align 4
 ; CHECK: LV: Found an estimated cost of 4 for VF 4 For instruction:   store i32 %tmp2, i32* %tmp0, align 4
+
+; CHECK: LV: Scalarizing:  %tmp1 = load i32, i32* %tmp0, align 4
+; CHECK: LV: Scalarizing:  store i32 %tmp2, i32* %tmp0, align 4
 }
 
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
index bc9247f..3a8237f 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
@@ -467,6 +467,13 @@
 ; SINK-AFTER:   %[[VCONV:.+]] = sext <4 x i16> %[[VSHUF]] to <4 x i32>
 ; SINK-AFTER:   %[[VCONV3:.+]] = sext <4 x i16> %wide.load to <4 x i32>
 ; SINK-AFTER:   mul nsw <4 x i32> %[[VCONV3]], %[[VCONV]]
+; Check also that the sext sank after the load in the scalar loop.
+; SINK-AFTER: for.body
+; SINK-AFTER:   %scalar.recur = phi i16 [ %scalar.recur.init, %scalar.ph ], [ %[[LOAD:.+]], %for.body ]
+; SINK-AFTER:   %[[LOAD]] = load i16, i16* %arrayidx2
+; SINK-AFTER:   %[[CONV:.+]] = sext i16 %scalar.recur to i32
+; SINK-AFTER:   %[[CONV3:.+]] = sext i16 %[[LOAD]] to i32
+; SINK-AFTER:   %mul = mul nsw i32 %[[CONV3]], %[[CONV]]
 ;
 define void @sink_after(i16* %a, i32* %b, i64 %n) {
 entry:
diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll b/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll
index c654b79..9765fde 100644
--- a/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll
+++ b/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll
@@ -209,9 +209,9 @@
 ; CHECK:   br i1 {{.*}}, label %[[IF0:.+]], label %[[CONT0:.+]]
 ; CHECK: [[IF0]]:
 ; CHECK:   %[[T00:.+]] = extractelement <2 x i32> %wide.load, i32 0
-; CHECK:   %[[T01:.+]] = add nsw i32 %[[T00]], %x
-; CHECK:   %[[T02:.+]] = extractelement <2 x i32> %wide.load, i32 0
-; CHECK:   %[[T03:.+]] = udiv i32 %[[T02]], %[[T01]]
+; CHECK:   %[[T01:.+]] = extractelement <2 x i32> %wide.load, i32 0
+; CHECK:   %[[T02:.+]] = add nsw i32 %[[T01]], %x
+; CHECK:   %[[T03:.+]] = udiv i32 %[[T00]], %[[T02]]
 ; CHECK:   %[[T04:.+]] = insertelement <2 x i32> undef, i32 %[[T03]], i32 0
 ; CHECK:   br label %[[CONT0]]
 ; CHECK: [[CONT0]]:
@@ -219,9 +219,9 @@
 ; CHECK:   br i1 {{.*}}, label %[[IF1:.+]], label %[[CONT1:.+]]
 ; CHECK: [[IF1]]:
 ; CHECK:   %[[T06:.+]] = extractelement <2 x i32> %wide.load, i32 1
-; CHECK:   %[[T07:.+]] = add nsw i32 %[[T06]], %x
-; CHECK:   %[[T08:.+]] = extractelement <2 x i32> %wide.load, i32 1
-; CHECK:   %[[T09:.+]] = udiv i32 %[[T08]], %[[T07]]
+; CHECK:   %[[T07:.+]] = extractelement <2 x i32> %wide.load, i32 1
+; CHECK:   %[[T08:.+]] = add nsw i32 %[[T07]], %x
+; CHECK:   %[[T09:.+]] = udiv i32 %[[T06]], %[[T08]]
 ; CHECK:   %[[T10:.+]] = insertelement <2 x i32> %[[T05]], i32 %[[T09]], i32 1
 ; CHECK:   br label %[[CONT1]]
 ; CHECK: [[CONT1]]: