DA: remove uses of GEP, only ask SCEV

It's been quite some time the Dependence Analysis (DA) is broken,
as it uses the GEP representation to "identify" multi-dimensional arrays.
It even wrongly detects multi-dimensional arrays in single nested loops:

from test/Analysis/DependenceAnalysis/Coupled.ll, example @couple6
;; for (long int i = 0; i < 50; i++) {
;; A[i][3*i - 6] = i;
;; *B++ = A[i][i];

DA used to detect two subscripts, which makes no sense in the LLVM IR
or in C/C++ semantics, as there are no guarantees as in Fortran of
subscripts not overlapping into a next array dimension:

maximum nesting levels = 1
SrcPtrSCEV = %A
DstPtrSCEV = %A
using GEPs
subscript 0
    src = {0,+,1}<nuw><nsw><%for.body>
    dst = {0,+,1}<nuw><nsw><%for.body>
    class = 1
    loops = {1}
subscript 1
    src = {-6,+,3}<nsw><%for.body>
    dst = {0,+,1}<nuw><nsw><%for.body>
    class = 1
    loops = {1}
Separable = {}
Coupled = {1}

With the current patch, DA will correctly work on only one dimension:

maximum nesting levels = 1
SrcSCEV = {(-2424 + %A)<nsw>,+,1212}<%for.body>
DstSCEV = {%A,+,404}<%for.body>
subscript 0
    src = {(-2424 + %A)<nsw>,+,1212}<%for.body>
    dst = {%A,+,404}<%for.body>
    class = 1
    loops = {1}
Separable = {0}
Coupled = {}

This change removes all uses of GEP from DA, and we now only rely
on the SCEV representation.

The patch does not turn on -da-delinearize by default, and so the DA analysis
will be more conservative in the case of multi-dimensional memory accesses in
nested loops.

I disabled some interchange tests, as the DA is not able to disambiguate
the dependence anymore. To make DA stronger, we may need to
compute a bound on the number of iterations based on the access functions
and array dimensions.

The patch cleans up all the CHECKs in test/Transforms/LoopInterchange/*.ll to
avoid checking for snippets of LLVM IR: this form of checking is very hard to
maintain. Instead, we now check for output of the pass that are more meaningful
than dozens of lines of LLVM IR. Some tests now require -debug messages and thus
only enabled with asserts.

Patch written by Sebastian Pop and Aditya Kumar.

Differential Revision: https://reviews.llvm.org/D35430

llvm-svn: 326837
diff --git a/llvm/test/Transforms/LoopInterchange/call-instructions.ll b/llvm/test/Transforms/LoopInterchange/call-instructions.ll
index c33abde..eec629f 100644
--- a/llvm/test/Transforms/LoopInterchange/call-instructions.ll
+++ b/llvm/test/Transforms/LoopInterchange/call-instructions.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -basicaa -loop-interchange -verify-dom-info -S | FileCheck %s
-;; We test the complete .ll for adjustment in outer loop header/latch and inner loop header/latch.
+; REQUIRES: asserts
+; RUN: opt < %s -basicaa -loop-interchange -verify-dom-info -S -debug 2>&1 | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
@@ -20,6 +20,8 @@
 ;;    }
 ;; }
 
+; CHECK: Not interchanging loops. Cannot prove legality.
+
 define void @interchange_01(i32 %k, i32 %N) {
 entry:
   %cmp21 = icmp sgt i32 %N, 0
@@ -65,24 +67,6 @@
   ret void
 }
 
-; CHECK-LABEL: @interchange_01
-; CHECK: for1.ph:
-; CHECK: br label %for1.header
-
-; CHECK: for1.header:
-; CHECK-NEXT: %indvars.iv23 = phi i64 [ 0, %for1.ph ], [ %indvars.iv.next24, %for1.inc10 ]
-; CHECK-NEXT: br i1 %cmp219, label %for2.ph, label %for1.inc10
-
-; CHECK: for2:
-; CHECK: br i1 %exitcond, label %for2.loopexit, label %for2
-
-; CHECK: for1.inc10:
-; CHECK: br i1 %exitcond26, label %for1.loopexit, label %for1.header
-
-; CHECK: for1.loopexit:
-; CHECK-NEXT: br label %exit
-
-
 ;;--------------------------------------Test case 02------------------------------------
 ;; Safe to interchange, because the called function `bar` is marked as readnone,
 ;; so it cannot introduce dependences.
@@ -94,6 +78,8 @@
 ;;    }
 ;; }
 
+; CHECK: Not interchanging loops. Cannot prove legality.
+
 define void @interchange_02(i32 %k, i32 %N) {
 entry:
   %cmp21 = icmp sgt i32 %N, 0
@@ -138,21 +124,3 @@
 exit:
   ret void
 }
-
-; CHECK-LABEL: @interchange_02
-; CHECK: for1.header:
-; CHECK-NEXT: %indvars.iv23 = phi i64 [ 0, %for1.ph ], [ %indvars.iv.next24, %for1.inc10 ]
-; CHECK-NEXT: br i1 %cmp219, label %for2.split1, label %for1.loopexit
-
-; CHECK: for2.split1:
-; CHECK: br label %for2.loopexit
-
-; CHECK: for2.split:
-; CHECK-NEXT: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-; CHECK: br i1 %exitcond, label %for1.loopexit, label %for2
-
-; CHECK: for2.loopexit:
-; CHECK-NEXT:  br label %for1.inc10
-
-; CHECK: for1.inc10:
-; CHECK: br i1 %exitcond26, label %for2.split, label %for1.header
diff --git a/llvm/test/Transforms/LoopInterchange/current-limitations-lcssa.ll b/llvm/test/Transforms/LoopInterchange/current-limitations-lcssa.ll
index df6c6cf..b46cbb1 100644
--- a/llvm/test/Transforms/LoopInterchange/current-limitations-lcssa.ll
+++ b/llvm/test/Transforms/LoopInterchange/current-limitations-lcssa.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -basicaa -loop-interchange -S | FileCheck %s
-;; We test the complete .ll for adjustment in outer loop header/latch and inner loop header/latch.
+; REQUIRES: asserts
+; RUN: opt < %s -basicaa -loop-interchange -S -debug 2>&1 | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
@@ -13,6 +13,8 @@
 ;;       for(gj=1;gj<M;gj++)
 ;;         A[gj][gi] = A[gj - 1][gi] + C[gj][gi];
 
+; CHECK: PHI Nodes in loop nest exit is not handled for now since on failure all loops branch to loop nest exit.
+
 @gi = common global i32 0
 @gj = common global i32 0
 
@@ -66,11 +68,3 @@
 for.end16:
   ret void
 }
-
-; CHECK-LABEL: @interchange_07
-; CHECK: for.body3:                                        ; preds = %for.body3.preheader, %for.body3
-; CHECK:   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body3 ], [ 1, %for.body3.preheader ]
-; CHECK:   %5 = add nsw i64 %indvars.iv, -1
-; CHECK:   %arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %5, i64 %indvars.iv25
-; CHECK:   %6 = load i32, i32* %arrayidx5
-; CHECK:   %arrayidx9 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @C, i64 0, i64 %indvars.iv, i64 %indvars.iv25
diff --git a/llvm/test/Transforms/LoopInterchange/currentLimitation.ll b/llvm/test/Transforms/LoopInterchange/currentLimitation.ll
index a0acf7e..90c46ba 100644
--- a/llvm/test/Transforms/LoopInterchange/currentLimitation.ll
+++ b/llvm/test/Transforms/LoopInterchange/currentLimitation.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -basicaa -loop-interchange -verify-dom-info -S | FileCheck %s
-;; These are test that fail to interchange due to current limitation. This will go off once we extend the loop interchange pass.
+; REQUIRES: asserts
+; RUN: opt < %s -basicaa -loop-interchange -verify-dom-info -S -debug 2>&1 | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
@@ -14,6 +14,8 @@
 ;;    for(int j=1;j<N-1;j++)
 ;;      A[j+1][i+1] = A[j+1][i+1] + k;
 
+; CHECK: Not interchanging loops. Cannot prove legality.
+
 define void @interchange_01(i32 %k, i32 %N) {
  entry:
    %sub = add nsw i32 %N, -1
@@ -49,10 +51,3 @@
  for.end17: 
    ret void
 }
-;; Inner loop not split so it is not interchanged.
-; CHECK-LABEL: @interchange_01
-; CHECK:      for.body4:
-; CHECK-NEXT:   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body4 ], [ 1, %for.body4.preheader ]
-; CHECK-NEXT:   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-; CHECK-NEXT:   %arrayidx7 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %indvars.iv.next, i64 %indvars.iv.next29
- 
diff --git a/llvm/test/Transforms/LoopInterchange/interchange-flow-dep-outer.ll b/llvm/test/Transforms/LoopInterchange/interchange-flow-dep-outer.ll
index 284d73e..caa27e8 100644
--- a/llvm/test/Transforms/LoopInterchange/interchange-flow-dep-outer.ll
+++ b/llvm/test/Transforms/LoopInterchange/interchange-flow-dep-outer.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -basicaa -loop-interchange -verify-dom-info -S | FileCheck %s
-;; We test the complete .ll for adjustment in outer loop header/latch and inner loop header/latch.
+; REQUIRES: asserts
+; RUN: opt < %s -basicaa -loop-interchange -verify-dom-info -S -debug 2>&1 | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
@@ -20,6 +20,12 @@
 ;;    fn2(T[k]);
 ;;  }
 
+; CHECK: Processing Inner Loop Id = 2 and OuterLoopId = 1
+; CHECK: Loops interchanged.
+
+; CHECK: Processing Inner Loop Id = 1 and OuterLoopId = 0
+; CHECK: Not interchanging loops. Cannot prove legality.
+
 @T = internal global [100 x double] zeroinitializer, align 4
 @Arr = internal global [1000 x [1000 x i32]] zeroinitializer, align 4
 
@@ -67,52 +73,3 @@
 
 declare double @fn1() readnone
 declare void @fn2(double) readnone
-
-
-;; After interchange %indvars.iv (j) should increment as the middle loop.
-;; After interchange %indvars.iv42 (i) should increment with the inner most loop.
-
-; CHECK-LABEL: @interchange_09
-
-; CHECK: for.body:
-; CHECK:   %indvars.iv45 = phi i64 [ %indvars.iv.next46, %for.cond.cleanup4 ], [ 0, %for.body.preheader ]
-; CHECK:   %call = call double @fn1()
-; CHECK:   %arrayidx = getelementptr inbounds [100 x double], [100 x double]* @T, i64 0, i64 %indvars.iv45
-; CHECK:   store double %call, double* %arrayidx, align 8
-; CHECK:   br label %for.body9.preheader
-
-; CHECK: for.cond6.preheader.preheader:
-; CHECK:   br label %for.cond6.preheader
-
-; CHECK: for.cond6.preheader:
-; CHECK:   %indvars.iv42 = phi i64 [ %indvars.iv.next43, %for.cond.cleanup8 ], [ 0, %for.cond6.preheader.preheader ]
-; CHECK:   br label %for.body9.split1
-
-; CHECK: for.body9.preheader:
-; CHECK:   br label %for.body9
-
-; CHECK: for.cond.cleanup4:
-; CHECK:   %tmp = load double, double* %arrayidx, align 8
-; CHECK:   call void @fn2(double %tmp)
-; CHECK:   %indvars.iv.next46 = add nuw nsw i64 %indvars.iv45, 1
-; CHECK:   %exitcond47 = icmp ne i64 %indvars.iv.next46, 100
-; CHECK:   br i1 %exitcond47, label %for.body, label %for.cond.cleanup
-
-; CHECK: for.cond.cleanup8:
-; CHECK:   %indvars.iv.next43 = add nuw nsw i64 %indvars.iv42, 1
-; CHECK:   %exitcond44 = icmp ne i64 %indvars.iv.next43, 1000
-; CHECK:   br i1 %exitcond44, label %for.cond6.preheader, label %for.body9.split
-
-; CHECK: for.body9:
-; CHECK:   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body9.split ], [ 1, %for.body9.preheader ]
-; CHECK:   br label %for.cond6.preheader.preheader
-
-; CHECK: for.body9.split1:
-; CHECK:   %arrayidx13 = getelementptr inbounds [1000 x [1000 x i32]], [1000 x [1000 x i32]]* @Arr, i64 0, i64 %indvars.iv, i64 %indvars.iv42
-; CHECK:   store i32 %add, i32* %arrayidx13, align 4
-; CHECK:   br label %for.cond.cleanup8
-
-; CHECK: for.body9.split:
-; CHECK:   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-; CHECK:   %exitcond = icmp ne i64 %indvars.iv.next, 1000
-; CHECK:   br i1 %exitcond, label %for.body9, label %for.cond.cleanup4
diff --git a/llvm/test/Transforms/LoopInterchange/interchange-insts-between-indvar.ll b/llvm/test/Transforms/LoopInterchange/interchange-insts-between-indvar.ll
index 51c0a16..698f6c3 100644
--- a/llvm/test/Transforms/LoopInterchange/interchange-insts-between-indvar.ll
+++ b/llvm/test/Transforms/LoopInterchange/interchange-insts-between-indvar.ll
@@ -1,8 +1,8 @@
-; RUN: opt < %s -basicaa -loop-interchange -verify-dom-info -S | FileCheck %s
+; RUN: opt < %s -basicaa -da-delinearize -loop-interchange -verify-dom-info -S -pass-remarks=loop-interchange 2>&1 | FileCheck %s
 
 @A10 = local_unnamed_addr global [3 x [3 x i32]] zeroinitializer, align 16
 
-;; Test to make sure we can handle zext intructions introduced by
+;; Test to make sure we can handle zext instructions introduced by
 ;; IndVarSimplify.
 ;;
 ;;  for (int i = 0; i < 2; ++i)
@@ -10,6 +10,8 @@
 ;;      A[j][i] = i;
 ;;    }
 
+; CHECK: Loop interchanged with enclosing loop.
+
 @A11 = local_unnamed_addr global [3 x [3 x i32]] zeroinitializer, align 16
 
 define void @interchange_11(i32 %n) {
@@ -30,9 +32,11 @@
 
 for.body4:                                        ; preds = %for.body4, %for.cond1.preheader
   %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body4 ]
-  %arrayidx6 = getelementptr inbounds [3 x [3 x i32]], [3 x [3 x i32]]* @A10, i64 0, i64 %indvars.iv, i64 %indvars.iv26
-  %tmp = trunc i64 %indvars.iv26 to i32
-  store i32 %tmp, i32* %arrayidx6, align 4
+; The store below does not appear in the C snippet above.
+; With two stores in the loop there may be WAW dependences, and interchange is illegal.
+;  %arrayidx6 = getelementptr inbounds [3 x [3 x i32]], [3 x [3 x i32]]* @A10, i64 0, i64 %indvars.iv, i64 %indvars.iv26
+;  %tmp = trunc i64 %indvars.iv26 to i32
+;  store i32 %tmp, i32* %arrayidx6, align 4
   %arrayidx10 = getelementptr inbounds [3 x [3 x i32]], [3 x [3 x i32]]* @A10, i64 0, i64 %indvars.iv, i64 %indvars.iv.next27
   %tmp1 = trunc i64 %indvars.iv to i32
   store i32 %tmp1, i32* %arrayidx10, align 4
@@ -41,40 +45,3 @@
   %exitcond = icmp ne i64 %indvars.iv.next, %n.wide
   br i1 %exitcond, label %for.body4, label %for.cond.loopexit
 }
-
-; CHECK-LABEL: @interchange_11
-; CHECK: entry:
-; CHECK:   br label %for.body4.preheader
-
-; CHECK: for.cond1.preheader.preheader:
-; CHECK:   br label %for.cond1.preheader
-
-; CHECK: for.cond.loopexit:
-; CHECK:   %exitcond28 = icmp ne i64 %indvars.iv.next27, 2
-; CHECK:   br i1 %exitcond28, label %for.cond1.preheader, label %for.body4.split
-
-; CHECK: for.cond1.preheader:
-; CHECK:   %indvars.iv26 = phi i64 [ %indvars.iv.next27, %for.cond.loopexit ], [ 0, %for.cond1.preheader.preheader ]
-; CHECK:   %indvars.iv.next27 = add nuw nsw i64 %indvars.iv26, 1
-; CHECK:   br label %for.body4.split1
-
-; CHECK: for.body4.preheader:
-; CHECK:   br label %for.body4
-
-; CHECK: for.cond.cleanup:
-; CHECK:   ret void
-
-; CHECK: for.body4:
-; CHECK:   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body4.split ], [ 0, %for.body4.preheader ]
-; CHECK:   br label %for.cond1.preheader.preheader
-
-; CHECK: for.body4.split1:
-; CHECK:   %arrayidx6 = getelementptr inbounds [3 x [3 x i32]], [3 x [3 x i32]]* @A10, i64 0, i64 %indvars.iv, i64 %indvars.iv26
-; CHECK:   %tmp = trunc i64 %indvars.iv26 to i32
-; CHECK:   store i32 %tmp, i32* %arrayidx6, align 4
-; CHECK:   br label %for.cond.loopexit
-
-; CHECK: for.body4.split:
-; CHECK:   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-; CHECK:   %exitcond = icmp ne i64 %indvars.iv.next, %n.wide
-; CHECK:   br i1 %exitcond, label %for.body4, label %for.cond.cleanup
diff --git a/llvm/test/Transforms/LoopInterchange/interchange-latch-no-exit.ll b/llvm/test/Transforms/LoopInterchange/interchange-latch-no-exit.ll
deleted file mode 100644
index b17bbde..0000000
--- a/llvm/test/Transforms/LoopInterchange/interchange-latch-no-exit.ll
+++ /dev/null
@@ -1,40 +0,0 @@
-; RUN: opt < %s -loop-interchange -S | FileCheck %s
-
-; BB latch1 is the loop latch, but does not exit the loop.
-define void @foo() {
-entry:
-  %dest = alloca i16*, align 8
-  br label %header1
-
-header1:
-  %0 = phi i16* [ %2, %latch1 ], [ undef, %entry ]
-  br i1 false, label %inner, label %loopexit
-
-inner:
-  br i1 undef, label %inner.ph, label %latch1
-
-inner.ph:
-  br label %inner.body
-
-inner.body:
-  %1 = load i16, i16* %0, align 2
-  store i16* inttoptr (i64 2 to i16*), i16** %dest, align 8
-  br i1 false, label %inner.body, label %inner.loopexit
-
-inner.loopexit:
-  br label %latch1
-
-latch1:
-  %2 = phi i16* [ %0, %inner ], [ undef, %inner.loopexit ]
-  br label %header1
-
-loopexit:                                         ; preds = %header1
-  unreachable
-}
-
-; CHECK-LABEL: inner.body:
-; CHECK: br i1 false, label %inner.body, label %inner.loopexit
-; CHECK: latch1:
-; CHECK-NEXT: %2 = phi i16* [ %0, %inner ], [ undef, %inner.loopexit ]
-; CHECK-NEXT: br label %header1
-
diff --git a/llvm/test/Transforms/LoopInterchange/interchange-no-deps.ll b/llvm/test/Transforms/LoopInterchange/interchange-no-deps.ll
index 6ec195f..2ff3d4e 100644
--- a/llvm/test/Transforms/LoopInterchange/interchange-no-deps.ll
+++ b/llvm/test/Transforms/LoopInterchange/interchange-no-deps.ll
@@ -1,6 +1,8 @@
-; RUN: opt < %s -loop-interchange -simplifycfg -S | FileCheck %s
+; RUN: opt < %s -loop-interchange -simplifycfg -S -pass-remarks=loop-interchange 2>&1 | FileCheck %s
+; CHECK: Loop interchanged with enclosing loop.
 
 ; no_deps_interchange just access a single nested array and can be interchange.
+
 define i32 @no_deps_interchange([1024 x i32]* nocapture %Arr, i32 %k) local_unnamed_addr #0 {
 entry:
   br label %for.body
@@ -26,19 +28,3 @@
 for.cond.cleanup:                                 ; preds = %for.cond.cleanup3
   ret i32 0
 }
-
-; CHECK-LABEL: @no_deps_interchange
-; CHECK-LABEL: entry:
-; CHECK-NEXT: br label %for.body4
-
-; CHECK-LABEL: for.body:                                         ; preds = %for.body4, %for.body
-; CHECK: %indvars.iv19 = phi i64 [ %indvars.iv.next20, %for.body ], [ 0, %for.body4 ]
-; CHECK: br i1 %exitcond21, label %for.body, label %for.body4.split
-
-; CHECK-LABEL: for.body4:                                        ; preds = %entry, %for.body4.split
-; CHECK: %indvars.iv = phi i64 [ %indvars.iv.next, %for.body4.split ], [ 0, %entry ]
-; CHECK: br label %for.body
-
-; CHECK-LABEL: for.body4.split:                                  ; preds = %for.body
-; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-; CHECK: br i1 %exitcond, label %for.body4, label %for.cond.cleanup
diff --git a/llvm/test/Transforms/LoopInterchange/interchange-not-profitable.ll b/llvm/test/Transforms/LoopInterchange/interchange-not-profitable.ll
index 67a63ca..f0d21c6 100644
--- a/llvm/test/Transforms/LoopInterchange/interchange-not-profitable.ll
+++ b/llvm/test/Transforms/LoopInterchange/interchange-not-profitable.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -basicaa -loop-interchange -S | FileCheck %s
-;; We test the complete .ll for adjustment in outer loop header/latch and inner loop header/latch.
+; REQUIRES: asserts
+; RUN: opt < %s -basicaa -loop-interchange -S -debug 2>&1 | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
@@ -12,6 +12,8 @@
 ;;    for(int j=0;j<100;j++)
 ;;      A[i][j] = A[i][j]+k;
 
+; CHECK: Interchanging loops not profitable.
+
 define void @interchange_03(i32 %k) {
 entry:
   br label %for.cond1.preheader
@@ -38,29 +40,3 @@
 for.end12:
   ret void
 }
-
-; CHECK-LABEL: @interchange_03
-; CHECK: entry:
-; CHECK:   br label %for.cond1.preheader.preheader
-; CHECK: for.cond1.preheader.preheader:                    ; preds = %entry
-; CHECK:   br label %for.cond1.preheader
-; CHECK: for.cond1.preheader:                              ; preds = %for.cond1.preheader.preheader, %for.inc10
-; CHECK:   %indvars.iv21 = phi i64 [ %indvars.iv.next22, %for.inc10 ], [ 0, %for.cond1.preheader.preheader ]
-; CHECK:  br label %for.body3.preheader
-; CHECK: for.body3.preheader:                              ; preds = %for.cond1.preheader
-; CHECK:   br label %for.body3
-; CHECK: for.body3:                                        ; preds = %for.body3.preheader, %for.body3
-; CHECK:   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body3 ], [ 0, %for.body3.preheader ]
-; CHECK:   %arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %indvars.iv21, i64 %indvars.iv
-; CHECK:   %0 = load i32, i32* %arrayidx5
-; CHECK:   %add = add nsw i32 %0, %k
-; CHECK:   store i32 %add, i32* %arrayidx5
-; CHECK:   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-; CHECK:   %exitcond = icmp eq i64 %indvars.iv.next, 100
-; CHECK:   br i1 %exitcond, label %for.inc10, label %for.body3
-; CHECK: for.inc10:                                        ; preds = %for.body3
-; CHECK:   %indvars.iv.next22 = add nuw nsw i64 %indvars.iv21, 1
-; CHECK:   %exitcond23 = icmp eq i64 %indvars.iv.next22, 100
-; CHECK:   br i1 %exitcond23, label %for.end12, label %for.cond1.preheader
-; CHECK: for.end12:                                        ; preds = %for.inc10
-; CHECK:   ret void
diff --git a/llvm/test/Transforms/LoopInterchange/interchange-output-dependencies.ll b/llvm/test/Transforms/LoopInterchange/interchange-output-dependencies.ll
index bf9b80c..a1c8823 100644
--- a/llvm/test/Transforms/LoopInterchange/interchange-output-dependencies.ll
+++ b/llvm/test/Transforms/LoopInterchange/interchange-output-dependencies.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -basicaa -loop-interchange -verify-dom-info -S | FileCheck %s
-;; We test the complete .ll for adjustment in outer loop header/latch and inner loop header/latch.
+; REQUIRES: asserts
+; RUN: opt < %s -basicaa -loop-interchange -verify-dom-info -S -debug 2>&1 | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
@@ -14,6 +14,8 @@
 ;;      A[j][i+1] = j;
 ;;    }
 
+; CHECK: Not interchanging loops. Cannot prove legality.
+
 @A10 = local_unnamed_addr global [3 x [3 x i32]] zeroinitializer, align 16
 
 define void @interchange_10() {
@@ -44,43 +46,3 @@
   %exitcond = icmp ne i64 %indvars.iv.next, 3
   br i1 %exitcond, label %for.body4, label %for.cond.loopexit
 }
-
-; CHECK-LABEL: @interchange_10
-; CHECK: entry:
-; CHECK:   br label %for.body4.preheader
-
-; CHECK: for.cond1.preheader.preheader:
-; CHECK:   br label %for.cond1.preheader
-
-; CHECK: for.cond.loopexit:
-; CHECK:   %exitcond28 = icmp ne i64 %indvars.iv.next27, 2
-; CHECK:   br i1 %exitcond28, label %for.cond1.preheader, label %for.body4.split
-
-; CHECK: for.cond1.preheader:
-; CHECK:   %indvars.iv26 = phi i64 [ %indvars.iv.next27, %for.cond.loopexit ], [ 0, %for.cond1.preheader.preheader ]
-; CHECK:   %indvars.iv.next27 = add nuw nsw i64 %indvars.iv26, 1
-; CHECK:   br label %for.body4.split1
-
-; CHECK: for.body4.preheader:
-; CHECK:   br label %for.body4
-
-; CHECK: for.cond.cleanup:
-; CHECK:   ret void
-
-; CHECK: for.body4:
-; CHECK:   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body4.split ], [ 0, %for.body4.preheader ]
-; CHECK:   br label %for.cond1.preheader.preheader
-
-; CHECK: for.body4.split1:
-; CHECK:   %arrayidx6 = getelementptr inbounds [3 x [3 x i32]], [3 x [3 x i32]]* @A10, i64 0, i64 %indvars.iv, i64 %indvars.iv26
-; CHECK:   %tmp = trunc i64 %indvars.iv26 to i32
-; CHECK:   store i32 %tmp, i32* %arrayidx6, align 4
-; CHECK:   %arrayidx10 = getelementptr inbounds [3 x [3 x i32]], [3 x [3 x i32]]* @A10, i64 0, i64 %indvars.iv, i64 %indvars.iv.next27
-; CHECK:   %tmp1 = trunc i64 %indvars.iv to i32
-; CHECK:   store i32 %tmp1, i32* %arrayidx10, align 4
-; CHECK:   br label %for.cond.loopexit
-
-; CHECK: for.body4.split:
-; CHECK:   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-; CHECK:   %exitcond = icmp ne i64 %indvars.iv.next, 3
-; CHECK:   br i1 %exitcond, label %for.body4, label %for.cond.cleanup
diff --git a/llvm/test/Transforms/LoopInterchange/interchange-simple-count-down.ll b/llvm/test/Transforms/LoopInterchange/interchange-simple-count-down.ll
index 5aee1f1..2faaf04 100644
--- a/llvm/test/Transforms/LoopInterchange/interchange-simple-count-down.ll
+++ b/llvm/test/Transforms/LoopInterchange/interchange-simple-count-down.ll
@@ -1,5 +1,4 @@
-; RUN: opt < %s -basicaa -loop-interchange -verify-dom-info -S | FileCheck %s
-;; We test the complete .ll for adjustment in outer loop header/latch and inner loop header/latch.
+; RUN: opt < %s -basicaa -loop-interchange -verify-dom-info -S -pass-remarks=loop-interchange 2>&1 | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
@@ -11,6 +10,8 @@
 ;;   for(int j=100;j>=0;j--)
 ;;     A[j][i] = A[j][i]+k;
 
+; CHECK: Loop interchanged with enclosing loop.
+
 define void @interchange_02(i32 %k) {
 entry:
   br label %for.cond1.preheader
@@ -37,33 +38,3 @@
 for.end11:
   ret void
 }
-
-; CHECK-LABEL: @interchange_02
-; CHECK: entry:
-; CHECK:   br label %for.body3.preheader
-; CHECK: for.cond1.preheader.preheader:
-; CHECK:   br label %for.cond1.preheader
-; CHECK: for.cond1.preheader:
-; CHECK:   %indvars.iv19 = phi i64 [ %indvars.iv.next20, %for.inc10 ], [ 0, %for.cond1.preheader.preheader ]
-; CHECK:   br label %for.body3.split1
-; CHECK: for.body3.preheader:
-; CHECK:   br label %for.body3
-; CHECK: for.body3:
-; CHECK:   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body3.split ], [ 100, %for.body3.preheader ]
-; CHECK:   br label %for.cond1.preheader.preheader
-; CHECK: for.body3.split1:                                 ; preds = %for.cond1.preheader
-; CHECK:   %arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %indvars.iv, i64 %indvars.iv19
-; CHECK:   %0 = load i32, i32* %arrayidx5
-; CHECK:   %add = add nsw i32 %0, %k
-; CHECK:   store i32 %add, i32* %arrayidx5
-; CHECK:   br label %for.inc10
-; CHECK: for.body3.split:
-; CHECK:   %indvars.iv.next = add nsw i64 %indvars.iv, -1
-; CHECK:   %cmp2 = icmp sgt i64 %indvars.iv, 0
-; CHECK:   br i1 %cmp2, label %for.body3, label %for.end11
-; CHECK: for.inc10:
-; CHECK:   %indvars.iv.next20 = add nuw nsw i64 %indvars.iv19, 1
-; CHECK:   %exitcond = icmp eq i64 %indvars.iv.next20, 100
-; CHECK:   br i1 %exitcond, label %for.body3.split, label %for.cond1.preheader
-; CHECK: for.end11:
-; CHECK:   ret void
diff --git a/llvm/test/Transforms/LoopInterchange/interchange-simple-count-up.ll b/llvm/test/Transforms/LoopInterchange/interchange-simple-count-up.ll
index b4c1421..8cfa2fb 100644
--- a/llvm/test/Transforms/LoopInterchange/interchange-simple-count-up.ll
+++ b/llvm/test/Transforms/LoopInterchange/interchange-simple-count-up.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -basicaa -loop-interchange -verify-dom-info -S | FileCheck %s
-;; We test the complete .ll for adjustment in outer loop header/latch and inner loop header/latch.
+; REQUIRES: asserts
+; RUN: opt < %s -basicaa -loop-interchange -verify-dom-info -S -debug 2>&1 | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
@@ -11,6 +11,8 @@
 ;;    for(int j=1;j<N;j++)
 ;;      A[j][i] = A[j][i]+k;
 
+; CHECK: Not interchanging loops. Cannot prove legality.
+
 define void @interchange_01(i32 %k, i32 %N) {
 entry:
   %cmp21 = icmp sgt i32 %N, 0
@@ -45,42 +47,3 @@
 for.end12:
   ret void
 }
-
-; CHECK-LABEL: @interchange_01
-; CHECK: entry:
-; CHECK:   %cmp21 = icmp sgt i32 %N, 0
-; CHECK:   br i1 %cmp21, label %for.body3.preheader, label %for.end12
-; CHECK: for.cond1.preheader.lr.ph:
-; CHECK:   br label %for.cond1.preheader
-; CHECK: for.cond1.preheader:
-; CHECK:   %indvars.iv23 = phi i64 [ 0, %for.cond1.preheader.lr.ph ], [ %indvars.iv.next24, %for.inc10 ]
-; CHECK:   br i1 %cmp219, label %for.body3.split1, label %for.end12.loopexit
-; CHECK: for.body3.preheader:
-; CHECK:   %cmp219 = icmp sgt i32 %N, 1
-; CHECK:   %0 = add i32 %N, -1
-; CHECK:   br label %for.body3
-; CHECK: for.body3:
-; CHECK:   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body3.split ], [ 1, %for.body3.preheader ]
-; CHECK:   br label %for.cond1.preheader.lr.ph
-; CHECK: for.body3.split1:
-; CHECK:   %arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %indvars.iv, i64 %indvars.iv23
-; CHECK:   %1 = load i32, i32* %arrayidx5
-; CHECK:   %add = add nsw i32 %1, %k
-; CHECK:   store i32 %add, i32* %arrayidx5
-; CHECK:   br label %for.inc10.loopexit
-; CHECK: for.body3.split:
-; CHECK:   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-; CHECK:   %lftr.wideiv = trunc i64 %indvars.iv to i32
-; CHECK:   %exitcond = icmp eq i32 %lftr.wideiv, %0
-; CHECK:   br i1 %exitcond, label %for.end12.loopexit, label %for.body3
-; CHECK: for.inc10.loopexit:
-; CHECK:   br label %for.inc10
-; CHECK: for.inc10:
-; CHECK:   %indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1
-; CHECK:   %lftr.wideiv25 = trunc i64 %indvars.iv23 to i32
-; CHECK:   %exitcond26 = icmp eq i32 %lftr.wideiv25, %0
-; CHECK:   br i1 %exitcond26, label %for.body3.split, label %for.cond1.preheader
-; CHECK: for.end12.loopexit:
-; CHECK:   br label %for.end12
-; CHECK: for.end12:
-; CHECK:   ret void
diff --git a/llvm/test/Transforms/LoopInterchange/loop-interchange-optimization-remarks.ll b/llvm/test/Transforms/LoopInterchange/loop-interchange-optimization-remarks.ll
index e14598c..5b04004 100644
--- a/llvm/test/Transforms/LoopInterchange/loop-interchange-optimization-remarks.ll
+++ b/llvm/test/Transforms/LoopInterchange/loop-interchange-optimization-remarks.ll
@@ -54,14 +54,10 @@
 
 ; CHECK: --- !Missed
 ; CHECK-NEXT: Pass:            loop-interchange
-; CHECK-NEXT: Name:            InterchangeNotProfitable
+; CHECK-NEXT: Name:            Dependence
 ; CHECK-NEXT: Function:        test01
 ; CHECK-NEXT: Args:
-; CHECK-NEXT:  - String:          'Interchanging loops is too costly (cost='
-; CHECK-NEXT:  - Cost:            '2'
-; CHECK-NEXT:  - String:          ', threshold='
-; CHECK-NEXT:  - Threshold:       '0'
-; CHECK-NEXT:  - String:          ') and it does not improve parallelism.'
+; CHECK-NEXT:   - String:          Cannot interchange loops due to dependences.
 ; CHECK-NEXT: ...
 
 ;;--------------------------------------Test case 02------------------------------------
@@ -110,10 +106,10 @@
 
 ; CHECK: --- !Missed
 ; CHECK-NEXT: Pass:            loop-interchange
-; CHECK-NEXT: Name:            UnsupportedInsBetweenInduction
+; CHECK-NEXT: Name:            Dependence
 ; CHECK-NEXT: Function:        test02
 ; CHECK-NEXT: Args:
-; CHECK-NEXT:   - String:          Found unsupported instruction between induction variable increment and branch.
+; CHECK-NEXT:   - String:          Cannot interchange loops due to dependences.
 ; CHECK-NEXT: ...
 
 ;;-----------------------------------Test case 03-------------------------------
@@ -156,12 +152,12 @@
   br i1 %exitcond, label %for.body4, label %for.cond.loopexit
 }
 
-; CHECK: --- !Passed
+; CHECK: --- !Missed
 ; CHECK-NEXT: Pass:            loop-interchange
-; CHECK-NEXT: Name:            Interchanged
+; CHECK-NEXT: Name:            Dependence
 ; CHECK-NEXT: Function:        test03
 ; CHECK-NEXT: Args:
-; CHECK-NEXT:   - String:          Loop interchanged with enclosing loop.
+; CHECK-NEXT:   - String:          Cannot interchange loops due to dependences.
 ; CHECK-NEXT: ...
 
 ;;--------------------------------------Test case 04-------------------------------------
@@ -213,8 +209,8 @@
 
 ; CHECK: --- !Missed
 ; CHECK-NEXT: Pass:            loop-interchange
-; CHECK-NEXT: Name:            NotTightlyNested
+; CHECK-NEXT: Name:            Dependence
 ; CHECK-NEXT: Function:        test04
 ; CHECK-NEXT: Args:
-; CHECK-NEXT:   - String:          Cannot interchange loops because they are not tightly nested.
+; CHECK-NEXT:   - String:          Cannot interchange loops due to dependences.
 ; CHECK-NEXT: ...
diff --git a/llvm/test/Transforms/LoopInterchange/not-interchanged-dependencies-1.ll b/llvm/test/Transforms/LoopInterchange/not-interchanged-dependencies-1.ll
index eaff177..5cd2b58 100644
--- a/llvm/test/Transforms/LoopInterchange/not-interchanged-dependencies-1.ll
+++ b/llvm/test/Transforms/LoopInterchange/not-interchanged-dependencies-1.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -basicaa -loop-interchange -verify-dom-info -S | FileCheck %s
-;; We test the complete .ll for adjustment in outer loop header/latch and inner loop header/latch.
+; REQUIRES: asserts
+; RUN: opt < %s -basicaa -loop-interchange -verify-dom-info -S -debug 2>&1 | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
@@ -12,6 +12,8 @@
 ;;   for(int i=0;i<99;i++)
 ;;       A[j][i+1] = A[j+1][i]+k;
 
+; CHECK: Not interchanging loops. Cannot prove legality.
+
 define void @interchange_04(i32 %k){
 entry:
   br label %for.cond1.preheader
@@ -39,26 +41,3 @@
 for.end14:
   ret void
 }
-
-; CHECK-LABEL: @interchange_04
-; CHECK: entry:
-; CHECK:   br label %for.cond1.preheader
-; CHECK: for.cond1.preheader:                              ; preds = %for.inc12, %entry
-; CHECK:   %indvars.iv23 = phi i64 [ 0, %entry ], [ %indvars.iv.next24, %for.inc12 ]
-; CHECK:   %indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1
-; CHECK:   br label %for.body3
-; CHECK: for.body3:                                        ; preds = %for.body3, %for.cond1.preheader
-; CHECK:   %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body3 ]
-; CHECK:   %arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %indvars.iv.next24, i64 %indvars.iv
-; CHECK:   %0 = load i32, i32* %arrayidx5
-; CHECK:   %add6 = add nsw i32 %0, %k
-; CHECK:   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-; CHECK:   %arrayidx11 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %indvars.iv23, i64 %indvars.iv.next
-; CHECK:   store i32 %add6, i32* %arrayidx11
-; CHECK:   %exitcond = icmp eq i64 %indvars.iv.next, 99
-; CHECK:   br i1 %exitcond, label %for.inc12, label %for.body3
-; CHECK: for.inc12:                                        ; preds = %for.body3
-; CHECK:   %exitcond25 = icmp eq i64 %indvars.iv.next24, 99
-; CHECK:   br i1 %exitcond25, label %for.end14, label %for.cond1.preheader
-; CHECK: for.end14:                                        ; preds = %for.inc12
-; CHECK:   ret void
diff --git a/llvm/test/Transforms/LoopInterchange/not-interchanged-loop-nest-3.ll b/llvm/test/Transforms/LoopInterchange/not-interchanged-loop-nest-3.ll
index 1d4d228..b6a98267 100644
--- a/llvm/test/Transforms/LoopInterchange/not-interchanged-loop-nest-3.ll
+++ b/llvm/test/Transforms/LoopInterchange/not-interchanged-loop-nest-3.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -basicaa -loop-interchange -S | FileCheck %s
-;; We test the complete .ll for adjustment in outer loop header/latch and inner loop header/latch.
+; REQUIRES: asserts
+; RUN: opt < %s -basicaa -loop-interchange -S -debug 2>&1 | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
@@ -12,6 +12,12 @@
 ;;      for(int k=0;k<100;k++)
 ;;        D[i][k][j] = D[i][k][j]+t;
 
+; CHECK: Processing Inner Loop Id = 2 and OuterLoopId = 1
+; CHECK: Loops interchanged.
+
+; CHECK: Processing Inner Loop Id = 1 and OuterLoopId = 0
+; CHECK: Interchanging loops not profitable.
+
 define void @interchange_08(i32 %t){
 entry:
   br label %for.cond1.preheader
@@ -47,41 +53,3 @@
 for.end17:                                        ; preds = %for.inc15
   ret void
 }
-; CHECK-LABEL: @interchange_08
-; CHECK:   entry:
-; CHECK:     br label %for.cond1.preheader.preheader
-; CHECK:   for.cond1.preheader.preheader:                    ; preds = %entry
-; CHECK:     br label %for.cond1.preheader
-; CHECK:   for.cond1.preheader:                              ; preds = %for.cond1.preheader.preheader, %for.inc15
-; CHECK:     %i.028 = phi i32 [ %inc16, %for.inc15 ], [ 0, %for.cond1.preheader.preheader ]
-; CHECK:     br label %for.body6.preheader
-; CHECK:   for.cond4.preheader.preheader:                    ; preds = %for.body6
-; CHECK:     br label %for.cond4.preheader
-; CHECK:   for.cond4.preheader:                              ; preds = %for.cond4.preheader.preheader, %for.inc12
-; CHECK:     %j.027 = phi i32 [ %inc13, %for.inc12 ], [ 0, %for.cond4.preheader.preheader ]
-; CHECK:     br label %for.body6.split1
-; CHECK:   for.body6.preheader:                              ; preds = %for.cond1.preheader
-; CHECK:     br label %for.body6
-; CHECK:   for.body6:                                        ; preds = %for.body6.preheader, %for.body6.split
-; CHECK:     %k.026 = phi i32 [ %inc, %for.body6.split ], [ 0, %for.body6.preheader ]
-; CHECK:     br label %for.cond4.preheader.preheader
-; CHECK:   for.body6.split1:                                 ; preds = %for.cond4.preheader
-; CHECK:     %arrayidx8 = getelementptr inbounds [100 x [100 x [100 x i32]]], [100 x [100 x [100 x i32]]]* @D, i32 0, i32 %i.028, i32 %k.026, i32 %j.027
-; CHECK:     %0 = load i32, i32* %arrayidx8
-; CHECK:     %add = add nsw i32 %0, %t
-; CHECK:     store i32 %add, i32* %arrayidx8
-; CHECK:     br label %for.inc12
-; CHECK:   for.body6.split:                                  ; preds = %for.inc12
-; CHECK:     %inc = add nuw nsw i32 %k.026, 1
-; CHECK:     %exitcond = icmp eq i32 %inc, 100
-; CHECK:     br i1 %exitcond, label %for.inc15, label %for.body6
-; CHECK:   for.inc12:                                        ; preds = %for.body6.split1
-; CHECK:     %inc13 = add nuw nsw i32 %j.027, 1
-; CHECK:     %exitcond29 = icmp eq i32 %inc13, 100
-; CHECK:     br i1 %exitcond29, label %for.body6.split, label %for.cond4.preheader
-; CHECK:   for.inc15:                                        ; preds = %for.body6.split
-; CHECK:     %inc16 = add nuw nsw i32 %i.028, 1
-; CHECK:     %exitcond30 = icmp eq i32 %inc16, 100
-; CHECK:     br i1 %exitcond30, label %for.end17, label %for.cond1.preheader
-; CHECK:   for.end17:                                        ; preds = %for.inc15
-; CHECK:     ret void
diff --git a/llvm/test/Transforms/LoopInterchange/not-interchanged-tightly-nested.ll b/llvm/test/Transforms/LoopInterchange/not-interchanged-tightly-nested.ll
index 21312b4..cf614ea 100644
--- a/llvm/test/Transforms/LoopInterchange/not-interchanged-tightly-nested.ll
+++ b/llvm/test/Transforms/LoopInterchange/not-interchanged-tightly-nested.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -basicaa -loop-interchange -S | FileCheck %s
-;; We test the complete .ll for adjustment in outer loop header/latch and inner loop header/latch.
+; REQUIRES: asserts
+; RUN: opt < %s -basicaa -loop-interchange -S -debug 2>&1 | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
@@ -16,6 +16,8 @@
 ;;      A[j][i] = A[j][i]+B[j];
 ;;  }
 
+; CHECK: Not interchanging loops. Cannot prove legality.
+
 define void @interchange_05(i32 %k, i32 %N){
 entry:
   %cmp30 = icmp sgt i32 %N, 0
@@ -55,44 +57,6 @@
   ret void
 }
 
-; CHECK-LABEL: @interchange_05
-; CHECK: entry:
-; CHECK: %cmp30 = icmp sgt i32 %N, 0
-; CHECK: br i1 %cmp30, label %for.body.lr.ph, label %for.end17
-; CHECK: for.body.lr.ph:
-; CHECK: %0 = add i32 %N, -1
-; CHECK: %1 = zext i32 %k to i64
-; CHECK: br label %for.body
-; CHECK: for.body:
-; CHECK: %indvars.iv32 = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next33, %for.inc15 ]
-; CHECK: %2 = add nsw i64 %indvars.iv32, %1
-; CHECK: %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* @B, i64 0, i64 %indvars.iv32
-; CHECK: %3 = trunc i64 %2 to i32
-; CHECK: store i32 %3, i32* %arrayidx
-; CHECK: br label %for.body3.preheader
-; CHECK: for.body3.preheader:
-; CHECK: br label %for.body3
-; CHECK: for.body3:
-; CHECK: %indvars.iv = phi i64 [ %indvars.iv.next, %for.body3 ], [ 0, %for.body3.preheader ]
-; CHECK: %arrayidx7 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %indvars.iv32, i64 %indvars.iv
-; CHECK: %4 = load i32, i32* %arrayidx7
-; CHECK: %add10 = add nsw i32 %3, %4
-; CHECK: store i32 %add10, i32* %arrayidx7
-; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-; CHECK: %lftr.wideiv = trunc i64 %indvars.iv to i32
-; CHECK: %exitcond = icmp eq i32 %lftr.wideiv, %0
-; CHECK: br i1 %exitcond, label %for.inc15, label %for.body3
-; CHECK: for.inc15:
-; CHECK: %indvars.iv.next33 = add nuw nsw i64 %indvars.iv32, 1
-; CHECK: %lftr.wideiv35 = trunc i64 %indvars.iv32 to i32
-; CHECK: %exitcond36 = icmp eq i32 %lftr.wideiv35, %0
-; CHECK: br i1 %exitcond36, label %for.end17.loopexit, label %for.body
-; CHECK: for.end17.loopexit:
-; CHECK: br label %for.end17
-; CHECK: for.end17:
-; CHECK: ret void
-
-
 declare void @foo(...) readnone
 
 ;; Loops not tightly nested are not interchanged
@@ -102,6 +66,8 @@
 ;;      A[j][i] = A[j][i]+k;
 ;;  }
 
+; CHECK: Not interchanging loops. Cannot prove legality.
+
 define void @interchange_06(i32 %k, i32 %N) {
 entry:
   %cmp22 = icmp sgt i32 %N, 0
@@ -136,8 +102,3 @@
 for.end12:
   ret void
 }
-;; Here we are checking if the inner phi is not split then we have not interchanged.
-; CHECK-LABEL: @interchange_06
-; CHECK:  phi i64 [ %indvars.iv.next, %for.body3 ], [ 2, %for.body3.preheader ]
-; CHECK-NEXT: getelementptr
-; CHECK-NEXT: %1 = load
diff --git a/llvm/test/Transforms/LoopInterchange/phi-ordering.ll b/llvm/test/Transforms/LoopInterchange/phi-ordering.ll
index c24e6eb..a03e901 100644
--- a/llvm/test/Transforms/LoopInterchange/phi-ordering.ll
+++ b/llvm/test/Transforms/LoopInterchange/phi-ordering.ll
@@ -1,7 +1,10 @@
-; RUN: opt < %s -loop-interchange -verify-dom-info -S | FileCheck %s
+; REQUIRES: asserts
+; RUN: opt < %s -loop-interchange -verify-dom-info -S -debug 2>&1 | FileCheck %s
 ;; Checks the order of the inner phi nodes does not cause havoc.
 ;; The inner loop has a reduction into c. The IV is not the first phi.
 
+; CHECK: Not interchanging loops. Cannot prove legality.
+
 target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
 target triple = "armv8--linux-gnueabihf"
 
@@ -61,30 +64,3 @@
 for.end21:                                        ; preds = %for.end21.loopexit, %entry
   ret void
 }
-
-
-; CHECK-LABEL: test
-; CHECK: entry:
-; CHECK:   br i1 %cmp45, label %for.body6.preheader, label %for.end21
-; CHECK: for.body3.lr.ph.preheader:
-; CHECK:   br label %for.body3.lr.ph
-; CHECK: for.body3.lr.ph:
-; CHECK:   br label %for.body6.lr.ph.preheader
-; CHECK: for.body6.lr.ph.preheader:
-; CHECK:   br label %for.body6.lr.ph
-; CHECK: for.body6.lr.ph:
-; CHECK:   br label %for.body6.split1
-; CHECK: for.body6.preheader:
-; CHECK:   br label %for.body6
-; CHECK: for.body6:
-; CHECK:   br label %for.body3.lr.ph.preheader
-; CHECK: for.body6.split1:
-; CHECK:   br label %for.inc16
-; CHECK: for.body6.split:
-; CHECK:   add nuw nsw i32 %k.040, 1
-; CHECK:   br i1 %exitcond, label %for.end21.loopexit, label %for.body6
-; CHECK: for.inc16:
-; CHECK:   br i1 %exitcond47, label %for.inc19, label %for.body6.lr.ph
-; CHECK: for.inc19:
-; CHECK:   br i1 %exitcond48, label %for.body6.split, label %for.body3.lr.ph
-; CHECK: for.end21:
diff --git a/llvm/test/Transforms/LoopInterchange/profitability.ll b/llvm/test/Transforms/LoopInterchange/profitability.ll
index 548e491..6f778b3 100644
--- a/llvm/test/Transforms/LoopInterchange/profitability.ll
+++ b/llvm/test/Transforms/LoopInterchange/profitability.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -basicaa -loop-interchange -verify-dom-info -S | FileCheck %s
+; REQUIRES: asserts
+; RUN: opt < %s -basicaa -loop-interchange -verify-dom-info -S -debug 2>&1 | FileCheck %s
 ;; We test profitability model in these test cases.
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
@@ -13,6 +14,8 @@
 ;;     for(int j=1;j<N;j++)
 ;;       A[j][i] = A[j - 1][i] + B[j][i];
 
+; CHECK: Not interchanging loops. Cannot prove legality.
+
 define void @interchange_01(i32 %N) {
 entry:
   %cmp27 = icmp sgt i32 %N, 1
@@ -50,30 +53,6 @@
 for.end16:
   ret void
 }
-;; Here we are checking partial .ll to check if loop are interchanged.
-; CHECK-LABEL: @interchange_01
-; CHECK:  for.body3.preheader:                              ; preds = %for.inc14, %for.cond1.preheader.lr.ph
-; CHECK:    %indvars.iv30 = phi i64 [ 1, %for.cond1.preheader.lr.ph ], [ %indvars.iv.next31, %for.inc14 ]
-; CHECK:    br label %for.body3.split2
-
-; CHECK:  for.body3.preheader1:                             ; preds = %entry
-; CHECK:    br label %for.body3
-
-; CHECK:  for.body3:                                        ; preds = %for.body3.preheader1, %for.body3.split
-; CHECK:    %indvars.iv = phi i64 [ %indvars.iv.next, %for.body3.split ], [ 1, %for.body3.preheader1 ]
-; CHECK:    br label %for.cond1.preheader.lr.ph
-
-; CHECK:  for.body3.split2:                                 ; preds = %for.body3.preheader
-; CHECK:    %1 = add nsw i64 %indvars.iv, -1
-; CHECK:    %arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %1, i64 %indvars.iv30
-; CHECK:    %2 = load i32, i32* %arrayidx5
-; CHECK:    %arrayidx9 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @B, i64 0, i64 %indvars.iv, i64 %indvars.iv30
-; CHECK:    %3 = load i32, i32* %arrayidx9
-; CHECK:    %add = add nsw i32 %3, %2
-; CHECK:    %arrayidx13 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %indvars.iv, i64 %indvars.iv30
-; CHECK:    store i32 %add, i32* %arrayidx13
-; CHECK:    br label %for.inc14
-
 
 ;; ---------------------------------------Test case 02---------------------------------
 ;; Check loop interchange profitability model. 
@@ -83,6 +62,8 @@
 ;;    for(int j=1;j<N;j++)
 ;;       A[j-1][i-1] = A[j - 1][i-1] + B[j-1][i-1];
 
+; CHECK: Not interchanging loops. Cannot prove legality.
+
 define void @interchange_02(i32 %N) {
 entry:
   %cmp32 = icmp sgt i32 %N, 1
@@ -120,30 +101,6 @@
 for.end21:
   ret void
 }
-; CHECK-LABEL: @interchange_02
-; CHECK:  for.body3.lr.ph:                                  ; preds = %for.inc19, %for.cond1.preheader.lr.ph
-; CHECK:    %indvars.iv35 = phi i64 [ 1, %for.cond1.preheader.lr.ph ], [ %indvars.iv.next36, %for.inc19 ]
-; CHECK:    %0 = add nsw i64 %indvars.iv35, -1
-; CHECK:    br label %for.body3.split1
-
-; CHECK:  for.body3.preheader:                              ; preds = %entry
-; CHECK:    %1 = add i32 %N, -1
-; CHECK:    br label %for.body3
-
-; CHECK:  for.body3:                                        ; preds = %for.body3.preheader, %for.body3.split
-; CHECK:    %indvars.iv = phi i64 [ %indvars.iv.next, %for.body3.split ], [ 1, %for.body3.preheader ]
-; CHECK:    br label %for.cond1.preheader.lr.ph
-
-; CHECK:  for.body3.split1:                                 ; preds = %for.body3.lr.ph
-; CHECK:    %2 = add nsw i64 %indvars.iv, -1
-; CHECK:    %arrayidx6 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %2, i64 %0
-; CHECK:    %3 = load i32, i32* %arrayidx6
-; CHECK:    %arrayidx12 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @B, i64 0, i64 %2, i64 %0
-; CHECK:    %4 = load i32, i32* %arrayidx12
-; CHECK:    %add = add nsw i32 %4, %3
-; CHECK:    store i32 %add, i32* %arrayidx6
-; CHECK:    br label %for.inc19
-
 
 ;;---------------------------------------Test case 03---------------------------------
 ;; Loops interchange is not profitable.
@@ -151,6 +108,8 @@
 ;;     for(int j=1;j<N;j++)
 ;;       A[i-1][j-1] = A[i - 1][j-1] + B[i][j];
 
+; CHECK: Not interchanging loops. Cannot prove legality.
+
 define void @interchange_03(i32 %N){
 entry:
   %cmp31 = icmp sgt i32 %N, 1
@@ -188,18 +147,3 @@
 for.end19:
   ret void
 }
-
-; CHECK-LABEL: @interchange_03
-; CHECK:  for.body3.lr.ph:
-; CHECK:    %indvars.iv34 = phi i64 [ 1, %for.cond1.preheader.lr.ph ], [ %indvars.iv.next35, %for.inc17 ]
-; CHECK:    %1 = add nsw i64 %indvars.iv34, -1
-; CHECK:    br label %for.body3.preheader
-; CHECK:  for.body3.preheader:
-; CHECK:    br label %for.body3
-; CHECK:  for.body3:
-; CHECK:    %indvars.iv = phi i64 [ %indvars.iv.next, %for.body3 ], [ 1, %for.body3.preheader ]
-; CHECK:    %2 = add nsw i64 %indvars.iv, -1
-; CHECK:    %arrayidx6 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %1, i64 %2
-; CHECK:    %3 = load i32, i32* %arrayidx6
-; CHECK:    %arrayidx10 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @B, i64 0, i64 %indvars.iv34, i64 %indvars.iv
-; CHECK:    %4 = load i32, i32* %arrayidx10
diff --git a/llvm/test/Transforms/LoopInterchange/reductions.ll b/llvm/test/Transforms/LoopInterchange/reductions.ll
index ccd4fef..23ce7c3 100644
--- a/llvm/test/Transforms/LoopInterchange/reductions.ll
+++ b/llvm/test/Transforms/LoopInterchange/reductions.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -basicaa -loop-interchange -verify-dom-info -S | FileCheck %s
+; REQUIRES: asserts
+; RUN: opt < %s -basicaa -loop-interchange -verify-dom-info -S -debug 2>&1 | FileCheck %s
 
 @A = common global [500 x [500 x i32]] zeroinitializer
 @X = common global i32 0
@@ -9,6 +10,9 @@
 ;;    for( int j=1;j<N;j++)
 ;;      X+=A[j][i];
 
+;; Loop is interchanged check that the phi nodes are split and the promoted value is used instead of the reduction phi.
+; CHECK: Loops interchanged.
+
 define void @reduction_01(i32 %N) {
 entry:
   %cmp16 = icmp sgt i32 %N, 1
@@ -41,14 +45,6 @@
   ret void
 }
 
-;; Loop is interchanged check that the phi nodes are split and the promoted value is used instead of the reduction phi.
-; CHECK-LABEL: @reduction_01
-; CHECK:  for.body3:                                        ; preds = %for.body3.preheader, %for.body3.split
-; CHECK:    %indvars.iv = phi i64 [ %indvars.iv.next, %for.body3.split ], [ 1, %for.body3.preheader ]
-; CHECK:    br label %for.body3.lr.ph.preheader
-; CHECK:    %add = add nsw i32 %X.promoted
-
-
 ;; Test for more than 1 reductions inside a loop.
 ;;  for( int i=1;i<N;i++)
 ;;    for( int j=1;j<N;j++)
@@ -57,6 +53,9 @@
 ;;        Y+=B[k][i];
 ;;      }
 
+;; Loop is interchanged check that the phi nodes are split and the promoted value is used instead of the reduction phi.
+; CHECK: Loops interchanged.
+
 define void @reduction_02(i32 %N)  {
 entry:
   %cmp34 = icmp sgt i32 %N, 1
@@ -105,14 +104,6 @@
   ret void
 }
 
-;; Loop is interchanged check that the phi nodes are split and the promoted value is used instead of the reduction phi.
-; CHECK-LABEL: @reduction_02
-; CHECK:  for.body6:                                        ; preds = %for.body6.preheader, %for.body6.split
-; CHECK:    %indvars.iv = phi i64 [ %indvars.iv.next, %for.body6.split ], [ 1, %for.body6.preheader ]
-; CHECK:    br label %for.cond4.preheader.preheader.preheader
-; CHECK:    %add13 = add nsw i32 %Y.promoted
-
-
 ;; Not tightly nested. Do not interchange.
 ;;  for( int i=1;i<N;i++)
 ;;    for( int j=1;j<N;j++) {
@@ -121,6 +112,11 @@
 ;;      }
 ;;      Y+=B[j][i];
 ;;    }
+
+;; Not tightly nested. Do not interchange.
+;; Not interchanged hence the phi's in the inner loop will not be split. Check for the same.
+; CHECK: Not interchanging loops. Cannot prove legality.
+
 define void @reduction_03(i32 %N)  {
 entry:
   %cmp35 = icmp sgt i32 %N, 1
@@ -168,14 +164,6 @@
 for.end19:                                        ; preds = %for.cond1.for.inc17_crit_edge, %entry
   ret void
 }
-;; Not tightly nested. Do not interchange.
-;; Not interchanged hence the phi's in the inner loop will not be split. Check for the same.
-; CHECK-LABEL: @reduction_03
-; CHECK:  for.body6:                                        ; preds = %for.body6.preheader, %for.body6
-; CHECK:    %indvars.iv = phi i64 [ %indvars.iv.next, %for.body6 ], [ 1, %for.body6.preheader ]
-; CHECK:    %add31 = phi i32 [ %add, %for.body6 ], [ %X.promoted, %for.body6.preheader ]
-
-
 
 ;; Multiple use of reduction not safe. Do not interchange.
 ;;  for( int i=1;i<N;i++)
@@ -184,6 +172,10 @@
 ;;        X+=A[k][j];
 ;;        Y+=X;
 ;;      }
+
+;; Not interchanged hence the phi's in the inner loop will not be split. Check for the same.
+; CHECK: Not interchanging loops. Cannot prove legality.
+
 define void @reduction_04(i32 %N) {
 entry:
   %cmp28 = icmp sgt i32 %N, 1
@@ -228,8 +220,3 @@
 for.end15:                                        ; preds = %for.inc13, %entry
   ret void
 }
-;; Not interchanged hence the phi's in the inner loop will not be split. Check for the same.
-; CHECK-LABEL: @reduction_04
-; CHECK:  for.body6:                                        ; preds = %for.body6.preheader, %for.body6
-; CHECK:    %indvars.iv = phi i64 [ %indvars.iv.next, %for.body6 ], [ 1, %for.body6.preheader ]
-; CHECK:    %add925 = phi i32 [ %add9, %for.body6 ], [ %Y.promoted, %for.body6.preheader ]