Re-apply r84295, with fixes to how the loop "top" and "bottom" blocks are
tracked. Instead of trying to manually keep track of these locations
while doing complex modifications, just recompute them when they're needed.
This fixes a bug in which the TopMBB and BotMBB were not correctly updated,
leading to invalid transformations.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@84598 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/test/CodeGen/X86/2009-04-20-LinearScanOpt.ll b/test/CodeGen/X86/2009-04-20-LinearScanOpt.ll
index 4d25b0f..d7b9463 100644
--- a/test/CodeGen/X86/2009-04-20-LinearScanOpt.ll
+++ b/test/CodeGen/X86/2009-04-20-LinearScanOpt.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim -stats |& grep asm-printer | grep 84
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim -stats |& grep asm-printer | grep 83
 ; rdar://6802189
 
 ; Test if linearscan is unfavoring registers for allocation to allow more reuse
diff --git a/test/CodeGen/X86/discontiguous-loops.ll b/test/CodeGen/X86/discontiguous-loops.ll
new file mode 100644
index 0000000..479c450
--- /dev/null
+++ b/test/CodeGen/X86/discontiguous-loops.ll
@@ -0,0 +1,72 @@
+; RUN: llc -verify-loop-info -verify-dom-info -march=x86-64 < %s
+; PR5243
+
+@.str96 = external constant [37 x i8], align 8    ; <[37 x i8]*> [#uses=1]
+
+define void @foo() nounwind {
+bb:
+  br label %ybb1
+
+ybb1:                                              ; preds = %yybb13, %xbb6, %bb
+  switch i32 undef, label %bb18 [
+    i32 150, label %ybb2
+    i32 151, label %bb17
+    i32 152, label %bb19
+    i32 157, label %ybb8
+  ]
+
+ybb2:                                              ; preds = %ybb1
+  %tmp = icmp eq i8** undef, null                 ; <i1> [#uses=1]
+  br i1 %tmp, label %bb3, label %xbb6
+
+bb3:                                              ; preds = %ybb2
+  unreachable
+
+xbb4:                                              ; preds = %xbb6
+  store i32 0, i32* undef, align 8
+  br i1 undef, label %xbb6, label %bb5
+
+bb5:                                              ; preds = %xbb4
+  call fastcc void @decl_mode_check_failed() nounwind
+  unreachable
+
+xbb6:                                              ; preds = %xbb4, %ybb2
+  %tmp7 = icmp slt i32 undef, 0                   ; <i1> [#uses=1]
+  br i1 %tmp7, label %xbb4, label %ybb1
+
+ybb8:                                              ; preds = %ybb1
+  %tmp9 = icmp eq i8** undef, null                ; <i1> [#uses=1]
+  br i1 %tmp9, label %bb10, label %ybb12
+
+bb10:                                             ; preds = %ybb8
+  %tmp11 = load i8** undef, align 8               ; <i8*> [#uses=1]
+  call void (i8*, ...)* @fatal(i8* getelementptr inbounds ([37 x i8]* @.str96, i64 0, i64 0), i8* %tmp11) nounwind
+  unreachable
+
+ybb12:                                             ; preds = %ybb8
+  br i1 undef, label %bb15, label %ybb13
+
+ybb13:                                             ; preds = %ybb12
+  %tmp14 = icmp sgt i32 undef, 0                  ; <i1> [#uses=1]
+  br i1 %tmp14, label %bb16, label %ybb1
+
+bb15:                                             ; preds = %ybb12
+  call void (i8*, ...)* @fatal(i8* getelementptr inbounds ([37 x i8]* @.str96, i64 0, i64 0), i8* undef) nounwind
+  unreachable
+
+bb16:                                             ; preds = %ybb13
+  unreachable
+
+bb17:                                             ; preds = %ybb1
+  unreachable
+
+bb18:                                             ; preds = %ybb1
+  unreachable
+
+bb19:                                             ; preds = %ybb1
+  unreachable
+}
+
+declare void @fatal(i8*, ...)
+
+declare fastcc void @decl_mode_check_failed() nounwind
diff --git a/test/CodeGen/X86/loop-blocks.ll b/test/CodeGen/X86/loop-blocks.ll
new file mode 100644
index 0000000..af50bd9
--- /dev/null
+++ b/test/CodeGen/X86/loop-blocks.ll
@@ -0,0 +1,207 @@
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -asm-verbose=false | FileCheck %s
+
+; These tests check for loop branching structure, and that the loop align
+; directive is placed in the expected place.
+
+; CodeGen should insert a branch into the middle of the loop in
+; order to avoid a branch within the loop.
+
+; CHECK: simple:
+;      CHECK:   jmp   .LBB1_1
+; CHECK-NEXT:   align
+; CHECK-NEXT: .LBB1_2:
+; CHECK-NEXT:   call loop_latch
+; CHECK-NEXT: .LBB1_1:
+; CHECK-NEXT:   call loop_header
+
+define void @simple() nounwind {
+entry:
+  br label %loop
+
+loop:
+  call void @loop_header()
+  %t0 = tail call i32 @get()
+  %t1 = icmp slt i32 %t0, 0
+  br i1 %t1, label %done, label %bb
+
+bb:
+  call void @loop_latch()
+  br label %loop
+
+done:
+  call void @exit()
+  ret void
+}
+
+; CodeGen should move block_a to the top of the loop so that it
+; falls through into the loop, avoiding a branch within the loop.
+
+; CHECK: slightly_more_involved:
+;      CHECK:   jmp .LBB2_1
+; CHECK-NEXT:   align
+; CHECK-NEXT: .LBB2_4:
+; CHECK-NEXT:   call bar99
+; CHECK-NEXT: .LBB2_1:
+; CHECK-NEXT:   call body
+
+define void @slightly_more_involved() nounwind {
+entry:
+  br label %loop
+
+loop:
+  call void @body()
+  %t0 = call i32 @get()
+  %t1 = icmp slt i32 %t0, 2
+  br i1 %t1, label %block_a, label %bb
+
+bb:
+  %t2 = call i32 @get()
+  %t3 = icmp slt i32 %t2, 99
+  br i1 %t3, label %exit, label %loop
+
+block_a:
+  call void @bar99()
+  br label %loop
+
+exit:
+  call void @exit()
+  ret void
+}
+
+; Same as slightly_more_involved, but block_a is now a CFG diamond with
+; fallthrough edges which should be preserved.
+
+; CHECK: yet_more_involved:
+;      CHECK:   jmp .LBB3_1
+; CHECK-NEXT:   align
+; CHECK-NEXT: .LBB3_7:
+; CHECK-NEXT:   call block_a_true_func
+; CHECK-NEXT:   jmp .LBB3_4
+; CHECK-NEXT: .LBB3_2:
+; CHECK-NEXT:   call bar99
+; CHECK-NEXT:   call get
+; CHECK-NEXT:   cmpl $2999, %eax
+; CHECK-NEXT:   jle .LBB3_7
+; CHECK-NEXT:   call block_a_false_func
+; CHECK-NEXT: .LBB3_4:
+; CHECK-NEXT:   call block_a_merge_func
+; CHECK-NEXT: .LBB3_1:
+; CHECK-NEXT:   call body
+
+define void @yet_more_involved() nounwind {
+entry:
+  br label %loop
+
+loop:
+  call void @body()
+  %t0 = call i32 @get()
+  %t1 = icmp slt i32 %t0, 2
+  br i1 %t1, label %block_a, label %bb
+
+bb:
+  %t2 = call i32 @get()
+  %t3 = icmp slt i32 %t2, 99
+  br i1 %t3, label %exit, label %loop
+
+block_a:
+  call void @bar99()
+  %z0 = call i32 @get()
+  %z1 = icmp slt i32 %z0, 3000
+  br i1 %z1, label %block_a_true, label %block_a_false
+
+block_a_true:
+  call void @block_a_true_func()
+  br label %block_a_merge
+
+block_a_false:
+  call void @block_a_false_func()
+  br label %block_a_merge
+
+block_a_merge:
+  call void @block_a_merge_func()
+  br label %loop
+
+exit:
+  call void @exit()
+  ret void
+}
+
+; CodeGen should move the CFG islands that are part of the loop but don't
+; conveniently fit anywhere so that they are at least contiguous with the
+; loop.
+
+; CHECK: cfg_islands:
+;      CHECK:   jmp     .LBB4_1
+; CHECK-NEXT:   align
+; CHECK-NEXT: .LBB4_7:
+; CHECK-NEXT:   call    bar100
+; CHECK-NEXT:   jmp     .LBB4_1
+; CHECK-NEXT: .LBB4_8:
+; CHECK-NEXT:   call    bar101
+; CHECK-NEXT:   jmp     .LBB4_1
+; CHECK-NEXT: .LBB4_9:
+; CHECK-NEXT:   call    bar102
+; CHECK-NEXT:   jmp     .LBB4_1
+; CHECK-NEXT: .LBB4_5:
+; CHECK-NEXT:   call    loop_latch
+; CHECK-NEXT: .LBB4_1:
+; CHECK-NEXT:   call    loop_header
+
+define void @cfg_islands() nounwind {
+entry:
+  br label %loop
+
+loop:
+  call void @loop_header()
+  %t0 = call i32 @get()
+  %t1 = icmp slt i32 %t0, 100
+  br i1 %t1, label %block100, label %bb
+
+bb:
+  %t2 = call i32 @get()
+  %t3 = icmp slt i32 %t2, 101
+  br i1 %t3, label %block101, label %bb1
+
+bb1:
+  %t4 = call i32 @get()
+  %t5 = icmp slt i32 %t4, 102
+  br i1 %t5, label %block102, label %bb2
+
+bb2:
+  %t6 = call i32 @get()
+  %t7 = icmp slt i32 %t6, 103
+  br i1 %t7, label %exit, label %bb3
+
+bb3:
+  call void @loop_latch()
+  br label %loop
+
+exit:
+  call void @exit()
+  ret void
+
+block100:
+  call void @bar100()
+  br label %loop
+
+block101:
+  call void @bar101()
+  br label %loop
+
+block102:
+  call void @bar102()
+  br label %loop
+}
+
+declare void @bar99() nounwind
+declare void @bar100() nounwind
+declare void @bar101() nounwind
+declare void @bar102() nounwind
+declare void @body() nounwind
+declare void @exit() nounwind
+declare void @loop_header() nounwind
+declare void @loop_latch() nounwind
+declare i32 @get() nounwind
+declare void @block_a_true_func() nounwind
+declare void @block_a_false_func() nounwind
+declare void @block_a_merge_func() nounwind