[Hexagon] Simplify CFG after atomic expansion

This will remove suboptimal branching from the generated ll/sc loops.
The extra simplification pass affects a lot of testcases, which have
been modified to accommodate this change: either by modifying the
test to become immune to the CFG simplification, or (less preferablt)
by adding option -hexagon-initial-cfg-clenaup=0.

llvm-svn: 338774
diff --git a/llvm/test/CodeGen/Hexagon/autohvx/isel-concat-vectors-bool.ll b/llvm/test/CodeGen/Hexagon/autohvx/isel-concat-vectors-bool.ll
index 81e4199..1721c99 100644
--- a/llvm/test/CodeGen/Hexagon/autohvx/isel-concat-vectors-bool.ll
+++ b/llvm/test/CodeGen/Hexagon/autohvx/isel-concat-vectors-bool.ll
@@ -6,6 +6,8 @@
 target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048"
 target triple = "hexagon"
 
+@g0 = global <8 x i32> zeroinitializer, align 8
+
 define void @fred(<8 x float>* %a0, <8 x float>* %a1) #0 {
 b0:
   %v0 = load <8 x float>, <8 x float>* %a1, align 8
@@ -14,8 +16,8 @@
   %v3 = fcmp olt <8 x float> %v2, zeroinitializer
   %v4 = and <8 x i1> %v1, %v3
   %v5 = zext <8 x i1> %v4 to <8 x i32>
-  store <8 x i32> %v5, <8 x i32>* undef, align 8
-  unreachable
+  store <8 x i32> %v5, <8 x i32>* @g0, align 8
+  ret void
 }
 
 attributes #0 = { noinline norecurse nounwind "target-cpu"="hexagonv60" "target-features"="+hvx-length64b,+hvxv60" }