Recommit r129383. PreRA scheduler heuristic fixes: VRegCycle, TokenFactor latency.

Additional fixes:
Do something reasonable for subtargets with generic
itineraries by handle node latency the same as for an empty
itinerary. Now nodes default to unit latency unless an itinerary
explicitly specifies a zero cycle stage or it is a TokenFactor chain.

Original fixes:
UnitsSharePred was a source of randomness in the scheduler: node
priority depended on the queue data structure. I rewrote the recent
VRegCycle heuristics to completely replace the old heuristic without
any randomness. To make the ndoe latency adjustments work, I also
needed to do something a little more reasonable with TokenFactor. I
gave it zero latency to its consumers and always schedule it as low as
possible.

llvm-svn: 129421
diff --git a/llvm/test/CodeGen/ARM/memcpy-inline.ll b/llvm/test/CodeGen/ARM/memcpy-inline.ll
index e8a2a3b..5bae037 100644
--- a/llvm/test/CodeGen/ARM/memcpy-inline.ll
+++ b/llvm/test/CodeGen/ARM/memcpy-inline.ll
@@ -1,10 +1,8 @@
-; RUN: llc < %s -mtriple=arm-apple-darwin -regalloc=linearscan -disable-post-ra | FileCheck %s
-; RUN: llc < %s -mtriple=arm-apple-darwin -regalloc=basic -disable-post-ra | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -regalloc=linearscan -disable-post-ra | FileCheck %s
 
 ; The ARM magic hinting works best with linear scan.
-; CHECK: ldmia
-; CHECK: stmia
-; CHECK: ldrh
+; CHECK: ldrd
+; CHECK: strd
 ; CHECK: ldrb
 
 %struct.x = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 }
diff --git a/llvm/test/CodeGen/ARM/neon_div.ll b/llvm/test/CodeGen/ARM/neon_div.ll
index e337970..de48fee 100644
--- a/llvm/test/CodeGen/ARM/neon_div.ll
+++ b/llvm/test/CodeGen/ARM/neon_div.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc < %s -march=arm -mattr=+neon -pre-RA-sched=source | FileCheck %s
 
 define <8 x i8> @sdivi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK: vrecpe.f32
diff --git a/llvm/test/CodeGen/ARM/va_arg.ll b/llvm/test/CodeGen/ARM/va_arg.ll
index 7cb9762..bb40453 100644
--- a/llvm/test/CodeGen/ARM/va_arg.ll
+++ b/llvm/test/CodeGen/ARM/va_arg.ll
@@ -1,10 +1,10 @@
-; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -pre-RA-sched=source | FileCheck %s
 ; Test that we correctly align elements when using va_arg
 
 ; CHECK: test1:
 ; CHECK-NOT: bfc
-; CHECK: add	r0, r0, #7
-; CHECK: bfc	r0, #0, #3
+; CHECK: add	[[REG:(r[0-9]+)|(lr)]], {{(r[0-9]+)|(lr)}}, #7
+; CHECK: bfc	[[REG]], #0, #3
 ; CHECK-NOT: bfc
 
 define i64 @test1(i32 %i, ...) nounwind optsize {
@@ -19,8 +19,8 @@
 
 ; CHECK: test2:
 ; CHECK-NOT: bfc
-; CHECK: add	r0, r0, #7
-; CHECK: bfc	r0, #0, #3
+; CHECK: add	[[REG:(r[0-9]+)|(lr)]], {{(r[0-9]+)|(lr)}}, #7
+; CHECK: bfc	[[REG]], #0, #3
 ; CHECK-NOT:	bfc
 ; CHECK: bx	lr
 
diff --git a/llvm/test/CodeGen/ARM/vfp.ll b/llvm/test/CodeGen/ARM/vfp.ll
index 390457f..49a6982 100644
--- a/llvm/test/CodeGen/ARM/vfp.ll
+++ b/llvm/test/CodeGen/ARM/vfp.ll
@@ -40,8 +40,8 @@
 define void @test_ext_round(float* %P, double* %D) {
 ;CHECK: test_ext_round:
 	%a = load float* %P		; <float> [#uses=1]
-;CHECK: vcvt.f32.f64
 ;CHECK: vcvt.f64.f32
+;CHECK: vcvt.f32.f64
 	%b = fpext float %a to double		; <double> [#uses=1]
 	%A = load double* %D		; <double> [#uses=1]
 	%B = fptrunc double %A to float		; <float> [#uses=1]
diff --git a/llvm/test/CodeGen/Mips/o32_cc_vararg.ll b/llvm/test/CodeGen/Mips/o32_cc_vararg.ll
index 6601d25..1f71ed2 100644
--- a/llvm/test/CodeGen/Mips/o32_cc_vararg.ll
+++ b/llvm/test/CodeGen/Mips/o32_cc_vararg.ll
@@ -1,12 +1,12 @@
-; RUN: llc -march=mipsel -mcpu=mips2 < %s | FileCheck %s
-; RUN: llc -march=mipsel -mcpu=mips2 < %s -regalloc=basic | FileCheck %s
+; RUN: llc -march=mipsel -mcpu=mips2 -pre-RA-sched=source < %s | FileCheck %s
+; RUN: llc -march=mipsel -mcpu=mips2 -pre-RA-sched=source < %s -regalloc=basic | FileCheck %s
 
 
 ; All test functions do the same thing - they return the first variable
 ; argument.
 
-; All CHECK's do the same thing - they check whether variable arguments from 
-; registers are placed on correct stack locations, and whether the first 
+; All CHECK's do the same thing - they check whether variable arguments from
+; registers are placed on correct stack locations, and whether the first
 ; variable argument is returned from the correct stack location.
 
 
@@ -31,14 +31,14 @@
 
 ; CHECK: va1:
 ; CHECK: addiu   $sp, $sp, -32
-; CHECK: sw      $5, 36($sp)
-; CHECK: sw      $6, 40($sp)
 ; CHECK: sw      $7, 44($sp)
+; CHECK: sw      $6, 40($sp)
+; CHECK: sw      $5, 36($sp)
 ; CHECK: lw      $2, 36($sp)
 }
 
-; check whether the variable double argument will be accessed from the 8-byte 
-; aligned location (i.e. whether the address is computed by adding 7 and 
+; check whether the variable double argument will be accessed from the 8-byte
+; aligned location (i.e. whether the address is computed by adding 7 and
 ; clearing lower 3 bits)
 define double @va2(i32 %a, ...) nounwind {
 entry:
@@ -57,10 +57,10 @@
 
 ; CHECK: va2:
 ; CHECK: addiu   $sp, $sp, -40
-; CHECK: addiu   $[[R0:[0-9]+]], $sp, 44
-; CHECK: sw      $5, 44($sp)
-; CHECK: sw      $6, 48($sp)
 ; CHECK: sw      $7, 52($sp)
+; CHECK: sw      $6, 48($sp)
+; CHECK: sw      $5, 44($sp)
+; CHECK: addiu   $[[R0:[0-9]+]], $sp, 44
 ; CHECK: addiu   $[[R1:[0-9]+]], $[[R0]], 7
 ; CHECK: addiu   $[[R2:[0-9]+]], $zero, -8
 ; CHECK: and     $[[R3:[0-9]+]], $[[R1]], $[[R2]]
@@ -85,8 +85,8 @@
 
 ; CHECK: va3:
 ; CHECK: addiu   $sp, $sp, -40
-; CHECK: sw      $6, 48($sp)
 ; CHECK: sw      $7, 52($sp)
+; CHECK: sw      $6, 48($sp)
 ; CHECK: lw      $2, 48($sp)
 }
 
@@ -108,8 +108,8 @@
 
 ; CHECK: va4:
 ; CHECK: addiu   $sp, $sp, -48
-; CHECK: sw      $6, 56($sp)
 ; CHECK: sw      $7, 60($sp)
+; CHECK: sw      $6, 56($sp)
 ; CHECK: addiu   $[[R0:[0-9]+]], $sp, 56
 ; CHECK: addiu   $[[R1:[0-9]+]], $[[R0]], 7
 ; CHECK: addiu   $[[R2:[0-9]+]], $zero, -8
diff --git a/llvm/test/CodeGen/Thumb2/thumb2-uxtb.ll b/llvm/test/CodeGen/Thumb2/thumb2-uxtb.ll
index 2074f98..35914b1 100644
--- a/llvm/test/CodeGen/Thumb2/thumb2-uxtb.ll
+++ b/llvm/test/CodeGen/Thumb2/thumb2-uxtb.ll
@@ -128,9 +128,9 @@
 
 ; ARMv7M: test10
 ; ARMv7M: mov.w r1, #16253176
+; ARMv7M: mov.w r2, #458759
 ; ARMv7M: and.w r0, r1, r0, lsr #7
-; ARMv7M: mov.w r1, #458759
-; ARMv7M: and.w r1, r1, r0, lsr #5
+; ARMv7M: and.w r1, r2, r0, lsr #5
 ; ARMv7M: orrs r0, r1
 	%tmp1 = lshr i32 %p0, 7		; <i32> [#uses=1]
 	%tmp2 = and i32 %tmp1, 16253176		; <i32> [#uses=2]