Recommit r129383. PreRA scheduler heuristic fixes: VRegCycle, TokenFactor latency.
Additional fixes:
Do something reasonable for subtargets with generic
itineraries by handle node latency the same as for an empty
itinerary. Now nodes default to unit latency unless an itinerary
explicitly specifies a zero cycle stage or it is a TokenFactor chain.
Original fixes:
UnitsSharePred was a source of randomness in the scheduler: node
priority depended on the queue data structure. I rewrote the recent
VRegCycle heuristics to completely replace the old heuristic without
any randomness. To make the ndoe latency adjustments work, I also
needed to do something a little more reasonable with TokenFactor. I
gave it zero latency to its consumers and always schedule it as low as
possible.
llvm-svn: 129421
diff --git a/llvm/test/CodeGen/ARM/memcpy-inline.ll b/llvm/test/CodeGen/ARM/memcpy-inline.ll
index e8a2a3b..5bae037 100644
--- a/llvm/test/CodeGen/ARM/memcpy-inline.ll
+++ b/llvm/test/CodeGen/ARM/memcpy-inline.ll
@@ -1,10 +1,8 @@
-; RUN: llc < %s -mtriple=arm-apple-darwin -regalloc=linearscan -disable-post-ra | FileCheck %s
-; RUN: llc < %s -mtriple=arm-apple-darwin -regalloc=basic -disable-post-ra | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -regalloc=linearscan -disable-post-ra | FileCheck %s
; The ARM magic hinting works best with linear scan.
-; CHECK: ldmia
-; CHECK: stmia
-; CHECK: ldrh
+; CHECK: ldrd
+; CHECK: strd
; CHECK: ldrb
%struct.x = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 }
diff --git a/llvm/test/CodeGen/ARM/neon_div.ll b/llvm/test/CodeGen/ARM/neon_div.ll
index e337970..de48fee 100644
--- a/llvm/test/CodeGen/ARM/neon_div.ll
+++ b/llvm/test/CodeGen/ARM/neon_div.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc < %s -march=arm -mattr=+neon -pre-RA-sched=source | FileCheck %s
define <8 x i8> @sdivi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK: vrecpe.f32
diff --git a/llvm/test/CodeGen/ARM/va_arg.ll b/llvm/test/CodeGen/ARM/va_arg.ll
index 7cb9762..bb40453 100644
--- a/llvm/test/CodeGen/ARM/va_arg.ll
+++ b/llvm/test/CodeGen/ARM/va_arg.ll
@@ -1,10 +1,10 @@
-; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -pre-RA-sched=source | FileCheck %s
; Test that we correctly align elements when using va_arg
; CHECK: test1:
; CHECK-NOT: bfc
-; CHECK: add r0, r0, #7
-; CHECK: bfc r0, #0, #3
+; CHECK: add [[REG:(r[0-9]+)|(lr)]], {{(r[0-9]+)|(lr)}}, #7
+; CHECK: bfc [[REG]], #0, #3
; CHECK-NOT: bfc
define i64 @test1(i32 %i, ...) nounwind optsize {
@@ -19,8 +19,8 @@
; CHECK: test2:
; CHECK-NOT: bfc
-; CHECK: add r0, r0, #7
-; CHECK: bfc r0, #0, #3
+; CHECK: add [[REG:(r[0-9]+)|(lr)]], {{(r[0-9]+)|(lr)}}, #7
+; CHECK: bfc [[REG]], #0, #3
; CHECK-NOT: bfc
; CHECK: bx lr
diff --git a/llvm/test/CodeGen/ARM/vfp.ll b/llvm/test/CodeGen/ARM/vfp.ll
index 390457f..49a6982 100644
--- a/llvm/test/CodeGen/ARM/vfp.ll
+++ b/llvm/test/CodeGen/ARM/vfp.ll
@@ -40,8 +40,8 @@
define void @test_ext_round(float* %P, double* %D) {
;CHECK: test_ext_round:
%a = load float* %P ; <float> [#uses=1]
-;CHECK: vcvt.f32.f64
;CHECK: vcvt.f64.f32
+;CHECK: vcvt.f32.f64
%b = fpext float %a to double ; <double> [#uses=1]
%A = load double* %D ; <double> [#uses=1]
%B = fptrunc double %A to float ; <float> [#uses=1]
diff --git a/llvm/test/CodeGen/Mips/o32_cc_vararg.ll b/llvm/test/CodeGen/Mips/o32_cc_vararg.ll
index 6601d25..1f71ed2 100644
--- a/llvm/test/CodeGen/Mips/o32_cc_vararg.ll
+++ b/llvm/test/CodeGen/Mips/o32_cc_vararg.ll
@@ -1,12 +1,12 @@
-; RUN: llc -march=mipsel -mcpu=mips2 < %s | FileCheck %s
-; RUN: llc -march=mipsel -mcpu=mips2 < %s -regalloc=basic | FileCheck %s
+; RUN: llc -march=mipsel -mcpu=mips2 -pre-RA-sched=source < %s | FileCheck %s
+; RUN: llc -march=mipsel -mcpu=mips2 -pre-RA-sched=source < %s -regalloc=basic | FileCheck %s
; All test functions do the same thing - they return the first variable
; argument.
-; All CHECK's do the same thing - they check whether variable arguments from
-; registers are placed on correct stack locations, and whether the first
+; All CHECK's do the same thing - they check whether variable arguments from
+; registers are placed on correct stack locations, and whether the first
; variable argument is returned from the correct stack location.
@@ -31,14 +31,14 @@
; CHECK: va1:
; CHECK: addiu $sp, $sp, -32
-; CHECK: sw $5, 36($sp)
-; CHECK: sw $6, 40($sp)
; CHECK: sw $7, 44($sp)
+; CHECK: sw $6, 40($sp)
+; CHECK: sw $5, 36($sp)
; CHECK: lw $2, 36($sp)
}
-; check whether the variable double argument will be accessed from the 8-byte
-; aligned location (i.e. whether the address is computed by adding 7 and
+; check whether the variable double argument will be accessed from the 8-byte
+; aligned location (i.e. whether the address is computed by adding 7 and
; clearing lower 3 bits)
define double @va2(i32 %a, ...) nounwind {
entry:
@@ -57,10 +57,10 @@
; CHECK: va2:
; CHECK: addiu $sp, $sp, -40
-; CHECK: addiu $[[R0:[0-9]+]], $sp, 44
-; CHECK: sw $5, 44($sp)
-; CHECK: sw $6, 48($sp)
; CHECK: sw $7, 52($sp)
+; CHECK: sw $6, 48($sp)
+; CHECK: sw $5, 44($sp)
+; CHECK: addiu $[[R0:[0-9]+]], $sp, 44
; CHECK: addiu $[[R1:[0-9]+]], $[[R0]], 7
; CHECK: addiu $[[R2:[0-9]+]], $zero, -8
; CHECK: and $[[R3:[0-9]+]], $[[R1]], $[[R2]]
@@ -85,8 +85,8 @@
; CHECK: va3:
; CHECK: addiu $sp, $sp, -40
-; CHECK: sw $6, 48($sp)
; CHECK: sw $7, 52($sp)
+; CHECK: sw $6, 48($sp)
; CHECK: lw $2, 48($sp)
}
@@ -108,8 +108,8 @@
; CHECK: va4:
; CHECK: addiu $sp, $sp, -48
-; CHECK: sw $6, 56($sp)
; CHECK: sw $7, 60($sp)
+; CHECK: sw $6, 56($sp)
; CHECK: addiu $[[R0:[0-9]+]], $sp, 56
; CHECK: addiu $[[R1:[0-9]+]], $[[R0]], 7
; CHECK: addiu $[[R2:[0-9]+]], $zero, -8
diff --git a/llvm/test/CodeGen/Thumb2/thumb2-uxtb.ll b/llvm/test/CodeGen/Thumb2/thumb2-uxtb.ll
index 2074f98..35914b1 100644
--- a/llvm/test/CodeGen/Thumb2/thumb2-uxtb.ll
+++ b/llvm/test/CodeGen/Thumb2/thumb2-uxtb.ll
@@ -128,9 +128,9 @@
; ARMv7M: test10
; ARMv7M: mov.w r1, #16253176
+; ARMv7M: mov.w r2, #458759
; ARMv7M: and.w r0, r1, r0, lsr #7
-; ARMv7M: mov.w r1, #458759
-; ARMv7M: and.w r1, r1, r0, lsr #5
+; ARMv7M: and.w r1, r2, r0, lsr #5
; ARMv7M: orrs r0, r1
%tmp1 = lshr i32 %p0, 7 ; <i32> [#uses=1]
%tmp2 = and i32 %tmp1, 16253176 ; <i32> [#uses=2]