Mark the SPU 'lr' instruction to never have side effects. 
This allows the fast regiser allocator to remove redundant 
register moves.
Update a set of tests that depend on the register allocator
to be linear scan. 


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@106420 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/test/CodeGen/CellSPU/call.ll b/test/CodeGen/CellSPU/call.ll
index 960d2fe..eb7cf2c 100644
--- a/test/CodeGen/CellSPU/call.ll
+++ b/test/CodeGen/CellSPU/call.ll
@@ -1,7 +1,8 @@
-; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu -regalloc=linearscan > %t1.s
 ; RUN: grep brsl    %t1.s | count 1
 ; RUN: grep brasl   %t1.s | count 1
 ; RUN: grep stqd    %t1.s | count 80
+; RUN: llc < %s -march=cellspu | FileCheck %s
 
 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
 target triple = "spu"
@@ -16,6 +17,8 @@
 declare void @extern_stub_1(i32, i32)
 
 define i32 @stub_1(i32 %x, float %y) {
+ ; CHECK: il $3, 0
+ ; CHECK: bi $lr 
 entry:
   ret i32 0
 }
diff --git a/test/CodeGen/CellSPU/call_indirect.ll b/test/CodeGen/CellSPU/call_indirect.ll
index 08dad74..d94d77c 100644
--- a/test/CodeGen/CellSPU/call_indirect.ll
+++ b/test/CodeGen/CellSPU/call_indirect.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=cellspu -asm-verbose=0 > %t1.s
-; RUN: llc < %s -march=cellspu -mattr=large_mem -asm-verbose=0 > %t2.s
+; RUN: llc < %s -march=cellspu -asm-verbose=0 -regalloc=linearscan > %t1.s
+; RUN: llc < %s -march=cellspu -mattr=large_mem -asm-verbose=0 -regalloc=linearscan > %t2.s
 ; RUN: grep bisl    %t1.s | count 7
 ; RUN: grep ila     %t1.s | count 1
 ; RUN: grep rotqby  %t1.s | count 5
diff --git a/test/CodeGen/CellSPU/jumptable.ll b/test/CodeGen/CellSPU/jumptable.ll
index d7d1ef4..42b41b3 100644
--- a/test/CodeGen/CellSPU/jumptable.ll
+++ b/test/CodeGen/CellSPU/jumptable.ll
@@ -2,9 +2,9 @@
 ; This is to check that emitting jumptables doesn't crash llc
 define i32 @test(i32 %param) {
 entry:
-;CHECK:        ai      $4, $3, -1
-;CHECK:        clgti   $5, $4, 3
-;CHECK:        brnz    $5,.LBB0_2
+;CHECK:        ai      {{\$.}}, $3, -1
+;CHECK:        clgti   {{\$., \$.}}, 3
+;CHECK:        brnz    {{\$.}},.LBB0_2
   switch i32 %param, label %bb1 [
     i32 1, label %bb3
     i32 2, label %bb2
diff --git a/test/CodeGen/CellSPU/loads.ll b/test/CodeGen/CellSPU/loads.ll
index c46bcd1..4284c22 100644
--- a/test/CodeGen/CellSPU/loads.ll
+++ b/test/CodeGen/CellSPU/loads.ll
@@ -22,13 +22,15 @@
 
 declare <4 x i32>* @getv4f32ptr()
 define <4 x i32> @func() {
-        ;CHECK: brasl
-        ;CHECK: lr	{{\$[0-9]*, \$3}}
-        ;CHECK: brasl
-        %rv1 = call <4 x i32>* @getv4f32ptr()
-        %rv2 = call <4 x i32>* @getv4f32ptr()
-        %rv3 = load <4 x i32>* %rv1
-        ret <4 x i32> %rv3
+	;CHECK: brasl
+	; we need to have some instruction to move the result to safety.
+	; which instruction (lr, stqd...) depends on the regalloc
+	;CHECK: {{.*}}
+	;CHECK: brasl
+	%rv1 = call <4 x i32>* @getv4f32ptr()
+	%rv2 = call <4 x i32>* @getv4f32ptr()
+	%rv3 = load <4 x i32>* %rv1
+	ret <4 x i32> %rv3
 }
 
 define <4 x float> @load_undef(){