This patch fixes 8 out of 20 unexpected failures in "make check"
when run on an Intel Atom processor. The failures have arisen due
to changes elsewhere in the trunk over the past 8 weeks or so.
These failures were not detected by the Atom buildbot because the
CPU on the Atom buildbot was not being detected as an Atom CPU.
The fix for this problem is in Host.cpp and X86Subtarget.cpp, but
shall remain commented out until the current set of Atom test failures
are fixed.
Patch by Andy Zhang and Tyler Nowicki!
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@160451 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/test/CodeGen/X86/atom-lea-sp.ll b/test/CodeGen/X86/atom-lea-sp.ll
index 5942788..19482e1 100644
--- a/test/CodeGen/X86/atom-lea-sp.ll
+++ b/test/CodeGen/X86/atom-lea-sp.ll
@@ -1,15 +1,15 @@
-; RUN: llc < %s -mcpu=atom -mtriple=i686-linux | FileCheck -check-prefix=atom %s
+; RUN: llc < %s -mcpu=atom -mtriple=i686-linux | FileCheck -check-prefix=ATOM %s
; RUN: llc < %s -mcpu=core2 -mtriple=i686-linux | FileCheck %s
declare void @use_arr(i8*)
declare void @many_params(i32, i32, i32, i32, i32, i32)
define void @test1() nounwind {
-; atom: test1:
-; atom: leal -1052(%esp), %esp
-; atom-NOT: sub
-; atom: call
-; atom: leal 1052(%esp), %esp
+; ATOM: test1:
+; ATOM: leal -1052(%esp), %esp
+; ATOM-NOT: sub
+; ATOM: call
+; ATOM: leal 1052(%esp), %esp
; CHECK: test1:
; CHECK: subl
@@ -22,10 +22,10 @@
}
define void @test2() nounwind {
-; atom: test2:
-; atom: leal -28(%esp), %esp
-; atom: call
-; atom: leal 28(%esp), %esp
+; ATOM: test2:
+; ATOM: leal -28(%esp), %esp
+; ATOM: call
+; ATOM: leal 28(%esp), %esp
; CHECK: test2:
; CHECK-NOT: lea
@@ -34,9 +34,9 @@
}
define void @test3() nounwind {
-; atom: test3:
-; atom: leal -8(%esp), %esp
-; atom: leal 8(%esp), %esp
+; ATOM: test3:
+; ATOM: leal -8(%esp), %esp
+; ATOM: leal 8(%esp), %esp
; CHECK: test3:
; CHECK-NOT: lea
diff --git a/test/CodeGen/X86/full-lsr.ll b/test/CodeGen/X86/full-lsr.ll
index 655ab29..1344cdc 100644
--- a/test/CodeGen/X86/full-lsr.ll
+++ b/test/CodeGen/X86/full-lsr.ll
@@ -1,9 +1,17 @@
-; RUN: llc < %s -march=x86 >%t
-
-; RUN: grep "addl \$4," %t | count 3
-; RUN: not grep ",%" %t
+; RUN: llc < %s -march=x86 -mcpu=generic | FileCheck %s
+; RUN: llc < %s -march=x86 -mcpu=atom | FileCheck -check-prefix=ATOM %s
define void @foo(float* nocapture %A, float* nocapture %B, float* nocapture %C, i32 %N) nounwind {
+; ATOM: foo
+; ATOM: addl
+; ATOM: leal
+; ATOM: leal
+
+; CHECK: foo
+; CHECK: addl
+; CHECK: addl
+; CEHCK: addl
+
entry:
%0 = icmp sgt i32 %N, 0 ; <i1> [#uses=1]
br i1 %0, label %bb, label %return
diff --git a/test/CodeGen/X86/phys-reg-local-regalloc.ll b/test/CodeGen/X86/phys-reg-local-regalloc.ll
index c565684..37eca1c 100644
--- a/test/CodeGen/X86/phys-reg-local-regalloc.ll
+++ b/test/CodeGen/X86/phys-reg-local-regalloc.ll
@@ -1,6 +1,7 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-apple-darwin9 -regalloc=fast -optimize-regalloc=0 | FileCheck %s
-; RUN: llc -O0 < %s -march=x86 -mtriple=i386-apple-darwin9 -regalloc=fast | FileCheck %s
-; CHECKed instructions should be the same with or without -O0.
+; RUN: llc < %s -march=x86 -mtriple=i386-apple-darwin9 -mcpu=generic -regalloc=fast -optimize-regalloc=0 | FileCheck %s
+; RUN: llc -O0 < %s -march=x86 -mtriple=i386-apple-darwin9 -mcpu=generic -regalloc=fast | FileCheck %s
+; RUN: llc < %s -march=x86 -mtriple=i386-apple-darwin9 -mcpu=atom -regalloc=fast -optimize-regalloc=0 | FileCheck -check-prefix=ATOM %s
+; CHECKed instructions should be the same with or without -O0 except on Intel Atom due to instruction scheduling.
@.str = private constant [12 x i8] c"x + y = %i\0A\00", align 1 ; <[12 x i8]*> [#uses=1]
@@ -15,6 +16,19 @@
; CHECK: movl %ebx, 40(%esp)
; CHECK-NOT: movl
; CHECK: addl %ebx, %eax
+
+; On Intel Atom the scheduler moves a movl instruction
+; used for the printf call to follow movl 24(%esp), %eax
+; ATOM: movl 24(%esp), %eax
+; ATOM: movl
+; ATOM: movl %eax, 36(%esp)
+; ATOM-NOT: movl
+; ATOM: movl 28(%esp), %ebx
+; ATOM-NOT: movl
+; ATOM: movl %ebx, 40(%esp)
+; ATOM-NOT: movl
+; ATOM: addl %ebx, %eax
+
%retval = alloca i32 ; <i32*> [#uses=2]
%"%ebx" = alloca i32 ; <i32*> [#uses=1]
%"%eax" = alloca i32 ; <i32*> [#uses=2]
diff --git a/test/CodeGen/X86/v-binop-widen2.ll b/test/CodeGen/X86/v-binop-widen2.ll
index ae3f55a..569586a 100644
--- a/test/CodeGen/X86/v-binop-widen2.ll
+++ b/test/CodeGen/X86/v-binop-widen2.ll
@@ -1,9 +1,16 @@
-; RUN: llc -march=x86 -mattr=+sse < %s | FileCheck %s
+; RUN: llc -march=x86 -mcpu=generic -mattr=+sse < %s | FileCheck %s
+; RUN: llc -march=x86 -mcpu=atom -mattr=+sse < %s | FileCheck -check-prefix=ATOM %s
%vec = type <6 x float>
; CHECK: divss
; CHECK: divss
; CHECK: divps
+
+; Scheduler causes a different instruction order to be produced on Intel Atom
+; ATOM: divps
+; ATOM: divss
+; ATOM: divss
+
define %vec @vecdiv( %vec %p1, %vec %p2)
{
%result = fdiv %vec %p1, %p2
diff --git a/test/CodeGen/X86/vshift-1.ll b/test/CodeGen/X86/vshift-1.ll
index 4955156..e775750 100644
--- a/test/CodeGen/X86/vshift-1.ll
+++ b/test/CodeGen/X86/vshift-1.ll
@@ -16,7 +16,7 @@
entry:
; CHECK: shift1b:
; CHECK: movd
-; CHECK-NEXT: psllq
+; CHECK: psllq
%0 = insertelement <2 x i64> undef, i64 %amt, i32 0
%1 = insertelement <2 x i64> %0, i64 %amt, i32 1
%shl = shl <2 x i64> %val, %1
@@ -38,7 +38,7 @@
entry:
; CHECK: shift2b:
; CHECK: movd
-; CHECK-NEXT: pslld
+; CHECK: pslld
%0 = insertelement <4 x i32> undef, i32 %amt, i32 0
%1 = insertelement <4 x i32> %0, i32 %amt, i32 1
%2 = insertelement <4 x i32> %1, i32 %amt, i32 2
diff --git a/test/CodeGen/X86/vshift-2.ll b/test/CodeGen/X86/vshift-2.ll
index 9a9b419..9496893 100644
--- a/test/CodeGen/X86/vshift-2.ll
+++ b/test/CodeGen/X86/vshift-2.ll
@@ -16,7 +16,7 @@
entry:
; CHECK: shift1b:
; CHECK: movd
-; CHECK-NEXT: psrlq
+; CHECK: psrlq
%0 = insertelement <2 x i64> undef, i64 %amt, i32 0
%1 = insertelement <2 x i64> %0, i64 %amt, i32 1
%lshr = lshr <2 x i64> %val, %1
@@ -37,7 +37,7 @@
entry:
; CHECK: shift2b:
; CHECK: movd
-; CHECK-NEXT: psrld
+; CHECK: psrld
%0 = insertelement <4 x i32> undef, i32 %amt, i32 0
%1 = insertelement <4 x i32> %0, i32 %amt, i32 1
%2 = insertelement <4 x i32> %1, i32 %amt, i32 2
@@ -63,7 +63,7 @@
; CHECK: shift3b:
; CHECK: movzwl
; CHECK: movd
-; CHECK-NEXT: psrlw
+; CHECK: psrlw
%0 = insertelement <8 x i16> undef, i16 %amt, i32 0
%1 = insertelement <8 x i16> %0, i16 %amt, i32 1
%2 = insertelement <8 x i16> %0, i16 %amt, i32 2
diff --git a/test/CodeGen/X86/vshift-3.ll b/test/CodeGen/X86/vshift-3.ll
index 8e8a9aa..b2b48b9 100644
--- a/test/CodeGen/X86/vshift-3.ll
+++ b/test/CodeGen/X86/vshift-3.ll
@@ -28,7 +28,7 @@
entry:
; CHECK: shift2b:
; CHECK: movd
-; CHECK-NEXT: psrad
+; CHECK: psrad
%0 = insertelement <4 x i32> undef, i32 %amt, i32 0
%1 = insertelement <4 x i32> %0, i32 %amt, i32 1
%2 = insertelement <4 x i32> %1, i32 %amt, i32 2
@@ -52,7 +52,7 @@
; CHECK: shift3b:
; CHECK: movzwl
; CHECK: movd
-; CHECK-NEXT: psraw
+; CHECK: psraw
%0 = insertelement <8 x i16> undef, i16 %amt, i32 0
%1 = insertelement <8 x i16> %0, i16 %amt, i32 1
%2 = insertelement <8 x i16> %0, i16 %amt, i32 2
diff --git a/test/CodeGen/X86/vshift-5.ll b/test/CodeGen/X86/vshift-5.ll
index cb254ae..f6c311d 100644
--- a/test/CodeGen/X86/vshift-5.ll
+++ b/test/CodeGen/X86/vshift-5.ll
@@ -6,7 +6,7 @@
entry:
; CHECK: shift5a:
; CHECK: movd
-; CHECK-NEXT: pslld
+; CHECK: pslld
%amt = load i32* %pamt
%tmp0 = insertelement <4 x i32> undef, i32 %amt, i32 0
%shamt = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> zeroinitializer
@@ -20,7 +20,7 @@
entry:
; CHECK: shift5b:
; CHECK: movd
-; CHECK-NEXT: psrad
+; CHECK: psrad
%amt = load i32* %pamt
%tmp0 = insertelement <4 x i32> undef, i32 %amt, i32 0
%shamt = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> zeroinitializer
@@ -34,7 +34,7 @@
entry:
; CHECK: shift5c:
; CHECK: movd
-; CHECK-NEXT: pslld
+; CHECK: pslld
%tmp0 = insertelement <4 x i32> undef, i32 %amt, i32 0
%shamt = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> zeroinitializer
%shl = shl <4 x i32> %val, %shamt
@@ -47,7 +47,7 @@
entry:
; CHECK: shift5d:
; CHECK: movd
-; CHECK-NEXT: psrad
+; CHECK: psrad
%tmp0 = insertelement <4 x i32> undef, i32 %amt, i32 0
%shamt = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> zeroinitializer
%shr = ashr <4 x i32> %val, %shamt
diff --git a/test/CodeGen/X86/widen_cast-1.ll b/test/CodeGen/X86/widen_cast-1.ll
index d886f2c..ebdfea9 100644
--- a/test/CodeGen/X86/widen_cast-1.ll
+++ b/test/CodeGen/X86/widen_cast-1.ll
@@ -1,8 +1,15 @@
-; RUN: llc -march=x86 -mattr=+sse42 < %s | FileCheck %s
+; RUN: llc -march=x86 -mcpu=generic -mattr=+sse42 < %s | FileCheck %s
+; RUN: llc -march=x86 -mcpu=atom -mattr=+sse42 < %s | FileCheck -check-prefix=ATOM %s
+
; CHECK: paddd
; CHECK: movl
; CHECK: movlpd
+; Scheduler causes produce a different instruction order
+; ATOM: movl
+; ATOM: paddd
+; ATOM: movlpd
+
; bitcast a v4i16 to v2i32
define void @convert(<2 x i32>* %dst, <4 x i16>* %src) nounwind {