[X86][Haswell] Updating HSW instruction scheduling information
This patch completely replaces the instruction scheduling information for the Haswell architecture target by modifying the file X86SchedHaswell.td located under the X86 Target.
We used the scheduling information retrieved from the Haswell architects in order to replace and modify the existing scheduling.
The patch continues the scheduling replacement effort started with the SNB target in r307529 and r310792.
Information includes latency, number of micro-Ops and used ports by each HSW instruction.
Please expect some performance fluctuations due to code alignment effects.
Reviewers: RKSimon, zvi, aymanmus, craig.topper, m_zuckerman, igorb, dim, chandlerc, aaboud
Differential Revision: https://reviews.llvm.org/D36663
llvm-svn: 311879
diff --git a/llvm/test/CodeGen/X86/sse-schedule.ll b/llvm/test/CodeGen/X86/sse-schedule.ll
index 2ddefa1..8eb7b3f 100644
--- a/llvm/test/CodeGen/X86/sse-schedule.ll
+++ b/llvm/test/CodeGen/X86/sse-schedule.ll
@@ -37,8 +37,8 @@
; HASWELL-LABEL: test_addps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; HASWELL-NEXT: retq # sched: [1:1.00]
+; HASWELL-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_addps:
; BTVER2: # BB#0:
@@ -85,8 +85,8 @@
; HASWELL-LABEL: test_addss:
; HASWELL: # BB#0:
; HASWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; HASWELL-NEXT: retq # sched: [1:1.00]
+; HASWELL-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_addss:
; BTVER2: # BB#0:
@@ -137,8 +137,8 @@
; HASWELL-LABEL: test_andps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
-; HASWELL-NEXT: retq # sched: [1:1.00]
+; HASWELL-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
+; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_andps:
; BTVER2: # BB#0:
@@ -193,8 +193,8 @@
; HASWELL-LABEL: test_andnotps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
-; HASWELL-NEXT: retq # sched: [1:1.00]
+; HASWELL-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
+; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_andnotps:
; BTVER2: # BB#0:
@@ -251,9 +251,9 @@
; HASWELL-LABEL: test_cmpps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
-; HASWELL-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; HASWELL-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [1:1.00]
+; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_cmpps:
; BTVER2: # BB#0:
@@ -306,7 +306,7 @@
; HASWELL: # BB#0:
; HASWELL-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; HASWELL-NEXT: retq # sched: [1:1.00]
+; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_cmpss:
; BTVER2: # BB#0:
@@ -399,7 +399,7 @@
; HASWELL-NEXT: andb %al, %dl # sched: [1:0.25]
; HASWELL-NEXT: orb %cl, %dl # sched: [1:0.25]
; HASWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25]
-; HASWELL-NEXT: retq # sched: [1:1.00]
+; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_comiss:
; BTVER2: # BB#0:
@@ -470,7 +470,7 @@
; HASWELL-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [4:1.00]
; HASWELL-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [8:1.00]
; HASWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [1:1.00]
+; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_cvtsi2ss:
; BTVER2: # BB#0:
@@ -523,10 +523,10 @@
;
; HASWELL-LABEL: test_cvtsi2ssq:
; HASWELL: # BB#0:
-; HASWELL-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [4:1.00]
+; HASWELL-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [5:2.00]
; HASWELL-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [8:1.00]
; HASWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [1:1.00]
+; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_cvtsi2ssq:
; BTVER2: # BB#0:
@@ -580,9 +580,9 @@
; HASWELL-LABEL: test_cvtss2si:
; HASWELL: # BB#0:
; HASWELL-NEXT: vcvtss2si %xmm0, %ecx # sched: [4:1.00]
-; HASWELL-NEXT: vcvtss2si (%rdi), %eax # sched: [8:1.00]
+; HASWELL-NEXT: vcvtss2si (%rdi), %eax # sched: [4:1.00]
; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; HASWELL-NEXT: retq # sched: [1:1.00]
+; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_cvtss2si:
; BTVER2: # BB#0:
@@ -639,9 +639,9 @@
; HASWELL-LABEL: test_cvtss2siq:
; HASWELL: # BB#0:
; HASWELL-NEXT: vcvtss2si %xmm0, %rcx # sched: [4:1.00]
-; HASWELL-NEXT: vcvtss2si (%rdi), %rax # sched: [8:1.00]
+; HASWELL-NEXT: vcvtss2si (%rdi), %rax # sched: [4:1.00]
; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; HASWELL-NEXT: retq # sched: [1:1.00]
+; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_cvtss2siq:
; BTVER2: # BB#0:
@@ -698,9 +698,9 @@
; HASWELL-LABEL: test_cvttss2si:
; HASWELL: # BB#0:
; HASWELL-NEXT: vcvttss2si %xmm0, %ecx # sched: [4:1.00]
-; HASWELL-NEXT: vcvttss2si (%rdi), %eax # sched: [8:1.00]
+; HASWELL-NEXT: vcvttss2si (%rdi), %eax # sched: [4:1.00]
; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; HASWELL-NEXT: retq # sched: [1:1.00]
+; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_cvttss2si:
; BTVER2: # BB#0:
@@ -754,9 +754,9 @@
; HASWELL-LABEL: test_cvttss2siq:
; HASWELL: # BB#0:
; HASWELL-NEXT: vcvttss2si %xmm0, %rcx # sched: [4:1.00]
-; HASWELL-NEXT: vcvttss2si (%rdi), %rax # sched: [8:1.00]
+; HASWELL-NEXT: vcvttss2si (%rdi), %rax # sched: [4:1.00]
; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; HASWELL-NEXT: retq # sched: [1:1.00]
+; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_cvttss2siq:
; BTVER2: # BB#0:
@@ -805,9 +805,9 @@
;
; HASWELL-LABEL: test_divps:
; HASWELL: # BB#0:
-; HASWELL-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [12:1.00]
-; HASWELL-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
-; HASWELL-NEXT: retq # sched: [1:1.00]
+; HASWELL-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [13:1.00]
+; HASWELL-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [13:1.00]
+; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_divps:
; BTVER2: # BB#0:
@@ -853,9 +853,9 @@
;
; HASWELL-LABEL: test_divss:
; HASWELL: # BB#0:
-; HASWELL-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [12:1.00]
-; HASWELL-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
-; HASWELL-NEXT: retq # sched: [1:1.00]
+; HASWELL-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [13:1.00]
+; HASWELL-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [13:1.00]
+; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_divss:
; BTVER2: # BB#0:
@@ -902,8 +902,8 @@
; HASWELL-LABEL: test_ldmxcsr:
; HASWELL: # BB#0:
; HASWELL-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
-; HASWELL-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [6:1.00]
-; HASWELL-NEXT: retq # sched: [1:1.00]
+; HASWELL-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00]
+; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_ldmxcsr:
; BTVER2: # BB#0:
@@ -952,8 +952,8 @@
; HASWELL-LABEL: test_maxps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; HASWELL-NEXT: retq # sched: [1:1.00]
+; HASWELL-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_maxps:
; BTVER2: # BB#0:
@@ -1001,8 +1001,8 @@
; HASWELL-LABEL: test_maxss:
; HASWELL: # BB#0:
; HASWELL-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; HASWELL-NEXT: retq # sched: [1:1.00]
+; HASWELL-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_maxss:
; BTVER2: # BB#0:
@@ -1050,8 +1050,8 @@
; HASWELL-LABEL: test_minps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; HASWELL-NEXT: retq # sched: [1:1.00]
+; HASWELL-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_minps:
; BTVER2: # BB#0:
@@ -1099,8 +1099,8 @@
; HASWELL-LABEL: test_minss:
; HASWELL: # BB#0:
; HASWELL-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; HASWELL-NEXT: retq # sched: [1:1.00]
+; HASWELL-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_minss:
; BTVER2: # BB#0:
@@ -1151,10 +1151,10 @@
;
; HASWELL-LABEL: test_movaps:
; HASWELL: # BB#0:
-; HASWELL-NEXT: vmovaps (%rdi), %xmm0 # sched: [4:0.50]
+; HASWELL-NEXT: vmovaps (%rdi), %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [1:1.00]
+; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_movaps:
; BTVER2: # BB#0:
@@ -1207,7 +1207,7 @@
; HASWELL-LABEL: test_movhlps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [1:1.00]
+; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_movhlps:
; BTVER2: # BB#0:
@@ -1257,10 +1257,10 @@
;
; HASWELL-LABEL: test_movhps:
; HASWELL: # BB#0:
-; HASWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [5:1.00]
+; HASWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00]
; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [5:1.00]
-; HASWELL-NEXT: retq # sched: [1:1.00]
+; HASWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [1:1.00]
+; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_movhps:
; BTVER2: # BB#0:
@@ -1316,7 +1316,7 @@
; HASWELL: # BB#0:
; HASWELL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
; HASWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [1:1.00]
+; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_movlhps:
; BTVER2: # BB#0:
@@ -1365,10 +1365,10 @@
;
; HASWELL-LABEL: test_movlps:
; HASWELL: # BB#0:
-; HASWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [5:1.00]
+; HASWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [1:1.00]
; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [1:1.00]
+; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_movlps:
; BTVER2: # BB#0:
@@ -1419,7 +1419,7 @@
; HASWELL-LABEL: test_movmskps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vmovmskps %xmm0, %eax # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [1:1.00]
+; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_movmskps:
; BTVER2: # BB#0:
@@ -1465,7 +1465,7 @@
; HASWELL-LABEL: test_movntps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [1:1.00]
+; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_movntps:
; BTVER2: # BB#0:
@@ -1511,10 +1511,10 @@
;
; HASWELL-LABEL: test_movss_mem:
; HASWELL: # BB#0:
-; HASWELL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [4:0.50]
+; HASWELL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [1:0.50]
; HASWELL-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [1:1.00]
+; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_movss_mem:
; BTVER2: # BB#0:
@@ -1565,7 +1565,7 @@
; HASWELL-LABEL: test_movss_reg:
; HASWELL: # BB#0:
; HASWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.33]
-; HASWELL-NEXT: retq # sched: [1:1.00]
+; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_movss_reg:
; BTVER2: # BB#0:
@@ -1611,10 +1611,10 @@
;
; HASWELL-LABEL: test_movups:
; HASWELL: # BB#0:
-; HASWELL-NEXT: vmovups (%rdi), %xmm0 # sched: [4:0.50]
+; HASWELL-NEXT: vmovups (%rdi), %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [1:1.00]
+; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_movups:
; BTVER2: # BB#0:
@@ -1663,8 +1663,8 @@
; HASWELL-LABEL: test_mulps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; HASWELL-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
-; HASWELL-NEXT: retq # sched: [1:1.00]
+; HASWELL-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_mulps:
; BTVER2: # BB#0:
@@ -1711,8 +1711,8 @@
; HASWELL-LABEL: test_mulss:
; HASWELL: # BB#0:
; HASWELL-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; HASWELL-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
-; HASWELL-NEXT: retq # sched: [1:1.00]
+; HASWELL-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_mulss:
; BTVER2: # BB#0:
@@ -1763,8 +1763,8 @@
; HASWELL-LABEL: test_orps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
-; HASWELL-NEXT: retq # sched: [1:1.00]
+; HASWELL-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
+; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_orps:
; BTVER2: # BB#0:
@@ -1816,8 +1816,8 @@
;
; HASWELL-LABEL: test_prefetchnta:
; HASWELL: # BB#0:
-; HASWELL-NEXT: prefetchnta (%rdi) # sched: [4:0.50]
-; HASWELL-NEXT: retq # sched: [1:1.00]
+; HASWELL-NEXT: prefetchnta (%rdi) # sched: [1:0.50]
+; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_prefetchnta:
; BTVER2: # BB#0:
@@ -1867,9 +1867,9 @@
; HASWELL-LABEL: test_rcpps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vrcpps %xmm0, %xmm0 # sched: [5:1.00]
-; HASWELL-NEXT: vrcpps (%rdi), %xmm1 # sched: [9:1.00]
+; HASWELL-NEXT: vrcpps (%rdi), %xmm1 # sched: [5:1.00]
; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [1:1.00]
+; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_rcpps:
; BTVER2: # BB#0:
@@ -1929,11 +1929,11 @@
;
; HASWELL-LABEL: test_rcpss:
; HASWELL: # BB#0:
-; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [9:1.00]
-; HASWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50]
-; HASWELL-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [9:1.00]
+; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
+; HASWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:0.50]
+; HASWELL-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [1:1.00]
+; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_rcpss:
; BTVER2: # BB#0:
@@ -1994,9 +1994,9 @@
; HASWELL-LABEL: test_rsqrtps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [5:1.00]
-; HASWELL-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [9:1.00]
+; HASWELL-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [5:1.00]
; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [1:1.00]
+; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_rsqrtps:
; BTVER2: # BB#0:
@@ -2057,10 +2057,10 @@
; HASWELL-LABEL: test_rsqrtss:
; HASWELL: # BB#0:
; HASWELL-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
-; HASWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50]
+; HASWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:0.50]
; HASWELL-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [1:1.00]
+; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_rsqrtss:
; BTVER2: # BB#0:
@@ -2116,8 +2116,8 @@
;
; HASWELL-LABEL: test_sfence:
; HASWELL: # BB#0:
-; HASWELL-NEXT: sfence # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [1:1.00]
+; HASWELL-NEXT: sfence # sched: [1:0.33]
+; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_sfence:
; BTVER2: # BB#0:
@@ -2165,8 +2165,8 @@
; HASWELL-LABEL: test_shufps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
-; HASWELL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [5:1.00]
-; HASWELL-NEXT: retq # sched: [1:1.00]
+; HASWELL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [1:1.00]
+; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_shufps:
; BTVER2: # BB#0:
@@ -2217,10 +2217,10 @@
;
; HASWELL-LABEL: test_sqrtps:
; HASWELL: # BB#0:
-; HASWELL-NEXT: vsqrtps %xmm0, %xmm0 # sched: [15:1.00]
-; HASWELL-NEXT: vsqrtps (%rdi), %xmm1 # sched: [19:1.00]
+; HASWELL-NEXT: vsqrtps %xmm0, %xmm0 # sched: [14:1.00]
+; HASWELL-NEXT: vsqrtps (%rdi), %xmm1 # sched: [14:1.00]
; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [1:1.00]
+; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_sqrtps:
; BTVER2: # BB#0:
@@ -2280,11 +2280,11 @@
;
; HASWELL-LABEL: test_sqrtss:
; HASWELL: # BB#0:
-; HASWELL-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [19:1.00]
-; HASWELL-NEXT: vmovaps (%rdi), %xmm1 # sched: [4:0.50]
-; HASWELL-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [19:1.00]
+; HASWELL-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [14:1.00]
+; HASWELL-NEXT: vmovaps (%rdi), %xmm1 # sched: [1:0.50]
+; HASWELL-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [14:1.00]
; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [1:1.00]
+; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_sqrtss:
; BTVER2: # BB#0:
@@ -2336,9 +2336,9 @@
;
; HASWELL-LABEL: test_stmxcsr:
; HASWELL: # BB#0:
-; HASWELL-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00]
-; HASWELL-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [4:0.50]
-; HASWELL-NEXT: retq # sched: [1:1.00]
+; HASWELL-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00]
+; HASWELL-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [1:0.50]
+; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_stmxcsr:
; BTVER2: # BB#0:
@@ -2387,8 +2387,8 @@
; HASWELL-LABEL: test_subps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; HASWELL-NEXT: retq # sched: [1:1.00]
+; HASWELL-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_subps:
; BTVER2: # BB#0:
@@ -2435,8 +2435,8 @@
; HASWELL-LABEL: test_subss:
; HASWELL: # BB#0:
; HASWELL-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; HASWELL-NEXT: retq # sched: [1:1.00]
+; HASWELL-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_subss:
; BTVER2: # BB#0:
@@ -2524,7 +2524,7 @@
; HASWELL-NEXT: andb %al, %dl # sched: [1:0.25]
; HASWELL-NEXT: orb %cl, %dl # sched: [1:0.25]
; HASWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25]
-; HASWELL-NEXT: retq # sched: [1:1.00]
+; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_ucomiss:
; BTVER2: # BB#0:
@@ -2593,8 +2593,8 @@
; HASWELL-LABEL: test_unpckhps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; HASWELL-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [5:1.00]
-; HASWELL-NEXT: retq # sched: [1:1.00]
+; HASWELL-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [1:1.00]
+; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_unpckhps:
; BTVER2: # BB#0:
@@ -2645,8 +2645,8 @@
; HASWELL-LABEL: test_unpcklps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
-; HASWELL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [5:1.00]
-; HASWELL-NEXT: retq # sched: [1:1.00]
+; HASWELL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [1:1.00]
+; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_unpcklps:
; BTVER2: # BB#0:
@@ -2697,8 +2697,8 @@
; HASWELL-LABEL: test_xorps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
-; HASWELL-NEXT: retq # sched: [1:1.00]
+; HASWELL-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
+; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_xorps:
; BTVER2: # BB#0: